mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-29 18:51:24 +01:00
Compare commits
No commits in common. "a281beba8d8f007cf220f96dd1d9412bb070c7d8" and "4d9dc0abe24ad5d9d22a16f40fc61137dcd103f7" have entirely different histories.
a281beba8d
...
4d9dc0abe2
|
@ -345,10 +345,6 @@ from .chingari import (
|
||||||
ChingariIE,
|
ChingariIE,
|
||||||
ChingariUserIE,
|
ChingariUserIE,
|
||||||
)
|
)
|
||||||
from .chzzk import (
|
|
||||||
CHZZKLiveIE,
|
|
||||||
CHZZKVideoIE,
|
|
||||||
)
|
|
||||||
from .cinemax import CinemaxIE
|
from .cinemax import CinemaxIE
|
||||||
from .cinetecamilano import CinetecaMilanoIE
|
from .cinetecamilano import CinetecaMilanoIE
|
||||||
from .cineverse import (
|
from .cineverse import (
|
||||||
|
@ -544,7 +540,6 @@ from .egghead import (
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .elementorembed import ElementorEmbedIE
|
|
||||||
from .elonet import ElonetIE
|
from .elonet import ElonetIE
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
from .eltrecetv import ElTreceTVIE
|
from .eltrecetv import ElTreceTVIE
|
||||||
|
@ -792,7 +787,6 @@ from .iheart import (
|
||||||
IHeartRadioIE,
|
IHeartRadioIE,
|
||||||
IHeartRadioPodcastIE,
|
IHeartRadioPodcastIE,
|
||||||
)
|
)
|
||||||
from .ilpost import IlPostIE
|
|
||||||
from .iltalehti import IltalehtiIE
|
from .iltalehti import IltalehtiIE
|
||||||
from .imdb import (
|
from .imdb import (
|
||||||
ImdbIE,
|
ImdbIE,
|
||||||
|
@ -1002,7 +996,7 @@ from .lynda import (
|
||||||
)
|
)
|
||||||
from .maariv import MaarivIE
|
from .maariv import MaarivIE
|
||||||
from .magellantv import MagellanTVIE
|
from .magellantv import MagellanTVIE
|
||||||
from .magentamusik import MagentaMusikIE
|
from .magentamusik360 import MagentaMusik360IE
|
||||||
from .mailru import (
|
from .mailru import (
|
||||||
MailRuIE,
|
MailRuIE,
|
||||||
MailRuMusicIE,
|
MailRuMusicIE,
|
||||||
|
|
|
@ -70,24 +70,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||||
'upload_date': '20230930',
|
'upload_date': '20230930',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
}
|
||||||
}, {
|
|
||||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '085374-003-A',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
|
||||||
'timestamp': 1702872000,
|
|
||||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
|
||||||
'duration': 2594,
|
|
||||||
'title': 'Die kurze Zeit der Jugend',
|
|
||||||
'alt_title': 'Im hohen Norden geboren',
|
|
||||||
'upload_date': '20231218',
|
|
||||||
'subtitles': {
|
|
||||||
'fr': 'mincount:1',
|
|
||||||
'fr-acc': 'mincount:1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_GEO_BYPASS = True
|
_GEO_BYPASS = True
|
||||||
|
@ -138,16 +121,6 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _fix_accessible_subs_locale(subs):
|
|
||||||
updated_subs = {}
|
|
||||||
for lang, sub_formats in subs.items():
|
|
||||||
for format in sub_formats:
|
|
||||||
if format.get('url', '').endswith('-MAL.m3u8'):
|
|
||||||
lang += '-acc'
|
|
||||||
updated_subs.setdefault(lang, []).append(format)
|
|
||||||
return updated_subs
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
@ -201,7 +174,6 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
secondary_formats.extend(fmts)
|
secondary_formats.extend(fmts)
|
||||||
else:
|
else:
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
subs = self._fix_accessible_subs_locale(subs)
|
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||||
|
|
|
@ -18,7 +18,6 @@ from ..utils import (
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
bool_or_none,
|
bool_or_none,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
|
||||||
filter_dict,
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
|
@ -1659,34 +1658,19 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||||
'aid': aid,
|
'aid': aid,
|
||||||
})) or {}
|
})) or {}
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
fetched_urls = set()
|
for sub in sub_json.get('subtitles') or []:
|
||||||
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
|
sub_url = sub.get('url')
|
||||||
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
|
if not sub_url:
|
||||||
if url in fetched_urls:
|
continue
|
||||||
continue
|
sub_data = self._download_json(
|
||||||
fetched_urls.add(url)
|
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||||
sub_ext = determine_ext(url)
|
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||||
sub_lang = sub.get('lang_key') or 'en'
|
if not sub_data:
|
||||||
|
continue
|
||||||
if sub_ext == 'ass':
|
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
|
||||||
subtitles.setdefault(sub_lang, []).append({
|
'ext': 'srt',
|
||||||
'ext': 'ass',
|
'data': self.json2srt(sub_data)
|
||||||
'url': url,
|
})
|
||||||
})
|
|
||||||
elif sub_ext == 'json':
|
|
||||||
sub_data = self._download_json(
|
|
||||||
url, ep_id or aid, fatal=False,
|
|
||||||
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
|
|
||||||
errnote='Unable to download subtitles')
|
|
||||||
|
|
||||||
if sub_data:
|
|
||||||
subtitles.setdefault(sub_lang, []).append({
|
|
||||||
'ext': 'srt',
|
|
||||||
'data': self.json2srt(sub_data),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
self.report_warning('Unexpected subtitle extension', ep_id or aid)
|
|
||||||
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _get_formats(self, *, ep_id=None, aid=None):
|
def _get_formats(self, *, ep_id=None, aid=None):
|
||||||
|
|
|
@ -1,139 +0,0 @@
|
||||||
import functools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
from ..utils.traversal import traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class CHZZKLiveIE(InfoExtractor):
|
|
||||||
IE_NAME = 'chzzk:live'
|
|
||||||
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'c68b8ef525fb3d2fa146344d84991753',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': str,
|
|
||||||
'channel': '진짜도현',
|
|
||||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
|
||||||
'channel_is_verified': False,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1705510344,
|
|
||||||
'upload_date': '20240117',
|
|
||||||
'live_status': 'is_live',
|
|
||||||
'view_count': int,
|
|
||||||
'concurrent_view_count': int,
|
|
||||||
},
|
|
||||||
'skip': 'The channel is not currently live',
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
channel_id = self._match_id(url)
|
|
||||||
live_detail = self._download_json(
|
|
||||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
|
||||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
|
||||||
|
|
||||||
if live_detail.get('status') == 'CLOSE':
|
|
||||||
raise ExtractorError('The channel is not currently live', expected=True)
|
|
||||||
|
|
||||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
thumbnail_template = traverse_obj(
|
|
||||||
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
|
|
||||||
if thumbnail_template and '{type}' in thumbnail_template:
|
|
||||||
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
|
|
||||||
thumbnails.append({
|
|
||||||
'id': width,
|
|
||||||
'url': thumbnail_template.replace('{type}', width),
|
|
||||||
'width': int_or_none(width),
|
|
||||||
})
|
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
|
||||||
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
|
|
||||||
is_low_latency = media.get('mediaId') == 'LLHLS'
|
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
media['path'], channel_id, 'mp4', fatal=False, live=True,
|
|
||||||
m3u8_id='hls-ll' if is_low_latency else 'hls')
|
|
||||||
for f in fmts:
|
|
||||||
if is_low_latency:
|
|
||||||
f['source_preference'] = -2
|
|
||||||
if '-afragalow.stream-audio.stream' in f['format_id']:
|
|
||||||
f['quality'] = -2
|
|
||||||
formats.extend(fmts)
|
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': channel_id,
|
|
||||||
'is_live': True,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
**traverse_obj(live_detail, {
|
|
||||||
'title': ('liveTitle', {str}),
|
|
||||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
|
||||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
|
||||||
'view_count': ('accumulateCount', {int_or_none}),
|
|
||||||
'channel': ('channel', 'channelName', {str}),
|
|
||||||
'channel_id': ('channel', 'channelId', {str}),
|
|
||||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CHZZKVideoIE(InfoExtractor):
|
|
||||||
IE_NAME = 'chzzk:video'
|
|
||||||
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://chzzk.naver.com/video/1754',
|
|
||||||
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1754',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '치지직 테스트 방송',
|
|
||||||
'channel': '침착맨',
|
|
||||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
|
||||||
'channel_is_verified': False,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 15577,
|
|
||||||
'timestamp': 1702970505.417,
|
|
||||||
'upload_date': '20231219',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
video_meta = self._download_json(
|
|
||||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
|
||||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
|
||||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
|
||||||
query={
|
|
||||||
'key': video_meta['inKey'],
|
|
||||||
'env': 'real',
|
|
||||||
'lc': 'en_US',
|
|
||||||
'cpl': 'en_US',
|
|
||||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
**traverse_obj(video_meta, {
|
|
||||||
'title': ('videoTitle', {str}),
|
|
||||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
|
||||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
|
||||||
'view_count': ('readCount', {int_or_none}),
|
|
||||||
'duration': ('duration', {int_or_none}),
|
|
||||||
'channel': ('channel', 'channelName', {str}),
|
|
||||||
'channel_id': ('channel', 'channelId', {str}),
|
|
||||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
|
||||||
}),
|
|
||||||
}
|
|
|
@ -1,72 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .vimeo import VimeoIE
|
|
||||||
from .youtube import YoutubeIE
|
|
||||||
from ..utils import unescapeHTML, url_or_none
|
|
||||||
from ..utils.traversal import traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class ElementorEmbedIE(InfoExtractor):
|
|
||||||
_VALID_URL = False
|
|
||||||
_WEBPAGE_TESTS = [{
|
|
||||||
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'KgzuxwuQwM4',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
|
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
|
|
||||||
'playable_in_embed': True,
|
|
||||||
'tags': 'count:16',
|
|
||||||
'like_count': int,
|
|
||||||
'channel': 'Capital TV Cyprus',
|
|
||||||
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
|
|
||||||
'availability': 'public',
|
|
||||||
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
|
|
||||||
'duration': 2891,
|
|
||||||
'upload_date': '20231214',
|
|
||||||
'uploader_id': '@capitaltvcyprus6389',
|
|
||||||
'live_status': 'not_live',
|
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
|
|
||||||
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
|
|
||||||
'uploader': 'Capital TV Cyprus',
|
|
||||||
'age_limit': 0,
|
|
||||||
'categories': ['News & Politics'],
|
|
||||||
'view_count': int,
|
|
||||||
'channel_follower_count': int,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '?playlist=76011151&video=9e59909',
|
|
||||||
'title': 'Theme Builder Collection - Academy',
|
|
||||||
'age_limit': 0,
|
|
||||||
'timestamp': 1702196984.0,
|
|
||||||
'upload_date': '20231210',
|
|
||||||
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
|
|
||||||
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
|
|
||||||
},
|
|
||||||
'playlist_count': 11,
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
|
|
||||||
|
|
||||||
def _extract_from_webpage(self, url, webpage):
|
|
||||||
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
|
|
||||||
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
|
|
||||||
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
|
|
||||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
|
||||||
|
|
||||||
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
|
|
||||||
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
|
|
||||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
|
||||||
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
|
|
||||||
yield self.url_result(vimeo_url, ie=VimeoIE)
|
|
||||||
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
|
|
||||||
yield {
|
|
||||||
'id': video['_id'],
|
|
||||||
'url': direct_url,
|
|
||||||
'title': video.get('title'),
|
|
||||||
}
|
|
|
@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||||
)|
|
)|
|
||||||
facebook:
|
facebook:
|
||||||
)
|
)
|
||||||
(?P<id>pfbid[A-Za-z0-9]+|\d+)
|
(?P<id>[0-9]+)
|
||||||
'''
|
'''
|
||||||
_EMBED_REGEX = [
|
_EMBED_REGEX = [
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||||
|
@ -247,24 +247,6 @@ class FacebookIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 148.435,
|
'duration': 148.435,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6968553779868435',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
|
|
||||||
'uploader': 'ATTN:',
|
|
||||||
'upload_date': '20231207',
|
|
||||||
'title': 'ATTN:',
|
|
||||||
'duration': 132.675,
|
|
||||||
'uploader_id': '100064451419378',
|
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': r're:^https?://.*',
|
|
||||||
'timestamp': 1701975646,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:docs|drive|drive\.usercontent)\.google\.com/
|
(?:docs|drive)\.google\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:uc|open|download)\?.*?id=|
|
(?:uc|open)\?.*?id=|
|
||||||
file/d/
|
file/d/
|
||||||
)|
|
)|
|
||||||
video\.google\.com/get_player\?.*?docid=
|
video\.google\.com/get_player\?.*?docid=
|
||||||
|
@ -53,9 +53,6 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
'5': 'flv',
|
'5': 'flv',
|
||||||
|
@ -208,10 +205,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
source_url = update_url_query(
|
source_url = update_url_query(
|
||||||
'https://drive.usercontent.google.com/download', {
|
'https://drive.google.com/uc', {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'export': 'download',
|
'export': 'download',
|
||||||
'confirm': 't',
|
|
||||||
})
|
})
|
||||||
|
|
||||||
def request_source_file(source_url, kind, data=None):
|
def request_source_file(source_url, kind, data=None):
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
import functools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
url_or_none,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
from ..utils.traversal import traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class IlPostIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
|
|
||||||
'md5': '43649f002d85e1c2f319bb478d479c40',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2972047',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'display_id': '1-avis-akvasas-ka',
|
|
||||||
'title': '1. Avis akvasas ka',
|
|
||||||
'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
|
|
||||||
'timestamp': 1703835014,
|
|
||||||
'upload_date': '20231229',
|
|
||||||
'duration': 2495.0,
|
|
||||||
'availability': 'public',
|
|
||||||
'series_id': '235598',
|
|
||||||
'description': '',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
endpoint_metadata = self._search_json(
|
|
||||||
r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
|
|
||||||
episode_id = endpoint_metadata['post_id']
|
|
||||||
podcast_id = endpoint_metadata['podcast_id']
|
|
||||||
podcast_metadata = self._download_json(
|
|
||||||
endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
|
|
||||||
'action': 'checkpodcast',
|
|
||||||
'cookie': endpoint_metadata['cookie'],
|
|
||||||
'post_id': episode_id,
|
|
||||||
'podcast_id': podcast_id,
|
|
||||||
}))
|
|
||||||
|
|
||||||
episode = traverse_obj(podcast_metadata, (
|
|
||||||
'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
|
|
||||||
if not episode:
|
|
||||||
raise ExtractorError('Episode could not be extracted')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': episode_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'series_id': podcast_id,
|
|
||||||
'vcodec': 'none',
|
|
||||||
**traverse_obj(episode, {
|
|
||||||
'title': ('title', {str}),
|
|
||||||
'description': ('description', {str}),
|
|
||||||
'url': ('podcast_raw_url', {url_or_none}),
|
|
||||||
'thumbnail': ('image', {url_or_none}),
|
|
||||||
'timestamp': ('timestamp', {int_or_none}),
|
|
||||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
|
||||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
|
||||||
}),
|
|
||||||
}
|
|
|
@ -1,62 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
|
||||||
from ..utils.traversal import traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class MagentaMusikIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
|
|
||||||
'md5': 'd82dd4748f55fc91957094546aaf8584',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9208205928595409235',
|
|
||||||
'display_id': 'marty-friedman-woa-2023-9208205928595409235',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Marty Friedman: W:O:A 2023',
|
|
||||||
'alt_title': 'Konzert vom: 05.08.2023 13:00',
|
|
||||||
'duration': 2760,
|
|
||||||
'categories': ['Musikkonzert'],
|
|
||||||
'release_year': 2023,
|
|
||||||
'location': 'Deutschland',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
player_config = self._search_json(
|
|
||||||
r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
|
|
||||||
if not player_config:
|
|
||||||
raise ExtractorError('No video found', expected=True)
|
|
||||||
|
|
||||||
asset_id = player_config['assetId']
|
|
||||||
asset_details = self._download_json(
|
|
||||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
|
|
||||||
display_id, note='Downloading asset details')
|
|
||||||
|
|
||||||
video_id = traverse_obj(
|
|
||||||
asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
|
|
||||||
if not video_id:
|
|
||||||
raise ExtractorError('Unable to extract video id')
|
|
||||||
|
|
||||||
vod_data = self._download_json(
|
|
||||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
|
|
||||||
smil_url = traverse_obj(
|
|
||||||
vod_data, ('content', 'feature', 'representations', ...,
|
|
||||||
'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'formats': self._extract_smil_formats(smil_url, video_id),
|
|
||||||
**traverse_obj(vod_data, ('content', 'feature', 'metadata', {
|
|
||||||
'title': 'title',
|
|
||||||
'alt_title': 'originalTitle',
|
|
||||||
'description': 'longDescription',
|
|
||||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
|
||||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
|
||||||
'release_year': ('yearOfProduction', {int_or_none}),
|
|
||||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
|
||||||
})),
|
|
||||||
}
|
|
58
yt_dlp/extractor/magentamusik360.py
Normal file
58
yt_dlp/extractor/magentamusik360.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MagentaMusik360IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
|
||||||
|
'md5': '65b6f060b40d90276ec6fb9b992c1216',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9208205928595185932',
|
||||||
|
'ext': 'm3u8',
|
||||||
|
'title': 'WITHIN TEMPTATION',
|
||||||
|
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
|
||||||
|
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9208205928595231363',
|
||||||
|
'ext': 'm3u8',
|
||||||
|
'title': 'Body Count feat. Ice-T',
|
||||||
|
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
# _match_id casts to string, but since "None" is not a valid video_id for magenta
|
||||||
|
# there is no risk for confusion
|
||||||
|
if video_id == "None":
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
|
||||||
|
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
|
||||||
|
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
|
||||||
|
metadata = json['content']['feature'].get('metadata')
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
duration = None
|
||||||
|
thumbnails = []
|
||||||
|
if metadata:
|
||||||
|
title = metadata.get('title')
|
||||||
|
description = metadata.get('fullDescription')
|
||||||
|
duration = metadata.get('runtimeInSeconds')
|
||||||
|
for img_key in ('teaserImageWide', 'smallCoverImage'):
|
||||||
|
if img_key in metadata:
|
||||||
|
thumbnails.append({'url': metadata[img_key].get('href')})
|
||||||
|
|
||||||
|
xml = self._download_xml(xml_url, video_id)
|
||||||
|
final_url = xml[0][0][0].attrib['src']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'url': final_url,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails
|
||||||
|
}
|
|
@ -1,25 +1,20 @@
|
||||||
import base64
|
|
||||||
import hashlib
|
|
||||||
import hmac
|
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import time
|
from urllib.parse import urlparse, parse_qs
|
||||||
from urllib.parse import parse_qs, urlparse
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_duration,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -115,18 +110,6 @@ class NaverBaseIE(InfoExtractor):
|
||||||
**self.process_subtitles(video_data, get_subs),
|
**self.process_subtitles(video_data, get_subs),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
|
||||||
api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
|
|
||||||
key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
|
|
||||||
msgpad = int(time.time() * 1000)
|
|
||||||
md = base64.b64encode(hmac.HMAC(
|
|
||||||
key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
|
||||||
|
|
||||||
return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
|
|
||||||
'msgpad': msgpad,
|
|
||||||
'md': md,
|
|
||||||
})['result']
|
|
||||||
|
|
||||||
|
|
||||||
class NaverIE(NaverBaseIE):
|
class NaverIE(NaverBaseIE):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
||||||
|
@ -142,32 +125,21 @@ class NaverIE(NaverBaseIE):
|
||||||
'upload_date': '20130903',
|
'upload_date': '20130903',
|
||||||
'uploader': '메가스터디, 합격불변의 법칙',
|
'uploader': '메가스터디, 합격불변의 법칙',
|
||||||
'uploader_id': 'megastudy',
|
'uploader_id': 'megastudy',
|
||||||
'uploader_url': 'https://tv.naver.com/megastudy',
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'duration': 2118,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.naver.com/v/395837',
|
'url': 'http://tv.naver.com/v/395837',
|
||||||
'md5': '7791205fa89dbed2f5e3eb16d287ff05',
|
'md5': '8a38e35354d26a17f73f4e90094febd3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '395837',
|
'id': '395837',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||||
'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
|
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
|
||||||
'timestamp': 1432030253,
|
'timestamp': 1432030253,
|
||||||
'upload_date': '20150519',
|
'upload_date': '20150519',
|
||||||
'uploader': '4가지쇼',
|
'uploader': '4가지쇼 시즌2',
|
||||||
'uploader_id': '4show',
|
'uploader_id': 'wrappinguser29',
|
||||||
'uploader_url': 'https://tv.naver.com/4show',
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'duration': 277,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
},
|
},
|
||||||
|
'skip': 'Georestricted',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tvcast.naver.com/v/81652',
|
'url': 'http://tvcast.naver.com/v/81652',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -175,63 +147,56 @@ class NaverIE(NaverBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._call_api(f'/clips/{video_id}/play-info', video_id)
|
content = self._download_json(
|
||||||
|
'https://tv.naver.com/api/json/v/' + video_id,
|
||||||
|
video_id, headers=self.geo_verification_headers())
|
||||||
|
player_info_json = content.get('playerInfoJson') or {}
|
||||||
|
current_clip = player_info_json.get('currentClip') or {}
|
||||||
|
|
||||||
vid = traverse_obj(data, ('clip', 'videoId', {str}))
|
vid = current_clip.get('videoId')
|
||||||
in_key = traverse_obj(data, ('play', 'inKey', {str}))
|
in_key = current_clip.get('inKey')
|
||||||
|
|
||||||
if not vid or not in_key:
|
if not vid or not in_key:
|
||||||
raise ExtractorError('Unable to extract video info')
|
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
|
||||||
|
if player_auth == 'notCountry':
|
||||||
|
self.raise_geo_restricted(countries=['KR'])
|
||||||
|
elif player_auth == 'notLogin':
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError('couldn\'t extract vid and key')
|
||||||
info = self._extract_video_info(video_id, vid, in_key)
|
info = self._extract_video_info(video_id, vid, in_key)
|
||||||
info.update(traverse_obj(data, ('clip', {
|
info.update({
|
||||||
'title': 'title',
|
'description': clean_html(current_clip.get('description')),
|
||||||
'description': 'description',
|
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
|
||||||
'timestamp': ('firstExposureDatetime', {parse_iso8601}),
|
'duration': parse_duration(current_clip.get('displayPlayTime')),
|
||||||
'duration': ('playTime', {int_or_none}),
|
'like_count': int_or_none(current_clip.get('recommendPoint')),
|
||||||
'like_count': ('likeItCount', {int_or_none}),
|
'age_limit': 19 if current_clip.get('adult') else None,
|
||||||
'view_count': ('playCount', {int_or_none}),
|
})
|
||||||
'comment_count': ('commentCount', {int_or_none}),
|
|
||||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
|
||||||
'uploader': 'channelName',
|
|
||||||
'uploader_id': 'channelId',
|
|
||||||
'uploader_url': ('channelUrl', {url_or_none}),
|
|
||||||
'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
|
|
||||||
})))
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class NaverLiveIE(NaverBaseIE):
|
class NaverLiveIE(InfoExtractor):
|
||||||
IE_NAME = 'Naver:live'
|
IE_NAME = 'Naver:live'
|
||||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.naver.com/l/127062',
|
'url': 'https://tv.naver.com/l/52010',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '127062',
|
'id': '52010',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'live_status': 'is_live',
|
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
|
||||||
'channel': '뉴스는 YTN',
|
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
|
||||||
'channel_id': 'ytnnews24',
|
'channel_id': 'NTV-ytnnews24-0',
|
||||||
'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'start_time': 1597026780000,
|
||||||
'description': 'md5:f938b5956711beab6f882314ffadf4d5',
|
|
||||||
'start_time': 1677752280,
|
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
|
||||||
'like_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.naver.com/l/140535',
|
'url': 'https://tv.naver.com/l/51549',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '140535',
|
'id': '51549',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'live_status': 'is_live',
|
'title': '연합뉴스TV - 코로나19 뉴스특보',
|
||||||
'channel': 'KBS뉴스',
|
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
|
||||||
'channel_id': 'kbsnews',
|
'channel_id': 'NTV-yonhapnewstv-0',
|
||||||
'start_time': 1696867320,
|
'start_time': 1596406380000,
|
||||||
'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
|
||||||
'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
|
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
|
||||||
'like_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.naver.com/l/54887',
|
'url': 'https://tv.naver.com/l/54887',
|
||||||
|
@ -240,27 +205,55 @@ class NaverLiveIE(NaverBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
|
page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
|
||||||
|
secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
|
||||||
|
|
||||||
|
info = self._extract_video_info(video_id, secure_url)
|
||||||
|
info.update({
|
||||||
|
'description': self._og_search_description(page)
|
||||||
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _extract_video_info(self, video_id, url):
|
||||||
|
video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
|
||||||
|
meta = video_data.get('meta')
|
||||||
|
status = meta.get('status')
|
||||||
|
|
||||||
status = traverse_obj(data, ('live', 'liveStatus'))
|
|
||||||
if status == 'CLOSED':
|
if status == 'CLOSED':
|
||||||
raise ExtractorError('Stream is offline.', expected=True)
|
raise ExtractorError('Stream is offline.', expected=True)
|
||||||
elif status != 'OPENED':
|
elif status != 'OPENED':
|
||||||
raise ExtractorError(f'Unknown status {status!r}')
|
raise ExtractorError('Unknown status %s' % status)
|
||||||
|
|
||||||
|
title = meta.get('title')
|
||||||
|
stream_list = video_data.get('streams')
|
||||||
|
|
||||||
|
if stream_list is None:
|
||||||
|
raise ExtractorError('Could not get stream data.', expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for quality in stream_list:
|
||||||
|
if not quality.get('url'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
prop = quality.get('property')
|
||||||
|
if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
quality.get('url'), video_id, 'mp4',
|
||||||
|
m3u8_id=quality.get('qualityId'), live=True
|
||||||
|
))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': self._extract_m3u8_formats(
|
'title': title,
|
||||||
traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
|
'formats': formats,
|
||||||
**traverse_obj(data, ('live', {
|
'channel_id': meta.get('channelId'),
|
||||||
'title': 'title',
|
'channel_url': meta.get('channelUrl'),
|
||||||
'channel': 'channelName',
|
'thumbnail': meta.get('imgUrl'),
|
||||||
'channel_id': 'channelId',
|
'start_time': meta.get('startTime'),
|
||||||
'description': 'description',
|
'categories': [meta.get('categoryId')],
|
||||||
'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
|
|
||||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
|
||||||
'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
|
|
||||||
}), get_all=False),
|
|
||||||
'is_live': True
|
'is_live': True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,7 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from ..utils import parse_duration, unescapeHTML
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
clean_html,
|
|
||||||
extract_attributes,
|
|
||||||
get_element_by_attribute,
|
|
||||||
get_element_by_class,
|
|
||||||
get_element_html_by_class,
|
|
||||||
get_elements_by_class,
|
|
||||||
int_or_none,
|
|
||||||
join_nonempty,
|
|
||||||
parse_count,
|
|
||||||
parse_duration,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
from ..utils.traversal import traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class Rule34VideoIE(InfoExtractor):
|
class Rule34VideoIE(InfoExtractor):
|
||||||
|
@ -30,16 +17,7 @@ class Rule34VideoIE(InfoExtractor):
|
||||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
||||||
'duration': 347.0,
|
'duration': 347.0,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
'tags': 'count:14'
|
||||||
'like_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'timestamp': 1639872000,
|
|
||||||
'description': 'https://discord.gg/aBqPrHSHvv',
|
|
||||||
'upload_date': '20211219',
|
|
||||||
'uploader': 'Sweet HMV',
|
|
||||||
'uploader_url': 'https://rule34video.com/members/22119/',
|
|
||||||
'categories': ['3D', 'MMD', 'iwara'],
|
|
||||||
'tags': 'mincount:10'
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -52,17 +30,7 @@ class Rule34VideoIE(InfoExtractor):
|
||||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
||||||
'duration': 938.0,
|
'duration': 938.0,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
'tags': 'count:50'
|
||||||
'like_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'timestamp': 1640131200,
|
|
||||||
'description': '',
|
|
||||||
'creator': 'WildeerStudio',
|
|
||||||
'upload_date': '20211222',
|
|
||||||
'uploader': 'CerZule',
|
|
||||||
'uploader_url': 'https://rule34video.com/members/36281/',
|
|
||||||
'categories': ['3D', 'Tomb Raider'],
|
|
||||||
'tags': 'mincount:40'
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -81,44 +49,17 @@ class Rule34VideoIE(InfoExtractor):
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
})
|
})
|
||||||
|
|
||||||
categories, creator, uploader, uploader_url = [None] * 4
|
title = self._html_extract_title(webpage)
|
||||||
for col in get_elements_by_class('col', webpage):
|
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
|
||||||
label = clean_html(get_element_by_class('label', col))
|
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
|
||||||
if label == 'Categories:':
|
|
||||||
categories = list(map(clean_html, get_elements_by_class('item', col)))
|
|
||||||
elif label == 'Artist:':
|
|
||||||
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ')
|
|
||||||
elif label == 'Uploaded By:':
|
|
||||||
uploader = clean_html(get_element_by_class('name', col))
|
|
||||||
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({
|
|
||||||
'title': 'title',
|
|
||||||
'view_count': 'view_count',
|
|
||||||
'like_count': 'like_count',
|
|
||||||
'duration': 'duration',
|
|
||||||
'timestamp': 'timestamp',
|
|
||||||
'description': 'description',
|
|
||||||
'thumbnail': ('thumbnails', 0, 'url'),
|
|
||||||
})),
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._html_extract_title(webpage),
|
'title': title,
|
||||||
'thumbnail': self._html_search_regex(
|
'thumbnail': thumbnail,
|
||||||
r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None),
|
'duration': parse_duration(duration),
|
||||||
'duration': parse_duration(self._html_search_regex(
|
|
||||||
r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)),
|
|
||||||
'view_count': int_or_none(self._html_search_regex(
|
|
||||||
r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')),
|
|
||||||
'like_count': parse_count(get_element_by_class('voters count', webpage)),
|
|
||||||
'comment_count': int_or_none(self._search_regex(
|
|
||||||
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'creator': creator,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_url': uploader_url,
|
|
||||||
'categories': categories,
|
|
||||||
'tags': list(map(unescapeHTML, re.findall(
|
'tags': list(map(unescapeHTML, re.findall(
|
||||||
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
|
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user