mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 09:11:25 +01:00
Compare commits
9 Commits
53bafcc29e
...
86cdd2db37
Author | SHA1 | Date | |
---|---|---|---|
|
86cdd2db37 | ||
|
f2a4983df7 | ||
|
bacc31b05a | ||
|
a9f85670d0 | ||
|
180d2d1a9b | ||
|
c6a3a9b246 | ||
|
44f8f59c88 | ||
|
6b6f97f3c9 | ||
|
8c27ce471d |
|
@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
}, {
|
||||||
|
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||||
|
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||||
|
'creators': ['Grateful Dead'],
|
||||||
|
'duration': 338.31,
|
||||||
|
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||||
|
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||||
|
'display_id': 'gd95-04-02d1t04.shn',
|
||||||
|
'location': 'Pyramid Arena',
|
||||||
|
'uploader': 'jon@archive.org',
|
||||||
|
'album': '1995-04-02 - Pyramid Arena',
|
||||||
|
'upload_date': '20040519',
|
||||||
|
'track_number': 4,
|
||||||
|
'release_date': '19950402',
|
||||||
|
'timestamp': 1084927901,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
info['comments'].append({
|
info['comments'].append({
|
||||||
'id': review.get('review_id'),
|
'id': review.get('review_id'),
|
||||||
'author': review.get('reviewer'),
|
'author': review.get('reviewer'),
|
||||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||||
'timestamp': unified_timestamp(review.get('createdate')),
|
'timestamp': unified_timestamp(review.get('createdate')),
|
||||||
'parent': 'root'})
|
'parent': 'root'})
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class ChaturbateIE(InfoExtractor):
|
class ChaturbateIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.chaturbate.com/siswet19/',
|
'url': 'https://www.chaturbate.com/siswet19/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://en.chaturbate.com/siswet19/',
|
'url': 'https://en.chaturbate.com/siswet19/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://chaturbate.eu/siswet19/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://chaturbate.global/siswet19/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_ROOM_OFFLINE = 'Room is currently offline'
|
_ROOM_OFFLINE = 'Room is currently offline'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'https://chaturbate.com/{video_id}/', video_id,
|
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
||||||
headers=self.geo_verification_headers())
|
headers=self.geo_verification_headers())
|
||||||
|
|
||||||
found_m3u8_urls = []
|
found_m3u8_urls = []
|
||||||
|
|
|
@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor):
|
||||||
return extract_video_data(try_get(
|
return extract_video_data(try_get(
|
||||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
def extract_dash_manifest(video, formats):
|
def extract_dash_manifest(vid_data, formats, mpd_url=None):
|
||||||
dash_manifest = traverse_obj(
|
dash_manifest = traverse_obj(
|
||||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
|
||||||
if dash_manifest:
|
if dash_manifest:
|
||||||
formats.extend(self._parse_mpd_formats(
|
formats.extend(self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
|
||||||
|
|
||||||
def process_formats(info):
|
def process_formats(info):
|
||||||
# Downloads with browser's User-Agent are rate limited. Working around
|
# Downloads with browser's User-Agent are rate limited. Working around
|
||||||
|
@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor):
|
||||||
video = video['creation_story']
|
video = video['creation_story']
|
||||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||||
video.update(reel_info)
|
video.update(reel_info)
|
||||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
|
||||||
formats = []
|
formats = []
|
||||||
q = qualities(['sd', 'hd'])
|
q = qualities(['sd', 'hd'])
|
||||||
|
|
||||||
|
# Legacy formats extraction
|
||||||
|
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||||
('browser_native_sd_url', 'sd')):
|
('browser_native_sd_url', 'sd')):
|
||||||
|
@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor):
|
||||||
if not playable_url:
|
if not playable_url:
|
||||||
continue
|
continue
|
||||||
if determine_ext(playable_url) == 'mpd':
|
if determine_ext(playable_url) == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': playable_url,
|
'url': playable_url,
|
||||||
})
|
})
|
||||||
extract_dash_manifest(fmt_data, formats)
|
extract_dash_manifest(fmt_data, formats)
|
||||||
|
|
||||||
|
# New videoDeliveryResponse formats extraction
|
||||||
|
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
|
||||||
|
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
|
||||||
|
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
|
||||||
|
for idx, dash_manifest in enumerate(dash_manifests):
|
||||||
|
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
|
||||||
|
if not dash_manifests:
|
||||||
|
# Only extract from MPD URLs if the manifests are not already provided
|
||||||
|
for mpd_url in mpd_urls:
|
||||||
|
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
|
||||||
|
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
|
||||||
|
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||||
|
'quality': q(format_id) - 3,
|
||||||
|
'url': prog_fmt['progressive_url'],
|
||||||
|
})
|
||||||
|
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
|
||||||
|
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
# Do not append false positive entry w/o any formats
|
# Do not append false positive entry w/o any formats
|
||||||
return
|
return
|
||||||
|
|
|
@ -5,7 +5,6 @@ from ..utils import (
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
@ -25,6 +24,11 @@ class ZDFBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
||||||
|
|
||||||
|
def _download_mediathekv2_document(self, document_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
|
||||||
|
document_id)
|
||||||
|
|
||||||
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
||||||
headers = {}
|
headers = {}
|
||||||
if api_token:
|
if api_token:
|
||||||
|
@ -320,9 +324,7 @@ class ZDFIE(ZDFBaseIE):
|
||||||
return self._extract_entry(player['content'], player, content, video_id)
|
return self._extract_entry(player['content'], player, content, video_id)
|
||||||
|
|
||||||
def _extract_mobile(self, video_id):
|
def _extract_mobile(self, video_id):
|
||||||
video = self._download_json(
|
video = self._download_mediathekv2_document(video_id)
|
||||||
f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}',
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||||
|
@ -387,18 +389,19 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'planet-e',
|
'id': 'planet-e',
|
||||||
'title': 'planet e.',
|
'title': 'planet e.',
|
||||||
|
'description': 'md5:87e3b9c66a63cf1407ee443d2c4eb88e',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 50,
|
'playlist_mincount': 50,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aktenzeichen-xy-ungeloest',
|
'id': 'aktenzeichen-xy-ungeloest',
|
||||||
'title': 'Aktenzeichen XY... ungelöst',
|
'title': 'Aktenzeichen XY... Ungelöst',
|
||||||
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
'description': 'md5:623ede5819c400c6d04943fa8100e6e7',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
'url': 'https://www.zdf.de/serien/taunuskrimi/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -410,32 +413,72 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||||
title = super()._og_search_title(webpage, fatal=fatal)
|
title = super()._og_search_title(webpage, fatal=fatal)
|
||||||
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||||
|
|
||||||
|
def _extract_document_id(self, webpage):
|
||||||
|
matches = re.search(r'docId\s*:\s*[\'"](?P<docid>[^\'"]+)[\'"]', webpage)
|
||||||
|
return matches and matches.group('docid')
|
||||||
|
|
||||||
|
def _get_playlist_description(self, page_data):
|
||||||
|
headline = traverse_obj(page_data, ('shortText', 'headline'))
|
||||||
|
text = traverse_obj(page_data, ('shortText', 'text'))
|
||||||
|
if headline is not None and text is not None:
|
||||||
|
return f'{headline}\n\n{text}'
|
||||||
|
return headline or text
|
||||||
|
|
||||||
|
def _convert_thumbnails(self, thumbnails):
|
||||||
|
return [{
|
||||||
|
'id': key,
|
||||||
|
'url': thumbnail_info['url'],
|
||||||
|
'width': int_or_none(thumbnail_info.get('width')),
|
||||||
|
'height': int_or_none(thumbnail_info.get('height')),
|
||||||
|
} for key, thumbnail_info in thumbnails.items() if url_or_none(thumbnail_info.get('url'))]
|
||||||
|
|
||||||
|
def _teaser_to_url_result(self, teaser):
|
||||||
|
return self.url_result(
|
||||||
|
teaser['sharingUrl'], ie=ZDFIE.ie_key(),
|
||||||
|
id=teaser.get('id'), title=teaser.get('titel', ''),
|
||||||
|
thumbnails=self._convert_thumbnails(teaser.get('teaserBild', {})),
|
||||||
|
description=teaser.get('beschreibung'),
|
||||||
|
duration=float_or_none(teaser.get('length')),
|
||||||
|
media_type=teaser.get('currentVideoType') or teaser.get('contentType'),
|
||||||
|
season_number=int_or_none(teaser.get('seasonNumber')),
|
||||||
|
episode_number=int_or_none(teaser.get('episodeNumber')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, channel_id)
|
webpage = self._download_webpage(url, channel_id)
|
||||||
|
|
||||||
matches = re.finditer(
|
main_video = None
|
||||||
rf'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''',
|
playlist_videos = []
|
||||||
webpage)
|
|
||||||
|
document_id = self._extract_document_id(webpage)
|
||||||
|
if document_id is not None:
|
||||||
|
data = self._download_mediathekv2_document(document_id)
|
||||||
|
|
||||||
|
for cluster in data['cluster']:
|
||||||
|
for teaser in cluster['teaser']:
|
||||||
|
if cluster['type'] == 'teaserContent' and teaser['type'] == 'video':
|
||||||
|
main_video = main_video or teaser
|
||||||
|
elif cluster['type'] == 'teaser' and teaser['type'] == 'video':
|
||||||
|
if teaser['brandId'] != document_id:
|
||||||
|
# These are unrelated 'You might also like' videos, filter them out
|
||||||
|
continue
|
||||||
|
playlist_videos.append(teaser)
|
||||||
|
|
||||||
if self._downloader.params.get('noplaylist', False):
|
if self._downloader.params.get('noplaylist', False):
|
||||||
entry = next(
|
return self._teaser_to_url_result(main_video) if main_video else None
|
||||||
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
|
||||||
None)
|
|
||||||
self.to_screen('Downloading just the main video because of --no-playlist')
|
|
||||||
if entry:
|
|
||||||
return entry
|
|
||||||
else:
|
|
||||||
self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
|
|
||||||
|
|
||||||
def check_video(m):
|
self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
|
||||||
v_ref = self._search_regex(
|
|
||||||
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')),
|
|
||||||
webpage, 'check id', default='')
|
|
||||||
v_ref = extract_attributes(v_ref)
|
|
||||||
return v_ref.get('data-target-video-type') != 'novideo'
|
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
thumbnails = (
|
||||||
(m.group('url') for m in matches if check_video(m)),
|
traverse_obj(data, ('document', 'image'))
|
||||||
channel_id, self._og_search_title(webpage, fatal=False))
|
or traverse_obj(data, ('document', 'teaserBild'))
|
||||||
|
or traverse_obj(data, ('stageHeader', 'image'))
|
||||||
|
or {})
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self._teaser_to_url_result(video) for video in playlist_videos),
|
||||||
|
playlist_id=channel_id,
|
||||||
|
playlist_title=self._og_search_title(webpage, fatal=False),
|
||||||
|
description=self._get_playlist_description(data),
|
||||||
|
thumbnails=self._convert_thumbnails(thumbnails))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user