mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 16:51:26 +01:00
Compare commits
6 Commits
5ccb70ef9c
...
d73473d35c
Author | SHA1 | Date | |
---|---|---|---|
|
d73473d35c | ||
|
180d2d1a9b | ||
|
c6a3a9b246 | ||
|
44f8f59c88 | ||
|
6b6f97f3c9 | ||
|
8c27ce471d |
|
@ -5,7 +5,6 @@ from ..utils import (
|
|||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
|
@ -25,6 +24,11 @@ class ZDFBaseIE(InfoExtractor):
|
|||
_GEO_COUNTRIES = ['DE']
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
||||
|
||||
def _download_mediathekv2_document(self, document_id):
|
||||
return self._download_json(
|
||||
f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
|
||||
document_id)
|
||||
|
||||
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
||||
headers = {}
|
||||
if api_token:
|
||||
|
@ -320,9 +324,7 @@ class ZDFIE(ZDFBaseIE):
|
|||
return self._extract_entry(player['content'], player, content, video_id)
|
||||
|
||||
def _extract_mobile(self, video_id):
|
||||
video = self._download_json(
|
||||
f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}',
|
||||
video_id)
|
||||
video = self._download_mediathekv2_document(video_id)
|
||||
|
||||
formats = []
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
|
@ -387,18 +389,19 @@ class ZDFChannelIE(ZDFBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'planet-e',
|
||||
'title': 'planet e.',
|
||||
'description': 'md5:87e3b9c66a63cf1407ee443d2c4eb88e',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||
'info_dict': {
|
||||
'id': 'aktenzeichen-xy-ungeloest',
|
||||
'title': 'Aktenzeichen XY... ungelöst',
|
||||
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
||||
'title': 'Aktenzeichen XY... Ungelöst',
|
||||
'description': 'md5:623ede5819c400c6d04943fa8100e6e7',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||
'url': 'https://www.zdf.de/serien/taunuskrimi/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
@ -410,32 +413,72 @@ class ZDFChannelIE(ZDFBaseIE):
|
|||
title = super()._og_search_title(webpage, fatal=fatal)
|
||||
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||
|
||||
def _extract_document_id(self, webpage):
|
||||
matches = re.search(r'docId\s*:\s*[\'"](?P<docid>[^\'"]+)[\'"]', webpage)
|
||||
return matches and matches.group('docid')
|
||||
|
||||
def _get_playlist_description(self, page_data):
|
||||
headline = traverse_obj(page_data, ('shortText', 'headline'))
|
||||
text = traverse_obj(page_data, ('shortText', 'text'))
|
||||
if headline is not None and text is not None:
|
||||
return f'{headline}\n\n{text}'
|
||||
return headline or text
|
||||
|
||||
def _convert_thumbnails(self, thumbnails):
|
||||
return [{
|
||||
'id': key,
|
||||
'url': thumbnail_info['url'],
|
||||
'width': int_or_none(thumbnail_info.get('width')),
|
||||
'height': int_or_none(thumbnail_info.get('height')),
|
||||
} for key, thumbnail_info in thumbnails.items() if url_or_none(thumbnail_info.get('url'))]
|
||||
|
||||
def _teaser_to_url_result(self, teaser):
|
||||
return self.url_result(
|
||||
teaser['sharingUrl'], ie=ZDFIE.ie_key(),
|
||||
id=teaser.get('id'), title=teaser.get('titel', ''),
|
||||
thumbnails=self._convert_thumbnails(teaser.get('teaserBild', {})),
|
||||
description=teaser.get('beschreibung'),
|
||||
duration=float_or_none(teaser.get('length')),
|
||||
media_type=teaser.get('currentVideoType') or teaser.get('contentType'),
|
||||
season_number=int_or_none(teaser.get('seasonNumber')),
|
||||
episode_number=int_or_none(teaser.get('episodeNumber')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
matches = re.finditer(
|
||||
rf'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''',
|
||||
webpage)
|
||||
main_video = None
|
||||
playlist_videos = []
|
||||
|
||||
document_id = self._extract_document_id(webpage)
|
||||
if document_id is not None:
|
||||
data = self._download_mediathekv2_document(document_id)
|
||||
|
||||
for cluster in data['cluster']:
|
||||
for teaser in cluster['teaser']:
|
||||
if cluster['type'] == 'teaserContent' and teaser['type'] == 'video':
|
||||
main_video = main_video or teaser
|
||||
elif cluster['type'] == 'teaser' and teaser['type'] == 'video':
|
||||
if teaser['brandId'] != document_id:
|
||||
# These are unrelated 'You might also like' videos, filter them out
|
||||
continue
|
||||
playlist_videos.append(teaser)
|
||||
|
||||
if self._downloader.params.get('noplaylist', False):
|
||||
entry = next(
|
||||
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
||||
None)
|
||||
self.to_screen('Downloading just the main video because of --no-playlist')
|
||||
if entry:
|
||||
return entry
|
||||
else:
|
||||
self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
|
||||
return self._teaser_to_url_result(main_video) if main_video else None
|
||||
|
||||
def check_video(m):
|
||||
v_ref = self._search_regex(
|
||||
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')),
|
||||
webpage, 'check id', default='')
|
||||
v_ref = extract_attributes(v_ref)
|
||||
return v_ref.get('data-target-video-type') != 'novideo'
|
||||
self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
|
||||
|
||||
return self.playlist_from_matches(
|
||||
(m.group('url') for m in matches if check_video(m)),
|
||||
channel_id, self._og_search_title(webpage, fatal=False))
|
||||
thumbnails = (
|
||||
traverse_obj(data, ('document', 'image'))
|
||||
or traverse_obj(data, ('document', 'teaserBild'))
|
||||
or traverse_obj(data, ('stageHeader', 'image'))
|
||||
or {})
|
||||
|
||||
return self.playlist_result(
|
||||
(self._teaser_to_url_result(video) for video in playlist_videos),
|
||||
playlist_id=channel_id,
|
||||
playlist_title=self._og_search_title(webpage, fatal=False),
|
||||
description=self._get_playlist_description(data),
|
||||
thumbnails=self._convert_thumbnails(thumbnails))
|
||||
|
|
Loading…
Reference in New Issue
Block a user