Compare commits

...

13 Commits

Author SHA1 Message Date
HobbyistDev
a391e2aa07
Remove fmts and subs temporary value
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 09:26:17 +09:00
HobbyistDev
df007d8904
Set float_or_none in set in when traverse
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 09:24:27 +09:00
HobbyistDev
cc52ccd3be
Prefix manifest_id with the respective manifest_type
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 09:21:55 +09:00
HobbyistDev
6a2f6b71f8 lint, fix test 2024-02-01 08:59:37 +09:00
HobbyistDev
0272e00343
Branch metadata traversal better
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 08:56:04 +09:00
HobbyistDev
21941027b4
Get manifest_url directly using traverse_obj
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 08:55:00 +09:00
HobbyistDev
3944e4a692 Remove comment in _extract_video 2024-02-01 08:51:08 +09:00
HobbyistDev
a857660b61 change _call_api to _extract_video 2024-02-01 08:50:40 +09:00
HobbyistDev
ee470291e8 set json_ld_data as non-fatal 2024-02-01 08:48:45 +09:00
HobbyistDev
46ea6c17a8 Merge branch 'on-orf-at-issue-8903' of https://github.com/HobbyistDev/yt-dlp into on-orf-at-issue-8903 2024-02-01 08:44:55 +09:00
HobbyistDev
5f576e600b Unpack _call_api instead of using temporary variabel (api_data 2024-02-01 08:44:26 +09:00
HobbyistDev
ff8a13c160
Get manifest_type with more safely
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 08:42:51 +09:00
HobbyistDev
4b87560356
Remove unnecessary fallback
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-01 08:19:49 +09:00

View File

@ -12,7 +12,6 @@ from ..utils import (
int_or_none, int_or_none,
join_nonempty, join_nonempty,
make_archive_id, make_archive_id,
merge_dicts,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
remove_end, remove_end,
@ -581,57 +580,53 @@ class ORFONIE(InfoExtractor):
'title': 'School of Champions (4/8)', 'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd', 'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode', 'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
} }
}] }]
def _call_api(self, video_id, display_id): def _extract_video(self, video_id, display_id):
# NOTE: the prefix `3dSlfek03nsLKdj4Jsd` is only based on my observation on several
# api call. This string may change in future
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json( api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id) f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {} formats, subtitles = [], {}
for manifest_type in api_json.get('sources') or [{}]: for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_info in traverse_obj(api_json, ('sources', manifest_type, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
fmt, subs = [], {}
if manifest_type == 'hls': if manifest_type == 'hls':
fmt, subs = self._extract_m3u8_formats_and_subtitles(manifest_info.get('src'), display_id) fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash': elif manifest_type == 'dash':
fmt, subs = self._extract_mpd_formats_and_subtitles(manifest_info.get('src'), display_id, fatal=False) fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else: else:
continue continue
formats.extend(fmt) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
return { return {
'id': video_id or api_json.get('id'), 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(api_json, { **traverse_obj(api_json, {
'duration': ('duration_second', float_or_none), 'duration': ('duration_second', {float_or_none}),
'title': (('title'), ('headline')), 'title': (('title', 'headline'), {str}),
'description': (('description'), ('teaser_text')), 'description': (('description', 'teaser_text'), {str}),
'media_type': 'video_type' 'media_type': ('video_type', {str}),
}) }, get_all=False)
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug') video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
json_ld_data = self._search_json_ld(webpage, display_id) json_ld_data = self._search_json_ld(webpage, display_id, fatal=False)
api_data = self._call_api(video_id, display_id) return {
return merge_dicts(api_data, {
'id': video_id, 'id': video_id,
'title': (json_ld_data.get('title') 'title': (json_ld_data.get('title')
or self._html_search_meta(['og:title', 'twitter:title'], webpage)), or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
'description': (json_ld_data.get('description') 'description': (json_ld_data.get('description')
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)), or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
**traverse_obj(json_ld_data, { **json_ld_data,
'duration': ('duration', {float_or_none}), **self._extract_video(video_id, display_id)
'timestamp': ('timestamp', int_or_none), }
'thumbnails': 'thumbnails'
})
})