2024-10-02 15:21:23 +02:00
1 changed files with 26 additions and 21 deletions
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@ -12,6 +12,7 @@ from ..utils import (
    int_or_none,
    join_nonempty,
    make_archive_id,
+    merge_dicts,
    mimetype2ext,
    orderedSet,
    remove_end,
@ -580,53 +581,57 @@ class ORFONIE(InfoExtractor):
            'title': 'School of Champions (4/8)',
            'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
            'media_type': 'episode',
-            'timestamp': 1706472362,
-            'upload_date': '20240128',
        }
    }]

-    def _extract_video(self, video_id, display_id):
+    def _call_api(self, video_id, display_id):
+        # NOTE: the prefix `3dSlfek03nsLKdj4Jsd` is only based on my observation on several
+        # api call. This string may change in future
        encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
        api_json = self._download_json(
            f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)

        formats, subtitles = [], {}
-        for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
-            for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
+        for manifest_type in api_json.get('sources') or [{}]:
+            for manifest_info in traverse_obj(api_json, ('sources', manifest_type, ...)):
+                fmt, subs = [], {}
                if manifest_type == 'hls':
-                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                        manifest_url, display_id, fatal=False, m3u8_id='hls')
+                    fmt, subs = self._extract_m3u8_formats_and_subtitles(manifest_info.get('src'), display_id)
                elif manifest_type == 'dash':
-                    fmts, subs = self._extract_mpd_formats_and_subtitles(
-                        manifest_url, display_id, fatal=False, mpd_id='dash')
+                    fmt, subs = self._extract_mpd_formats_and_subtitles(manifest_info.get('src'), display_id, fatal=False)
                else:
                    continue
-                formats.extend(fmts)
+                formats.extend(fmt)
                self._merge_subtitles(subs, target=subtitles)

        return {
-            'id': video_id,
+            'id': video_id or api_json.get('id'),
            'formats': formats,
            'subtitles': subtitles,
            **traverse_obj(api_json, {
-                'duration': ('duration_second', {float_or_none}),
-                'title': (('title', 'headline'), {str}),
-                'description': (('description', 'teaser_text'), {str}),
-                'media_type': ('video_type', {str}),
-            }, get_all=False)
+                'duration': ('duration_second', float_or_none),
+                'title': (('title'), ('headline')),
+                'description': (('description'), ('teaser_text')),
+                'media_type': 'video_type'
+            })
        }

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).group('id', 'slug')
        webpage = self._download_webpage(url, display_id)
-        json_ld_data = self._search_json_ld(webpage, display_id, fatal=False)
+        json_ld_data = self._search_json_ld(webpage, display_id)

-        return {
+        api_data = self._call_api(video_id, display_id)
+
+        return merge_dicts(api_data, {
            'id': video_id,
            'title': (json_ld_data.get('title')
                      or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
            'description': (json_ld_data.get('description')
                            or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
-            **json_ld_data,
-            **self._extract_video(video_id, display_id)
-        }
+            **traverse_obj(json_ld_data, {
+                'duration': ('duration', {float_or_none}),
+                'timestamp': ('timestamp', int_or_none),
+                'thumbnails': 'thumbnails'
+            })
+        })