Remove fmts and subs temporary value

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
Set float_or_none in set in when traverse
2024-10-02 07:11:24 +02:00 · 2024-02-01 09:26:17 +09:00 · 2024-02-01 09:24:27 +09:00 · 2024-02-01 09:21:55 +09:00 · 2024-02-01 08:59:37 +09:00 · 2024-02-01 08:56:04 +09:00
1 changed files with 21 additions and 26 deletions
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@ -12,7 +12,6 @@ from ..utils import (
    int_or_none,
    join_nonempty,
    make_archive_id,
-    merge_dicts,
    mimetype2ext,
    orderedSet,
    remove_end,
@ -581,57 +580,53 @@ class ORFONIE(InfoExtractor):
            'title': 'School of Champions (4/8)',
            'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
            'media_type': 'episode',
+            'timestamp': 1706472362,
+            'upload_date': '20240128',
        }
    }]

-    def _call_api(self, video_id, display_id):
-        # NOTE: the prefix `3dSlfek03nsLKdj4Jsd` is only based on my observation on several
-        # api call. This string may change in future
+    def _extract_video(self, video_id, display_id):
        encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
        api_json = self._download_json(
            f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)

        formats, subtitles = [], {}
-        for manifest_type in api_json.get('sources') or [{}]:
-            for manifest_info in traverse_obj(api_json, ('sources', manifest_type, ...)):
-                fmt, subs = [], {}
+        for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
+            for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
                if manifest_type == 'hls':
-                    fmt, subs = self._extract_m3u8_formats_and_subtitles(manifest_info.get('src'), display_id)
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                        manifest_url, display_id, fatal=False, m3u8_id='hls')
                elif manifest_type == 'dash':
-                    fmt, subs = self._extract_mpd_formats_and_subtitles(manifest_info.get('src'), display_id, fatal=False)
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
+                        manifest_url, display_id, fatal=False, mpd_id='dash')
                else:
                    continue
-                formats.extend(fmt)
+                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)

        return {
-            'id': video_id or api_json.get('id'),
+            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
            **traverse_obj(api_json, {
-                'duration': ('duration_second', float_or_none),
-                'title': (('title'), ('headline')),
-                'description': (('description'), ('teaser_text')),
-                'media_type': 'video_type'
-            })
+                'duration': ('duration_second', {float_or_none}),
+                'title': (('title', 'headline'), {str}),
+                'description': (('description', 'teaser_text'), {str}),
+                'media_type': ('video_type', {str}),
+            }, get_all=False)
        }

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).group('id', 'slug')
        webpage = self._download_webpage(url, display_id)
-        json_ld_data = self._search_json_ld(webpage, display_id)
+        json_ld_data = self._search_json_ld(webpage, display_id, fatal=False)

-        api_data = self._call_api(video_id, display_id)
-
-        return merge_dicts(api_data, {
+        return {
            'id': video_id,
            'title': (json_ld_data.get('title')
                      or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
            'description': (json_ld_data.get('description')
                            or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
-            **traverse_obj(json_ld_data, {
-                'duration': ('duration', {float_or_none}),
-                'timestamp': ('timestamp', int_or_none),
-                'thumbnails': 'thumbnails'
-            })
-        })
+            **json_ld_data,
+            **self._extract_video(video_id, display_id)
+        }
Author	SHA1	Message	Date
HobbyistDev	a391e2aa07	Remove `fmts` and `subs` temporary value Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 09:26:17 +09:00
HobbyistDev	df007d8904	Set `float_or_none` in `set` in when traverse Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 09:24:27 +09:00
HobbyistDev	cc52ccd3be	Prefix manifest_id with the respective `manifest_type` Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 09:21:55 +09:00
HobbyistDev	6a2f6b71f8	lint, fix test	2024-02-01 08:59:37 +09:00
HobbyistDev	0272e00343	Branch metadata traversal better Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:56:04 +09:00
HobbyistDev	21941027b4	Get `manifest_url` directly using `traverse_obj` Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:55:00 +09:00
HobbyistDev	3944e4a692	Remove comment in `_extract_video`	2024-02-01 08:51:08 +09:00
HobbyistDev	a857660b61	change `_call_api` to `_extract_video`	2024-02-01 08:50:40 +09:00
HobbyistDev	ee470291e8	set `json_ld_data` as non-fatal	2024-02-01 08:48:45 +09:00
HobbyistDev	46ea6c17a8	Merge branch 'on-orf-at-issue-8903' of https://github.com/HobbyistDev/yt-dlp into on-orf-at-issue-8903	2024-02-01 08:44:55 +09:00
HobbyistDev	5f576e600b	Unpack `_call_api` instead of using temporary variabel (`api_data`	2024-02-01 08:44:26 +09:00
HobbyistDev	ff8a13c160	Get `manifest_type` with more safely Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:42:51 +09:00
HobbyistDev	4b87560356	Remove unnecessary fallback Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:19:49 +09:00