Update facebook.py

Update yt_dlp/extractor/facebook.py
2024-09-24 03:11:24 +02:00 · 2023-11-21 12:16:51 +08:00 · 2023-11-21 12:00:15 +08:00 · 2023-11-21 10:51:53 +08:00 · 2023-11-21 10:51:24 +08:00 · 2023-11-21 10:50:33 +08:00
1 changed files with 21 additions and 19 deletions
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -19,6 +19,7 @@ from ..utils import (
    get_element_by_id,
    get_first,
    int_or_none,
+    join_nonempty,
    js_to_json,
    merge_dicts,
    parse_count,
@ -421,29 +422,25 @@ class FacebookIE(InfoExtractor):
            post = traverse_obj(post_data, (
                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
            snippet = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
-                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) or {}
+                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
            locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
            captions = get_first(snippet, 'video_available_captions_locales', 'captions_url')
-            useIsVideoBroadcast = get_first(snippet, ('is_video_broadcast')) or False
+            is_video_broadcast = get_first(snippet, 'is_video_broadcast', expected_type=bool)
            automatic_captions = {}
            subtitles = {}
-            if isinstance(captions, str):
-                subtitles[locale] = [{'ext': determine_ext(captions, default_ext='srt'), 'url': captions}]
-            elif isinstance(captions, list):
-                if len(captions) > 1:
-                    captions = sorted(captions, key=lambda c: (c['locale'] != locale, c['locale']))
-                for c in captions:
-                    s = {
-                        'ext': determine_ext(c['captions_url'], default_ext='srt'),
-                        'url': c['captions_url'],
-                        'name': (c['localized_language']
-                                 + (' (' + c['localized_country'] + ')' if c['localized_country'] else '')
-                                 + (' (' + c['localized_creation_method'] + ')' if c['localized_creation_method'] else '')),
-                    }
-                    if c['localized_creation_method'] or useIsVideoBroadcast:
-                        automatic_captions.setdefault(c['locale'], []).append(s)
+            if url_or_none(captions):  # snippet only had 'captions_url'
+                subtitles[locale] = [{'url': captions}]
            else:
-                        subtitles.setdefault(c['locale'], []).append(s)
+                captions = sorted(captions, key=lambda c: c['locale'])
+            for caption in traverse_obj(captions, lambda _, v: v['captions_url'] and v['locale']):
+                subs = {
+                    'url': caption['captions_url'],
+                    'name': join_nonempty('localized_language', 'localized_country', from_dict=caption),
+                }
+                if caption.get('localized_creation_method') or is_video_broadcast:
+                    automatic_captions.setdefault(caption['locale'], []).append(subs)
+                else:
+                    subtitles.setdefault(caption['locale'], []).append(subs)
            media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
            title = get_first(media, ('title', 'text'))
@ -734,6 +731,7 @@ class FacebookIE(InfoExtractor):
        video_data = video_data[0]

        formats = []
+        subtitles = {}
        for f in video_data:
            format_id = f['stream_type']
            if f and isinstance(f, dict):
@ -756,10 +754,14 @@ class FacebookIE(InfoExtractor):
                            'height': 720 if quality == 'hd' else None
                        })
            extract_dash_manifest(f[0], formats)
+            subtitles_src = f[0].get('subtitles_src')
+            if subtitles_src:
+                subtitles.setdefault('en', []).append({'url': subtitles_src})

        info_dict = {
            'id': video_id,
            'formats': formats,
+            'subtitles': subtitles,
        }
        process_formats(info_dict)
        info_dict.update(extract_metadata(webpage))
Author	SHA1	Message	Date
kclauhk	23e2bc581b	Update facebook.py	2023-11-21 12:16:51 +08:00
kclauhk	98ee02d751	Update facebook.py	2023-11-21 12:00:15 +08:00
kclauhk	1e973dfe62	Update yt_dlp/extractor/facebook.py Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2023-11-21 10:51:53 +08:00
kclauhk	31e28ccfd6	Update yt_dlp/extractor/facebook.py Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2023-11-21 10:51:24 +08:00
kclauhk	c7a1cb0d3b	Update yt_dlp/extractor/facebook.py Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2023-11-21 10:50:33 +08:00