2024-11-27 01:31:25 +01:00
4 changed files with 29 additions and 59 deletions
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -140,8 +140,6 @@ class TestFormatSelection(unittest.TestCase):
        test('example-with-dashes', 'example-with-dashes')
        test('all', '2', '47', '45', 'example-with-dashes', '35')
        test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
-        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
-        test('7_a/worst', '35')

    def test_format_selection_audio(self):
        formats = [
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2465,16 +2465,9 @@ class YoutubeDL:
                return selector_function(ctx_copy)
            return final_selector

-        # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
-        #       Prefix numbers with random letters to avoid it being classified as a number
-        #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
-        # TODO: Implement parser not reliant on tokenize.tokenize
-        prefix = ''.join(random.choices(string.ascii_letters, k=32))
-        stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
+        stream = io.BytesIO(format_spec.encode())
        try:
-            tokens = list(_remove_unused_ops(
-                token._replace(string=token.string.replace(prefix, ''))
-                for token in tokenize.tokenize(stream.readline)))
+            tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
        except tokenize.TokenError:
            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
    _TESTS = [{
        # available till 7.12.2023
        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
-        'md5': '94812e6438488fb923c361a44469614b',
+        'md5': 'a438f671e87a7eba04000336a119ccc4',
        'info_dict': {
            'id': 'maischberger-video-424',
            'display_id': 'maischberger-video-424',
@ -403,25 +403,26 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
    _VALID_URL = r'''(?x)https://
        (?:(?:beta|www)\.)?ardmediathek\.de/
        (?:(?P<client>[^/]+)/)?
-        (?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
+        (?:player|live|video|(?P<playlist>sendung|sammlung))/
        (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
        (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
        (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''

    _TESTS = [{
-        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
-        'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
+        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
+        'md5': '3fd5fead7a370a819341129c8d713136',
        'info_dict': {
-            'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
-            'id': '12939099',
-            'title': 'Liebe auf vier Pfoten',
-            'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
-            'duration': 5222,
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
-            'timestamp': 1701343800,
-            'upload_date': '20231130',
+            'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
+            'id': '12172961',
+            'title': 'Wolfsland - Die traurigen Schwestern',
+            'description': r're:^Als der Polizeiobermeister Raaben',
+            'duration': 5241,
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
+            'timestamp': 1670710500,
+            'upload_date': '20221210',
            'ext': 'mp4',
-            'episode': 'Liebe auf vier Pfoten',
+            'age_limit': 12,
+            'episode': 'Wolfsland - Die traurigen Schwestern',
            'series': 'Filme im MDR'
        },
    }, {
@ -453,7 +454,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            'duration': 915,
            'episode': 'tagesschau, 20:00 Uhr',
            'series': 'tagesschau',
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
        },
    }, {
        'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -474,10 +475,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        # playlist of type 'sendung'
        'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
        'only_matching': True,
-    }, {
-        # playlist of type 'serie'
-        'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
-        'only_matching': True,
    }, {
        # playlist of type 'sammlung'
        'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -490,11 +487,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        'only_matching': True,
    }]

-    def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
+    def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
        """ Query the ARD server for playlist information
        and returns the data in "raw" format """
-        assert mode in ('sendung', 'serie', 'sammlung')
-        if mode in ('sendung', 'serie'):
+        if mode == 'sendung':
            graphQL = json.dumps({
                'query': '''{
                    showPage(
@ -511,7 +507,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                            links { target { id href title } }
                            type
                        }
-                    }}''' % (client, playlist_id, page_number),
+                    }}''' % (client, playlist_id, pageNumber),
            }).encode()
        else:  # mode == 'sammlung'
            graphQL = json.dumps({
@ -532,7 +528,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                                type
                            }
                        }
-                    }}''' % (client, playlist_id, page_number),
+                    }}''' % (client, playlist_id, pageNumber),
            }).encode()
        # Ressources for ARD graphQL debugging:
        # https://api-test.ardmediathek.de/public-gateway
@ -542,7 +538,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            data=graphQL,
            headers={'Content-Type': 'application/json'})['data']
        # align the structure of the returned data:
-        if mode in ('sendung', 'serie'):
+        if mode == 'sendung':
            show_page = show_page['showPage']
        else:  # mode == 'sammlung'
            show_page = show_page['morePage']['widget']
@ -550,12 +546,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):

    def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
        """ Collects all playlist entries and returns them as info dict.
-        Supports playlists of mode 'sendung', 'serie', and 'sammlung',
-        as well as nested playlists. """
+        Supports playlists of mode 'sendung' and 'sammlung', and also nested
+        playlists. """
        entries = []
        pageNumber = 0
        while True:  # iterate by pageNumber
-            show_page = self._ARD_load_playlist_snippet(
+            show_page = self._ARD_load_playlist_snipped(
                playlist_id, display_id, client, mode, pageNumber)
            for teaser in show_page['teasers']:  # process playlist items
                if '/compilation/' in teaser['links']['target']['href']:
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
-                            groups/[^/]+/(?:permalink|posts)/|
+                            groups/[^/]+/permalink/|
                            watchparty/
                        )|
                    facebook:
@ -232,21 +232,6 @@ class FacebookIE(InfoExtractor):
            'uploader_id': '100013949973717',
        },
        'skip': 'Requires logging in',
-    }, {
-        # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
-        'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
-        'info_dict': {
-            'id': '1569199726448814',
-            'ext': 'mp4',
-            'title': 'Pence MUST GO!',
-            'description': 'Vickie Gentry shared a memory.',
-            'timestamp': 1511548260,
-            'upload_date': '20171124',
-            'uploader': 'Vickie Gentry',
-            'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
-            'thumbnail': r're:^https?://.*',
-            'duration': 148.435,
-        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
@ -627,11 +612,9 @@ class FacebookIE(InfoExtractor):
                nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
                attachments = traverse_obj(nodes, (
                    ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
-                    ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
-                    'attachment', {dict}))
+                    ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
                for attachment in attachments:
-                    ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
-                                      ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
+                    ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
                    for n in ns:
                        parse_attachment(n)
                    parse_attachment(attachment)
@ -654,7 +637,7 @@ class FacebookIE(InfoExtractor):
                if len(entries) > 1:
                    return self.playlist_result(entries, video_id)

-                video_info = entries[0] if entries else {'id': video_id}
+                video_info = entries[0]
                webpage_info = extract_metadata(webpage)
                # honor precise duration in video info
                if video_info.get('duration'):