Merge 180d2d1a9b into 39d79c9b9c

[utils] Fix join_nonempty, add **kwargs to unpack (#11559 )
Authored by: Grub4K
2024-11-26 09:11:25 +01:00 · 2024-11-15 22:52:15 +01:00 · 2024-11-15 22:06:15 +01:00 · 2024-09-19 21:50:18 +02:00 · 2024-09-19 21:50:18 +02:00 · 2024-09-19 21:50:18 +02:00
5 changed files with 74 additions and 37 deletions
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@ -525,7 +525,7 @@ class TestTraversalHelpers:
    def test_unpack(self):
        assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
        assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
-        assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
+        assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
        with pytest.raises(TypeError):
            unpack(join_nonempty)()
        with pytest.raises(TypeError):
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -72,7 +72,6 @@ from yt_dlp.utils import (
    intlist_to_bytes,
    iri_to_uri,
    is_html,
    join_nonempty,
    js_to_json,
    limit_length,
    locked_file,
@ -2158,10 +2157,6 @@ Line 1
        assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
        assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
        assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
        assert callable(join_nonempty()), 'varargs positional should apply partially'
        assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
 if __name__ == '__main__':
    unittest.main()
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@ -5,7 +5,6 @@ from ..utils import (
    NO_DEFAULT,
    ExtractorError,
    determine_ext,
    extract_attributes,
    float_or_none,
    int_or_none,
    join_nonempty,
@ -25,6 +24,11 @@ class ZDFBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['DE']
    _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
    def _download_mediathekv2_document(self, document_id):
        return self._download_json(
            f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
            document_id)
    def _call_api(self, url, video_id, item, api_token=None, referrer=None):
        headers = {}
        if api_token:
@ -320,9 +324,7 @@ class ZDFIE(ZDFBaseIE):
        return self._extract_entry(player['content'], player, content, video_id)
    def _extract_mobile(self, video_id):
-        video = self._download_json(
+        video = self._download_mediathekv2_document(video_id)
            f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}',
            video_id)
        formats = []
        formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
@ -387,18 +389,19 @@ class ZDFChannelIE(ZDFBaseIE):
        'info_dict': {
            'id': 'planet-e',
            'title': 'planet e.',
            'description': 'md5:87e3b9c66a63cf1407ee443d2c4eb88e',
        },
        'playlist_mincount': 50,
    }, {
        'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
        'info_dict': {
            'id': 'aktenzeichen-xy-ungeloest',
-            'title': 'Aktenzeichen XY... ungelöst',
+            'title': 'Aktenzeichen XY... Ungelöst',
-            'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+            'description': 'md5:623ede5819c400c6d04943fa8100e6e7',
        },
        'playlist_mincount': 2,
    }, {
-        'url': 'https://www.zdf.de/filme/taunuskrimi/',
+        'url': 'https://www.zdf.de/serien/taunuskrimi/',
        'only_matching': True,
    }]
@ -410,32 +413,72 @@ class ZDFChannelIE(ZDFBaseIE):
        title = super()._og_search_title(webpage, fatal=fatal)
        return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
    def _extract_document_id(self, webpage):
        matches = re.search(r'docId\s*:\s*[\'"](?P<docid>[^\'"]+)[\'"]', webpage)
        return matches and matches.group('docid')
    def _get_playlist_description(self, page_data):
        headline = traverse_obj(page_data, ('shortText', 'headline'))
        text = traverse_obj(page_data, ('shortText', 'text'))
        if headline is not None and text is not None:
            return f'{headline}\n\n{text}'
        return headline or text
    def _convert_thumbnails(self, thumbnails):
        return [{
            'id': key,
            'url': thumbnail_info['url'],
            'width': int_or_none(thumbnail_info.get('width')),
            'height': int_or_none(thumbnail_info.get('height')),
        } for key, thumbnail_info in thumbnails.items() if url_or_none(thumbnail_info.get('url'))]
    def _teaser_to_url_result(self, teaser):
        return self.url_result(
            teaser['sharingUrl'], ie=ZDFIE.ie_key(),
            id=teaser.get('id'), title=teaser.get('titel', ''),
            thumbnails=self._convert_thumbnails(teaser.get('teaserBild', {})),
            description=teaser.get('beschreibung'),
            duration=float_or_none(teaser.get('length')),
            media_type=teaser.get('currentVideoType') or teaser.get('contentType'),
            season_number=int_or_none(teaser.get('seasonNumber')),
            episode_number=int_or_none(teaser.get('episodeNumber')))
    def _real_extract(self, url):
        channel_id = self._match_id(url)
        webpage = self._download_webpage(url, channel_id)
-        matches = re.finditer(
+        main_video = None
-            rf'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''',
+        playlist_videos = []
-            webpage)
+
        document_id = self._extract_document_id(webpage)
        if document_id is not None:
            data = self._download_mediathekv2_document(document_id)
            for cluster in data['cluster']:
                for teaser in cluster['teaser']:
                    if cluster['type'] == 'teaserContent' and teaser['type'] == 'video':
                        main_video = main_video or teaser
                    elif cluster['type'] == 'teaser' and teaser['type'] == 'video':
                        if teaser['brandId'] != document_id:
                            # These are unrelated 'You might also like' videos, filter them out
                            continue
                        playlist_videos.append(teaser)
        if self._downloader.params.get('noplaylist', False):
-            entry = next(
+            return self._teaser_to_url_result(main_video) if main_video else None
-                (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+
                None)
            self.to_screen('Downloading just the main video because of --no-playlist')
            if entry:
                return entry
        else:
        self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
-        def check_video(m):
+        thumbnails = (
-            v_ref = self._search_regex(
+            traverse_obj(data, ('document', 'image'))
-                r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')),
+            or traverse_obj(data, ('document', 'teaserBild'))
-                webpage, 'check id', default='')
+            or traverse_obj(data, ('stageHeader', 'image'))
-            v_ref = extract_attributes(v_ref)
+            or {})
            return v_ref.get('data-target-video-type') != 'novideo'
-        return self.playlist_from_matches(
+        return self.playlist_result(
-            (m.group('url') for m in matches if check_video(m)),
+            (self._teaser_to_url_result(video) for video in playlist_videos),
-            channel_id, self._og_search_title(webpage, fatal=False))
+            playlist_id=channel_id,
            playlist_title=self._og_search_title(webpage, fatal=False),
            description=self._get_playlist_description(data),
            thumbnails=self._convert_thumbnails(thumbnails))
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -216,7 +216,7 @@ def partial_application(func):
    sig = inspect.signature(func)
    required_args = [
        param.name for param in sig.parameters.values()
-        if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
+        if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
        if param.default is inspect.Parameter.empty
    ]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
    return len('%d' % number)
@partial_application
 def join_nonempty(*values, delim='-', from_dict=None):
    if from_dict is not None:
        values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -452,9 +452,9 @@ def trim_str(*, start=None, end=None):
    return trim
-def unpack(func):
+def unpack(func, **kwargs):
    @functools.wraps(func)
-    def inner(items, **kwargs):
+    def inner(items):
        return func(*items, **kwargs)
    return inner
Author	SHA1	Message	Date
InvalidUsernameException	d73473d35c	Merge `180d2d1a9b` into `39d79c9b9c`	2024-11-15 22:52:15 +01:00
Simon Sawicki	39d79c9b9c	[utils] Fix `join_nonempty`, add `**kwargs` to `unpack` (#11559 ) Authored by: Grub4K	2024-11-15 22:06:15 +01:00
InvalidUsernameException	180d2d1a9b	Update outdated API URL	2024-09-19 21:50:18 +02:00
InvalidUsernameException	c6a3a9b246	Fix tests	2024-09-19 21:50:18 +02:00
InvalidUsernameException	44f8f59c88	Extract video entry info	2024-09-19 21:50:18 +02:00
InvalidUsernameException	6b6f97f3c9	Extract playlist metadata	2024-09-19 21:50:18 +02:00
InvalidUsernameException	8c27ce471d	Rewrite ZDF channel extractor to use an API instead of web scraping	2024-09-19 21:50:18 +02:00