Merge fd3f0e936e into 6365e92589

[ie/bandlab] Add extractors (#11535 )
Closes #7750 Authored by: seproDev
2024-11-26 01:01:25 +01:00 · 2024-11-16 17:57:18 +01:00 · 2024-11-16 17:56:43 +01:00 · 2024-11-16 13:56:15 +01:00 · 2024-11-15 22:51:55 +00:00 · 2024-11-15 22:51:55 +00:00
11 changed files with 655 additions and 25 deletions
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@ -481,7 +481,7 @@ class TestTraversalHelpers:
            'id': 'name',
            'data': 'content',
            'url': 'url',
-        }, all, {subs_list_to_dict}]) == {
+        }, all, {subs_list_to_dict(lang=None)}]) == {
            'de': [{'url': 'https://example.com/subs/de.ass'}],
            'en': [{'data': 'content'}],
        }, 'subs with mandatory items missing should be filtered'
@ -507,6 +507,54 @@ class TestTraversalHelpers:
            {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
            {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
        ]}, '`quality` key should sort subtitle list accordingly'
        assert traverse_obj([
            {'name': 'de', 'url': 'https://example.com/subs/de.ass'},
            {'name': 'de'},
            {'name': 'en', 'content': 'content'},
            {'url': 'https://example.com/subs/en'},
        ], [..., {
            'id': 'name',
            'url': 'url',
            'data': 'content',
        }, all, {subs_list_to_dict(lang='en')}]) == {
            'de': [{'url': 'https://example.com/subs/de.ass'}],
            'en': [
                {'data': 'content'},
                {'url': 'https://example.com/subs/en'},
            ],
        }, 'optionally provided lang should be used if no id available'
        assert traverse_obj([
            {'name': 1, 'url': 'https://example.com/subs/de1'},
            {'name': {}, 'url': 'https://example.com/subs/de2'},
            {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
            {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
        ], [..., {
            'id': 'name',
            'url': 'url',
            'ext': 'ext',
        }, all, {subs_list_to_dict(lang=None)}]) == {
            'de': [
                {'url': 'https://example.com/subs/de3'},
                {'url': 'https://example.com/subs/de4'},
            ],
        }, 'non str types should be ignored for id and ext'
        assert traverse_obj([
            {'name': 1, 'url': 'https://example.com/subs/de1'},
            {'name': {}, 'url': 'https://example.com/subs/de2'},
            {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
            {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
        ], [..., {
            'id': 'name',
            'url': 'url',
            'ext': 'ext',
        }, all, {subs_list_to_dict(lang='de')}]) == {
            'de': [
                {'url': 'https://example.com/subs/de1'},
                {'url': 'https://example.com/subs/de2'},
                {'url': 'https://example.com/subs/de3'},
                {'url': 'https://example.com/subs/de4'},
            ],
        }, 'non str types should be replaced by default id'
    def test_trim_str(self):
        with pytest.raises(TypeError):
@ -525,7 +573,7 @@ class TestTraversalHelpers:
    def test_unpack(self):
        assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
        assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
-        assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
+        assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
        with pytest.raises(TypeError):
            unpack(join_nonempty)()
        with pytest.raises(TypeError):
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -72,7 +72,6 @@ from yt_dlp.utils import (
    intlist_to_bytes,
    iri_to_uri,
    is_html,
    join_nonempty,
    js_to_json,
    limit_length,
    locked_file,
@ -2158,10 +2157,6 @@ Line 1
        assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
        assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
        assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
        assert callable(join_nonempty()), 'varargs positional should apply partially'
        assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
 if __name__ == '__main__':
    unittest.main()
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -4381,7 +4381,9 @@ class YoutubeDL:
            return None
        for idx, t in list(enumerate(thumbnails))[::-1]:
-            thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
+            thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
            if multiple:
                thumb_ext = f'{t["id"]}.{thumb_ext}'
            thumb_display_id = f'{label} thumbnail {t["id"]}'
            thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
            thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -208,6 +208,10 @@ from .bandcamp import (
    BandcampUserIE,
    BandcampWeeklyIE,
 )
 from .bandlab import (
    BandlabIE,
    BandlabPlaylistIE,
 )
 from .bannedvideo import BannedVideoIE
 from .bbc import (
    BBCIE,
@ -1649,6 +1653,7 @@ from .radiokapital import (
    RadioKapitalIE,
    RadioKapitalShowIE,
 )
 from .radioradicale import RadioRadicaleIE
 from .radiozet import RadioZetPodcastIE
 from .radlive import (
    RadLiveChannelIE,
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
            extensions={'legacy_ssl': True}), display_id,
            'Downloading API JSON', 'Unable to download API JSON')
    @staticmethod
    def _fixup_thumb(thumb_url):
        if not url_or_none(thumb_url):
            return None
        # Core would determine_ext as 'php' from the url, so we need to provide the real ext
        # See: https://github.com/yt-dlp/yt-dlp/issues/11537
        return [{'url': thumb_url, 'ext': 'jpg'}]
 class AfreecaTVIE(AfreecaTVBaseIE):
    IE_NAME = 'soop'
@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
            'uploader': ('writer_nick', {str}),
            'uploader_id': ('bj_id', {str}),
            'duration': ('total_file_duration', {int_or_none(scale=1000)}),
-            'thumbnail': ('thumb', {url_or_none}),
+            'thumbnails': ('thumb', {self._fixup_thumb}),
        })
        entries = []
@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
        return self.playlist_result(self._entries(data), video_id)
-    @staticmethod
+    def _entries(self, data):
    def _entries(data):
        # 'files' is always a list with 1 element
        yield from traverse_obj(data, (
            'data', lambda _, v: v['story_type'] == 'catch',
@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
                'title': ('title', {str}),
                'uploader': ('writer_nick', {str}),
                'uploader_id': ('writer_id', {str}),
-                'thumbnail': ('thumb', {url_or_none}),
+                'thumbnails': ('thumb', {self._fixup_thumb}),
                'timestamp': ('write_timestamp', {int_or_none}),
            }))
--- a/yt_dlp/extractor/bandlab.py
+++ b/yt_dlp/extractor/bandlab.py
@ -0,0 +1,438 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    float_or_none,
    format_field,
    int_or_none,
    parse_iso8601,
    parse_qs,
    truncate_string,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj, value
 class BandlabBaseIE(InfoExtractor):
    def _call_api(self, endpoint, asset_id, **kwargs):
        headers = kwargs.pop('headers', None) or {}
        return self._download_json(
            f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
            asset_id, headers={
                'accept': 'application/json',
                'referer': 'https://www.bandlab.com/',
                'x-client-id': 'BandLab-Web',
                'x-client-version': '10.1.124',
                **headers,
            }, **kwargs)
    def _parse_revision(self, revision_data, url=None):
        return {
            'vcodec': 'none',
            'media_type': 'revision',
            'extractor_key': BandlabIE.ie_key(),
            'extractor': BandlabIE.IE_NAME,
            **traverse_obj(revision_data, {
                'webpage_url': (
                    'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
                'id': (('revisionId', 'id'), {str}, any),
                'title': ('song', 'name', {str}),
                'track': ('song', 'name', {str}),
                'url': ('mixdown', 'file', {url_or_none}),
                'thumbnail': ('song', 'picture', 'url', {url_or_none}),
                'description': ('description', {str}),
                'uploader': ('creator', 'name', {str}),
                'uploader_id': ('creator', 'username', {str}),
                'timestamp': ('createdOn', {parse_iso8601}),
                'duration': ('mixdown', 'duration', {float_or_none}),
                'view_count': ('counters', 'plays', {int_or_none}),
                'like_count': ('counters', 'likes', {int_or_none}),
                'comment_count': ('counters', 'comments', {int_or_none}),
                'genres': ('genres', ..., 'name', {str}),
            }),
        }
    def _parse_track(self, track_data, url=None):
        return {
            'vcodec': 'none',
            'media_type': 'track',
            'extractor_key': BandlabIE.ie_key(),
            'extractor': BandlabIE.IE_NAME,
            **traverse_obj(track_data, {
                'webpage_url': (
                    'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
                'id': (('revisionId', 'id'), {str}, any),
                'url': ('track', 'sample', 'audioUrl', {url_or_none}),
                'title': ('track', 'name', {str}),
                'track': ('track', 'name', {str}),
                'description': ('caption', {str}),
                'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
                'view_count': ('counters', 'plays', {int_or_none}),
                'like_count': ('counters', 'likes', {int_or_none}),
                'comment_count': ('counters', 'comments', {int_or_none}),
                'duration': ('track', 'sample', 'duration', {float_or_none}),
                'uploader': ('creator', 'name', {str}),
                'uploader_id': ('creator', 'username', {str}),
                'timestamp': ('createdOn', {parse_iso8601}),
            }),
        }
    def _parse_video(self, video_data, url=None):
        return {
            'media_type': 'video',
            'extractor_key': BandlabIE.ie_key(),
            'extractor': BandlabIE.IE_NAME,
            **traverse_obj(video_data, {
                'id': ('id', {str}),
                'webpage_url': (
                    'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
                'url': ('video', 'url', {url_or_none}),
                'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
                'description': ('caption', {str}),
                'thumbnail': ('video', 'picture', 'url', {url_or_none}),
                'view_count': ('video', 'counters', 'plays', {int_or_none}),
                'like_count': ('video', 'counters', 'likes', {int_or_none}),
                'comment_count': ('counters', 'comments', {int_or_none}),
                'duration': ('video', 'duration', {float_or_none}),
                'uploader': ('creator', 'name', {str}),
                'uploader_id': ('creator', 'username', {str}),
            }),
        }
 class BandlabIE(BandlabBaseIE):
    _VALID_URL = [
        r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
        r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
    ]
    _EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
    _TESTS = [{
        'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
        'md5': '46f7b43367dd268bbcf0bbe466753b2c',
        'info_dict': {
            'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
            'ext': 'm4a',
            'uploader_id': 'ender_milze',
            'track': 'sweet black',
            'description': 'composed by juanjn3737',
            'timestamp': 1702171963,
            'view_count': int,
            'like_count': int,
            'duration': 54.629999999999995,
            'title': 'sweet black',
            'upload_date': '20231210',
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
            'genres': ['Lofi'],
            'uploader': 'ender milze',
            'comment_count': int,
            'media_type': 'revision',
        },
    }, {
        # Same track as above but post URL
        'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
        'md5': '46f7b43367dd268bbcf0bbe466753b2c',
        'info_dict': {
            'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
            'ext': 'm4a',
            'uploader_id': 'ender_milze',
            'track': 'sweet black',
            'description': 'composed by juanjn3737',
            'timestamp': 1702171973,
            'view_count': int,
            'like_count': int,
            'duration': 54.629999999999995,
            'title': 'sweet black',
            'upload_date': '20231210',
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
            'genres': ['Lofi'],
            'uploader': 'ender milze',
            'comment_count': int,
            'media_type': 'revision',
        },
    }, {
        # SharedKey Example
        'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
        'md5': '15174b57c44440e2a2008be9cae00250',
        'info_dict': {
            'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
            'ext': 'm4a',
            'comment_count': int,
            'genres': ['Other'],
            'uploader_id': 'user8353034818103753',
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
            'timestamp': 1709625771,
            'track': 'PodcastMaerchen4b',
            'duration': 468.14,
            'view_count': int,
            'description': 'Podcast: Neues aus der Märchenwelt',
            'like_count': int,
            'upload_date': '20240305',
            'uploader': 'Erna Wageneder',
            'title': 'PodcastMaerchen4b',
            'media_type': 'revision',
        },
    }, {
        # Different Revision selected
        'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
        'md5': '74e055ef9325d63f37088772fbfe4454',
        'info_dict': {
            'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
            'ext': 'm4a',
            'timestamp': 1588273294,
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
            'description': 'Final Revision.',
            'title': 'Replay ( Instrumental)',
            'uploader': 'David R Sparks',
            'uploader_id': 'davesnothome69',
            'view_count': int,
            'comment_count': int,
            'track': 'Replay ( Instrumental)',
            'genres': ['Rock'],
            'upload_date': '20200430',
            'like_count': int,
            'duration': 279.43,
            'media_type': 'revision',
        },
    }, {
        # Video
        'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
        'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
        'info_dict': {
            'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
            'ext': 'mp4',
            'duration': 44.705,
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
            'comment_count': int,
            'title': 'backing vocals',
            'uploader_id': 'marliashya',
            'uploader': 'auraa',
            'like_count': int,
            'description': 'backing vocals',
            'media_type': 'video',
        },
    }, {
        # Embed Example
        'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
        'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
        'info_dict': {
            'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
            'ext': 'm4a',
            'comment_count': int,
            'genres': ['Electronic'],
            'uploader': 'Charlie Henson',
            'timestamp': 1587328674,
            'upload_date': '20200419',
            'view_count': int,
            'track': 'Positronic Meltdown',
            'duration': 318.55,
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
            'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
            'uploader_id': 'microfreaks',
            'title': 'Positronic Meltdown',
            'like_count': int,
            'media_type': 'revision',
        },
    }, {
        # Track without revisions available
        'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
        'md5': 'f05d68a3769952c2d9257c473e14c15f',
        'info_dict': {
            'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
            'ext': 'm4a',
            'track': 'insame',
            'like_count': int,
            'duration': 84.03,
            'title': 'insame',
            'view_count': int,
            'comment_count': int,
            'uploader': 'Sorakime',
            'uploader_id': 'sorakime',
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
            'timestamp': 1691162128,
            'upload_date': '20230804',
            'media_type': 'track',
        },
    }, {
        'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
        'only_matching': True,
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://phantomluigi.github.io/',
        'info_dict': {
            'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
            'ext': 'm4a',
            'view_count': int,
            'upload_date': '20240913',
            'uploader_id': 'phantommusicofficial',
            'timestamp': 1726194897,
            'uploader': 'Phantom',
            'comment_count': int,
            'genres': ['Progresive Rock'],
            'description': 'md5:a38cd668f7a2843295ef284114f18429',
            'duration': 225.23,
            'like_count': int,
            'title': 'Vermilion Pt. 2 (Cover)',
            'track': 'Vermilion Pt. 2 (Cover)',
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
            'media_type': 'revision',
        },
    }]
    def _real_extract(self, url):
        display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
        qs = parse_qs(url)
        revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
        if url_type == 'revision':
            revision_id = display_id
        revision_data = None
        if not revision_id:
            post_data = self._call_api(
                'posts', display_id, note='Downloading post data',
                query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
            revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
            revision_data = traverse_obj(post_data, ('revision', {dict}))
            if not revision_data and not revision_id:
                post_type = post_data.get('type')
                if post_type == 'Video':
                    return self._parse_video(post_data, url=url)
                if post_type == 'Track':
                    return self._parse_track(post_data, url=url)
                raise ExtractorError(f'Could not extract data for post type {post_type!r}')
        if not revision_data:
            revision_data = self._call_api(
                'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
        return self._parse_revision(revision_data, url=url)
 class BandlabPlaylistIE(BandlabBaseIE):
    _VALID_URL = [
        r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
        r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
    ]
    _EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
    _TESTS = [{
        'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
        'info_dict': {
            'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
            'release_date': '20221003',
            'title': 'Remnants',
            'album': 'Remnants',
            'like_count': int,
            'album_type': 'LP',
            'description': 'A collection of some feel good, rock hits.',
            'comment_count': int,
            'view_count': int,
            'id': '89b79ea6-de42-ed11-b495-00224845aac7',
            'uploader': 'David R Sparks',
            'uploader_id': 'davesnothome69',
        },
        'playlist_count': 10,
    }, {
        'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
        'info_dict': {
            'id': '955102d4-1040-ef11-86c3-000d3a42581b',
            'timestamp': 1720762659,
            'view_count': int,
            'title': 'My Shit 🖤',
            'uploader_id': 'slytheband',
            'uploader': '𝓢𝓛𝓨',
            'upload_date': '20240712',
            'like_count': int,
            'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
        },
        'playlist_count': 15,
    }, {
        # Embeds can contain both albums and collections with the same URL pattern. This is an album
        'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
        'info_dict': {
            'id': '12cc6f7f-951b-ee11-907c-00224844f303',
            'release_date': '20230706',
            'description': 'This is a collection of songs I created when I had an Amiga computer.',
            'view_count': int,
            'title': 'Mark Salud The Amiga Collection',
            'uploader_id': 'mssirmooth1962',
            'comment_count': int,
            'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
            'like_count': int,
            'uploader': 'Mark Salud',
            'album': 'Mark Salud The Amiga Collection',
            'album_type': 'LP',
        },
        'playlist_count': 24,
    }, {
        # Tracks without revision id
        'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
        'info_dict': {
            'like_count': int,
            'uploader_id': 'sorakime',
            'comment_count': int,
            'uploader': 'Sorakime',
            'view_count': int,
            'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
            'release_date': '20230812',
            'title': 'Art',
            'album': 'Art',
            'album_type': 'Album',
            'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
            'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
        },
        'playlist_count': 13,
    }, {
        'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
        'only_matching': True,
    }]
    def _entries(self, album_data):
        for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
            post_type = post['type']
            if post_type == 'Revision':
                yield self._parse_revision(post.get('revision'))
            elif post_type == 'Track':
                yield self._parse_track(post)
            elif post_type == 'Video':
                yield self._parse_video(post)
            else:
                self.report_warning(f'Skipping unknown post type: "{post_type}"')
    def _real_extract(self, url):
        playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
        endpoints = {
            'albums': ['albums'],
            'collections': ['collections'],
            'embed': ['collections', 'albums'],
        }.get(playlist_type)
        for endpoint in endpoints:
            playlist_data = self._call_api(
                endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
                fatal=False, expected_status=404)
            if not playlist_data.get('errorCode'):
                playlist_type = endpoint
                break
        if error_code := playlist_data.get('errorCode'):
            raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
        return self.playlist_result(
            self._entries(playlist_data), playlist_id,
            **traverse_obj(playlist_data, {
                'title': ('name', {str}),
                'description': ('description', {str}),
                'uploader': ('creator', 'name', {str}),
                'uploader_id': ('creator', 'username', {str}),
                'timestamp': ('createdOn', {parse_iso8601}),
                'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
                'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
                'like_count': ('counters', 'likes', {int_or_none}),
                'comment_count': ('counters', 'comments', {int_or_none}),
                'view_count': ('counters', 'plays', {int_or_none}),
            }),
            **(traverse_obj(playlist_data, {
                'album': ('name', {str}),
                'album_type': ('type', {str}),
            }) if playlist_type == 'albums' else {}))
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -279,6 +279,7 @@ class InfoExtractor:
    thumbnails:     A list of dictionaries, with the following entries:
                        * "id" (optional, string) - Thumbnail format ID
                        * "url"
                        * "ext" (optional, string) - actual image extension if not given in URL
                        * "preference" (optional, int) - quality of the image
                        * "width" (optional, int)
                        * "height" (optional, int)
--- a/yt_dlp/extractor/radioradicale.py
+++ b/yt_dlp/extractor/radioradicale.py
@ -0,0 +1,105 @@
 from .common import InfoExtractor
 from ..utils import url_or_none
 from ..utils.traversal import traverse_obj
 class RadioRadicaleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://www.radioradicale.it/scheda/471591',
        'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
        'info_dict': {
            'id': '471591',
            'ext': 'mp4',
            'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
            'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
            'location': 'Napoli',
            'duration': 2852.0,
            'timestamp': 1459987200,
            'upload_date': '20160407',
            'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
        },
    }, {
        'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
        'info_dict': {
            'id': '742783',
            'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
            'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
            'location': 'CAMERA',
            'duration': 5868.0,
            'timestamp': 1730246400,
            'upload_date': '20241030',
        },
        'playlist': [{
            'md5': 'aa48de55dcc45478e4cd200f299aab7d',
            'info_dict': {
                'id': '742783-0',
                'ext': 'mp4',
                'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
            },
        }, {
            'md5': 'be915c189c70ad2920e5810f32260ff5',
            'info_dict': {
                'id': '742783-1',
                'ext': 'mp4',
                'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
            },
        }, {
            'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
            'info_dict': {
                'id': '742783-2',
                'ext': 'mp4',
                'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
            },
        }],
    }]
    def _entries(self, videos_info, page_id):
        for idx, video in enumerate(traverse_obj(
                videos_info, ('playlist', lambda _, v: v['sources']))):
            video_id = f'{page_id}-{idx}'
            formats = []
            subtitles = {}
            for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
                fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
                self._merge_subtitles({sub.get('srclang') or 'und': [{
                    'url': sub['src'],
                    'name': sub.get('label'),
                }]}, target=subtitles)
            yield {
                'id': video_id,
                'title': video.get('title'),
                'formats': formats,
                'subtitles': subtitles,
            }
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
        videos_info = self._search_json(
            r'jQuery\.extend\(Drupal\.settings\s*,',
            webpage, 'videos_info', page_id)['RRscheda']
        entries = list(self._entries(videos_info, page_id))
        common_info = {
            'id': page_id,
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'location': videos_info.get('luogo'),
            **self._search_json_ld(webpage, page_id),
        }
        if len(entries) == 1:
            return {
                **entries[0],
                **common_info,
            }
        return self.playlist_result(entries, multi_video=True, **common_info)
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@ -18,10 +18,13 @@ from ..utils import (
 )
 if mutagen:
    from mutagen.aiff import AIFF
    from mutagen.flac import FLAC, Picture
    from mutagen.id3 import APIC
    from mutagen.mp4 import MP4, MP4Cover
    from mutagen.oggopus import OggOpus
    from mutagen.oggvorbis import OggVorbis
    from mutagen.wave import WAVE
 class EmbedThumbnailPPError(PostProcessingError):
@ -218,9 +221,30 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                f['METADATA_BLOCK_PICTURE'] = base64.b64encode(pic.write()).decode('ascii')
            f.save()
            temp_filename = filename
        elif info['ext'] in ['aiff', 'wav']:
            if not mutagen:
                raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python3 -m pip install mutagen`')
            self._report_run('mutagen', filename)
            success = True
            try:
                f = {'aiff': AIFF, 'wav': WAVE}[info['ext']](filename)
                with open(thumbnail_filename, 'rb') as thumbfile:
                    thumb_data = thumbfile.read()
                f['APIC'] = APIC(
                    encoding=3,  # utf-8
                    mime=f'image/{thumbnail_ext.replace("jpg", "jpeg")}',
                    type=3,  # front cover
                    desc='Cover (front)',
                    data=thumb_data,
                )
                f.save()
                temp_filename = filename
            except Exception as err:
                success = False
                raise EmbedThumbnailPPError(f'unable to embed using mutagen; {err}')
        else:
-            raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov')
+            raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov, aiff/wav')
        if success and temp_filename != filename:
            os.replace(temp_filename, filename)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -216,7 +216,7 @@ def partial_application(func):
    sig = inspect.signature(func)
    required_args = [
        param.name for param in sig.parameters.values()
-        if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
+        if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
        if param.default is inspect.Parameter.empty
    ]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
    return len('%d' % number)
@partial_application
 def join_nonempty(*values, delim='-', from_dict=None):
    if from_dict is not None:
        values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload
-def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
+def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload
-def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ...
+def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
-def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
+def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
    """
    Convert subtitles from a traversal into a subtitle dict.
    The path should have an `all` immediately before this function.
@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
    `quality`  The sort order for each subtitle
    """
    if subs is None:
-        return functools.partial(subs_list_to_dict, ext=ext)
+        return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
    result = collections.defaultdict(list)
@ -360,9 +360,15 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
        if not url_or_none(sub.get('url')) and not sub.get('data'):
            continue
        sub_id = sub.pop('id', None)
-        if sub_id is None:
+        if not isinstance(sub_id, str):
            if not lang:
                continue
-        if ext is not None and not sub.get('ext'):
+            sub_id = lang
        sub_ext = sub.get('ext')
        if not isinstance(sub_ext, str):
            if not ext:
                sub.pop('ext', None)
            else:
                sub['ext'] = ext
        result[sub_id].append(sub)
    result = dict(result)
@ -452,9 +458,9 @@ def trim_str(*, start=None, end=None):
    return trim
-def unpack(func):
+def unpack(func, **kwargs):
    @functools.wraps(func)
-    def inner(items, **kwargs):
+    def inner(items):
        return func(*items, **kwargs)
    return inner
Author	SHA1	Message	Date
gavin	5af79b5e6b	Merge `fd3f0e936e` into `6365e92589`	2024-11-16 17:57:18 +01:00
sepro	6365e92589	[ie/bandlab] Add extractors (#11535 ) Closes #7750 Authored by: seproDev	2024-11-16 17:56:43 +01:00
Alessandro Campolo	70c55cb08f	[ie/RadioRadicale] Add extractor (#5607 ) Authored by: a13ssandr0, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>	2024-11-16 13:56:15 +01:00
bashonly	c699bafc50	[ie/soop] Fix thumbnail extraction (#11545 ) Closes #11537 Authored by: bashonly	2024-11-15 22:51:55 +00:00
bashonly	eb64ae7d5d	[ie] Allow `ext` override for thumbnails (#11545 ) Authored by: bashonly	2024-11-15 22:51:55 +00:00
Simon Sawicki	c014fbcddc	[utils] `subs_list_to_dict`: Add `lang` default parameter (#11508 ) Authored by: Grub4K	2024-11-15 23:25:52 +01:00
Simon Sawicki	39d79c9b9c	[utils] Fix `join_nonempty`, add `**kwargs` to `unpack` (#11559 ) Authored by: Grub4K	2024-11-15 22:06:15 +01:00
bashonly	fd3f0e936e	Merge branch 'yt-dlp:master' into pr/11485	2024-11-09 17:58:59 -06:00
7x11x13	300a988ce3	EmbedThumbnailPP: add support for WAVE and AIFF files	2024-11-09 12:01:37 -05:00