Compare commits

...

18 Commits

Author SHA1 Message Date
kclauhk
84e3660278
Merge 48416c3718 into 1d253b0a27 2024-11-16 22:24:54 +01:00
Jackson Humphrey
1d253b0a27
[ie/patreon] Fix comments extraction (#11530)
Closes #11483
Authored by: jshumphrey, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-16 20:02:14 +00:00
powergold1
720b3dc453
[ie/chaturbate] Extract from API and support impersonation (#11555)
Closes #6546, Closes #10359
Authored by: powergold1
2024-11-16 19:55:40 +00:00
Jackson Humphrey
d215fba7ed
[ie/RedGifsUser] Fix extraction (#11531)
Closes #7382, Closes #9131
Authored by: jshumphrey
2024-11-16 19:50:17 +00:00
Jackson Humphrey
8388ec256f
[ie/spankbang] Support browser impersonation (#11542)
Closes #6545
Authored by: jshumphrey
2024-11-16 19:48:47 +00:00
sepro
6365e92589
[ie/bandlab] Add extractors (#11535)
Closes #7750
Authored by: seproDev
2024-11-16 17:56:43 +01:00
Alessandro Campolo
70c55cb08f
[ie/RadioRadicale] Add extractor (#5607)
Authored by: a13ssandr0, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-16 13:56:15 +01:00
bashonly
c699bafc50 [ie/soop] Fix thumbnail extraction (#11545)
Closes #11537

Authored by: bashonly
2024-11-15 22:51:55 +00:00
bashonly
eb64ae7d5d [ie] Allow ext override for thumbnails (#11545)
Authored by: bashonly
2024-11-15 22:51:55 +00:00
Simon Sawicki
c014fbcddc
[utils] subs_list_to_dict: Add lang default parameter (#11508)
Authored by: Grub4K
2024-11-15 23:25:52 +01:00
Simon Sawicki
39d79c9b9c
[utils] Fix join_nonempty, add **kwargs to unpack (#11559)
Authored by: Grub4K
2024-11-15 22:06:15 +01:00
kclauhk
48416c3718 remove trailing whitespace 2024-10-31 12:25:11 +08:00
kclauhk
2fd16fdc6e ver contains instead of exact match 2024-10-31 12:12:25 +08:00
kclauhk
245ca4e515 use one ie_key only 2024-10-31 02:42:58 +08:00
kclauhk
0f49100783 correct file ext 2024-09-28 20:39:10 +08:00
kclauhk
05b4719f6c refactor
- avoid repeatedly calling `self._configuration_arg`
- avoid unnecessary looping when 'ver' argument is not provided
- add/change error messages
2024-09-28 17:00:22 +08:00
kclauhk
52f73ffc61 use geo_bypass_country instead and modify _REQUEST_HEADERS 2024-09-21 15:23:56 +08:00
kclauhk
83dedc8369 [ie/extrememusic] Add extractor 2024-09-17 03:29:56 +08:00
15 changed files with 1090 additions and 51 deletions

View File

@ -481,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -507,6 +507,54 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self): def test_trim_str(self):
with pytest.raises(TypeError): with pytest.raises(TypeError):
@ -525,7 +573,7 @@ class TestTraversalHelpers:
def test_unpack(self): def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3' assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError): with pytest.raises(TypeError):
unpack(join_nonempty)() unpack(join_nonempty)()
with pytest.raises(TypeError): with pytest.raises(TypeError):

View File

@ -72,7 +72,6 @@ from yt_dlp.utils import (
intlist_to_bytes, intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
join_nonempty,
js_to_json, js_to_json,
limit_length, limit_length,
locked_file, locked_file,
@ -2158,10 +2157,6 @@ Line 1
assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -4381,7 +4381,9 @@ class YoutubeDL:
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

View File

@ -208,6 +208,10 @@ from .bandcamp import (
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -622,6 +626,11 @@ from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE from .eurosport import EurosportIE
from .euscreen import EUScreenIE from .euscreen import EUScreenIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .extrememusic import (
ExtremeMusicAIE,
ExtremeMusicIE,
ExtremeMusicPIE,
)
from .eyedotv import EyedoTVIE from .eyedotv import EyedoTVIE
from .facebook import ( from .facebook import (
FacebookAdsIE, FacebookAdsIE,
@ -1649,6 +1658,7 @@ from .radiokapital import (
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,

View File

@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'soop'
@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))

438
yt_dlp/extractor/bandlab.py Normal file
View File

@ -0,0 +1,438 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View File

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id, tld = self._match_valid_url(url).group('id', 'tld') response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
status = response.get('room_status')
if status != 'public':
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
self.report_warning('Falling back to webpage extraction')
return None
m3u8_url = response.get('url')
if not m3u8_url:
self.raise_geo_restricted()
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_webpage(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -85,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -113,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)

View File

@ -279,6 +279,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)

View File

@ -0,0 +1,358 @@
import itertools
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
merge_dicts,
str_or_none,
traverse_obj,
unified_strdate,
url_or_none,
)
class ExtremeMusicBaseIE(InfoExtractor):
_API_URL = 'https://snapi.extrememusic.com'
_REQUEST_HEADERS = None
_REQUIRE_VERSION = []
def _initialize(self, url, video_id, country=None):
self._REQUIRE_VERSION = (self._configuration_arg('ver', ie_key='extrememusic')
or self._configuration_arg('version', ie_key='extrememusic'))
# This site serves different versions of the same playlist id due to geo-restriction
# use user's own country code if no code (geo_bypass_country or pre-defined country code) is provided
if not country:
country = self._download_webpage('https://ipapi.co/country_code', video_id)
self.to_screen(f'Set country code to {country}')
env = self._download_json('https://www.extrememusic.com/env', video_id)
self._REQUEST_HEADERS = {
'Accept': 'application/json',
'Origin': 'https://www.extrememusic.com',
'Referer': url,
'Sec-Fetch-Mode': 'cors',
'X-API-Auth': env['token'],
'X-Site-Id': 4,
'X-Viewer-Country': country.upper(),
}
def _get_album_data(self, album_id, video_id, fatal=True):
album = self._download_json(f'{self._API_URL}/albums/{album_id}', video_id, fatal=fatal,
note='Downloading album data', errnote='Unable to download album data',
headers=self._REQUEST_HEADERS) or {}
if video_id == album_id:
bio = self._download_json(f'{self._API_URL}/albums/{album_id}/bio', video_id, fatal=False,
note='Downloading album data', errnote='Unable to download album data',
headers=self._REQUEST_HEADERS) or {}
return merge_dicts(album, bio)
else:
return album
def _extract_track(self, album_data, track_id=None, version_id=None):
if 'tracks' in album_data and 'track_sounds' in album_data:
if not track_id and version_id:
track_id = traverse_obj(album_data['track_sounds'],
(lambda _, v: v['id'] == int(version_id), 'track_id', {int}), get_all=False)
if track := traverse_obj(album_data['tracks'],
(lambda _, v: v['id'] == int(track_id), {dict}), get_all=False):
info = {**traverse_obj(track, {
'track': ('title', {str}),
'track_number': ('sort_order', {lambda v: v + 1}, {int}),
'track_id': ('track_no', {str}),
'description': ('description', {lambda v: str_or_none(v) or None}),
'artists': ('artists', {lambda v: v or traverse_obj(album_data, ('album', 'artist'))},
{lambda v: (v if isinstance(v, list) else [v]) if v else None}),
'composers': ('composers', ..., 'name'),
'genres': (('genre', 'subgenre'), ..., 'label'),
'tag': ('keywords', ..., 'label'),
'album': ('album_title', {lambda v: str_or_none(v) or None}),
}), **traverse_obj(album_data, {
'album_artists': ('album', 'artist', {lambda v: [v] if v else None}),
'upload_date': ('album', 'created', {unified_strdate}),
})}
entries, thumbnails = [], []
for image in traverse_obj(track, ('images', 'default')):
thumbnails.append(traverse_obj(image, {
'url': ('url', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}))
if not self._REQUIRE_VERSION:
version_id = version_id or traverse_obj(track, 'default_track_sound_id', ('track_sound_ids', 0))
for sound_id in [version_id] if version_id else track['track_sound_ids']:
if sound := traverse_obj(album_data['track_sounds'],
(lambda _, v: v['id'] == int(sound_id) and v['track_id'] == int(track_id),
{dict}), get_all=False):
if (version_id
or 'all' in self._REQUIRE_VERSION
or any(x in sound['version_type'].lower() for x in self._REQUIRE_VERSION)):
formats = []
for audio_url in traverse_obj(sound, ('assets', 'audio', ('preview_url',
'preview_url_hls'))):
if determine_ext(audio_url) == 'm3u8':
m3u8_url = re.sub(r'\.m3u8\?.*', '/HLS/128_v4.m3u8', audio_url)
for f in self._extract_m3u8_formats(m3u8_url, sound_id, 'm4a', fatal=False):
formats.append({
**f,
'vcodec': 'none',
'perference': -2,
})
else:
formats.append({
'url': audio_url,
'vcodec': 'none',
})
entries.append({
'id': str(sound_id),
'title': join_nonempty('title', 'version_type', from_dict=sound, delim=' - '),
'alt_title': sound['version_type'],
**info,
'thumbnails': thumbnails,
'duration': sound.get('duration'),
'formats': formats,
'webpage_url': f"https://www.extrememusic.com/albums/{track['album_id']}?item={track_id}&ver={sound_id}",
})
if len(entries) > 1:
return {
'id': track_id,
**info,
'entries': entries,
'_type': 'playlist',
}
elif len(entries) == 1:
return entries[0]
else:
self.raise_no_formats('Track data not found', video_id=track_id)
return []
class ExtremeMusicIE(ExtremeMusicBaseIE):
_VALID_URL = r'https?://(?:www\.)?extrememusic\.com/albums/(?P<album>\d+)\?(.*item=(?P<id>\d+))?(.*ver=(?P<ver>\d+))?'
_TESTS = [{
'url': 'https://www.extrememusic.com/albums/15875?item=263381&ver=1265009&sharedTrack=dHJ1ZQ==',
'info_dict': {
'id': '1265009',
'ext': 'mp3',
'title': 'FOLLOW - Instrumental',
'alt_title': 'Instrumental',
'track': 'FOLLOW',
'track_number': 5,
'track_id': 'HPE316_05',
'artists': ['PRAERS'],
'composers': ['Joseph Andrew Banfi', 'Thomas Louis James White'],
'genres': ['POP', 'DREAM', 'INDIE'],
'tag': 'count:7',
'album': 'AVALON',
'album_artists': ['PRAERS'],
'upload_date': '20240729',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/album/600/HPE316.jpg',
'duration': 246,
},
}, {
'url': 'https://www.extrememusic.com/albums/15823?ver=1262087',
'info_dict': {
'id': '1262087',
'ext': 'mp3',
'title': 'MAGICAL HIGHWAY - VOCALS',
'alt_title': 'VOCALS',
'track': 'MAGICAL HIGHWAY',
'track_number': 2,
'track_id': 'ASM0002_02',
'description': 'Full version - a fun, happy and upbeat pop track with a medium - fast tempo - electronic, bouncy, bright',
'composers': ['ENB'],
'genres': ['POP', 'ELECTRO', 'JPOP'],
'tag': 'count:8',
'album': 'TOKYO POPPIN\'',
'upload_date': '20240709',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/album/600/ASM0002.jpg',
'duration': 265,
},
}, {
'url': 'https://www.extrememusic.com/albums/15064?item=254704',
'info_dict': {
'id': '1178851',
'ext': 'mp3',
'title': 'SWEET TOOTH - Full Version',
'alt_title': 'Full Version',
'track': 'SWEET TOOTH',
'track_number': 2,
'track_id': 'HPE263_02',
'artists': ['PILOT PAISLEY-ROSE'],
'composers': ['PILOT PAISLEY ROSE SARACENO', 'SAMUEL JAMES BRANDT'],
'genres': ['POP', 'ELECTRO', 'ROCK'],
'tag': 'count:7',
'album': 'ADDICTED',
'album_artists': ['PILOT PAISLEY-ROSE'],
'upload_date': '20230629',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/album/600/HPE263.jpg',
'duration': 161,
},
}, {
'url': 'https://www.extrememusic.com/albums/1315?item=24795',
'info_dict': {
'id': '61003',
'ext': 'mp3',
'title': 'JOY TO THE WORLD (INST) - Instrumental',
'alt_title': 'Instrumental',
'track': 'JOY TO THE WORLD (INST)',
'track_number': 6,
'track_id': 'XEL016_06',
'composers': ['TRADITIONAL'],
'genres': ['HOLIDAY', 'CHRISTMAS'],
'tag': 'count:5',
'album': 'CHRISTMAS SPARKLE',
'upload_date': '20041001',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/album/600/XEL016.jpg',
'duration': 132,
},
}]
def _real_extract(self, url):
album_id, track_id, version_id = self._match_valid_url(url).group('album', 'id', 'ver')
self._initialize(url, version_id or track_id, self.get_param('geo_bypass_country') or 'DE')
album_data = self._get_album_data(album_id, version_id or track_id)
if result := self._extract_track(album_data, track_id, version_id):
return result
else:
self.raise_no_formats('No formats were found')
class ExtremeMusicAIE(ExtremeMusicBaseIE):
IE_NAME = 'ExtremeMusic:album'
_VALID_URL = r'https?://(?:www\.)?extrememusic\.com/albums/(?P<id>\d+)(?!.*(item|ver)=)'
_TESTS = [{
'url': 'https://www.extrememusic.com/albums/6778',
'info_dict': {
'id': '6778',
'album': 'Ethereal Voices',
},
'playlist_count': 11,
}, {
'url': 'https://www.extrememusic.com/albums/15835',
'info_dict': {
'id': '15835',
'album': 'BIGGEST BANG',
'description': 'Minus Aura, a minimalist duo who create deep drama and emotion to put you under their spell.',
'artists': ['MINUS AURA'],
'genres': ['ELECTRONICA', 'POP', 'SYNTH'],
'tag': ['ELECTRONIC', 'STRUGGLE'],
},
'playlist_count': 4,
}]
def _real_extract(self, url):
album_id = self._match_id(url)
self._initialize(url, album_id, self.get_param('geo_bypass_country') or 'DE')
album_data = self._get_album_data(album_id, album_id)
entries = []
for track_id in traverse_obj(album_data, ('tracks', ..., 'id')):
if track := self._extract_track(album_data, track_id=track_id):
if track.get('entries'):
entries.extend(track['entries'])
else:
entries.append(track)
if entries:
subgenres = traverse_obj(album_data, ('album', 'subgenres', {str_or_none}))
return merge_dicts(traverse_obj(album_data.get('album'), {
'id': ('id', {lambda v: str(v)}),
'album': ('title', {str_or_none}),
'description': ('description', {lambda v: str_or_none(v) or None}),
'artists': ('artist', {lambda v: [v] if v else None}),
'genres': ('genres', {str_or_none}, {lambda v: join_nonempty(v, subgenres, delim=', ')},
{lambda v: v.split(', ') if v else None}),
'tag': ('keywords', {lambda v: v.split(', ') if v else None}),
}), {
'description': traverse_obj(album_data, ('bio', 'description', {lambda v: str_or_none(v) or None})),
'entries': entries,
'_type': 'playlist',
})
else:
self.raise_no_formats('No formats were found')
class ExtremeMusicPIE(ExtremeMusicBaseIE):
IE_NAME = 'ExtremeMusic:playlist'
_VALID_URL = r'https?://(?:www\.)?extrememusic\.com/playlists/(?P<id>[^?]+)'
_TESTS = [{
'url': 'https://www.extrememusic.com/playlists/Kf3fAppAKK2UpAUUp7KK1pBDBMrC62c_Kf8UKAAppUUKppK2UAp92K7Appp8xMx',
'info_dict': {
'id': 'Kf3fAppAKK2UpAUUp7KK1pBDBMrC62c_Kf8UKAAppUUKppK2UAp92K7Appp8xMx',
'title': 'NICE',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/featureditem/square/thumbnail_PLAYLIST_Nice-square-(formerly ChristmasTraditional).jpg',
},
'playlist_mincount': 29,
'expected_warnings': ['This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE'],
}, {
'url': 'https://www.extrememusic.com/playlists/fUKKU5KAfK61pAAKp4U4KpKUxsRk2ki_fU117KpUUAAUKAUfpA6UAfAKK8Ul5ji',
'info_dict': {
'id': 'fUKKU5KAfK61pAAKp4U4KpKUxsRk2ki_fU117KpUUAAUKAUfpA6UAfAKK8Ul5ji',
'title': 'NEO CLASSICAL',
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/featureditem/square/NeoClassical.jpg',
},
'playlist_mincount': 50,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
self._initialize(url, playlist_id, self.get_param('geo_bypass_country'))
def playlist_query(playlist_id, offset, limit):
# playlist api: https://snapi.extrememusic.com/playlists?id={playlist_id}&range={offset}%2C{limit}'
return self._download_json(
'https://snapi.extrememusic.com/playlists', playlist_id,
note=f'Downloading item {offset + 1}-{offset + limit}', query={
'id': playlist_id,
'range': f'{offset},{limit}',
}, headers=self._REQUEST_HEADERS)
thumbnails, entries = [], []
album_data, track_done, limit = {}, [], 50
for i in itertools.count():
playlist = playlist_query(playlist_id, i * limit, limit)
if len(playlist['playlist_items']) == 0:
break
else:
track_ids = traverse_obj(playlist, ('playlist_items', ..., 'track_id'))
for track_id in list(dict.fromkeys(track_ids)):
if track_id not in track_done:
album_id = traverse_obj(playlist,
('tracks', lambda _, v: v['id'] == track_id, 'album_id', {int}), get_all=False)
if album_id not in album_data:
album_data[album_id] = self._get_album_data(album_id, track_id, fatal=False)
playlist['album'] = traverse_obj(album_data, (album_id, 'album', {dict}))
if track := self._extract_track(playlist, track_id=track_id):
if track.get('entries'):
entries.extend(track['entries'])
else:
entries.append(track)
track_done.append(track_id)
if len(track_done) >= playlist['playlist']['playlist_items_count']:
break
if entries:
if len(track_done) < playlist['playlist']['playlist_items_count']:
self.report_warning('This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE')
for image in traverse_obj(playlist['playlist'], ('images', 'square')):
thumbnails.append(traverse_obj(image, {
'url': ('url', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}))
return {k: v for k, v in {
'id': playlist['playlist']['id'],
'title': playlist['playlist']['title'],
'thumbnail': traverse_obj(thumbnails, (0, 'url', {url_or_none})),
'thumbnails': thumbnails,
'uploader': playlist['playlist']['owner_name'],
'entries': entries,
'_type': 'playlist',
}.items() if v}
else:
self.raise_no_formats('No formats were found')

View File

@ -16,10 +16,10 @@ from ..utils import (
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
traverse_obj,
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj, value
class PatreonBaseIE(InfoExtractor): class PatreonBaseIE(InfoExtractor):
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': r're:^https?://.+', 'thumbnail': r're:^https?://.+',
}, },
'skip': 'Patron-only content', 'skip': 'Patron-only content',
}, {
# Contains a comment reply in the 'included' section
'url': 'https://www.patreon.com/posts/114721679',
'info_dict': {
'id': '114721679',
'ext': 'mp4',
'upload_date': '20241025',
'uploader': 'Japanalysis',
'like_count': int,
'thumbnail': r're:^https?://.+',
'comment_count': int,
'title': 'Karasawa Part 2',
'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
'uploader_url': 'https://www.patreon.com/japanalysis',
'uploader_id': '80504268',
'channel_url': 'https://www.patreon.com/japanalysis',
'channel_follower_count': int,
'timestamp': 1729897015,
'channel_id': '9346307',
},
'params': {'getcomments': True},
}] }]
_RETURN_TYPE = 'video' _RETURN_TYPE = 'video'
@ -404,26 +425,24 @@ class PatreonIE(PatreonBaseIE):
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
cursor = None cursor = None
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
count += 1 count += 1
comment_id = comment.get('id')
attributes = comment.get('attributes') or {}
if comment_id is None:
continue
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
author_info = traverse_obj(
response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
get_all=False, expected_type=dict, default={})
yield { yield {
'id': comment_id, **traverse_obj(comment, {
'text': attributes.get('body'), 'id': ('id', {str_or_none}),
'timestamp': parse_iso8601(attributes.get('created')), 'text': ('attributes', 'body', {str}),
'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), 'timestamp': ('attributes', 'created', {parse_iso8601}),
'author_is_uploader': attributes.get('is_by_creator'), 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
}),
**traverse_obj(response, (
'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
'author': ('full_name', {str}),
'author_thumbnail': ('image_url', {url_or_none}),
}), get_all=False),
'author_id': author_id, 'author_id': author_id,
'author': author_info.get('full_name'),
'author_thumbnail': author_info.get('image_url'),
} }
if count < traverse_obj(response, ('meta', 'count')): if count < traverse_obj(response, ('meta', 'count')):

View File

@ -0,0 +1,105 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class RadioRadicaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.radioradicale.it/scheda/471591',
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
'info_dict': {
'id': '471591',
'ext': 'mp4',
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'location': 'Napoli',
'duration': 2852.0,
'timestamp': 1459987200,
'upload_date': '20160407',
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
},
}, {
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
'info_dict': {
'id': '742783',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
'location': 'CAMERA',
'duration': 5868.0,
'timestamp': 1730246400,
'upload_date': '20241030',
},
'playlist': [{
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
'info_dict': {
'id': '742783-0',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'be915c189c70ad2920e5810f32260ff5',
'info_dict': {
'id': '742783-1',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
'info_dict': {
'id': '742783-2',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}],
}]
def _entries(self, videos_info, page_id):
for idx, video in enumerate(traverse_obj(
videos_info, ('playlist', lambda _, v: v['sources']))):
video_id = f'{page_id}-{idx}'
formats = []
subtitles = {}
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
self._merge_subtitles({sub.get('srclang') or 'und': [{
'url': sub['src'],
'name': sub.get('label'),
}]}, target=subtitles)
yield {
'id': video_id,
'title': video.get('title'),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
videos_info = self._search_json(
r'jQuery\.extend\(Drupal\.settings\s*,',
webpage, 'videos_info', page_id)['RRscheda']
entries = list(self._entries(videos_info, page_id))
common_info = {
'id': page_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'location': videos_info.get('luogo'),
**self._search_json_ld(webpage, page_id),
}
if len(entries) == 1:
return {
**entries[0],
**common_info,
}
return self.playlist_result(entries, multi_video=True, **common_info)

View File

@ -213,7 +213,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor):
class RedGifsUserIE(RedGifsBaseInfoExtractor): class RedGifsUserIE(RedGifsBaseInfoExtractor):
IE_DESC = 'Redgifs user' IE_DESC = 'Redgifs user'
_VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
_PAGE_SIZE = 30 _PAGE_SIZE = 80
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.redgifs.com/users/lamsinka89', 'url': 'https://www.redgifs.com/users/lamsinka89',
@ -222,7 +222,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?page=3', 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
@ -231,7 +231,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_count': 30, 'playlist_count': 80,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
@ -240,7 +240,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by best', 'description': 'RedGifs user lamsinka89, ordered by best',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
},
{
'url': 'https://www.redgifs.com/users/ignored52',
'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
'info_dict': {
'id': 'ignored52',
'title': 'ignored52',
'description': 'RedGifs user ignored52, ordered by recent',
},
'playlist_mincount': 121,
}, },
] ]

View File

@ -71,9 +71,11 @@ class SpankBangIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2') video_id = mobj.group('id') or mobj.group('id_2')
country = self.get_param('geo_bypass_country') or 'US'
self._set_cookie('.spankbang.com', 'country', country.upper())
webpage = self._download_webpage( webpage = self._download_webpage(
url.replace(f'/{video_id}/embed', f'/{video_id}/video'), url.replace(f'/{video_id}/embed', f'/{video_id}/video'),
video_id, headers={'Cookie': 'country=US'}) video_id, impersonate=True)
if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(

View File

@ -216,7 +216,7 @@ def partial_application(func):
sig = inspect.signature(func) sig = inspect.signature(func)
required_args = [ required_args = [
param.name for param in sig.parameters.values() param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL) if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
if param.default is inspect.Parameter.empty if param.default is inspect.Parameter.empty
] ]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
return len('%d' % number) return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None): def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None: if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values) values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)

View File

@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload @typing.overload
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload @typing.overload
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ... def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
""" """
Convert subtitles from a traversal into a subtitle dict. Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function. The path should have an `all` immediately before this function.
@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
`quality` The sort order for each subtitle `quality` The sort order for each subtitle
""" """
if subs is None: if subs is None:
return functools.partial(subs_list_to_dict, ext=ext) return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
result = collections.defaultdict(list) result = collections.defaultdict(list)
@ -360,10 +360,16 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
if not url_or_none(sub.get('url')) and not sub.get('data'): if not url_or_none(sub.get('url')) and not sub.get('data'):
continue continue
sub_id = sub.pop('id', None) sub_id = sub.pop('id', None)
if sub_id is None: if not isinstance(sub_id, str):
continue if not lang:
if ext is not None and not sub.get('ext'): continue
sub['ext'] = ext sub_id = lang
sub_ext = sub.get('ext')
if not isinstance(sub_ext, str):
if not ext:
sub.pop('ext', None)
else:
sub['ext'] = ext
result[sub_id].append(sub) result[sub_id].append(sub)
result = dict(result) result = dict(result)
@ -452,9 +458,9 @@ def trim_str(*, start=None, end=None):
return trim return trim
def unpack(func): def unpack(func, **kwargs):
@functools.wraps(func) @functools.wraps(func)
def inner(items, **kwargs): def inner(items):
return func(*items, **kwargs) return func(*items, **kwargs)
return inner return inner