Compare commits

...

19 Commits

Author SHA1 Message Date
Michal Kubeček
39df1402b0
Merge 560bcb5291 into 1d253b0a27 2024-11-17 02:32:18 +05:30
Jackson Humphrey
1d253b0a27
[ie/patreon] Fix comments extraction (#11530)
Closes #11483
Authored by: jshumphrey, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-16 20:02:14 +00:00
powergold1
720b3dc453
[ie/chaturbate] Extract from API and support impersonation (#11555)
Closes #6546, Closes #10359
Authored by: powergold1
2024-11-16 19:55:40 +00:00
Jackson Humphrey
d215fba7ed
[ie/RedGifsUser] Fix extraction (#11531)
Closes #7382, Closes #9131
Authored by: jshumphrey
2024-11-16 19:50:17 +00:00
Jackson Humphrey
8388ec256f
[ie/spankbang] Support browser impersonation (#11542)
Closes #6545
Authored by: jshumphrey
2024-11-16 19:48:47 +00:00
sepro
6365e92589
[ie/bandlab] Add extractors (#11535)
Closes #7750
Authored by: seproDev
2024-11-16 17:56:43 +01:00
Alessandro Campolo
70c55cb08f
[ie/RadioRadicale] Add extractor (#5607)
Authored by: a13ssandr0, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-16 13:56:15 +01:00
bashonly
c699bafc50 [ie/soop] Fix thumbnail extraction (#11545)
Closes #11537

Authored by: bashonly
2024-11-15 22:51:55 +00:00
bashonly
eb64ae7d5d [ie] Allow ext override for thumbnails (#11545)
Authored by: bashonly
2024-11-15 22:51:55 +00:00
Simon Sawicki
c014fbcddc
[utils] subs_list_to_dict: Add lang default parameter (#11508)
Authored by: Grub4K
2024-11-15 23:25:52 +01:00
Simon Sawicki
39d79c9b9c
[utils] Fix join_nonempty, add **kwargs to unpack (#11559)
Authored by: Grub4K
2024-11-15 22:06:15 +01:00
bashonly
560bcb5291
Merge branch 'yt-dlp:master' into pr/6698 2024-11-07 11:51:23 -06:00
bashonly
44c8cdb728
post-merge cleanup 2024-10-26 15:19:13 +00:00
bashonly
19003f882b
Merge branch 'master' into mk/ct-2023-03 2024-10-26 15:14:05 +00:00
Michal Kubecek
6f1db75869
[ceskatelevize] update selftests
Most selftest metadata is no longer correct.

- use 'live_status' to identify live broadcast; this can be no longer
  recognized from item['type'] (which is always 'VOD') so move the
  detection to the code path where we can actually find out
- update outdated test metadata
- drop georestricted test, the URL is no longer valid and I failed to find
  out what should be the right one
- disable live broadcast tests for now; CT1 does not have a stable id any
  more and the selftest framework requires duration which depends on
  current program
2024-03-20 08:01:47 +01:00
Michal Kubecek
4f31126f07
[ceskatelevize] more robust sidp detection
For older style livestreams (all except CT4 Sport and CT24), sidp value
cannot be determined from URL; the value used by browsers can be found as
showID inside the next_data JSON.
2024-03-20 08:00:17 +01:00
Michal Kubecek
e1b623cea1
[ceskatelevize] fix live broadcast
After recent site changes, live streaming is handled in three different
ways. Update the code to deal with all of them. Also update the test URLs.
2024-03-20 08:00:13 +01:00
pukkandan
59cdcf7795
Update yt_dlp/extractor/ceskatelevize.py 2024-03-20 08:00:02 +01:00
Michal Kubecek
a03cd32b71
[ceskatelevize] update to March 2023 changes (#6539)
Note: we could even skip downloading the player.ceskatelevize.cz page
completely as we do not actually need it to get the information we used to
need before the recent website changes. However, we would not catch the
errors that are handled here and the resulting output could be quite
confusing if one of them does happen.
2024-03-20 07:59:57 +01:00
15 changed files with 840 additions and 116 deletions

View File

@ -481,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -507,6 +507,54 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self): def test_trim_str(self):
with pytest.raises(TypeError): with pytest.raises(TypeError):
@ -525,7 +573,7 @@ class TestTraversalHelpers:
def test_unpack(self): def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3' assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError): with pytest.raises(TypeError):
unpack(join_nonempty)() unpack(join_nonempty)()
with pytest.raises(TypeError): with pytest.raises(TypeError):

View File

@ -72,7 +72,6 @@ from yt_dlp.utils import (
intlist_to_bytes, intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
join_nonempty,
js_to_json, js_to_json,
limit_length, limit_length,
locked_file, locked_file,
@ -2158,10 +2157,6 @@ Line 1
assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -4381,7 +4381,9 @@ class YoutubeDL:
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

View File

@ -208,6 +208,10 @@ from .bandcamp import (
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -1649,6 +1653,7 @@ from .radiokapital import (
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,

View File

@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'soop'
@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))

438
yt_dlp/extractor/bandlab.py Normal file
View File

@ -0,0 +1,438 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View File

@ -1,3 +1,4 @@
import json
import re import re
import urllib.parse import urllib.parse
@ -8,6 +9,7 @@ from ..utils import (
float_or_none, float_or_none,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
unescapeHTML,
urlencode_postdata, urlencode_postdata,
) )
@ -19,7 +21,7 @@ USER_AGENTS = {
class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'url': 'https://www.ceskatelevize.cz/porady/10441294653-hyde-park-civilizace/bonus/20641/',
'info_dict': { 'info_dict': {
'id': '61924494877028507', 'id': '61924494877028507',
'ext': 'mp4', 'ext': 'mp4',
@ -27,6 +29,7 @@ class CeskaTelevizeIE(InfoExtractor):
'description': 'English Subtittles', 'description': 'English Subtittles',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3, 'duration': 81.3,
'live_status': 'not_live',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -34,13 +37,16 @@ class CeskaTelevizeIE(InfoExtractor):
}, },
}, { }, {
# live stream # live stream
'url': 'http://www.ceskatelevize.cz/zive/ct1/', 'url': 'https://www.ceskatelevize.cz/zive/ct1/',
'only_matching': True,
'info_dict': { 'info_dict': {
'id': '102', 'id': '61924494878124436',
'ext': 'mp4', 'ext': 'mp4',
'title': r'ČT1 - živé vysílání online', 'title': r're:^ČT1 - živé vysílání online \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
'is_live': True, 'thumbnail': r're:^https?://.*\.jpg',
'duration': 5373.3,
'live_status': 'is_live',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -48,18 +54,19 @@ class CeskaTelevizeIE(InfoExtractor):
}, },
}, { }, {
# another # another
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'url': 'https://www.ceskatelevize.cz/zive/sport/',
'only_matching': True, 'only_matching': True,
'info_dict': { 'info_dict': {
'id': '402', 'id': '422',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True, 'thumbnail': r're:^https?://.*\.jpg',
'live_status': 'is_live',
},
'params': {
# m3u8 download
'skip_download': True,
}, },
# 'skip': 'Georestricted to Czech Republic',
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}, { }, {
# video with 18+ caution trailer # video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
@ -74,6 +81,7 @@ class CeskaTelevizeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bogotart - Queer (Varování 18+)', 'title': 'Bogotart - Queer (Varování 18+)',
'duration': 11.9, 'duration': 11.9,
'live_status': 'not_live',
}, },
}, { }, {
'info_dict': { 'info_dict': {
@ -82,6 +90,7 @@ class CeskaTelevizeIE(InfoExtractor):
'title': 'Bogotart - Queer (Queer)', 'title': 'Bogotart - Queer (Queer)',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3, 'duration': 1558.3,
'live_status': 'not_live',
}, },
}], }],
'params': { 'params': {
@ -91,7 +100,19 @@ class CeskaTelevizeIE(InfoExtractor):
}, { }, {
# iframe embed # iframe embed
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
'only_matching': True, 'info_dict': {
'id': '61924494877628660',
'ext': 'mp4',
'title': 'Epizoda 1/13 - Neviditelní',
'description': 'Vypadají jako my, mluví jako my, ale mají něco navíc gen, který jim umožňuje dýchat vodu. Aniž to tušíme, žijí mezi námi.',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 3576.8,
'live_status': 'not_live',
},
'params': {
# m3u8 download
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -106,26 +127,83 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_description: if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ') playlist_description = playlist_description.replace('\xa0', ' ')
type_ = 'IDEC' type_ = 'episode'
is_live = False
if re.search(r'(^/porady|/zive)/', parsed_url.path): if re.search(r'(^/porady|/zive)/', parsed_url.path):
next_data = self._search_nextjs_data(webpage, playlist_id) next_data = self._search_nextjs_data(webpage, playlist_id)
if '/zive/' in parsed_url.path: if '/zive/' in parsed_url.path:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
sidp = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'showId'), get_all=False)
is_live = True
else: else:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
if not idec: if not idec:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
if idec: if idec:
type_ = 'bonus' type_ = 'bonus'
sidp = self._search_regex(r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/([0-9]+)-', url, playlist_id, default=playlist_id)
if not idec: if not idec:
raise ExtractorError('Failed to find IDEC id') raise ExtractorError('Failed to find IDEC id')
iframe_hash = self._download_webpage( sidp = sidp.rsplit('-')[0]
'https://www.ceskatelevize.cz/v-api/iframe-hash/', query = {'origin': 'iVysilani', 'autoStart': 'true', 'sidp': sidp, type_: idec}
playlist_id, note='Getting IFRAME hash')
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec}
webpage = self._download_webpage( webpage = self._download_webpage(
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', 'https://player.ceskatelevize.cz/',
playlist_id, note='Downloading player', query=query) playlist_id, note='Downloading player', query=query)
playlistpage_url = 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/'
data = {
'playlist[0][type]': type_,
'playlist[0][id]': idec,
'requestUrl': parsed_url.path,
'requestSource': 'iVysilani',
}
elif parsed_url.path == '/' and parsed_url.fragment == 'live':
if self._search_regex(r'(?s)<section[^>]+id=[\'"]live[\'"][^>]+data-ctcomp-data=\'([^\']+)\'[^>]*>', webpage, 'live video player', default=None):
# CT4
is_live = True
ctcomp_data = self._parse_json(
self._search_regex(
r'(?s)<section[^>]+id=[\'"]live[\'"][^>]+data-ctcomp-data=\'([^\']+)\'[^>]*>',
webpage, 'ctcomp data', fatal=True),
playlist_id, transform_source=unescapeHTML)
current_item = traverse_obj(ctcomp_data, ('items', ctcomp_data.get('currentItem'), 'items', 0, 'video', 'data', 'source', 'playlist', 0))
playlistpage_url = 'https://playlist.ceskatelevize.cz/'
data = {
'contentType': 'live',
'items': [{
'id': current_item.get('id'),
'key': current_item.get('key'),
'assetId': current_item.get('assetId'),
'playerType': 'dash',
'date': current_item.get('date'),
'requestSource': current_item.get('requestSource'),
'drm': current_item.get('drm'),
'quality': current_item.get('quality'),
}],
}
data = {'data': json.dumps(data).encode('utf-8')}
else:
# CT24
is_live = True
lvp_url = self._search_regex(
r'(?s)<div[^>]+id=[\'"]live-video-player[\'"][^>]+data-url=[\'"]([^\'"]+)[\'"][^>]*>',
webpage, 'live video player', fatal=True)
lvp_hash = self._search_regex(
r'(?s)media_ivysilani: *{ *hash *: *[\'"]([0-9a-f]+)[\'"] *}',
webpage, 'live video hash', fatal=True)
lvp_url += '&hash=' + lvp_hash
webpage = self._download_webpage(unescapeHTML(lvp_url), playlist_id)
playlistpage = self._search_regex(
r'(?s)getPlaylistUrl\((\[[^\]]+\])[,\)]',
webpage, 'playlist params', fatal=True)
playlistpage_params = self._parse_json(playlistpage, playlist_id)[0]
playlistpage_url = 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/'
idec = playlistpage_params.get('id')
data = {
'playlist[0][type]': playlistpage_params.get('type'),
'playlist[0][id]': idec,
'requestUrl': '/ivysilani/embed/iFramePlayer.php',
'requestSource': 'iVysilani',
}
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if f'{NOT_AVAILABLE_STRING}</p>' in webpage: if f'{NOT_AVAILABLE_STRING}</p>' in webpage:
@ -133,40 +211,10 @@ class CeskaTelevizeIE(InfoExtractor):
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')): if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')):
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None
episode_id = None
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = {
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': parsed_url.path,
'requestSource': 'iVysilani',
}
entries = [] entries = []
for user_agent in (None, USER_AGENTS['Safari']): for user_agent in (None, USER_AGENTS['Safari']):
req = Request( req = Request(playlistpage_url, data=urlencode_postdata(data))
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data))
req.headers['Content-type'] = 'application/x-www-form-urlencoded' req.headers['Content-type'] = 'application/x-www-form-urlencoded'
req.headers['x-addr'] = '127.0.0.1' req.headers['x-addr'] = '127.0.0.1'
req.headers['X-Requested-With'] = 'XMLHttpRequest' req.headers['X-Requested-With'] = 'XMLHttpRequest'
@ -179,25 +227,25 @@ class CeskaTelevizeIE(InfoExtractor):
if not playlistpage: if not playlistpage:
continue continue
playlist_url = playlistpage['url'] playlist_url = playlistpage.get('url')
if playlist_url == 'error_region': if playlist_url:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = Request(urllib.parse.unquote(playlist_url))
req.headers['Referer'] = url
playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist:
continue
playlist = playlist.get('playlist')
else:
playlist = traverse_obj(playlistpage, ('RESULT', 'playlist'))
req = Request(urllib.parse.unquote(playlist_url))
req.headers['Referer'] = url
playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist:
continue
playlist = playlist.get('playlist')
if not isinstance(playlist, list): if not isinstance(playlist, list):
continue continue
playlist_len = len(playlist) playlist_len = len(playlist)
for num, item in enumerate(playlist): for num, item in enumerate(playlist):
is_live = item.get('type') == 'LIVE'
formats = [] formats = []
for format_id, stream_url in item.get('streamUrls', {}).items(): for format_id, stream_url in item.get('streamUrls', {}).items():
if 'playerType=flash' in stream_url: if 'playerType=flash' in stream_url:
@ -222,7 +270,7 @@ class CeskaTelevizeIE(InfoExtractor):
continue continue
item_id = str_or_none(item.get('id') or item['assetId']) item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title'] title = item.get('title') or 'live'
duration = float_or_none(item.get('duration')) duration = float_or_none(item.get('duration'))
thumbnail = item.get('previewImageUrl') thumbnail = item.get('previewImageUrl')
@ -231,7 +279,7 @@ class CeskaTelevizeIE(InfoExtractor):
if item.get('type') == 'VOD': if item.get('type') == 'VOD':
subs = item.get('subtitles') subs = item.get('subtitles')
if subs: if subs:
subtitles = self.extract_subtitles(episode_id, subs) subtitles = self.extract_subtitles(idec, subs)
if playlist_len == 1: if playlist_len == 1:
final_title = playlist_title or title final_title = playlist_title or title
@ -246,7 +294,7 @@ class CeskaTelevizeIE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'is_live': is_live, 'live_status': 'is_live' if is_live else 'not_live',
}) })
if len(entries) == 1: if len(entries) == 1:

View File

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id, tld = self._match_valid_url(url).group('id', 'tld') response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
status = response.get('room_status')
if status != 'public':
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
self.report_warning('Falling back to webpage extraction')
return None
m3u8_url = response.get('url')
if not m3u8_url:
self.raise_geo_restricted()
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_webpage(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -85,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -113,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)

View File

@ -279,6 +279,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)

View File

@ -16,10 +16,10 @@ from ..utils import (
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
traverse_obj,
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj, value
class PatreonBaseIE(InfoExtractor): class PatreonBaseIE(InfoExtractor):
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': r're:^https?://.+', 'thumbnail': r're:^https?://.+',
}, },
'skip': 'Patron-only content', 'skip': 'Patron-only content',
}, {
# Contains a comment reply in the 'included' section
'url': 'https://www.patreon.com/posts/114721679',
'info_dict': {
'id': '114721679',
'ext': 'mp4',
'upload_date': '20241025',
'uploader': 'Japanalysis',
'like_count': int,
'thumbnail': r're:^https?://.+',
'comment_count': int,
'title': 'Karasawa Part 2',
'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
'uploader_url': 'https://www.patreon.com/japanalysis',
'uploader_id': '80504268',
'channel_url': 'https://www.patreon.com/japanalysis',
'channel_follower_count': int,
'timestamp': 1729897015,
'channel_id': '9346307',
},
'params': {'getcomments': True},
}] }]
_RETURN_TYPE = 'video' _RETURN_TYPE = 'video'
@ -404,26 +425,24 @@ class PatreonIE(PatreonBaseIE):
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
cursor = None cursor = None
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
count += 1 count += 1
comment_id = comment.get('id')
attributes = comment.get('attributes') or {}
if comment_id is None:
continue
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
author_info = traverse_obj(
response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
get_all=False, expected_type=dict, default={})
yield { yield {
'id': comment_id, **traverse_obj(comment, {
'text': attributes.get('body'), 'id': ('id', {str_or_none}),
'timestamp': parse_iso8601(attributes.get('created')), 'text': ('attributes', 'body', {str}),
'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), 'timestamp': ('attributes', 'created', {parse_iso8601}),
'author_is_uploader': attributes.get('is_by_creator'), 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
}),
**traverse_obj(response, (
'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
'author': ('full_name', {str}),
'author_thumbnail': ('image_url', {url_or_none}),
}), get_all=False),
'author_id': author_id, 'author_id': author_id,
'author': author_info.get('full_name'),
'author_thumbnail': author_info.get('image_url'),
} }
if count < traverse_obj(response, ('meta', 'count')): if count < traverse_obj(response, ('meta', 'count')):

View File

@ -0,0 +1,105 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class RadioRadicaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.radioradicale.it/scheda/471591',
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
'info_dict': {
'id': '471591',
'ext': 'mp4',
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'location': 'Napoli',
'duration': 2852.0,
'timestamp': 1459987200,
'upload_date': '20160407',
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
},
}, {
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
'info_dict': {
'id': '742783',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
'location': 'CAMERA',
'duration': 5868.0,
'timestamp': 1730246400,
'upload_date': '20241030',
},
'playlist': [{
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
'info_dict': {
'id': '742783-0',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'be915c189c70ad2920e5810f32260ff5',
'info_dict': {
'id': '742783-1',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
'info_dict': {
'id': '742783-2',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}],
}]
def _entries(self, videos_info, page_id):
for idx, video in enumerate(traverse_obj(
videos_info, ('playlist', lambda _, v: v['sources']))):
video_id = f'{page_id}-{idx}'
formats = []
subtitles = {}
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
self._merge_subtitles({sub.get('srclang') or 'und': [{
'url': sub['src'],
'name': sub.get('label'),
}]}, target=subtitles)
yield {
'id': video_id,
'title': video.get('title'),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
videos_info = self._search_json(
r'jQuery\.extend\(Drupal\.settings\s*,',
webpage, 'videos_info', page_id)['RRscheda']
entries = list(self._entries(videos_info, page_id))
common_info = {
'id': page_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'location': videos_info.get('luogo'),
**self._search_json_ld(webpage, page_id),
}
if len(entries) == 1:
return {
**entries[0],
**common_info,
}
return self.playlist_result(entries, multi_video=True, **common_info)

View File

@ -213,7 +213,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor):
class RedGifsUserIE(RedGifsBaseInfoExtractor): class RedGifsUserIE(RedGifsBaseInfoExtractor):
IE_DESC = 'Redgifs user' IE_DESC = 'Redgifs user'
_VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
_PAGE_SIZE = 30 _PAGE_SIZE = 80
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.redgifs.com/users/lamsinka89', 'url': 'https://www.redgifs.com/users/lamsinka89',
@ -222,7 +222,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?page=3', 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
@ -231,7 +231,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_count': 30, 'playlist_count': 80,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
@ -240,7 +240,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by best', 'description': 'RedGifs user lamsinka89, ordered by best',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
},
{
'url': 'https://www.redgifs.com/users/ignored52',
'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
'info_dict': {
'id': 'ignored52',
'title': 'ignored52',
'description': 'RedGifs user ignored52, ordered by recent',
},
'playlist_mincount': 121,
}, },
] ]

View File

@ -71,9 +71,11 @@ class SpankBangIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2') video_id = mobj.group('id') or mobj.group('id_2')
country = self.get_param('geo_bypass_country') or 'US'
self._set_cookie('.spankbang.com', 'country', country.upper())
webpage = self._download_webpage( webpage = self._download_webpage(
url.replace(f'/{video_id}/embed', f'/{video_id}/video'), url.replace(f'/{video_id}/embed', f'/{video_id}/video'),
video_id, headers={'Cookie': 'country=US'}) video_id, impersonate=True)
if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(

View File

@ -216,7 +216,7 @@ def partial_application(func):
sig = inspect.signature(func) sig = inspect.signature(func)
required_args = [ required_args = [
param.name for param in sig.parameters.values() param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL) if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
if param.default is inspect.Parameter.empty if param.default is inspect.Parameter.empty
] ]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
return len('%d' % number) return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None): def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None: if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values) values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)

View File

@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload @typing.overload
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload @typing.overload
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ... def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
""" """
Convert subtitles from a traversal into a subtitle dict. Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function. The path should have an `all` immediately before this function.
@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
`quality` The sort order for each subtitle `quality` The sort order for each subtitle
""" """
if subs is None: if subs is None:
return functools.partial(subs_list_to_dict, ext=ext) return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
result = collections.defaultdict(list) result = collections.defaultdict(list)
@ -360,10 +360,16 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
if not url_or_none(sub.get('url')) and not sub.get('data'): if not url_or_none(sub.get('url')) and not sub.get('data'):
continue continue
sub_id = sub.pop('id', None) sub_id = sub.pop('id', None)
if sub_id is None: if not isinstance(sub_id, str):
continue if not lang:
if ext is not None and not sub.get('ext'): continue
sub['ext'] = ext sub_id = lang
sub_ext = sub.get('ext')
if not isinstance(sub_ext, str):
if not ext:
sub.pop('ext', None)
else:
sub['ext'] = ext
result[sub_id].append(sub) result[sub_id].append(sub)
result = dict(result) result = dict(result)
@ -452,9 +458,9 @@ def trim_str(*, start=None, end=None):
return trim return trim
def unpack(func): def unpack(func, **kwargs):
@functools.wraps(func) @functools.wraps(func)
def inner(items, **kwargs): def inner(items):
return func(*items, **kwargs) return func(*items, **kwargs)
return inner return inner