Compare commits

...

15 Commits

Author SHA1 Message Date
Mozi
6fb342c163
Merge 8779a8897c into c699bafc50 2024-11-16 07:40:24 +00:00
Mozi
8779a8897c simplify statements in traversal 2024-11-16 07:39:59 +00:00
Mozi
88de6d0c2d merge 'master' 2024-11-16 07:22:46 +00:00
bashonly
c699bafc50 [ie/soop] Fix thumbnail extraction (#11545)
Closes #11537

Authored by: bashonly
2024-11-15 22:51:55 +00:00
bashonly
eb64ae7d5d [ie] Allow ext override for thumbnails (#11545)
Authored by: bashonly
2024-11-15 22:51:55 +00:00
Simon Sawicki
c014fbcddc
[utils] subs_list_to_dict: Add lang default parameter (#11508)
Authored by: Grub4K
2024-11-15 23:25:52 +01:00
Simon Sawicki
39d79c9b9c
[utils] Fix join_nonempty, add **kwargs to unpack (#11559)
Authored by: Grub4K
2024-11-15 22:06:15 +01:00
Jackson Humphrey
f2a4983df7
[ie/archive.org] Fix comments extraction (#11527)
Closes #11526
Authored by: jshumphrey
2024-11-12 23:26:18 +00:00
bashonly
bacc31b05a
[ie/facebook] Fix formats extraction (#11513)
Closes #11497
Authored by: bashonly
2024-11-12 23:23:10 +00:00
Mozi
628ce197eb merge 'master' 2024-10-28 00:37:58 +00:00
Mozi
c0aa2e8160 fix usage of 'self._merge_subtitles' 2024-10-28 00:37:42 +00:00
Mozi
4b00360b4e [ie/vidio:live] the code I wrote does not seem to work. let's rewrite it
Those two URLs of Premier-exclusive livestreams are still not working!
2024-09-21 12:28:25 +00:00
Mozi
8155ed770b merge branch 'master' 2024-09-21 10:08:41 +00:00
Mozi
20c66ec13e [ie/vidio] Fix login; use new API; check DRM; extract comments 2024-09-21 10:07:45 +00:00
Mozi
3bb739f188 [ie/vidio:live] Add DASH support; use new API 2024-09-01 16:56:51 +00:00
10 changed files with 510 additions and 159 deletions

View File

@ -481,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name',
'data': 'content',
'url': 'url',
}, all, {subs_list_to_dict}]) == {
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered'
@ -507,6 +507,54 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self):
with pytest.raises(TypeError):
@ -525,7 +573,7 @@ class TestTraversalHelpers:
def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError):
unpack(join_nonempty)()
with pytest.raises(TypeError):

View File

@ -72,7 +72,6 @@ from yt_dlp.utils import (
intlist_to_bytes,
iri_to_uri,
is_html,
join_nonempty,
js_to_json,
limit_length,
locked_file,
@ -2158,10 +2157,6 @@ Line 1
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__':
unittest.main()

View File

@ -4381,7 +4381,9 @@ class YoutubeDL:
return None
for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

View File

@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop'
@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}),
'thumbnails': ('thumb', {self._fixup_thumb}),
})
entries = []
@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id)
@staticmethod
def _entries(data):
def _entries(self, data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}),
'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}),
}))

View File

@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
},
},
],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}]
@staticmethod
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
info['comments'].append({
'id': review.get('review_id'),
'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'})

View File

@ -279,6 +279,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID
* "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image
* "width" (optional, int)
* "height" (optional, int)

View File

@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor):
return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats):
def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj(
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=url_or_none(video.get('dash_manifest_url'))))
mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor):
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
formats = []
q = qualities(['sd', 'hd'])
# Legacy formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')):
@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor):
if not playable_url:
continue
if determine_ext(playable_url) == 'mpd':
formats.extend(self._extract_mpd_formats(playable_url, video_id))
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor):
'url': playable_url,
})
extract_dash_manifest(fmt_data, formats)
# New videoDeliveryResponse formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
for idx, dash_manifest in enumerate(dash_manifests):
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
if not dash_manifests:
# Only extract from MPD URLs if the manifests are not already provided
for mpd_url in mpd_urls:
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
if not formats:
# Do not append false positive entry w/o any formats
return

View File

@ -1,18 +1,24 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
format_field,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
int_or_none,
parse_iso8601,
remove_end,
smuggle_url,
str_or_none,
strip_or_none,
str_to_int,
try_get,
unsmuggle_url,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class VidioBaseIE(InfoExtractor):
@ -35,6 +41,7 @@ class VidioBaseIE(InfoExtractor):
login_form.update({
'user[login]': username,
'user[password]': password,
'authenticity_token': self._html_search_meta('csrf-token', login_page, fatal=True),
})
login_post, login_post_urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
@ -58,6 +65,7 @@ class VidioBaseIE(InfoExtractor):
def _initialize_pre_login(self):
self._api_key = self._download_json(
'https://www.vidio.com/auth', None, data=b'')['api_key']
self._ua = self.get_param('http_headers')['User-Agent']
def _call_api(self, url, video_id, note=None):
return self._download_json(url, video_id, note=note, headers={
@ -67,7 +75,9 @@ class VidioBaseIE(InfoExtractor):
class VidioIE(VidioBaseIE):
_GEO_COUNTRIES = ['ID']
_VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
'md5': 'abac81b1a205a8d94c609a473b5ea62a',
@ -77,113 +87,317 @@ class VidioIE(VidioBaseIE):
'ext': 'mp4',
'title': 'DJ_AMBRED - Booyah (Live 2015)',
'description': 'md5:27dc15f819b6a78a626490881adbadf8',
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 149,
'like_count': int,
'uploader': 'TWELVE Pic',
'timestamp': 1444902800,
'uploader': 'twelvepictures',
'timestamp': 1444902960,
'upload_date': '20151015',
'uploader_id': 'twelvepictures',
'channel': 'Cover Music Video',
'uploader_id': '270115',
'channel': 'cover-music-video',
'channel_id': '280236',
'view_count': int,
'dislike_count': int,
'comment_count': int,
'channel_url': 'https://www.vidio.com/@twelvepictures/channels/280236-cover-music-video',
'tags': 'count:3',
'uploader_url': 'https://www.vidio.com/@twelvepictures',
'live_status': 'not_live',
'genres': ['vlog', 'comedy', 'edm'],
'season_id': '',
'season_name': '',
'age_limit': 13,
'comment_count': int,
},
'params': {
'getcomments': True,
},
}, {
# DRM protected
'url': 'https://www.vidio.com/watch/7095853-ep-04-sketch-book',
'md5': 'abac81b1a205a8d94c609a473b5ea62a',
'info_dict': {
'id': '7095853',
'display_id': 'ep-04-sketch-book',
'ext': 'mp4',
'title': 'Ep 04 - Sketch Book',
'description': 'md5:9e22b4b1dbd65209c143d7009e899830',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 2784,
'uploader': 'vidiooriginal',
'timestamp': 1658509200,
'upload_date': '20220722',
'uploader_id': '31052580',
'channel': 'cupcake-untuk-rain',
'channel_id': '52332655',
'channel_url': 'https://www.vidio.com/@vidiooriginal/channels/52332655-cupcake-untuk-rain',
'tags': [],
'uploader_url': 'https://www.vidio.com/@vidiooriginal',
'live_status': 'not_live',
'genres': ['romance', 'drama', 'comedy', 'Teen', 'love triangle'],
'season_id': '8220',
'season_name': 'Season 1',
'age_limit': 13,
'availability': 'premium_only',
'comment_count': int,
},
'expected_warnings': ['This video is DRM protected'],
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://www.vidio.com/watch/7439193-episode-1-magic-5',
'md5': 'b1644c574aeb20c91503be367ac2d211',
'info_dict': {
'id': '7439193',
'display_id': 'episode-1-magic-5',
'ext': 'mp4',
'title': 'Episode 1 - Magic 5',
'description': 'md5:367255f9e8e7ad7192c26218f01b6260',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 6126,
'uploader': 'indosiar',
'timestamp': 1679315400,
'upload_date': '20230320',
'uploader_id': '12',
'channel': 'magic-5',
'channel_id': '52350795',
'channel_url': 'https://www.vidio.com/@indosiar/channels/52350795-magic-5',
'tags': ['basmalah', 'raden-rakha', 'eby-da-5', 'sinetron', 'afan-da-5', 'sridevi-da5'],
'uploader_url': 'https://www.vidio.com/@indosiar',
'live_status': 'not_live',
'genres': ['drama', 'fantasy', 'friendship'],
'season_id': '11017',
'season_name': 'Episode',
'age_limit': 13,
},
}, {
'url': 'https://www.vidio.com/watch/1716926-mas-suka-masukin-aja',
'md5': 'acc4009eeac0033328419aada7bc6925',
'info_dict': {
'id': '1716926',
'display_id': 'mas-suka-masukin-aja',
'ext': 'mp4',
'title': 'Mas Suka, Masukin Aja',
'description': 'md5:667093b08e07b6fb92f68037f81f2267',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 5080,
'uploader': 'vidiopremier',
'timestamp': 1564735560,
'upload_date': '20190802',
'uploader_id': '26094842',
'channel': 'mas-suka-masukin-aja',
'channel_id': '34112289',
'channel_url': 'https://www.vidio.com/@vidiopremier/channels/34112289-mas-suka-masukin-aja',
'tags': [],
'uploader_url': 'https://www.vidio.com/@vidiopremier',
'live_status': 'not_live',
'genres': ['comedy', 'romance'],
'season_id': '663',
'season_name': '',
'age_limit': 18,
'availability': 'premium_only',
},
'params': {
'ignore_no_formats_error': True,
},
'expected_warnings': ['This show isn\'t available in your country'],
}, {
'url': 'https://www.vidio.com/watch/2372948-first-day-of-school-kindergarten-life-song-beabeo-nursery-rhymes-kids-songs',
'md5': 'c6d1bde08eee88bea27cca9dc38bc3df',
'info_dict': {
'id': '2372948',
'display_id': 'first-day-of-school-kindergarten-life-song-beabeo-nursery-rhymes-kids-songs',
'ext': 'mp4',
'title': 'First Day of School | Kindergarten Life Song | BeaBeo Nursery Rhymes & Kids Songs',
'description': 'md5:d505486a67415903f7f3ab61adfd5a91',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 517,
'uploader': 'kidsstartv',
'timestamp': 1638518400,
'upload_date': '20211203',
'uploader_id': '38247189',
'channel': 'beabeo-school-series',
'channel_id': '52311987',
'channel_url': 'https://www.vidio.com/@kidsstartv/channels/52311987-beabeo-school-series',
'tags': [],
'uploader_url': 'https://www.vidio.com/@kidsstartv',
'live_status': 'not_live',
'genres': ['animation', 'Cartoon'],
'season_id': '6023',
'season_name': 'school series',
},
}, {
'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
'md5': '405b61a2f06c74e052e0bd67cad6b891',
'info_dict': {
'id': '1550718',
'display_id': 'stand-by-me-doraemon',
'ext': 'mp4',
'title': 'Stand by Me Doraemon',
'description': 'md5:673d899f6a58dd4b0d18aebe30545e2a',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 5429,
'uploader': 'vidiopremier',
'timestamp': 1545815634,
'upload_date': '20181226',
'uploader_id': '26094842',
'channel': 'stand-by-me-doraemon',
'channel_id': '29750953',
'channel_url': 'https://www.vidio.com/@vidiopremier/channels/29750953-stand-by-me-doraemon',
'tags': ['anime-lucu', 'top-10-this-week', 'kids', 'stand-by-me-doraemon-2'],
'uploader_url': 'https://www.vidio.com/@vidiopremier',
'live_status': 'not_live',
'genres': ['anime', 'family', 'adventure', 'comedy', 'coming of age'],
'season_id': '237',
'season_name': '',
'age_limit': 7,
'availability': 'premium_only',
},
'params': {
'ignore_no_formats_error': True,
},
'expected_warnings': ['This show isn\'t available in your country'],
}, {
# 404 Not Found
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
'only_matching': True,
}, {
# Premier-exclusive video
'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
'only_matching': True,
}, {
# embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
}]
_WEBPAGE_TESTS = [{
# embed player: https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
'url': 'https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
'info_dict': {
'id': '7115874',
'ext': 'mp4',
'channel_id': '40172876',
'comment_count': int,
'uploader_id': 'liputan6',
'view_count': int,
'dislike_count': int,
'upload_date': '20220804',
'uploader': 'Liputan6.com',
'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
'channel': 'ENAM PLUS 165',
'timestamp': 1659605520,
'ext': 'mp4',
'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
'duration': 59,
'like_count': int,
'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
'uploader_url': 'https://www.vidio.com/@liputan6',
'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'duration': 59,
'uploader': 'liputan6',
'timestamp': 1659605693,
'upload_date': '20220804',
'uploader_id': '139',
'channel': 'enam-plus-165',
'channel_id': '40172876',
'channel_url': 'https://www.vidio.com/@liputan6/channels/40172876-enam-plus-165',
'tags': ['monkeypox-indonesia', 'cacar-monyet-menyebar', 'suspek-cacar-monyet-di-indonesia', 'fakta', 'hoax-atau-bukan', 'jawa-tengah'],
'uploader_url': 'https://www.vidio.com/@liputan6',
'live_status': 'not_live',
'genres': ['health'],
'season_id': '',
'season_name': '',
'age_limit': 13,
'comment_count': int,
},
'params': {
'getcomments': True,
},
}]
def _real_extract(self, url):
match = self._match_valid_url(url).groupdict()
video_id, display_id = match.get('id'), match.get('display_id')
data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
video = data['videos'][0]
title = video['title'].strip()
is_premium = video.get('is_premium')
video_id, display_id = self._match_valid_url(url).groups()
if is_premium:
sources = self._download_json(
f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=videos',
display_id, note='Downloading premier API JSON')
if not (sources.get('source') or sources.get('source_dash')):
self.raise_login_required('This video is only available for registered users with the appropriate subscription')
webpage = self._download_webpage(url, video_id)
api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data')
interactions_stream = self._download_json(
'https://www.vidio.com/interactions_stream.json', video_id,
query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info',
errnote='Unable to download stream info')
formats, subs = [], {}
if sources.get('source'):
hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
sources['source'], display_id, 'mp4', 'm3u8_native')
formats.extend(hls_formats)
subs.update(hls_subs)
if sources.get('source_dash'): # TODO: Find video example with source_dash
dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
sources['source_dash'], display_id, 'dash')
formats.extend(dash_formats)
subs.update(dash_subs)
else:
hls_url = data['clips'][0]['hls_url']
formats, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, display_id, 'mp4', 'm3u8_native')
attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage))
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
channel = get_first('channel')
user = get_first('user')
username = user.get('username')
get_count = lambda x: int_or_none(video.get('total_' + x))
if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})):
self.report_drm(video_id)
if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})):
self.raise_geo_restricted(
'This show isn\'t available in your country', countries=['ID'], metadata_available=True)
subtitles = dict(traverse_obj(attrs, ('data-subtitles', {json.loads}, ..., {
lambda x: (x['language'], [{'url': x['file']['url']}]),
})))
formats = []
# There are time-based strings in the playlist URL,
# so try the other URL iff no formats extracted from the prior one.
for m3u8_url in traverse_obj([
interactions_stream.get('source'),
attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})):
fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls')
formats.extend(fmt)
self._merge_subtitles(subs, target=subtitles)
if fmt:
break
for mpd_url in traverse_obj([
interactions_stream.get('source_dash'),
attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})):
fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash')
formats.extend(fmt)
self._merge_subtitles(subs, target=subtitles)
if fmt:
break
# TODO: extract also short previews of premier-exclusive videos from "attrs['data-content-preview-url']".
uploader = attrs.get('data-video-username')
uploader_url = f'https://www.vidio.com/@{uploader}'
channel = attrs.get('data-video-channel')
channel_id = attrs.get('data-video-channel-id')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': strip_or_none(video.get('description')),
'thumbnail': video.get('image_url_medium'),
'duration': int_or_none(video.get('duration')),
'like_count': get_count('likes'),
'title': (traverse_obj(api_data, ('videos', 0, 'title'))
or attrs.get('data-video-title')
or self._html_extract_title(webpage)),
'live_status': 'not_live',
'formats': formats,
'subtitles': subs,
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
'channel': channel.get('name'),
'channel_id': str_or_none(channel.get('id')),
'view_count': get_count('view_count'),
'dislike_count': get_count('dislikes'),
'comment_count': get_count('comments'),
'tags': video.get('tag_list'),
'subtitles': subtitles,
'channel': channel,
'channel_id': channel_id,
'channel_url': f'{uploader_url}/channels/{channel_id}-{channel}',
'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]),
'season_id': traverse_obj(attrs, ('data-season-id', {str_or_none})),
'season_name': traverse_obj(attrs, ('data-season-name', {str})),
'uploader': uploader,
'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})),
'uploader_url': uploader_url,
'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})),
'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})),
'description': traverse_obj(attrs, ('data-video-description', {str})),
'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')),
'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]),
'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})),
'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0}))
or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))),
'__post_extractor': self.extract_comments(video_id),
}
def _get_comments(self, video_id):
# TODO: extract replies under comments
def extract_comments(comments_data):
users = dict(traverse_obj(comments_data, ('included', ..., {
lambda x: (x['id'], {
'author': x['attributes']['username'],
'author_thumbnail': url_or_none(x['attributes']['avatar_url_big'] or x['attributes']['avatar_url_small']),
'author_url': url_or_none(x['links']['self']),
}),
})))
yield from traverse_obj(comments_data, ('data', ..., {
'id': 'id',
'text': ('attributes', 'content'),
'timestamp': ('attributes', 'created_at', {parse_iso8601}),
'like_count': ('attributes', 'likes'),
'author_id': ('attributes', 'user_id'),
}, {lambda x: {**x, **users.get(x['author_id'])}}))
comment_page_url = f'https://api.vidio.com/videos/{video_id}/comments'
while comment_page_url:
comments_data = self._call_api(comment_page_url, video_id, 'Downloading comments')
comment_page_url = traverse_obj(comments_data, ('links', 'next', {url_or_none}))
yield from extract_comments(comments_data)
class VidioPremierIE(VidioBaseIE):
_VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
@ -234,10 +448,43 @@ class VidioLiveIE(VidioBaseIE):
'url': 'https://www.vidio.com/live/204-sctv',
'info_dict': {
'id': '204',
'title': 'SCTV',
'uploader': 'SCTV',
'uploader_id': 'sctv',
'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'mp4',
'title': r're:SCTV \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'display_id': 'sctv',
'uploader': 'sctv',
'uploader_id': '4',
'uploader_url': 'https://www.vidio.com/@sctv',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'live_status': 'is_live',
'description': r're:^SCTV merupakan stasiun televisi nasional terkemuka di Indonesia.+',
'like_count': int,
'dislike_count': int,
'timestamp': 1461258000,
'upload_date': '20160421',
'tags': [],
'genres': [],
'age_limit': 13,
},
}, {
'url': 'https://vidio.com/live/733-trans-tv',
'info_dict': {
'id': '733',
'ext': 'mp4',
'title': r're:TRANS TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'display_id': 'trans-tv',
'uploader': 'transtv',
'uploader_id': '551300',
'uploader_url': 'https://www.vidio.com/@transtv',
'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$',
'live_status': 'is_live',
'description': r're:^Trans TV adalah stasiun televisi swasta Indonesia.+',
'like_count': int,
'dislike_count': int,
'timestamp': 1461355080,
'upload_date': '20160422',
'tags': [],
'genres': [],
'age_limit': 13,
},
}, {
# Premier-exclusive livestream
@ -251,59 +498,60 @@ class VidioLiveIE(VidioBaseIE):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
stream_data = self._call_api(
f'https://www.vidio.com/api/livestreamings/{video_id}/detail', display_id)
stream_meta = stream_data['livestreamings'][0]
user = stream_data.get('users', [{}])[0]
title = stream_meta.get('title')
username = user.get('username')
webpage = self._download_webpage(url, video_id)
stream_meta = traverse_obj(self._call_api(
f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id),
('livestreamings', 0, {dict}), default={})
tokenized_playlist_urls = self._download_json(
f'https://www.vidio.com/live/{video_id}/tokens', video_id,
query={'type': 'dash'}, note='Downloading tokenized playlist',
errnote='Unable to download tokenized playlist', data=b'')
interactions_stream = self._download_json(
'https://www.vidio.com/interactions_stream.json', video_id,
query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info',
errnote='Unable to download stream info')
attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage))
if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})):
self.report_drm(video_id)
if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})):
self.raise_geo_restricted(
'This show isn\'t available in your country', countries=['ID'], metadata_available=True)
formats = []
if stream_meta.get('is_drm'):
if not self.get_param('allow_unplayable_formats'):
self.report_drm(video_id)
if stream_meta.get('is_premium'):
sources = self._download_json(
f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=livestreamings',
display_id, note='Downloading premier API JSON')
if not (sources.get('source') or sources.get('source_dash')):
self.raise_login_required('This video is only available for registered users with the appropriate subscription')
if str_or_none(sources.get('source')):
token_json = self._download_json(
f'https://www.vidio.com/live/{video_id}/tokens',
display_id, note='Downloading HLS token JSON', data=b'')
formats.extend(self._extract_m3u8_formats(
sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
if str_or_none(sources.get('source_dash')):
pass
else:
if stream_meta.get('stream_token_url'):
token_json = self._download_json(
f'https://www.vidio.com/live/{video_id}/tokens',
display_id, note='Downloading HLS token JSON', data=b'')
formats.extend(self._extract_m3u8_formats(
stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
display_id, 'mp4', 'm3u8_native'))
if stream_meta.get('stream_dash_url'):
pass
if stream_meta.get('stream_url'):
formats.extend(self._extract_m3u8_formats(
stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
for m3u8_url in traverse_obj([
tokenized_playlist_urls.get('hls_url'),
interactions_stream.get('source')], (..., {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='hls'))
for mpd_url in traverse_obj([
tokenized_playlist_urls.get('dash_url'),
interactions_stream.get('source_dash')], (..., {url_or_none})):
formats.extend(self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash'))
uploader = attrs.get('data-video-username')
uploader_url = f'https://www.vidio.com/@{uploader}'
return {
'id': video_id,
'display_id': display_id,
'title': title,
'is_live': True,
'description': strip_or_none(stream_meta.get('description')),
'thumbnail': stream_meta.get('image'),
'title': attrs.get('data-video-title'),
'live_status': 'is_live',
'formats': formats,
'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]),
'uploader': uploader,
'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})),
'uploader_url': uploader_url,
'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})),
'description': traverse_obj(attrs, ('data-video-description', {str})),
'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')),
'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]),
'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0}))
or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))),
'like_count': int_or_none(stream_meta.get('like')),
'dislike_count': int_or_none(stream_meta.get('dislike')),
'formats': formats,
'uploader': user.get('name'),
'timestamp': parse_iso8601(stream_meta.get('start_time')),
'uploader_id': username,
'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
}

View File

@ -216,7 +216,7 @@ def partial_application(func):
sig = inspect.signature(func)
required_args = [
param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
if param.default is inspect.Parameter.empty
]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)

View File

@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ...
def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
"""
Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function.
@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
`quality` The sort order for each subtitle
"""
if subs is None:
return functools.partial(subs_list_to_dict, ext=ext)
return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
result = collections.defaultdict(list)
@ -360,9 +360,15 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
if not url_or_none(sub.get('url')) and not sub.get('data'):
continue
sub_id = sub.pop('id', None)
if sub_id is None:
if not isinstance(sub_id, str):
if not lang:
continue
if ext is not None and not sub.get('ext'):
sub_id = lang
sub_ext = sub.get('ext')
if not isinstance(sub_ext, str):
if not ext:
sub.pop('ext', None)
else:
sub['ext'] = ext
result[sub_id].append(sub)
result = dict(result)
@ -452,9 +458,9 @@ def trim_str(*, start=None, end=None):
return trim
def unpack(func):
def unpack(func, **kwargs):
@functools.wraps(func)
def inner(items, **kwargs):
def inner(items):
return func(*items, **kwargs)
return inner