Compare commits

..

No commits in common. "b68bf3334c162bf56365f2f4d49326dc55ab5eab" and "b13be280c7f1693bac566eeaecab5e9805a911e8" have entirely different histories.

10 changed files with 371 additions and 578 deletions

View File

@ -1809,7 +1809,6 @@ The following extractors use this feature:
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests * `innertube_key`: Innertube API key to use for all API requests
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)

View File

@ -951,7 +951,6 @@ from .lastfm import (
from .lbry import ( from .lbry import (
LBRYIE, LBRYIE,
LBRYChannelIE, LBRYChannelIE,
LBRYPlaylistIE,
) )
from .lci import LCIIE from .lci import LCIIE
from .lcp import ( from .lcp import (

View File

@ -181,102 +181,18 @@ class ABCIViewIE(InfoExtractor):
_GEO_COUNTRIES = ['AU'] _GEO_COUNTRIES = ['AU']
_TESTS = [{ _TESTS = [{
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
'info_dict': {
'id': 'CO1211V001S00',
'ext': 'mp4',
'title': 'Series 1 Ep 1 Wood For The Trees',
'series': 'Utopia',
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
'upload_date': '20230726',
'uploader_id': 'abc1',
'series_id': 'CO1211V',
'episode_id': 'CO1211V001S00',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Wood For The Trees',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
'timestamp': 1690403700,
},
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name',
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
'md5': '67715ce3c78426b11ba167d875ac6abf', 'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': { 'info_dict': {
'id': 'LE1927H001S00', 'id': 'LE1927H001S00',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Series 11 Ep 1', 'title': "Series 11 Ep 1",
'series': 'Gruen', 'series': "Gruen",
'description': 'md5:52cc744ad35045baf6aded2ce7287f67', 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
'upload_date': '20190925', 'upload_date': '20190925',
'uploader_id': 'abc1', 'uploader_id': 'abc1',
'series_id': 'LE1927H',
'episode_id': 'LE1927H001S00',
'season_number': 11,
'season': 'Season 11',
'episode_number': 1,
'episode': 'Episode 1',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
'timestamp': 1569445289, 'timestamp': 1569445289,
}, },
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode number',
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
'md5': '77cb7d8434440e3b28fbebe331c2456a',
'info_dict': {
'id': 'NC2203H039S00',
'ext': 'mp4',
'title': 'Series 2022 Locking Up Kids',
'series': 'Four Corners',
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
'upload_date': '20221114',
'uploader_id': 'abc1',
'series_id': 'NC2203H',
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name or number',
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
'md5': '2e17dec06b13cc81dc119d2565289396',
'info_dict': {
'id': 'RF2004Q043S00',
'ext': 'mp4',
'title': 'Series 2021',
'series': 'Landline',
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
'upload_date': '20211205',
'uploader_id': 'abc1',
'series_id': 'RF2004Q',
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
@ -338,8 +254,6 @@ class ABCIViewIE(InfoExtractor):
'episode_number': int_or_none(self._search_regex( 'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number, 'episode_id': house_number,
'episode': self._search_regex(
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
'uploader_id': video_params.get('channel'), 'uploader_id': video_params.get('channel'),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,

View File

@ -60,7 +60,7 @@ class GofileIE(InfoExtractor):
account_data = self._download_json( account_data = self._download_json(
'https://api.gofile.io/createAccount', None, note='Getting a new guest account') 'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
self._TOKEN = account_data['data']['token'] self._TOKEN = account_data['data']['token']
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id): def _entries(self, file_id):
query_params = { query_params = {

View File

@ -22,11 +22,10 @@ from ..utils import (
class LBRYBaseIE(InfoExtractor): class LBRYBaseIE(InfoExtractor):
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)' _BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}' _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX _OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio'] _SUPPORTED_STREAM_TYPES = ['video', 'audio']
_PAGE_SIZE = 50
def _call_api_proxy(self, method, display_id, params, resource): def _call_api_proxy(self, method, display_id, params, resource):
headers = {'Content-Type': 'application/json-rpc'} headers = {'Content-Type': 'application/json-rpc'}
@ -78,70 +77,10 @@ class LBRYBaseIE(InfoExtractor):
return info return info
def _fetch_page(self, display_id, url, params, page):
page += 1
page_params = {
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
**params,
}
result = self._call_api_proxy(
'claim_search', display_id, page_params, f'page {page}')
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': item['claim_id'],
'url': self._permanent_url(url, item['name'], item['claim_id']),
}
def _playlist_entries(self, url, display_id, claim_param, metadata):
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'claim_type': 'stream',
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
**claim_param,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, display_id, url, params),
self._PAGE_SIZE)
return self.playlist_result(
entries, display_id, **traverse_obj(metadata, ('value', {
'title': 'title',
'description': 'description',
})))
class LBRYIE(LBRYBaseIE): class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry' IE_NAME = 'lbry'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf''' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
(?:\$/(?:download|embed)/)?
(?P<id>
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
)'''
_TESTS = [{ _TESTS = [{
# Video # Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@ -212,7 +151,6 @@ class LBRYIE(LBRYBaseIE):
'channel': 'Gardening In Canada', 'channel': 'Gardening In Canada',
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
'uploader_id': '@gardeningincanada',
'formats': 'mincount:3', 'formats': 'mincount:3',
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
'license': 'Copyrighted (contact publisher)', 'license': 'Copyrighted (contact publisher)',
@ -249,13 +187,12 @@ class LBRYIE(LBRYBaseIE):
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634', 'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)', 'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378', 'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'channel': 'Wicked Truths', 'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'uploader_id': '@wickedtruths', 'timestamp': 1685790036,
'timestamp': 1695114347, 'upload_date': '20230603',
'upload_date': '20230919',
'release_timestamp': 1685617473, 'release_timestamp': 1685617473,
'release_date': '20230601', 'release_date': '20230601',
'duration': 1063, 'duration': 1063,
@ -295,10 +232,10 @@ class LBRYIE(LBRYBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
if display_id.startswith('@'): if display_id.startswith('$/'):
display_id = display_id.replace(':', '#') display_id = display_id.split('/', 2)[-1].replace('/', ':')
else: else:
display_id = display_id.replace('/', ':') display_id = display_id.replace(':', '#')
display_id = urllib.parse.unquote(display_id) display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream') result = self._resolve_url(uri, display_id, 'stream')
@ -365,7 +302,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE): class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel' IE_NAME = 'lbry:channel'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_TESTS = [{ _TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0', 'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': { 'info_dict': {
@ -381,50 +318,65 @@ class LBRYChannelIE(LBRYBaseIE):
'url': 'lbry://@lbry#3f', 'url': 'lbry://@lbry#3f',
'only_matching': True, 'only_matching': True,
}] }]
_PAGE_SIZE = 50
def _fetch_page(self, claim_id, url, params, page):
page += 1
page_params = {
'channel_ids': [claim_id],
'claim_type': 'stream',
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
}
page_params.update(params)
result = self._call_api_proxy(
'claim_search', claim_id, page_params, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
if not (stream_claim_name and stream_claim_id):
continue
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#') display_id = self._match_id(url).replace(':', '#')
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel') result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
claim_id = result['claim_id'] claim_id = result['claim_id']
qs = parse_qs(url)
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result) content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
class LBRYPlaylistIE(LBRYBaseIE): 'order_by': {
IE_NAME = 'lbry:playlist' 'new': ['release_time'],
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)' 'top': ['effective_amount'],
_TESTS = [{ 'trending': ['trending_group', 'trending_mixed'],
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2', }[qs.get('order', ['new'])[0]],
'info_dict': { 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2', }
'title': 'Théâtre Classique', duration = qs.get('duration', [None])[0]
'description': 'Théâtre Classique', if duration:
}, params['duration'] = {
'playlist_mincount': 4, 'long': '>=1200',
}, { 'short': '<=240',
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770', }[duration]
'info_dict': { language = qs.get('language', ['all'])[0]
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770', if language != 'all':
'title': 'Social Media Exposed', languages = [language]
'description': 'md5:98af97317aacd5b85d595775ea37d80e', if language == 'en':
}, languages.append('none')
'playlist_mincount': 34, params['any_languages'] = languages
}, { entries = OnDemandPagedList(
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184', functools.partial(self._fetch_page, claim_id, url, params),
'info_dict': { self._PAGE_SIZE)
'id': '938fb11d-215f-4d1c-ad64-723954df2184', result_value = result.get('value') or {}
}, return self.playlist_result(
'playlist_mincount': 1000, entries, claim_id, result_value.get('title'),
}] result_value.get('description'))
def _real_extract(self, url):
display_id = self._match_id(url)
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
'claim_ids': [display_id],
'no_totals': True,
'page': 1,
'page_size': self._PAGE_SIZE,
}, 'playlist'), ('items', 0))
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
return self._playlist_entries(url, display_id, claim_param, result)

View File

@ -13,7 +13,7 @@ from ..utils import (
class LiTVIE(InfoExtractor): class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
_TESTS = [{ _TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
@ -21,18 +21,16 @@ class LiTVIE(InfoExtractor):
'id': 'VOD00041606', 'id': 'VOD00041606',
'title': '花千骨', 'title': '花千骨',
}, },
'playlist_count': 51, # 50 episodes + 1 trailer 'playlist_count': 50,
}, { }, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'md5': '969e343d9244778cb29acec608e53640',
'info_dict': { 'info_dict': {
'id': 'VOD00041610', 'id': 'VOD00041610',
'ext': 'mp4', 'ext': 'mp4',
'title': '花千骨第1集', 'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'categories': ['奇幻', '愛情', '中國', '仙俠'],
'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
}, },
'params': { 'params': {
@ -48,17 +46,20 @@ class LiTVIE(InfoExtractor):
'title': '芈月傳第1集 霸星芈月降世楚國', 'title': '芈月傳第1集 霸星芈月降世楚國',
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。', 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
}, },
'skip': 'No longer exists', 'skip': 'Georestricted to Taiwan',
}] }]
def _extract_playlist(self, playlist_data, content_type): def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
episode_title = program_info['title']
content_id = season_list['contentId']
all_episodes = [ all_episodes = [
self.url_result(smuggle_url( self.url_result(smuggle_url(
self._URL_TEMPLATE % (content_type, episode['contentId']), self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion {'force_noplaylist': True})) # To prevent infinite recursion
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] for episode in season_list['episode']]
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) return self.playlist_result(all_episodes, content_id, episode_title)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -67,31 +68,24 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if self._search_regex(
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
webpage, 'meta refresh redirect', default=False, group=0):
raise ExtractorError('No such content found', expected=True)
program_info = self._parse_json(self._search_regex( program_info = self._parse_json(self._search_regex(
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id) video_id)
# In browsers `getProgramInfo` request is always issued. Usually this season_list = list(program_info.get('seasonList', {}).values())
playlist_id = traverse_obj(season_list, 0, 'contentId')
if self._yes_playlist(playlist_id, video_id, smuggled_data):
return self._extract_playlist(season_list[0], video_id, program_info)
# In browsers `getMainUrl` request is always issued. Usually this
# endpoint gives the same result as the data embedded in the webpage. # endpoint gives the same result as the data embedded in the webpage.
# If, for some reason, there are no embedded data, we do an extra request. # If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
if 'assetId' not in program_info: if 'assetId' not in program_info:
program_info = self._download_json( program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id}, query={'contentId': video_id},
headers={'Accept': 'application/json'}) headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
@ -102,7 +96,7 @@ class LiTVIE(InfoExtractor):
'contentType': program_info['contentType'], 'contentType': program_info['contentType'],
} }
video_data = self._download_json( video_data = self._download_json(
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id, 'https://www.litv.tv/vod/getMainUrl', video_id,
data=json.dumps(payload).encode('utf-8'), data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'}) headers={'Content-Type': 'application/json'})

View File

@ -2,74 +2,105 @@ import itertools
import json import json
import re import re
import time import time
from base64 import b64encode
from binascii import hexlify
from datetime import datetime
from hashlib import md5 from hashlib import md5
from random import randint from random import randint
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import compat_urllib_parse_urlencode
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none, int_or_none,
join_nonempty, intlist_to_bytes,
str_or_none, try_get,
strftime_or_none,
traverse_obj,
unified_strdate,
url_or_none,
urljoin,
variadic,
) )
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic'] _FORMATS = ['bMusic', 'mMusic', 'hMusic']
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
@staticmethod @classmethod
def kilo_or_none(value): def _encrypt(cls, dfsid):
return int_or_none(value, scale=1000) salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes)
for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
m = md5()
m.update(bytes(string_bytes))
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
def _create_eapi_cipher(self, api_path, query_body, cookies): def make_player_api_request_data_and_headers(self, song_id, bitrate):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1') now = int(time.time() * 1000)
msg_digest = md5(message).hexdigest() rand = randint(0, 1000)
cookie = {
data = pkcs7_padding(list(str.encode( 'osver': None,
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}'))) 'deviceId': None,
encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
return f'params={encrypted.hex().upper()}'.encode()
def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
cookies = {
'osver': 'undefined',
'deviceId': 'undefined',
'appver': '8.0.0', 'appver': '8.0.0',
'versioncode': '140', 'versioncode': '140',
'mobilename': 'undefined', 'mobilename': None,
'buildver': '1623435496', 'buildver': '1623435496',
'resolution': '1920x1080', 'resolution': '1920x1080',
'__csrf': '', '__csrf': '',
'os': 'pc', 'os': 'pc',
'channel': 'undefined', 'channel': None,
'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}', 'requestId': '{0}_{1:04}'.format(now, rand),
**traverse_obj(self._get_cookies(self._API_BASE), {
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
})
} }
return self._download_json( request_text = json.dumps(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ separators=(',', ':'))
'Referer': 'https://music.163.com', message = 'nobody{0}use{1}md5forencrypt'.format(
'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]), URL, request_text).encode('latin1')
**headers, msg_digest = md5(message).hexdigest()
}, **kwargs)
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()
cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])
headers = {
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com',
'Cookie': cookie,
}
return ('params={0}'.format(encrypted_params), headers)
def _call_player_api(self, song_id, bitrate): def _call_player_api(self, song_id, bitrate):
return self._download_eapi_json( url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
note=f'Downloading song URL info: bitrate {bitrate}') try:
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}
def extract_formats(self, info): def extract_formats(self, info):
err = 0 err = 0
@ -79,50 +110,45 @@ class NetEaseMusicBaseIE(InfoExtractor):
details = info.get(song_format) details = info.get(song_format)
if not details: if not details:
continue continue
bitrate = int_or_none(details.get('bitrate')) or 999000 bitrate = int_or_none(details.get('bitrate')) or 999000
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))): data = self._call_player_api(song_id, bitrate)
song_url = song['url'] for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if not song_url:
continue
if self._is_valid_url(song_url, info['id'], 'song'): if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({ formats.append({
'url': song_url, 'url': song_url,
'ext': details.get('extension'),
'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format, 'format_id': song_format,
'asr': traverse_obj(details, ('sr', {int_or_none})), 'filesize': int_or_none(song.get('size')),
**traverse_obj(song, { 'asr': int_or_none(details.get('sr')),
'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
}) })
elif err == 0: elif err == 0:
err = traverse_obj(song, ('code', {int})) or 0 err = try_get(song, lambda x: x['code'], int)
if not formats: if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400): if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True) raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else: else:
self.raise_geo_restricted( self.raise_geo_restricted(
'No media links found: probably due to geo restriction.', countries=['CN']) msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
return formats return formats
def query_api(self, endpoint, video_id, note): @classmethod
result = self._download_json( def convert_milliseconds(cls, ms):
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) return int(round(ms / 1000.0))
code = traverse_obj(result, ('code', {int}))
message = traverse_obj(result, ('message', {str})) or ''
if code == -462:
self.raise_login_required(f'Login required to download: {message}')
elif code != 200:
raise ExtractorError(f'Failed to get meta info: {code} {message}')
return result
def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'): def query_api(self, endpoint, video_id, note):
for song in traverse_obj(songs_data, ( req = Request('%s%s' % (self._API_BASE, endpoint))
*variadic(entry_keys, (str, bytes, dict, set)), req.headers['Referer'] = self._API_BASE
lambda _, v: int_or_none(v[id_key]) is not None)): return self._download_json(req, video_id, note)
song_id = str(song[id_key])
yield self.url_result(
f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE,
song_id, traverse_obj(song, (name_key, {str})))
class NetEaseMusicIE(NetEaseMusicBaseIE): class NetEaseMusicIE(NetEaseMusicBaseIE):
@ -130,18 +156,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_DESC = '网易云音乐' IE_DESC = '网易云音乐'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://music.163.com/#/song?id=548648087', 'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': { 'info_dict': {
'id': '548648087', 'id': '32102397',
'ext': 'mp3', 'ext': 'mp3',
'title': '戒烟 (Live)', 'title': 'Bad Blood',
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊', 'creator': 'Taylor Swift / Kendrick Lamar',
'timestamp': 1522944000, 'upload_date': '20150516',
'upload_date': '20180405', 'timestamp': 1431792000,
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
"duration": 256,
'thumbnail': r're:^http.*\.jpg',
}, },
}, { }, {
'note': 'No lyrics.', 'note': 'No lyrics.',
@ -152,9 +176,21 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28', 'title': 'Opus 28',
'creator': 'Dustin O\'Halloran', 'creator': 'Dustin O\'Halloran',
'upload_date': '20080211', 'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600, 'timestamp': 1202745600,
'duration': 263, },
'thumbnail': r're:^http.*\.jpg', }, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
'upload_date': '20100127',
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
}, },
}, { }, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@ -167,99 +203,59 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'upload_date': '19911130', 'upload_date': '19911130',
'timestamp': 691516800, 'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141', 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg',
}, },
}, {
'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 199,
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'upload_date': '20100127',
'timestamp': 1264608000,
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 229,
'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}] }]
def _process_lyrics(self, lyrics_info): def _process_lyrics(self, lyrics_info):
original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str})) original = lyrics_info.get('lrc', {}).get('lyric')
translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str})) translated = lyrics_info.get('tlyric', {}).get('lyric')
if not original or original == '[99:00.00]纯音乐,请欣赏\n':
return None
if not translated: if not translated:
return { return original
'lyrics': [{'data': original, 'ext': 'lrc'}],
}
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
original_ts_texts = re.findall(lyrics_expr, original) original_ts_texts = re.findall(lyrics_expr, original)
translation_ts_dict = dict(re.findall(lyrics_expr, translated)) translation_ts_dict = dict(
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
merged = '\n'.join( )
join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ') lyrics = '\n'.join([
for timestamp, text in original_ts_texts) '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
for time_stamp, text in original_ts_texts
return { ])
'lyrics_merged': [{'data': merged, 'ext': 'lrc'}], return lyrics
'lyrics': [{'data': original, 'ext': 'lrc'}],
'lyrics_translated': [{'data': translated, 'ext': 'lrc'}],
}
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
params = {
'id': song_id,
'ids': '[%s]' % song_id
}
info = self.query_api( info = self.query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] 'song/detail?' + compat_urllib_parse_urlencode(params),
song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info) formats = self.extract_formats(info)
lyrics = self._process_lyrics(self.query_api( lyrics_info = self.query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
lyric_data = { song_id, 'Downloading lyrics data')
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False), lyrics = self._process_lyrics(lyrics_info)
'subtitles': lyrics,
} if lyrics else {} alt_title = None
if info.get('transNames'):
alt_title = '/'.join(info.get('transNames'))
return { return {
'id': song_id, 'id': song_id,
'title': info['name'],
'alt_title': alt_title,
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
'thumbnail': info.get('album', {}).get('picUrl'),
'duration': self.convert_milliseconds(info.get('duration', 0)),
'description': lyrics,
'formats': formats, 'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
}),
} }
@ -267,44 +263,31 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:album' IE_NAME = 'netease:album'
IE_DESC = '网易云音乐 - 专辑' IE_DESC = '网易云音乐 - 专辑'
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TEST = {
'url': 'https://music.163.com/#/album?id=133153666',
'info_dict': {
'id': '133153666',
'title': '桃几的翻唱',
'upload_date': '20210913',
'description': '桃几2021年翻唱合集',
'thumbnail': r're:^http.*\.jpg',
},
'playlist_mincount': 13,
}, {
'url': 'http://music.163.com/#/album?id=220780', 'url': 'http://music.163.com/#/album?id=220780',
'info_dict': { 'info_dict': {
'id': '220780', 'id': '220780',
'title': 'B\'Day', 'title': 'B\'day',
'upload_date': '20060904',
'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0',
'thumbnail': r're:^http.*\.jpg',
}, },
'playlist_count': 23, 'playlist_count': 23,
}] 'skip': 'Blocked outside Mainland China',
}
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) album_id = self._match_id(url)
webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id)
songs = self._search_json( info = self.query_api(
r'<textarea[^>]+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id, 'album/%s?id=%s' % (album_id, album_id),
end_pattern=r'</textarea>', contains_pattern=r'\[(?s:.+)\]') album_id, 'Downloading album data')['album']
metainfo = {
'title': self._og_search_property('title', webpage, 'title', fatal=False), name = info['name']
'description': self._html_search_regex( desc = info.get('description')
(rf'<div[^>]+\bid="album-desc-{suffix}"[^>]*>(.*?)</div>' for suffix in ('more', 'dot')), entries = [
webpage, 'description', flags=re.S, fatal=False), self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False), 'NetEaseMusic', song['id'])
'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)), for song in info['songs']
} ]
return self.playlist_result(self._get_entries(songs), album_id, **metainfo) return self.playlist_result(entries, album_id, name, desc)
class NetEaseMusicSingerIE(NetEaseMusicBaseIE): class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
@ -316,9 +299,10 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'url': 'http://music.163.com/#/artist?id=10559', 'url': 'http://music.163.com/#/artist?id=10559',
'info_dict': { 'info_dict': {
'id': '10559', 'id': '10559',
'title': '张惠妹 - aMEI;阿妹;阿密特', 'title': '张惠妹 - aMEI;阿密特',
}, },
'playlist_count': 50, 'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'Singer has translated name.', 'note': 'Singer has translated name.',
'url': 'http://music.163.com/#/artist?id=124098', 'url': 'http://music.163.com/#/artist?id=124098',
@ -327,28 +311,28 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'title': '李昇基 - 이승기', 'title': '李昇基 - 이승기',
}, },
'playlist_count': 50, 'playlist_count': 50,
}, { 'skip': 'Blocked outside Mainland China',
'note': 'Singer with both translated and alias',
'url': 'https://music.163.com/#/artist?id=159692',
'info_dict': {
'id': '159692',
'title': '初音ミク - 初音未来;Hatsune Miku',
},
'playlist_count': 50,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
singer_id = self._match_id(url) singer_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') 'artist/%s?id=%s' % (singer_id, singer_id),
singer_id, 'Downloading singer data')
name = join_nonempty( name = info['artist']['name']
traverse_obj(info, ('artist', 'name', {str})), if info['artist']['trans']:
join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'), name = '%s - %s' % (name, info['artist']['trans'])
delim=' - ') if info['artist']['alias']:
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name) entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['hotSongs']
]
return self.playlist_result(entries, singer_id, name)
class NetEaseMusicListIE(NetEaseMusicBaseIE): class NetEaseMusicListIE(NetEaseMusicBaseIE):
@ -360,28 +344,10 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'info_dict': { 'info_dict': {
'id': '79177352', 'id': '79177352',
'title': 'Billboard 2007 Top 100', 'title': 'Billboard 2007 Top 100',
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022', 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
'tags': ['欧美'],
'uploader': '浑然破灭',
'uploader_id': '67549805',
'timestamp': int,
'upload_date': r're:\d{8}',
}, },
'playlist_mincount': 95, 'playlist_count': 99,
}, { 'skip': 'Blocked outside Mainland China',
'note': 'Toplist/Charts sample',
'url': 'https://music.163.com/#/discover/toplist?id=60198',
'info_dict': {
'id': '60198',
'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
'description': '美国Billboard排行榜',
'tags': ['流行', '欧美', '榜单'],
'uploader': 'Billboard公告牌',
'uploader_id': '48171',
'timestamp': int,
'upload_date': r're:\d{8}',
},
'playlist_count': 100,
}, { }, {
'note': 'Toplist/Charts sample', 'note': 'Toplist/Charts sample',
'url': 'http://music.163.com/#/discover/toplist?id=3733003', 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@ -397,86 +363,64 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
info = self._download_eapi_json( info = self.query_api(
'/v3/playlist/detail', list_id, 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
{'id': list_id, 't': '-1', 'n': '500', 's': '0'}, list_id, 'Downloading playlist data')['result']
note="Downloading playlist info")
metainfo = traverse_obj(info, ('playlist', { name = info['name']
'title': ('name', {str}), desc = info.get('description')
'description': ('description', {str}),
'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}),
}))
if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo) if info.get('specialType') == 10: # is a chart/toplist
datestamp = datetime.fromtimestamp(
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
name = '%s %s' % (name, datestamp)
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['tracks']
]
return self.playlist_result(entries, list_id, name, desc)
class NetEaseMusicMvIE(NetEaseMusicBaseIE): class NetEaseMusicMvIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:mv' IE_NAME = 'netease:mv'
IE_DESC = '网易云音乐 - MV' IE_DESC = '网易云音乐 - MV'
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TEST = {
'url': 'https://music.163.com/#/mv?id=10958064',
'info_dict': {
'id': '10958064',
'ext': 'mp4',
'title': '交换余生',
'description': 'md5:e845872cff28820642a2b02eda428fea',
'creator': '林俊杰',
'upload_date': '20200916',
'thumbnail': r're:http.*\.jpg',
'duration': 364,
'view_count': int,
'like_count': int,
'comment_count': int,
},
}, {
'url': 'http://music.163.com/#/mv?id=415350', 'url': 'http://music.163.com/#/mv?id=415350',
'info_dict': { 'info_dict': {
'id': '415350', 'id': '415350',
'ext': 'mp4', 'ext': 'mp4',
'title': '이럴거면 그러지말지', 'title': '이럴거면 그러지말지',
'description': '白雅言自作曲唱甜蜜爱情', 'description': '白雅言自作曲唱甜蜜爱情',
'creator': '娥娟', 'creator': '白雅言',
'upload_date': '20150520', 'upload_date': '20150520',
'thumbnail': r're:http.*\.jpg',
'duration': 216,
'view_count': int,
'like_count': int,
'comment_count': int,
}, },
}] 'skip': 'Blocked outside Mainland China',
}
def _real_extract(self, url): def _real_extract(self, url):
mv_id = self._match_id(url) mv_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] 'mv/detail?id=%s&type=mp4' % mv_id,
mv_id, 'Downloading mv info')['data']
formats = [ formats = [
{'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)} {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
for brs, mv_url in info['brs'].items() for brs, mv_url in info['brs'].items()
] ]
return { return {
'id': mv_id, 'id': mv_id,
'title': info['name'],
'description': info.get('desc') or info.get('briefDesc'),
'creator': info['artistName'],
'upload_date': info['publishTime'].replace('-', ''),
'formats': formats, 'formats': formats,
**traverse_obj(info, { 'thumbnail': info.get('cover'),
'title': ('name', {str}), 'duration': self.convert_milliseconds(info.get('duration', 0)),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
}, get_all=False),
} }
@ -487,74 +431,75 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055', 'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': { 'info_dict': {
'id': '32593346', 'id': '10109055',
'ext': 'mp3', 'ext': 'mp3',
'title': '不丹足球背后的故事', 'title': '不丹足球背后的故事',
'description': '喜马拉雅人的足球梦 ...', 'description': '喜马拉雅人的足球梦 ...',
'creator': '大话西藏', 'creator': '大话西藏',
'timestamp': 1434179287, 'timestamp': 1434179342,
'upload_date': '20150613', 'upload_date': '20150613',
'thumbnail': r're:http.*\.jpg',
'duration': 900, 'duration': 900,
}, },
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'This program has accompanying songs.', 'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022', 'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': { 'info_dict': {
'id': '10141022', 'id': '10141022',
'title': '滚滚电台的有声节目', 'title': '25岁你是自在如风的少年<27°C>',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ',
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
}, },
'playlist_count': 4, 'playlist_count': 4,
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'This program has accompanying songs.', 'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022', 'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': { 'info_dict': {
'id': '32647209', 'id': '10141022',
'ext': 'mp3', 'ext': 'mp3',
'title': '滚滚电台的有声节目', 'title': '25岁你是自在如风的少年<27°C>',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ', 'timestamp': 1434450841,
'timestamp': 1434450733,
'upload_date': '20150616', 'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
'duration': 1104,
}, },
'params': { 'params': {
'noplaylist': True 'noplaylist': True
}, },
'skip': 'Blocked outside Mainland China',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
program_id = self._match_id(url) program_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] 'dj/program/detail?id=%s' % program_id,
program_id, 'Downloading program info')['program']
metainfo = traverse_obj(info, { name = info['name']
'title': ('name', {str}), description = info['description']
'description': ('description', {str}),
'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}),
})
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']): if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
formats = self.extract_formats(info['mainSong']) formats = self.extract_formats(info['mainSong'])
return { return {
'id': str(info['mainSong']['id']), 'id': info['mainSong']['id'],
'title': name,
'description': description,
'creator': info['dj']['brand'],
'timestamp': self.convert_milliseconds(info['createTime']),
'thumbnail': info['coverUrl'],
'duration': self.convert_milliseconds(info.get('duration', 0)),
'formats': formats, 'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
**metainfo,
} }
songs = traverse_obj(info, (('mainSong', ('songs', ...)),)) song_ids = [info['mainSong']['id']]
return self.playlist_result(self._get_entries(songs), program_id, **metainfo) song_ids.extend([song['id'] for song in info['songs']])
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
'NetEaseMusic', song_id)
for song_id in song_ids
]
return self.playlist_result(entries, program_id, name, description)
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
@ -566,32 +511,38 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
'info_dict': { 'info_dict': {
'id': '42', 'id': '42',
'title': '声音蔓延', 'title': '声音蔓延',
'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08' 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
}, },
'playlist_mincount': 40, 'playlist_mincount': 40,
'skip': 'Blocked outside Mainland China',
} }
_PAGE_SIZE = 1000 _PAGE_SIZE = 1000
def _real_extract(self, url): def _real_extract(self, url):
dj_id = self._match_id(url) dj_id = self._match_id(url)
metainfo = {} name = None
desc = None
entries = [] entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE): for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api( info = self.query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
dj_id, note=f'Downloading dj programs - {offset}') % (self._PAGE_SIZE, dj_id, offset),
dj_id, 'Downloading dj programs - %d' % offset)
entries.extend(self.url_result( entries.extend([
f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE, self.url_result(
program['id'], program.get('name')) for program in info['programs']) 'http://music.163.com/#/program?id=%s' % program['id'],
if not metainfo: 'NetEaseMusicProgram', program['id'])
metainfo = traverse_obj(info, ('programs', 0, 'radio', { for program in info['programs']
'title': ('name', {str}), ])
'description': ('desc', {str}),
})) if name is None:
radio = info['programs'][0]['radio']
name = radio['name']
desc = radio['desc']
if not info['more']: if not info['more']:
break break
return self.playlist_result(entries, dj_id, **metainfo) return self.playlist_result(entries, dj_id, name, desc)

View File

@ -190,7 +190,10 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id') lang, video_id = self._match_valid_url(url).group('lang', 'id')
metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData') metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData')
video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID}) video_data = self._call_api(video_id, ':watch', 'watch', data={
# 'deviceId' is required if ignoreDeviceRestriction is False
'ignoreDeviceRestriction': True,
})
return { return {
'id': video_id, 'id': video_id,

View File

@ -407,7 +407,7 @@ class XHamsterEmbedIE(InfoExtractor):
class XHamsterUserIE(InfoExtractor): class XHamsterUserIE(InfoExtractor):
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_TESTS = [{ _TESTS = [{
# Paginated user profile # Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos', 'url': 'https://xhamster.com/users/netvideogirls/videos',
@ -422,12 +422,6 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan', 'id': 'firatkaan',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
}, { }, {
'url': 'https://xhday.com/users/mobhunter', 'url': 'https://xhday.com/users/mobhunter',
'only_matching': True, 'only_matching': True,
@ -436,9 +430,8 @@ class XHamsterUserIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _entries(self, user_id, is_user): def _entries(self, user_id):
prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive') next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
page = self._download_webpage( page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum) next_page_url, user_id, 'Downloading page %s' % pagenum)
@ -461,5 +454,5 @@ class XHamsterUserIE(InfoExtractor):
break break
def _real_extract(self, url): def _real_extract(self, url):
user, user_id = self._match_valid_url(url).group('user', 'id') user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id, bool(user)), user_id) return self.playlist_result(self._entries(user_id), user_id)

View File

@ -941,13 +941,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'): default_client='web'):
raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) for retry in self.RetryManager():
# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
icd_rm = next(icd_retries)
main_retries = iter(self.RetryManager())
main_rm = next(main_retries)
for _ in range(main_rm.retries + icd_rm.retries + 1):
try: try:
response = self._call_api( response = self._call_api(
ep=ep, fatal=True, headers=headers, ep=ep, fatal=True, headers=headers,
@ -959,8 +953,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not isinstance(e.cause, network_exceptions): if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
elif not isinstance(e.cause, HTTPError): elif not isinstance(e.cause, HTTPError):
main_rm.error = e retry.error = e
next(main_retries)
continue continue
first_bytes = e.cause.response.read(512) first_bytes = e.cause.response.read(512)
@ -972,32 +965,27 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if yt_error: if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False) self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error # Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome # We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if e.cause.status not in (403, 429): if e.cause.status not in (403, 429):
main_rm.error = e retry.error = e
next(main_retries)
continue continue
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
try: try:
self._extract_and_report_alerts(response, only_once=True) self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e: except ExtractorError as e:
# YouTube's servers may return errors we want to retry on in a 200 OK response # YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839 # See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower(): if 'unknown error' in e.msg.lower():
main_rm.error = e retry.error = e
next(main_retries)
continue continue
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data # Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)): if not traverse_obj(response, *variadic(check_get_keys)):
icd_rm.error = ExtractorError('Incomplete data received', expected=True) retry.error = ExtractorError('Incomplete data received', expected=True)
should_retry = next(icd_retries, None)
if not should_retry:
return None
continue continue
return response return response