mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 16:51:26 +01:00
Compare commits
23 Commits
2e307a5757
...
be6d011c43
Author | SHA1 | Date | |
---|---|---|---|
|
be6d011c43 | ||
|
0f6fb121ba | ||
|
69054b483f | ||
|
2da3e052ed | ||
|
eac8a89b47 | ||
|
b6f26805a7 | ||
|
50271dbd80 | ||
|
853a9224f5 | ||
|
4c16680c00 | ||
|
cbbf6ad2cd | ||
|
d079c1a67f | ||
|
a9ac7d7f99 | ||
|
ba46a9e0d1 | ||
|
ca780a228c | ||
|
d6842fcd7f | ||
|
ddca238423 | ||
|
72fac58401 | ||
|
41bb0c6b37 | ||
|
fc79d7325c | ||
|
31b0294bd6 | ||
|
31be8d3dbd | ||
|
af0eb72a8b | ||
|
f40c4b4ee2 |
|
@ -260,7 +260,9 @@ from .bilibili import (
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
BiliBiliPlayerIE,
|
BiliBiliPlayerIE,
|
||||||
BilibiliPlaylistIE,
|
BilibiliPlaylistIE,
|
||||||
|
BiliBiliSearchAllIE,
|
||||||
BiliBiliSearchIE,
|
BiliBiliSearchIE,
|
||||||
|
BiliBiliSearchPageIE,
|
||||||
BilibiliSeriesListIE,
|
BilibiliSeriesListIE,
|
||||||
BilibiliSpaceAudioIE,
|
BilibiliSpaceAudioIE,
|
||||||
BilibiliSpaceVideoIE,
|
BilibiliSpaceVideoIE,
|
||||||
|
|
|
@ -1660,7 +1660,96 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||||
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliSearchIE(SearchInfoExtractor):
|
class BiliBiliSearchBaseIE(BilibiliBaseIE):
|
||||||
|
def _extract_search_result(self, result_data):
|
||||||
|
live_room_prefix = 'https://live.bilibili.com/'
|
||||||
|
bili_user_prefix = 'https://space.bilibili.com/'
|
||||||
|
result_type = result_data.get('type')
|
||||||
|
if result_type == 'video':
|
||||||
|
return self.url_result(result_data['arcurl'])
|
||||||
|
elif result_type == 'live_room':
|
||||||
|
return self.url_result(live_room_prefix + str(result_data['roomid']))
|
||||||
|
elif result_type in ['media_ft', 'media_bangumi']:
|
||||||
|
return self.url_result(result_data['url'])
|
||||||
|
elif result_type == 'bili_user':
|
||||||
|
return self.url_result(bili_user_prefix + str(result_data['mid']))
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliSearchAllIE(SearchInfoExtractor, BiliBiliSearchBaseIE):
|
||||||
|
IE_DESC = 'Bilibili all search'
|
||||||
|
_MAX_RESULTS = 100000
|
||||||
|
_SEARCH_KEY = 'biliallsearch'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'biliallsearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
'playlist_count': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1n44y1Q7sc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
|
||||||
|
'timestamp': 1669889987,
|
||||||
|
'upload_date': '20221201',
|
||||||
|
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
|
||||||
|
'tags': list,
|
||||||
|
'uploader': '靡烟miya',
|
||||||
|
'duration': 123.156,
|
||||||
|
'uploader_id': '1958703906',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 988222410_part1'],
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'biliallsearch:LOL',
|
||||||
|
'playlist_count': 1,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'LOL',
|
||||||
|
'title': 'LOL',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _search_results(self, query):
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers['Referer'] = 'https://www.bilibili.com/'
|
||||||
|
page_size = 50
|
||||||
|
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||||
|
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
query_params = {
|
||||||
|
'keyword': query,
|
||||||
|
'page': page_num,
|
||||||
|
'dynamic_offset': (page_num - 1) * page_size,
|
||||||
|
'platform': 'pc',
|
||||||
|
}
|
||||||
|
api_url = r'https://api.bilibili.com/x/web-interface/wbi/search/all/v2'
|
||||||
|
try:
|
||||||
|
search_all_result = self._download_json(
|
||||||
|
api_url, video_id=query, query=self._sign_wbi(query_params, query),
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
|
raise ExtractorError('Request is blocked by server (-412).', expected=True)
|
||||||
|
raise
|
||||||
|
status_code = search_all_result['code']
|
||||||
|
if status_code == -400:
|
||||||
|
raise ExtractorError('Invalid request (-400).', expected=True)
|
||||||
|
result_list = search_all_result['data'].get('result')
|
||||||
|
if not result_list:
|
||||||
|
self.write_debug(f'Response: {search_all_result}')
|
||||||
|
raise ExtractorError(f'Result not found in the response ({status_code}).',
|
||||||
|
expected=True)
|
||||||
|
for result_data in traverse_obj(result_list, (..., 'data', ...)):
|
||||||
|
yield self._extract_search_result(result_data)
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliSearchIE(SearchInfoExtractor, BilibiliBaseIE):
|
||||||
IE_DESC = 'Bilibili video search'
|
IE_DESC = 'Bilibili video search'
|
||||||
_MAX_RESULTS = 100000
|
_MAX_RESULTS = 100000
|
||||||
_SEARCH_KEY = 'bilisearch'
|
_SEARCH_KEY = 'bilisearch'
|
||||||
|
@ -1695,21 +1784,16 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
||||||
def _search_results(self, query):
|
def _search_results(self, query):
|
||||||
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||||
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers['Referer'] = 'https://www.bilibili.com/'
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
videos = self._download_json(
|
videos = self._download_json(
|
||||||
'https://api.bilibili.com/x/web-interface/search/type', query,
|
'https://api.bilibili.com/x/web-interface/wbi/search/type', query,
|
||||||
note=f'Extracting results from page {page_num}', query={
|
note=f'Extracting results from page {page_num}', query=self._sign_wbi({
|
||||||
'Search_key': query,
|
|
||||||
'keyword': query,
|
'keyword': query,
|
||||||
'page': page_num,
|
'page': page_num,
|
||||||
'context': '',
|
|
||||||
'duration': 0,
|
|
||||||
'tids_2': '',
|
|
||||||
'__refresh__': 'true',
|
|
||||||
'search_type': 'video',
|
'search_type': 'video',
|
||||||
'tids': 0,
|
}, query), headers=headers)['data'].get('result')
|
||||||
'highlight': 1,
|
|
||||||
})['data'].get('result')
|
|
||||||
if not videos:
|
if not videos:
|
||||||
break
|
break
|
||||||
for video in videos:
|
for video in videos:
|
||||||
|
@ -2406,3 +2490,126 @@ class BiliLiveIE(InfoExtractor):
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliSearchPageIE(BiliBiliSearchBaseIE):
|
||||||
|
IE_DESC = 'Bilibili Search Page URL Extractor'
|
||||||
|
_VALID_URL = r'https?://search\.bilibili\.com/(?P<type>all|video|bangumi|pgc|live|upuser).*'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': r'https://search.bilibili.com/all?keyword=yt+-+dlp+%E4%B8%8B%E8%BD%BD%E5%99%A8',
|
||||||
|
'playlist_count': 36,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yt - dlp 下载器',
|
||||||
|
'title': 'yt - dlp 下载器',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': r'https://search.bilibili.com/bangumi/?keyword=%E5%AD%A4%E7%8B%AC%E6%91%87%E6%BB%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5',
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '孤独摇滚',
|
||||||
|
'title': '孤独摇滚',
|
||||||
|
},
|
||||||
|
'skip': 'geo-restricted',
|
||||||
|
}, {
|
||||||
|
'url': r'https://search.bilibili.com/video?keyword=%E8%AE%A9%E5%AD%90%E5%BC%B9%E9%A3%9E&from_source=webtop_search&spm_id_from=333.1007&search_source=5&order=dm&duration=4&tids=181&page=3&o=72',
|
||||||
|
'playlist_mincount': 4,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '让子弹飞',
|
||||||
|
'title': '让子弹飞',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers['Referer'] = url
|
||||||
|
entries = []
|
||||||
|
params = parse_qs(url)
|
||||||
|
query = {
|
||||||
|
'platform': 'pc',
|
||||||
|
'page_size': 36,
|
||||||
|
}
|
||||||
|
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||||
|
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||||
|
search_type = self._match_valid_url(url).group('type')
|
||||||
|
raw_playlist_id = traverse_obj(params, ('keyword', 0))
|
||||||
|
if not raw_playlist_id:
|
||||||
|
raise ExtractorError('Please specify the keyword to search for!', expected=True)
|
||||||
|
playlist_id = urllib.parse.unquote_plus(raw_playlist_id)
|
||||||
|
search_type_mapping = {
|
||||||
|
'video': 'video',
|
||||||
|
'bangumi': 'media_bangumi',
|
||||||
|
'pgc': 'media_ft',
|
||||||
|
'live': 'live_room',
|
||||||
|
'upuser': 'bili_user',
|
||||||
|
'all': 'video', # 'all' search calls video search after page 1
|
||||||
|
}
|
||||||
|
valid_params = [
|
||||||
|
'keyword',
|
||||||
|
'page',
|
||||||
|
'order',
|
||||||
|
'duration',
|
||||||
|
'tids',
|
||||||
|
'search_type', # Only when searching for live_room or live_user
|
||||||
|
'order_sort',
|
||||||
|
'user_type',
|
||||||
|
]
|
||||||
|
for valid_param in valid_params:
|
||||||
|
param_value = traverse_obj(params, (valid_param, 0))
|
||||||
|
if param_value is not None:
|
||||||
|
query[valid_param] = param_value
|
||||||
|
page_num = int(query.get('page', 1))
|
||||||
|
param_offset = int_or_none(traverse_obj(params, ('o', 0)))
|
||||||
|
if page_num == 1:
|
||||||
|
query['dynamic_offset'] = 0
|
||||||
|
elif param_offset is not None:
|
||||||
|
query['dynamic_offset'] = param_offset
|
||||||
|
else:
|
||||||
|
query['dynamic_offset'] = query['page_size'] * (page_num - 1)
|
||||||
|
if search_type == 'live' and traverse_obj(params, ('search_type', 0)) == 'live_user':
|
||||||
|
raise ExtractorError('Live users are not downloadable!', expected=True)
|
||||||
|
if search_type == 'all' and page_num == 1:
|
||||||
|
try:
|
||||||
|
search_all_result = self._download_json(
|
||||||
|
r'https://api.bilibili.com/x/web-interface/wbi/search/all/v2',
|
||||||
|
video_id=playlist_id, query=self._sign_wbi(query, playlist_id), headers=headers)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
|
raise ExtractorError('Request is blocked by server (-412).', expected=True)
|
||||||
|
raise
|
||||||
|
status_code = search_all_result['code']
|
||||||
|
if status_code == -400:
|
||||||
|
raise ExtractorError('Invalid request (-400).', expected=True)
|
||||||
|
result_list = search_all_result['data'].get('result')
|
||||||
|
if not result_list:
|
||||||
|
self.write_debug(f'Response: {search_all_result}')
|
||||||
|
raise ExtractorError(f'Result not found in the response ({status_code}).',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
entries = [self._extract_search_result(result_data) for result_data in traverse_obj(result_list, (..., 'data', ...))]
|
||||||
|
else:
|
||||||
|
query = {
|
||||||
|
'search_type': search_type_mapping[search_type],
|
||||||
|
**query, # search_type in type is overridden when specified in url params
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
search_type_result = self._download_json(
|
||||||
|
r'https://api.bilibili.com/x/web-interface/wbi/search/type',
|
||||||
|
video_id=playlist_id, query=self._sign_wbi(query, playlist_id), headers=headers,
|
||||||
|
)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
|
raise ExtractorError('Request is blocked by server (-412).')
|
||||||
|
raise
|
||||||
|
status_code = search_type_result['code']
|
||||||
|
if status_code == -400:
|
||||||
|
raise ExtractorError('Invalid request (-400).')
|
||||||
|
result_list = search_type_result['data'].get('result')
|
||||||
|
if not result_list:
|
||||||
|
self.write_debug(f'Response: {search_type_result}')
|
||||||
|
raise ExtractorError(
|
||||||
|
f'Result not found in the response ({status_code}). '
|
||||||
|
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||||
|
|
||||||
|
entries = [self._extract_search_result(result_data) for result_data in result_list]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=playlist_id)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user