mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 16:01:27 +01:00
Compare commits
7 Commits
23d9bfd585
...
ace367aaf3
Author | SHA1 | Date | |
---|---|---|---|
|
ace367aaf3 | ||
|
f1323ae863 | ||
|
53e732deba | ||
|
08f91358ab | ||
|
54e0dbc355 | ||
|
fd62b4991b | ||
|
b224bfce9c |
|
@ -14,7 +14,6 @@ from ..networking.exceptions import HTTPError
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
RegexNotFoundError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
|
@ -118,10 +117,11 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
if not traverse_obj(subtitle_info, 'subtitles') and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if exist) are only visible when logged in. {self._login_hint()}')
|
||||
for s in traverse_obj(subtitle_info, ('subtitles', ...)):
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}')
|
||||
for s in subs_list:
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
|
@ -173,7 +173,8 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
|
||||
|
||||
def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges={}):
|
||||
def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
|
||||
cid_edges = cid_edges or {}
|
||||
division_data = self._download_json(
|
||||
'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
|
||||
query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
|
||||
|
@ -192,9 +193,9 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
'text': ('option', {str}),
|
||||
}),
|
||||
})))
|
||||
# use dict to combine edges that use the save video section (same cid)
|
||||
# use dict to combine edges that use the same video section (same cid)
|
||||
cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
|
||||
for choice in edges[edge_id].get('choices', []):
|
||||
for choice in traverse_obj(edges, ('edge_id', 'choices', ...)):
|
||||
if choice['edge_id'] not in edges:
|
||||
edges[choice['edge_id']] = {'cid': choice['cid']}
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
|
@ -203,8 +204,8 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
f'https://api.bilibili.com/x/player/wbi/v2?bvid={video_id}&cid={cid}',
|
||||
video_id, note='Extracting graph version'),
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
|
@ -477,21 +478,25 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
try:
|
||||
play_info_obj = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)
|
||||
play_info = play_info_obj['data']
|
||||
except KeyError:
|
||||
if play_info_obj.get('code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(f'{get_element_by_class("belongs-to", toast) or ""},{get_element_by_class("level", toast) or ""}')
|
||||
raise ExtractorError(f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play_info')
|
||||
except RegexNotFoundError:
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
self.report_warning('This video may be deleted or geo-restricted. You might want to try a VPN or a proxy server (with --proxy)', video_id)
|
||||
raise
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
@ -741,14 +746,16 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
|||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
initial_state = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
ss_id = initial_state['mediaInfo']['season_id']
|
||||
metainfo = traverse_obj(initial_state, ('mediaInfo', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('evaluate', {str}),
|
||||
}))
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id, **metainfo)
|
||||
return self.playlist_result(
|
||||
self._get_episodes_from_season(ss_id, url), media_id,
|
||||
**traverse_obj(initial_state, ('mediaInfo', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('evaluate', {str}),
|
||||
})))
|
||||
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
@ -803,30 +810,10 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
|||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
|
||||
|
||||
|
||||
class BilibiliCheeseIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/cheese/play/ep229832',
|
||||
'info_dict': {
|
||||
'id': '229832',
|
||||
'ext': 'mp4',
|
||||
'title': '1 - 课程先导片',
|
||||
'alt_title': '视频课 · 3分41秒',
|
||||
'uploader': '马督工',
|
||||
'uploader_id': '316568752',
|
||||
'episode': '课程先导片',
|
||||
'episode_id': '229832',
|
||||
'episode_number': 1,
|
||||
'duration': 221,
|
||||
'timestamp': 1695549606,
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id, headers):
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
aid, cid = episode_info['aid'], episode_info['cid']
|
||||
|
@ -837,9 +824,9 @@ class BilibiliCheeseIE(BilibiliBaseIE):
|
|||
self.raise_login_required('You need to purchase the course to download this episode')
|
||||
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pugv/player/web/playurl?fnval=16&fourk=1', ep_id,
|
||||
query={'avid': aid, 'cid': cid, 'ep_id': ep_id},
|
||||
headers=headers, note='Downloading playinfo')['data']
|
||||
'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
|
||||
query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
|
||||
headers=self._HEADERS, note='Downloading playinfo')['data']
|
||||
|
||||
return {
|
||||
'id': str_or_none(ep_id),
|
||||
|
@ -861,21 +848,43 @@ class BilibiliCheeseIE(BilibiliBaseIE):
|
|||
}),
|
||||
'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
def _download_season_info(self, query_key, video_id, headers):
|
||||
def _download_season_info(self, query_key, video_id):
|
||||
return self._download_json(
|
||||
f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
|
||||
headers=headers, note='Downloading season info')['data']
|
||||
headers=self._HEADERS, note='Downloading season info')['data']
|
||||
|
||||
|
||||
class BilibiliCheeseIE(BilibiliCheeseBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/cheese/play/ep229832',
|
||||
'info_dict': {
|
||||
'id': '229832',
|
||||
'ext': 'mp4',
|
||||
'title': '1 - 课程先导片',
|
||||
'alt_title': '视频课 · 3分41秒',
|
||||
'uploader': '马督工',
|
||||
'uploader_id': '316568752',
|
||||
'episode': '课程先导片',
|
||||
'episode_id': '229832',
|
||||
'episode_number': 1,
|
||||
'duration': 221,
|
||||
'timestamp': 1695549606,
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ep_id = self._match_id(url)
|
||||
return self._extract_episode(
|
||||
self._download_season_info('ep_id', ep_id, self._HEADERS), ep_id, self._HEADERS)
|
||||
return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
|
||||
|
||||
|
||||
class BilibiliCheeseSeasonIE(BilibiliCheeseIE):
|
||||
class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/cheese/play/ss5918',
|
||||
|
@ -917,20 +926,21 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseIE):
|
|||
def _get_cheese_entries(self, season_info):
|
||||
for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
|
||||
yield {
|
||||
**self._extract_episode(season_info, ep_id, self._HEADERS),
|
||||
**self._extract_episode(season_info, ep_id),
|
||||
'extractor_key': BilibiliCheeseIE.ie_key(),
|
||||
'extractor': BilibiliCheeseIE.IE_NAME,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id = self._match_id(url)
|
||||
season_info = self._download_season_info('season_id', season_id, self._HEADERS)
|
||||
metainfo = traverse_obj(season_info, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('subtitle', {str}),
|
||||
})
|
||||
season_info = self._download_season_info('season_id', season_id)
|
||||
|
||||
return self.playlist_result(self._get_cheese_entries(season_info), season_id, **metainfo)
|
||||
return self.playlist_result(
|
||||
self._get_cheese_entries(season_info), season_id,
|
||||
**traverse_obj(season_info, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('subtitle', {str}),
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
|
|
Loading…
Reference in New Issue
Block a user