Compare commits

...

19 Commits

Author SHA1 Message Date
c-basalt 2a38c65403
Merge df1b9ab688 into 5904853ae5 2024-05-07 14:32:00 +08:00
bashonly 5904853ae5
[ie/crunchyroll] Support browser impersonation (#9857)
Closes #7442
Authored by: bashonly
2024-05-05 23:15:32 +00:00
Chris Caruso c8bf48f3a8
[ie/cbc.ca:player] Improve `_VALID_URL` (#9866)
Closes #9825
Authored by: carusocr
2024-05-05 23:02:24 +00:00
The-MAGI 351368cb9a
[ie/youporn] Fix extractor (#8827)
Closes #7967
Authored by: The-MAGI
2024-05-05 22:57:38 +00:00
c-basalt df1b9ab688 rename and testcase fix 2024-02-26 00:09:06 -05:00
c-basalt 46de1f2b39 login hint for space video list 2024-02-12 13:35:14 -05:00
c-basalt 40e50ac6c0 Merge remote-tracking branch 'origin' into bili-legacy 2024-02-12 13:13:40 -05:00
c-basalt 1ce48dba7e wbi key cache 2024-02-04 01:44:43 -05:00
c-basalt 88db8b4679 drop flv when mp4 is available 2024-02-02 03:49:36 -05:00
c-basalt 3f46155e93 minor changes 2024-02-01 01:37:19 -05:00
c-basalt f949f7c79d fix after merge 2024-02-01 01:24:35 -05:00
c-basalt 8e67c2837c Merge branch 'master' into bili-legacy 2024-01-30 23:18:57 -05:00
c-basalt db1dc9b8df
Merge pull request #1 from GD-Slime/master
merge PR
2024-01-30 23:09:55 -05:00
GD-Slime 7c3cfab73a Accept some suggestions in code review 2023-11-09 21:06:42 +08:00
GD-Slime 77ed5d5bf9 fix some bugs in review 2023-11-09 19:39:14 +08:00
GD-Slime 493022e4a7 fix some unnecessary format fixes 2023-11-09 19:04:43 +08:00
GD-Slime 2c0052097f
Apply suggestions from code review
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
2023-11-09 18:52:12 +08:00
GD-Slime 73324baa2d use flake8 to check code 2023-11-09 17:21:42 +08:00
GD-Slime 66db69f511 fix some issue when downloading non-dash video in bilibili.com
and fix some test params
2023-11-09 16:24:52 +08:00
4 changed files with 283 additions and 98 deletions

View File

@ -36,7 +36,6 @@
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_call,
unified_timestamp,
unsmuggle_url,
url_or_none,
@ -47,6 +46,18 @@
class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
def check_missing_formats(self, play_info, formats):
parsed_qualites = set(traverse_obj(formats, (..., 'quality')))
missing_formats = [
traverse_obj(missing, 'new_description', 'display_desc', 'quality')
for missing in traverse_obj(play_info, (
'support_formats', lambda _, v: v['quality'] not in parsed_qualites))]
if missing_formats:
self.to_screen(f'Format(s) {", ".join(map(str, missing_formats))} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info):
format_names = {
@ -86,18 +97,74 @@ def extract_formats(self, play_info):
'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
if missing_formats:
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
if formats:
self.check_missing_formats(play_info, formats)
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'duration': ('length', {lambda x: float_or_none(x, scale=1000)}),
'filesize': ('size', {int_or_none}),
}))
if fragments:
formats.append({
'url': fragments[0]['url'],
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
**({
'fragments': fragments,
'protocol': 'http_dash_segments'
} if len(fragments) > 1 else {}),
**traverse_obj(play_info, {
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format': ('quality', {lambda x: format_names.get(x)}),
'resolution': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {lambda x: float_or_none(x, scale=1000)}),
}),
})
return formats
def _download_playinfo(self, video_id, cid):
def _get_wbi_key(self, video_id):
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
return self._wbi_key_cache['key']
session_data = self._download_json(
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
lookup = ''.join(traverse_obj(session_data, (
'data', 'wbi_img', ('img_url', 'sub_url'),
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
mixin_key_enc_tab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52
]
self._wbi_key_cache.update({
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
'ts': time.time(),
})
return self._wbi_key_cache['key']
def _sign_wbi(self, params, video_id):
params['wts'] = round(time.time())
params = {
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
for k, v in sorted(params.items())
}
query = urllib.parse.urlencode(params)
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params
def _download_playinfo(self, bvid, cid, headers={}, qn=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
if qn:
params['qn'] = qn
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}')['data']
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers,
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data):
srt_data = ''
@ -211,7 +278,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo):
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid)
play_info = self._download_playinfo(video_id, cid, metainfo.get('http_headers', {}))
yield {
**metainfo,
'id': f'{video_id}_{cid}',
@ -243,17 +310,17 @@ class BiliBiliIE(BilibiliBaseIE):
'timestamp': 1488353834,
'like_count': int,
'view_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
}, {
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'id': 'BV11x411K7CN',
'ext': 'mp4',
'title': '【金坷垃】金泡沫',
'uploader': '菊子桑',
'uploader_id': '156160',
'id': 'BV11x411K7CN',
'title': '【金坷垃】金泡沫',
'duration': 308.36,
'upload_date': '20140420',
'timestamp': 1397983878,
@ -262,6 +329,8 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'_old_archive_ids': ['bilibili 1074402_part1'],
},
'params': {'skip_download': True},
}, {
@ -288,6 +357,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
}
}]
}, {
@ -308,28 +378,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
}
}, {
'note': 'video has subtitles',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2'
},
'params': {'listsubtitles': True},
}, {
'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': {
@ -347,6 +397,7 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'like_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
'params': {
'skip_download': True,
@ -370,6 +421,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 463665680_part1'],
},
'params': {'skip_download': True},
}, {
@ -388,8 +440,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 893839363_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@ -406,8 +458,57 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 778246196_part1'],
},
}, {
'note': 'legacy flv/mp4 video',
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
'info_dict': {
'id': 'BV1ms411Q7vw_p4',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'timestamp': 1458222815,
'upload_date': '20160317',
'description': '云南方言快乐生产线出品',
'duration': float,
'uploader': '一笑颠天',
'uploader_id': '3916081',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 4120229_part4'],
},
'params': {'extractor_args': {'bilibili': {'_prefer_multi_flv': ['32']}}},
'playlist_count': 19,
'playlist': [{
'info_dict': {
'id': 'BV1ms411Q7vw_p4_0',
'ext': 'flv',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'duration': 399.102,
},
}],
}, {
'note': 'legacy mp4-only video',
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
'info_dict': {
'id': 'BV1nx411u79K',
'ext': 'mp4',
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
'timestamp': 1508893551,
'upload_date': '20171025',
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
'duration': 80.384,
'uploader': '伯远',
'uploader_id': '10584494',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 15700301_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@ -425,6 +526,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
'playlist_count': 33,
'playlist': [{
@ -443,6 +545,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
}],
}, {
@ -465,6 +568,29 @@ class BiliBiliIE(BilibiliBaseIE):
'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'note': 'video has subtitles, which requires login',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2', # login required for CC subtitle
'_old_archive_ids': ['bilibili 898179753_part1'],
},
'params': {'listsubtitles': True},
'skip': 'login required for subtitle',
}, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
@ -498,6 +624,7 @@ def _real_extract(self, url):
return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
headers = {'Referer': url, **self.geo_verification_headers()}
is_festival = 'videoData' not in initial_state
if is_festival:
@ -547,12 +674,11 @@ def _real_extract(self, url):
aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {}
if is_festival:
play_info = self._download_playinfo(video_id, cid)
play_info = self._download_playinfo(video_id, cid, headers)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
@ -579,7 +705,7 @@ def _real_extract(self, url):
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
'title': title,
'http_headers': {'Referer': url},
'http_headers': headers,
}
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
@ -590,14 +716,54 @@ def _real_extract(self, url):
'__post_extractor': self.extract_comments(aid),
})
else:
return {
**metainfo,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid),
}
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')): # for legacy-only formats
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
(lambda _, v: not has_qn(v.get('quality')))))
self.check_missing_formats(play_info, formats)
if traverse_obj(formats, lambda _, v: v['fragments']):
if not self._configuration_arg('_prefer_multi_flv'):
# `_prefer_multi_flv` is mainly for writing test case, user should hardly need this
dropping = ', '.join(traverse_obj(formats, (
lambda _, v: v['fragments'], {lambda x: f'{x["format"]} ({x["format_id"]})'})))
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
if dropping:
self.to_screen(f'Dropping incompatible flv format(s) {dropping} when mp4 exists')
else:
formats = traverse_obj(
formats, lambda _, v: v['quality'] == int(self._configuration_arg('_prefer_multi_flv')[0])
) or [max(traverse_obj(formats, lambda _, v: v['fragments']), key=lambda x: x['quality'])]
if formats[0].get('fragments'): # transform multi_video format
return {
**metainfo,
'_type': 'multi_video',
'entries': [{
'id': f'{metainfo["id"]}_{idx}',
'title': metainfo['title'],
'http_headers': metainfo['http_headers'],
'formats': [{
**fragment,
'format_id': formats[0].get('format_id'),
}],
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
} for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000),
}
else:
return {
**metainfo,
'formats': formats,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'__post_extractor': self.extract_comments(aid),
}
class BiliBiliBangumiIE(BilibiliBaseIE):
@ -967,7 +1133,7 @@ def _real_extract(self, url):
}))
class BilibiliSpaceBaseIE(InfoExtractor):
class BilibiliSpaceBaseIE(BilibiliBaseIE):
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(0)
metadata = get_metadata(first_page)
@ -987,45 +1153,22 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676',
},
'playlist_mincount': 178,
'skip': 'login required',
}, {
'url': 'https://space.bilibili.com/313580179/video',
'info_dict': {
'id': '313580179',
},
'playlist_mincount': 92,
'skip': 'login required',
}]
def _extract_signature(self, playlist_id):
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
img_key = traverse_obj(
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
sub_key = traverse_obj(
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
session_key = img_key + sub_key
signature_values = []
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
signature_values.append(char_at_position)
return ''.join(signature_values)[:32]
def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL')
signature = self._extract_signature(playlist_id)
def fetch_page(page_idx):
query = {
'keyword': '',
@ -1037,13 +1180,13 @@ def fetch_page(page_idx):
'ps': 30,
'tid': 0,
'web_location': 1550101,
'wts': int(time.time()),
}
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query)
response = self._download_json(
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
query=self._sign_wbi(query, playlist_id),
note=f'Downloading space page {page_idx}', headers={'Referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
@ -1052,6 +1195,10 @@ def fetch_page(page_idx):
if response['code'] == -401:
raise ExtractorError(
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
if response['code'] == -352 and not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):
self.raise_login_required('Request is rejected, you need to login to access playlist')
if response['code'] != 0:
raise ExtractorError(f'Request failed ({response["code"]}): {response.get("message", "")}')
return response['data']
def get_metadata(page_data):
@ -1277,7 +1424,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:\d+',
'title': '稍后再看',
},
'playlist_mincount': 0,
'skip': 'login required',
}]
@ -1353,14 +1503,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/watchlater',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:2_\d+',
'title': '稍后再看',
'uploader': str,
'uploader_id': str,
},
'playlist_mincount': 0,
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
'skip': 'redirect url & login required',
}]
def _extract_medialist(self, query, list_id):
@ -1411,7 +1566,7 @@ def _real_extract(self, url):
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'thumbnail': ('cover', {url_or_none}),
})),
}
@ -1806,7 +1961,8 @@ def _perform_login(self, username, password):
public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
data=urlencode_postdata({
'username': username,
'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true',
@ -2138,7 +2294,8 @@ def _entries(self, series_id):
def _real_extract(self, url):
series_id = self._match_id(url)
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
series_info = self._call_api(
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
return self.playlist_result(
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),

View File

@ -151,7 +151,7 @@ def _real_extract(self, url):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
'location': 'Canada',
'media_type': 'Full Program',
},
}, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': {
'id': '2334524995812',
'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791,
'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
'uploader': 'CBCC-NEW',
'chapters': 'count:0',
'upload_date': '20240504',
'categories': 'count:3',
'series': 'The National',
'tags': 'count:15',
'creators': ['encoder'],
'location': 'Canada',
'media_type': 'Excerpt',
},
}, {
'url': 'cbcplayer:1.7159484',
'only_matching': True,

View File

@ -53,15 +53,19 @@ def _set_auth_info(self, response):
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
try: # TODO: Add impersonation support here
try:
return self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
headers=headers, data=urlencode_postdata(data))
headers=headers, data=urlencode_postdata(data), impersonate=True)
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
raise
if target := error.cause.response.extensions.get('impersonate'):
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'Request blocked by Cloudflare. '
'Install the required impersonation dependency if possible, '
'or else navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)

View File

@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': 'md5:00ea70f642f431c379763c17c2f396bc',
'description': str, # TODO: detect/remove SEO spam description in ytdl backport
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
'timestamp': 1606089600,
'thumbnail': r're:https://.+\.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
definitions = self._download_json(
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
self._set_cookie('.youporn.com', 'age_verified', '1')
webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
def get_format_data(data, f):
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
def get_format_data(data, stream_type):
info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
if not info_url:
return []
return traverse_obj(
self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
f['height'] = height
formats.append(f)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title(