Merge df1b9ab688 into 5904853ae5

[ie/crunchyroll] Support browser impersonation (#9857 )
Closes #7442 Authored by: bashonly
2024-05-07 14:32:00 +08:00 · 2024-05-05 23:15:32 +00:00 · 2024-05-05 23:02:24 +00:00 · 2024-05-05 22:57:38 +00:00 · 2024-02-26 00:09:06 -05:00 · 2024-02-12 13:35:14 -05:00
4 changed files with 283 additions and 98 deletions
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -36,7 +36,6 @@
    srt_subtitles_timecode,
    str_or_none,
    traverse_obj,
-    try_call,
    unified_timestamp,
    unsmuggle_url,
    url_or_none,
@ -47,6 +46,18 @@

 class BilibiliBaseIE(InfoExtractor):
    _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+    _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
+    _wbi_key_cache = {}
+
+    def check_missing_formats(self, play_info, formats):
+        parsed_qualites = set(traverse_obj(formats, (..., 'quality')))
+        missing_formats = [
+            traverse_obj(missing, 'new_description', 'display_desc', 'quality')
+            for missing in traverse_obj(play_info, (
+                'support_formats', lambda _, v: v['quality'] not in parsed_qualites))]
+        if missing_formats:
+            self.to_screen(f'Format(s) {", ".join(map(str, missing_formats))} are missing; '
+                           f'you have to login or become premium member to download them. {self._login_hint()}')

    def extract_formats(self, play_info):
        format_names = {
@ -86,18 +97,74 @@ def extract_formats(self, play_info):
            'format': format_names.get(video.get('id')),
        } for video in traverse_obj(play_info, ('dash', 'video', ...)))

-        missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
-        if missing_formats:
-            self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
-                           f'you have to login or become premium member to download them. {self._login_hint()}')
+        if formats:
+            self.check_missing_formats(play_info, formats)

+        fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
+            'url': ('url', {url_or_none}),
+            'duration': ('length', {lambda x: float_or_none(x, scale=1000)}),
+            'filesize': ('size', {int_or_none}),
+        }))
+        if fragments:
+            formats.append({
+                'url': fragments[0]['url'],
+                'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
+                **({
+                    'fragments': fragments,
+                    'protocol': 'http_dash_segments'
+                } if len(fragments) > 1 else {}),
+                **traverse_obj(play_info, {
+                    'quality': ('quality', {int_or_none}),
+                    'format_id': ('quality', {str_or_none}),
+                    'format': ('quality', {lambda x: format_names.get(x)}),
+                    'resolution': ('quality', {lambda x: format_names.get(x)}),
+                    'duration': ('timelength', {lambda x: float_or_none(x, scale=1000)}),
+                }),
+            })
        return formats

-    def _download_playinfo(self, video_id, cid):
+    def _get_wbi_key(self, video_id):
+        if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
+            return self._wbi_key_cache['key']
+
+        session_data = self._download_json(
+            'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
+
+        lookup = ''.join(traverse_obj(session_data, (
+            'data', 'wbi_img', ('img_url', 'sub_url'),
+            {lambda x: x.rpartition('/')[2].partition('.')[0]})))
+
+        mixin_key_enc_tab = [
+            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
+            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
+            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
+            36, 20, 34, 44, 52
+        ]
+
+        self._wbi_key_cache.update({
+            'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
+            'ts': time.time(),
+        })
+        return self._wbi_key_cache['key']
+
+    def _sign_wbi(self, params, video_id):
+        params['wts'] = round(time.time())
+        params = {
+            k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
+            for k, v in sorted(params.items())
+        }
+        query = urllib.parse.urlencode(params)
+        params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
+        return params
+
+    def _download_playinfo(self, bvid, cid, headers={}, qn=None):
+        params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
+        if qn:
+            params['qn'] = qn
        return self._download_json(
-            'https://api.bilibili.com/x/player/playurl', video_id,
-            query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}')['data']
+            'https://api.bilibili.com/x/player/wbi/playurl', bvid,
+            query=self._sign_wbi(params, bvid), headers=headers,
+            note=f'Downloading video formats for cid {cid} {qn or ""}')['data']

    def json2srt(self, json_data):
        srt_data = ''
@ -211,7 +278,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo):
            ('data', 'interaction', 'graph_version', {int_or_none}))
        cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
        for cid, edges in cid_edges.items():
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, metainfo.get('http_headers', {}))
            yield {
                **metainfo,
                'id': f'{video_id}_{cid}',
@ -243,17 +310,17 @@ class BiliBiliIE(BilibiliBaseIE):
            'timestamp': 1488353834,
            'like_count': int,
            'view_count': int,
+            '_old_archive_ids': ['bilibili 8903802_part1'],
        },
    }, {
        'note': 'old av URL version',
        'url': 'http://www.bilibili.com/video/av1074402/',
        'info_dict': {
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+            'id': 'BV11x411K7CN',
            'ext': 'mp4',
+            'title': '【金坷垃】金泡沫',
            'uploader': '菊子桑',
            'uploader_id': '156160',
-            'id': 'BV11x411K7CN',
-            'title': '【金坷垃】金泡沫',
            'duration': 308.36,
            'upload_date': '20140420',
            'timestamp': 1397983878,
@ -262,6 +329,8 @@ class BiliBiliIE(BilibiliBaseIE):
            'comment_count': int,
            'view_count': int,
            'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+            '_old_archive_ids': ['bilibili 1074402_part1'],
        },
        'params': {'skip_download': True},
    }, {
@ -288,6 +357,7 @@ class BiliBiliIE(BilibiliBaseIE):
                'view_count': int,
                'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
                'duration': 90.314,
+                '_old_archive_ids': ['bilibili 498159642_part1'],
            }
        }]
    }, {
@ -308,28 +378,8 @@ class BiliBiliIE(BilibiliBaseIE):
            'view_count': int,
            'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
            'duration': 90.314,
+            '_old_archive_ids': ['bilibili 498159642_part1'],
        }
-    }, {
-        'note': 'video has subtitles',
-        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
-        'info_dict': {
-            'id': 'BV12N4y1M7rh',
-            'ext': 'mp4',
-            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
-            'tags': list,
-            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
-            'duration': 313.557,
-            'upload_date': '20220709',
-            'uploader': '小夫太渴',
-            'timestamp': 1657347907,
-            'uploader_id': '1326814124',
-            'comment_count': int,
-            'view_count': int,
-            'like_count': int,
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
-            'subtitles': 'count:2'
-        },
-        'params': {'listsubtitles': True},
    }, {
        'url': 'https://www.bilibili.com/video/av8903802/',
        'info_dict': {
@ -347,6 +397,7 @@ class BiliBiliIE(BilibiliBaseIE):
            'comment_count': int,
            'view_count': int,
            'like_count': int,
+            '_old_archive_ids': ['bilibili 8903802_part1'],
        },
        'params': {
            'skip_download': True,
@ -370,6 +421,7 @@ class BiliBiliIE(BilibiliBaseIE):
            'view_count': int,
            'like_count': int,
            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 463665680_part1'],
        },
        'params': {'skip_download': True},
    }, {
@ -388,8 +440,8 @@ class BiliBiliIE(BilibiliBaseIE):
            'view_count': int,
            'like_count': int,
            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 893839363_part1'],
        },
-        'params': {'skip_download': True},
    }, {
        'note': 'newer festival video',
        'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@ -406,8 +458,57 @@ class BiliBiliIE(BilibiliBaseIE):
            'view_count': int,
            'like_count': int,
            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 778246196_part1'],
+        },
+    }, {
+        'note': 'legacy flv/mp4 video',
+        'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
+        'info_dict': {
+            'id': 'BV1ms411Q7vw_p4',
+            'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+            'timestamp': 1458222815,
+            'upload_date': '20160317',
+            'description': '云南方言快乐生产线出品',
+            'duration': float,
+            'uploader': '一笑颠天',
+            'uploader_id': '3916081',
+            'view_count': int,
+            'comment_count': int,
+            'like_count': int,
+            'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 4120229_part4'],
+        },
+        'params': {'extractor_args': {'bilibili': {'_prefer_multi_flv': ['32']}}},
+        'playlist_count': 19,
+        'playlist': [{
+            'info_dict': {
+                'id': 'BV1ms411Q7vw_p4_0',
+                'ext': 'flv',
+                'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+                'duration': 399.102,
+            },
+        }],
+    }, {
+        'note': 'legacy mp4-only video',
+        'url': 'https://www.bilibili.com/video/BV1nx411u79K',
+        'info_dict': {
+            'id': 'BV1nx411u79K',
+            'ext': 'mp4',
+            'title': '【练习室】201603声乐练习《No Air》with VigoVan',
+            'timestamp': 1508893551,
+            'upload_date': '20171025',
+            'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
+            'duration': 80.384,
+            'uploader': '伯远',
+            'uploader_id': '10584494',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 15700301_part1'],
        },
-        'params': {'skip_download': True},
    }, {
        'note': 'interactive/split-path video',
        'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@ -425,6 +526,7 @@ class BiliBiliIE(BilibiliBaseIE):
            'view_count': int,
            'like_count': int,
            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 292734508_part1'],
        },
        'playlist_count': 33,
        'playlist': [{
@ -443,6 +545,7 @@ class BiliBiliIE(BilibiliBaseIE):
                'view_count': int,
                'like_count': int,
                'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+                '_old_archive_ids': ['bilibili 292734508_part1'],
            },
        }],
    }, {
@ -465,6 +568,29 @@ class BiliBiliIE(BilibiliBaseIE):
            'upload_date': '20191021',
            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
        },
+    }, {
+        'note': 'video has subtitles, which requires login',
+        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
+        'info_dict': {
+            'id': 'BV12N4y1M7rh',
+            'ext': 'mp4',
+            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+            'tags': list,
+            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+            'duration': 313.557,
+            'upload_date': '20220709',
+            'uploader': '小夫太渴',
+            'timestamp': 1657347907,
+            'uploader_id': '1326814124',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            'subtitles': 'count:2',  # login required for CC subtitle
+            '_old_archive_ids': ['bilibili 898179753_part1'],
+        },
+        'params': {'listsubtitles': True},
+        'skip': 'login required for subtitle',
    }, {
        'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
        'info_dict': {
@ -498,6 +624,7 @@ def _real_extract(self, url):
            return self.url_result(urlh.url)

        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+        headers = {'Referer': url, **self.geo_verification_headers()}

        is_festival = 'videoData' not in initial_state
        if is_festival:
@ -547,12 +674,11 @@ def _real_extract(self, url):

        aid = video_data.get('aid')
        old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
-
        cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')

        festival_info = {}
        if is_festival:
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, headers)

            festival_info = traverse_obj(initial_state, {
                'uploader': ('videoInfo', 'upName'),
@ -579,7 +705,7 @@ def _real_extract(self, url):
            'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
            '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
            'title': title,
-            'http_headers': {'Referer': url},
+            'http_headers': headers,
        }

        is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
@ -590,14 +716,54 @@ def _real_extract(self, url):
                    '__post_extractor': self.extract_comments(aid),
                })
        else:
-            return {
-                **metainfo,
-                'duration': float_or_none(play_info.get('timelength'), scale=1000),
-                'chapters': self._get_chapters(aid, cid),
-                'subtitles': self.extract_subtitles(video_id, cid),
-                'formats': self.extract_formats(play_info),
-                '__post_extractor': self.extract_comments(aid),
-            }
+            formats = self.extract_formats(play_info)
+
+            if not traverse_obj(play_info, ('dash')):  # for legacy-only formats
+                has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
+                for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
+                    formats.extend(traverse_obj(
+                        self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
+                        (lambda _, v: not has_qn(v.get('quality')))))
+                self.check_missing_formats(play_info, formats)
+                if traverse_obj(formats, lambda _, v: v['fragments']):
+                    if not self._configuration_arg('_prefer_multi_flv'):
+                        # `_prefer_multi_flv` is mainly for writing test case, user should hardly need this
+                        dropping = ', '.join(traverse_obj(formats, (
+                            lambda _, v: v['fragments'], {lambda x: f'{x["format"]} ({x["format_id"]})'})))
+                        formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
+                        if dropping:
+                            self.to_screen(f'Dropping incompatible flv format(s) {dropping} when mp4 exists')
+                    else:
+                        formats = traverse_obj(
+                            formats, lambda _, v: v['quality'] == int(self._configuration_arg('_prefer_multi_flv')[0])
+                        ) or [max(traverse_obj(formats, lambda _, v: v['fragments']), key=lambda x: x['quality'])]
+
+            if formats[0].get('fragments'):  # transform multi_video format
+                return {
+                    **metainfo,
+                    '_type': 'multi_video',
+                    'entries': [{
+                        'id': f'{metainfo["id"]}_{idx}',
+                        'title': metainfo['title'],
+                        'http_headers': metainfo['http_headers'],
+                        'formats': [{
+                            **fragment,
+                            'format_id': formats[0].get('format_id'),
+                        }],
+                        'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
+                        '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
+                    } for idx, fragment in enumerate(formats[0]['fragments'])],
+                    'duration': float_or_none(play_info.get('timelength'), scale=1000),
+                }
+            else:
+                return {
+                    **metainfo,
+                    'formats': formats,
+                    'duration': float_or_none(play_info.get('timelength'), scale=1000),
+                    'chapters': self._get_chapters(aid, cid),
+                    'subtitles': self.extract_subtitles(video_id, cid),
+                    '__post_extractor': self.extract_comments(aid),
+                }


 class BiliBiliBangumiIE(BilibiliBaseIE):
@ -967,7 +1133,7 @@ def _real_extract(self, url):
            }))


-class BilibiliSpaceBaseIE(InfoExtractor):
+class BilibiliSpaceBaseIE(BilibiliBaseIE):
    def _extract_playlist(self, fetch_page, get_metadata, get_entries):
        first_page = fetch_page(0)
        metadata = get_metadata(first_page)
@ -987,45 +1153,22 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
            'id': '3985676',
        },
        'playlist_mincount': 178,
+        'skip': 'login required',
    }, {
        'url': 'https://space.bilibili.com/313580179/video',
        'info_dict': {
            'id': '313580179',
        },
        'playlist_mincount': 92,
+        'skip': 'login required',
    }]

-    def _extract_signature(self, playlist_id):
-        session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
-
-        key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
-        img_key = traverse_obj(
-            session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
-        sub_key = traverse_obj(
-            session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
-
-        session_key = img_key + sub_key
-
-        signature_values = []
-        for position in (
-            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
-            12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
-            57, 62, 11, 36, 20, 34, 44, 52
-        ):
-            char_at_position = try_call(lambda: session_key[position])
-            if char_at_position:
-                signature_values.append(char_at_position)
-
-        return ''.join(signature_values)[:32]
-
    def _real_extract(self, url):
        playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
        if not is_video_url:
            self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
                           'To download audios, add a "/audio" to the URL')

-        signature = self._extract_signature(playlist_id)
-
        def fetch_page(page_idx):
            query = {
                'keyword': '',
@ -1037,13 +1180,13 @@ def fetch_page(page_idx):
                'ps': 30,
                'tid': 0,
                'web_location': 1550101,
-                'wts': int(time.time()),
            }
-            query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()

            try:
-                response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
-                                               playlist_id, note=f'Downloading page {page_idx}', query=query)
+                response = self._download_json(
+                    'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
+                    query=self._sign_wbi(query, playlist_id),
+                    note=f'Downloading space page {page_idx}', headers={'Referer': url})
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 412:
                    raise ExtractorError(
@ -1052,6 +1195,10 @@ def fetch_page(page_idx):
            if response['code'] == -401:
                raise ExtractorError(
                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+            if response['code'] == -352 and not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):
+                self.raise_login_required('Request is rejected, you need to login to access playlist')
+            if response['code'] != 0:
+                raise ExtractorError(f'Request failed ({response["code"]}): {response.get("message", "")}')
            return response['data']

        def get_metadata(page_data):
@ -1277,7 +1424,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/watchlater/#/list',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:\d+',
+            'title': '稍后再看',
+        },
        'playlist_mincount': 0,
        'skip': 'login required',
    }]
@ -1353,14 +1503,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
        'skip': 'redirect url',
    }, {
        'url': 'https://www.bilibili.com/list/watchlater',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:2_\d+',
+            'title': '稍后再看',
+            'uploader': str,
+            'uploader_id': str,
+        },
        'playlist_mincount': 0,
        'skip': 'login required',
    }, {
        'url': 'https://www.bilibili.com/medialist/play/watchlater',
        'info_dict': {'id': 'watchlater'},
        'playlist_mincount': 0,
-        'skip': 'login required',
+        'skip': 'redirect url & login required',
    }]

    def _extract_medialist(self, query, list_id):
@ -1411,7 +1566,7 @@ def _real_extract(self, url):
                'title': ('title', {str}),
                'uploader': ('upper', 'name', {str}),
                'uploader_id': ('upper', 'mid', {str_or_none}),
-                'timestamp': ('ctime', {int_or_none}),
+                'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
                'thumbnail': ('cover', {url_or_none}),
            })),
        }
@ -1806,7 +1961,8 @@ def _perform_login(self, username, password):
        public_key = Cryptodome.RSA.importKey(key_data['key'])
        password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
        login_post = self._download_json(
-            'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
+            'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
+            data=urlencode_postdata({
                'username': username,
                'password': base64.b64encode(password_hash).decode('ascii'),
                'keep_me': 'true',
@ -2138,7 +2294,8 @@ def _entries(self, series_id):

    def _real_extract(self, url):
        series_id = self._match_id(url)
-        series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
+        series_info = self._call_api(
+            f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
        return self.playlist_result(
            self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
            categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -151,7 +151,7 @@ def _real_extract(self, url):

 class CBCPlayerIE(InfoExtractor):
    IE_NAME = 'cbc.ca:player'
-    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
+    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
    _TESTS = [{
        'url': 'http://www.cbc.ca/player/play/2683190193',
        'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
            'location': 'Canada',
            'media_type': 'Full Program',
        },
+    }, {
+        'url': 'https://www.cbc.ca/player/play/video/1.7194274',
+        'md5': '188b96cf6bdcb2540e178a6caa957128',
+        'info_dict': {
+            'id': '2334524995812',
+            'ext': 'mp4',
+            'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
+            'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
+            'timestamp': 1714788791,
+            'duration': 77.678,
+            'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
+            'uploader': 'CBCC-NEW',
+            'chapters': 'count:0',
+            'upload_date': '20240504',
+            'categories': 'count:3',
+            'series': 'The National',
+            'tags': 'count:15',
+            'creators': ['encoder'],
+            'location': 'Canada',
+            'media_type': 'Excerpt',
+        },
    }, {
        'url': 'cbcplayer:1.7159484',
        'only_matching': True,
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -53,15 +53,19 @@ def _set_auth_info(self, response):
        CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)

    def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
-        try:  # TODO: Add impersonation support here
+        try:
            return self._download_json(
                f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
-                headers=headers, data=urlencode_postdata(data))
+                headers=headers, data=urlencode_postdata(data), impersonate=True)
        except ExtractorError as error:
            if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
                raise
+            if target := error.cause.response.extensions.get('impersonate'):
+                raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
            raise ExtractorError(
-                'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
+                'Request blocked by Cloudflare. '
+                'Install the required impersonation dependency if possible, '
+                'or else navigate to Crunchyroll in your browser, '
                'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
                'and your browser\'s User-Agent (with --user-agent)', expected=True)

--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
            'id': '16290308',
            'age_limit': 18,
            'categories': [],
-            'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+            'description': str,  # TODO: detect/remove SEO spam description in ytdl backport
            'display_id': 'tinderspecial-trailer1',
            'duration': 298.0,
            'ext': 'mp4',
            'upload_date': '20201123',
            'uploader': 'Ersties',
            'tags': [],
-            'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
-            'timestamp': 1606089600,
+            'thumbnail': r're:https://.+\.jpg',
+            'timestamp': 1606147564,
            'title': 'Tinder In Real Life',
            'view_count': int,
        }
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
-        definitions = self._download_json(
-            f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+        self._set_cookie('.youporn.com', 'age_verified', '1')
+        webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
+        definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']

-        def get_format_data(data, f):
-            return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+        def get_format_data(data, stream_type):
+            info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+            if not info_url:
+                return []
+            return traverse_obj(
+                self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+                lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))

        formats = []
        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
            f['height'] = height
            formats.append(f)

-        webpage = self._download_webpage(
-            'http://www.youporn.com/watch/%s' % video_id, display_id,
-            headers={'Cookie': 'age_verified=1'})
-
        title = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
            webpage, 'title', default=None) or self._og_search_title(
Author	SHA1	Message	Date
c-basalt	2a38c65403	Merge `df1b9ab688` into `5904853ae5`	2024-05-07 14:32:00 +08:00
bashonly	5904853ae5	[ie/crunchyroll] Support browser impersonation (#9857 ) Closes #7442 Authored by: bashonly	2024-05-05 23:15:32 +00:00
Chris Caruso	c8bf48f3a8	[ie/cbc.ca:player] Improve `_VALID_URL` (#9866 ) Closes #9825 Authored by: carusocr	2024-05-05 23:02:24 +00:00
The-MAGI	351368cb9a	[ie/youporn] Fix extractor (#8827 ) Closes #7967 Authored by: The-MAGI	2024-05-05 22:57:38 +00:00
c-basalt	df1b9ab688	rename and testcase fix	2024-02-26 00:09:06 -05:00
c-basalt	46de1f2b39	login hint for space video list	2024-02-12 13:35:14 -05:00
c-basalt	40e50ac6c0	Merge remote-tracking branch 'origin' into bili-legacy	2024-02-12 13:13:40 -05:00
c-basalt	1ce48dba7e	wbi key cache	2024-02-04 01:44:43 -05:00
c-basalt	88db8b4679	drop flv when mp4 is available	2024-02-02 03:49:36 -05:00
c-basalt	3f46155e93	minor changes	2024-02-01 01:37:19 -05:00
c-basalt	f949f7c79d	fix after merge	2024-02-01 01:24:35 -05:00
c-basalt	8e67c2837c	Merge branch 'master' into bili-legacy	2024-01-30 23:18:57 -05:00
c-basalt	db1dc9b8df	Merge pull request #1 from GD-Slime/master merge PR	2024-01-30 23:09:55 -05:00
GD-Slime	7c3cfab73a	Accept some suggestions in code review	2023-11-09 21:06:42 +08:00
GD-Slime	77ed5d5bf9	fix some bugs in review	2023-11-09 19:39:14 +08:00
GD-Slime	493022e4a7	fix some unnecessary format fixes	2023-11-09 19:04:43 +08:00
GD-Slime	2c0052097f	Apply suggestions from code review Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>	2023-11-09 18:52:12 +08:00
GD-Slime	73324baa2d	use flake8 to check code	2023-11-09 17:21:42 +08:00
GD-Slime	66db69f511	fix some issue when downloading non-dash video in bilibili.com and fix some test params	2023-11-09 16:24:52 +08:00