[test] Skip source address tests if the address cannot be bound to (#8900 )

Fixes https://github.com/yt-dlp/yt-dlp/issues/8890 Authored by: coletdjnz
[ie/MLBArticle] Fix extractor (#9021 )
2024-11-30 03:01:25 +01:00 · 2024-01-20 10:39:49 +13:00 · 2024-01-19 20:31:06 +00:00 · 2024-01-19 20:27:16 +00:00 · 2024-01-19 20:11:00 +00:00 · 2024-01-19 17:49:15 +01:00
14 changed files with 598 additions and 82 deletions
--- a/README.md
+++ b/README.md
@ -1888,6 +1888,9 @@ The following extractors use this feature:
 #### nflplusreplay
 * `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
 #### jiosaavn
 * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
--- a/test/helper.py
+++ b/test/helper.py
@ -10,7 +10,7 @@ import types
 import yt_dlp.extractor
 from yt_dlp import YoutubeDL
 from yt_dlp.compat import compat_os_name
-from yt_dlp.utils import preferredencoding, try_call, write_string
+from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
 if 'pytest' in sys.modules:
    import pytest
@ -329,3 +329,8 @@ def http_server_port(httpd):
    else:
        sock = httpd.socket
    return sock.getsockname()[1]
 def verify_address_availability(address):
    if find_available_port(address) is None:
        pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
--- a/test/test_networking.py
+++ b/test/test_networking.py
@ -26,7 +26,7 @@ import zlib
 from email.message import Message
 from http.cookiejar import CookieJar
-from test.helper import FakeYDL, http_server_port
+from test.helper import FakeYDL, http_server_port, verify_address_availability
 from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import brotli, requests, urllib3
 from yt_dlp.networking import (
@ -538,6 +538,9 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
    @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
    def test_source_address(self, handler):
        source_address = f'127.0.0.{random.randint(5, 255)}'
        # on some systems these loopback addresses we need for testing may not be available
        # see: https://github.com/yt-dlp/yt-dlp/issues/8890
        verify_address_availability(source_address)
        with handler(source_address=source_address) as rh:
            data = validate_and_send(
                rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
--- a/test/test_socks.py
+++ b/test/test_socks.py
@ -25,7 +25,7 @@ from socketserver import (
    ThreadingTCPServer,
 )
-from test.helper import http_server_port
+from test.helper import http_server_port, verify_address_availability
 from yt_dlp.networking import Request
 from yt_dlp.networking.exceptions import ProxyError, TransportError
 from yt_dlp.socks import (
@ -326,6 +326,7 @@ class TestSocks4Proxy:
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
            verify_address_availability(source_address)
            with handler(proxies={'all': f'socks4://{server_address}'},
                         source_address=source_address) as rh:
                response = ctx.socks_info_request(rh)
@ -441,6 +442,7 @@ class TestSocks5Proxy:
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
            verify_address_availability(source_address)
            with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
                response = ctx.socks_info_request(rh)
                assert response['client_address'][0] == source_address
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@ -6,6 +6,8 @@ import sys
 import pytest
 from test.helper import verify_address_availability
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import http.client
@ -227,6 +229,7 @@ class TestWebsSocketRequestHandlerConformance:
    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_source_address(self, handler):
        source_address = f'127.0.0.{random.randint(5, 255)}'
        verify_address_availability(source_address)
        with handler(source_address=source_address) as rh:
            ws = validate_and_send(rh, Request(self.ws_base_url))
            ws.send('source_address')
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -47,7 +47,7 @@ from .acast import (
    ACastChannelIE,
 )
 from .acfun import AcFunVideoIE, AcFunBangumiIE
-from .adn import ADNIE
+from .adn import ADNIE, ADNSeasonIE
 from .adobeconnect import AdobeConnectIE
 from .adobetv import (
    AdobeTVEmbedIE,
@ -145,6 +145,7 @@ from .arte import (
    ArteTVCategoryIE,
 )
 from .arnes import ArnesIE
 from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
 from .atresplayer import AtresPlayerIE
 from .atscaleconf import AtScaleConfEventIE
 from .atvat import ATVAtIE
@ -686,6 +687,10 @@ from .genius import (
    GeniusIE,
    GeniusLyricsIE,
 )
 from .getcourseru import (
    GetCourseRuPlayerIE,
    GetCourseRuIE
 )
 from .gettr import (
    GettrIE,
    GettrStreamingIE,
@ -1603,7 +1608,10 @@ from .restudy import RestudyIE
 from .reuters import ReutersIE
 from .reverbnation import ReverbNationIE
 from .rheinmaintv import RheinMainTVIE
-from .rinsefm import RinseFMIE
+from .rinsefm import (
    RinseFMIE,
    RinseFMArtistPlaylistIE,
 )
 from .rmcdecouverte import RMCDecouverteIE
 from .rockstargames import RockstarGamesIE
 from .rokfin import (
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@ -19,15 +19,35 @@ from ..utils import (
    long_to_bytes,
    pkcs1pad,
    strip_or_none,
    str_or_none,
    try_get,
    unified_strdate,
    urlencode_postdata,
 )
 from ..utils.traversal import traverse_obj
-class ADNIE(InfoExtractor):
+class ADNBaseIE(InfoExtractor):
    IE_DESC = 'Animation Digital Network'
-    _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+    _NETRC_MACHINE = 'animationdigitalnetwork'
    _BASE = 'animationdigitalnetwork.fr'
    _API_BASE_URL = f'https://gw.api.{_BASE}/'
    _PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
    _HEADERS = {}
    _LOGIN_ERR_MESSAGE = 'Unable to log in'
    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
    _POS_ALIGN_MAP = {
        'start': 1,
        'end': 3,
    }
    _LINE_ALIGN_MAP = {
        'middle': 8,
        'end': 4,
    }
 class ADNIE(ADNBaseIE):
    _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
        'md5': '1c9ef066ceb302c86f80c2b371615261',
@ -44,29 +64,35 @@ class ADNIE(InfoExtractor):
            'season_number': 1,
            'episode': 'À ce soir !',
            'episode_number': 1,
            'thumbnail': str,
            'season': 'Season 1',
        },
-        'skip': 'Only available in region (FR, ...)',
+        'skip': 'Only available in French and German speaking Europe',
    }, {
        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
        'only_matching': True,
    }, {
        'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
        'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
        'info_dict': {
            'id': '23550',
            'ext': 'mp4',
            'episode_number': 1,
            'duration': 1417,
            'release_date': '20231004',
            'series': 'The Eminence in Shadow',
            'season_number': 2,
            'episode': str,
            'title': str,
            'thumbnail': str,
            'season': 'Season 2',
            'comment_count': int,
            'average_rating': float,
            'description': str,
        },
        # 'skip': 'Only available in French and German speaking Europe',
    }]
    _NETRC_MACHINE = 'animationdigitalnetwork'
    _BASE = 'animationdigitalnetwork.fr'
    _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
    _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
    _HEADERS = {}
    _LOGIN_ERR_MESSAGE = 'Unable to log in'
    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
    _POS_ALIGN_MAP = {
        'start': 1,
        'end': 3,
    }
    _LINE_ALIGN_MAP = {
        'middle': 8,
        'end': 4,
    }
    def _get_subtitles(self, sub_url, video_id):
        if not sub_url:
            return None
@ -116,6 +142,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
            if sub_lang == 'vostf':
                sub_lang = 'fr'
            elif sub_lang == 'vostde':
                sub_lang = 'de'
            subtitles.setdefault(sub_lang, []).extend([{
                'ext': 'json',
                'data': json.dumps(sub),
@ -147,7 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        lang, video_id = self._match_valid_url(url).group('lang', 'id')
        video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
        player = self._download_json(
            video_base_url + 'configuration', video_id,
@ -162,7 +190,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
        token = self._download_json(
            user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
            video_id, 'Downloading access token', headers={
-                'x-player-refresh-token': user['refreshToken']
+                'X-Player-Refresh-Token': user['refreshToken'],
            }, data=b'')['token']
        links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
@ -184,7 +212,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
            try:
                links_data = self._download_json(
                    links_url, video_id, 'Downloading links JSON metadata', headers={
-                        'X-Player-Token': authorization
+                        'X-Player-Token': authorization,
                        'X-Target-Distribution': lang,
                        **self._HEADERS
                    }, query={
                        'freeWithAds': 'true',
                        'adaptive': 'false',
@ -232,6 +262,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
                if format_id == 'vf':
                    for f in m3u8_formats:
                        f['language'] = 'fr'
                elif format_id == 'vde':
                    for f in m3u8_formats:
                        f['language'] = 'de'
                formats.extend(m3u8_formats)
        video = (self._download_json(
@ -255,3 +288,40 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
            'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
            'comment_count': int_or_none(video.get('commentsCount')),
        }
 class ADNSeasonIE(ADNBaseIE):
    _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
    _TESTS = [{
        'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
        'playlist_count': 12,
        'info_dict': {
            'id': '911',
            'title': 'Tokyo Mew Mew New',
        },
        # 'skip': 'Only available in French end German speaking Europe',
    }]
    def _real_extract(self, url):
        lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
        show = self._download_json(
            f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
            'Downloading show JSON metadata', headers=self._HEADERS)['show']
        show_id = str(show['id'])
        episodes = self._download_json(
            f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
            'Downloading episode list', headers={
                'X-Target-Distribution': lang,
                **self._HEADERS
            }, query={
                'order': 'asc',
                'limit': '-1',
            })
        def entries():
            for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
                yield self.url_result(
                    f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
                    ADNIE, episode_id)
        return self.playlist_result(entries(), show_id, show.get('title'))
--- a/yt_dlp/extractor/asobichannel.py
+++ b/yt_dlp/extractor/asobichannel.py
@ -0,0 +1,168 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    merge_dicts,
    parse_iso8601,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class AsobiChannelBaseIE(InfoExtractor):
    _MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
    def _extract_info(self, metadata):
        return traverse_obj(metadata, {
            'id': ('id', {str}),
            'title': ('title', {str}),
            'description': ('body', {clean_html}),
            'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
            'timestamp': ('publishedAt', {parse_iso8601}),
            'modified_timestamp': ('updatedAt', {parse_iso8601}),
            'channel': ('channel', 'name', {str}),
            'channel_id': ('channel', 'id', {str}),
        })
 class AsobiChannelIE(AsobiChannelBaseIE):
    IE_NAME = 'asobichannel'
    IE_DESC = 'ASOBI CHANNEL'
    _VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
        'md5': '39df74e872afe032c4eb27b89144fc92',
        'info_dict': {
            'id': '1ypp48qd32p',
            'ext': 'mp4',
            'title': 'アイドルマスター ミリオンライブ！ 765プロch 原っぱ通信 #1',
            'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
            'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
            'timestamp': 1697098247,
            'upload_date': '20231012',
            'modified_timestamp': 1698381162,
            'modified_date': '20231027',
            'channel': 'アイドルマスター',
            'channel_id': 'idolmaster',
        },
    }, {
        'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
        'md5': '229fa8fb5c591c75ce8c37a497f113f6',
        'info_dict': {
            'id': 'redigiwnjzqj',
            'ext': 'mp4',
            'title': '【おまけ放送】アイドルマスター ミリオンライブ！ 765プロch 原っぱ通信 #1',
            'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
            'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
            'modified_timestamp': 1697797125,
            'modified_date': '20231020',
            'timestamp': 1697261769,
            'upload_date': '20231014',
            'channel': 'アイドルマスター',
            'channel_id': 'idolmaster',
        },
    }]
    _survapi_header = None
    def _real_initialize(self):
        token = self._download_json(
            'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
            note='Retrieving API token')
        self._survapi_header = {'Authorization': f'Bearer {token}'}
    def _process_vod(self, video_id, metadata):
        content_id = metadata['contents']['video_id']
        vod_data = self._download_json(
            f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
            headers=self._survapi_header, note='Downloading vod data')
        return {
            'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
        }
    def _process_live(self, video_id, metadata):
        content_id = metadata['contents']['video_id']
        event_data = self._download_json(
            f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
            headers=self._survapi_header, note='Downloading event data')
        player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
        if player_type == 'poster':
            self.raise_no_formats('Live event has not yet started', expected=True)
            live_status = 'is_upcoming'
            formats = []
        elif player_type == 'player':
            live_status = 'is_live'
            formats = self._extract_m3u8_formats(
                event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
        else:
            raise ExtractorError('Unsupported player type {player_type!r}')
        return {
            'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
            'live_status': live_status,
            'formats': formats,
        }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        metadata = self._download_json(
            f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
            headers=self._MICROCMS_HEADER)
        info = self._extract_info(metadata)
        video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
        if video_type == 'VOD':
            return merge_dicts(info, self._process_vod(video_id, metadata))
        if video_type == 'LIVE':
            return merge_dicts(info, self._process_live(video_id, metadata))
        raise ExtractorError(f'Unexpected video type {video_type!r}')
 class AsobiChannelTagURLIE(AsobiChannelBaseIE):
    IE_NAME = 'asobichannel:tag'
    IE_DESC = 'ASOBI CHANNEL'
    _VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
    _TESTS = [{
        'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
        'info_dict': {
            'id': 'bjhh-nbcja',
            'title': 'アイドルマスター ミリオンライブ！ 765プロch 原っぱ通信',
        },
        'playlist_mincount': 16,
    }, {
        'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
        'info_dict': {
            'id': 'hvm5qw3c6od',
            'title': 'アイマスMOIW2023ラジオ',
        },
        'playlist_mincount': 13,
    }]
    def _real_extract(self, url):
        tag_id = self._match_id(url)
        webpage = self._download_webpage(url, tag_id)
        title = traverse_obj(self._search_nextjs_data(
            webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
        media = self._download_json(
            f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
            tag_id, headers=self._MICROCMS_HEADER)
        def entries():
            for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
                yield {
                    '_type': 'url',
                    'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
                    'ie_key': AsobiChannelIE.ie_key(),
                    **self._extract_info(metadata),
                }
        return self.playlist_result(entries(), tag_id, title)
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -1622,6 +1622,7 @@ class BiliBiliPlayerIE(InfoExtractor):
 class BiliIntlBaseIE(InfoExtractor):
    _API_URL = 'https://api.bilibili.tv/intl/gateway'
    _NETRC_MACHINE = 'biliintl'
    _HEADERS = {'Referer': 'https://www.bilibili.com/'}
    def _call_api(self, endpoint, *args, **kwargs):
        json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
@ -1732,7 +1733,9 @@ class BiliIntlBaseIE(InfoExtractor):
    def _parse_video_metadata(self, video_data):
        return {
            'title': video_data.get('title_display') or video_data.get('title'),
            'description': video_data.get('desc'),
            'thumbnail': video_data.get('cover'),
            'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
            'episode_number': int_or_none(self._search_regex(
                r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
        }
@ -1829,17 +1832,6 @@ class BiliIntlIE(BiliIntlBaseIE):
            'episode_number': 140,
        },
        'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
    }, {
        'url': 'https://www.bilibili.tv/en/video/2041863208',
        'info_dict': {
            'id': '2041863208',
            'ext': 'mp4',
            'timestamp': 1670874843,
            'description': 'Scheduled for April 2023.\nStudio: ufotable',
            'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
            'upload_date': '20221212',
            'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
        },
    }, {
        # episode comment extraction
        'url': 'https://www.bilibili.tv/en/play/34580/340317',
@ -1880,9 +1872,9 @@ class BiliIntlIE(BiliIntlBaseIE):
            'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
            'timestamp': 1667891924,
            'upload_date': '20221108',
-            'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
+            'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
            'comment_count': int,
-            'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
+            'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
        },
        'params': {
            'getcomments': True
@ -1945,10 +1937,12 @@ class BiliIntlIE(BiliIntlBaseIE):
        # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
        return merge_dicts(
-            self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
+            self._parse_video_metadata(video_data), {
-                'title': self._html_search_meta('og:title', webpage),
+                'title': get_element_by_class(
-                'description': self._html_search_meta('og:description', webpage)
+                    'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
-            })
+                'description': get_element_by_class(
                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
            }, self._search_json_ld(webpage, video_id, default={}))
    def _get_comments_reply(self, root_id, next_id=0, display_id=None):
        comment_api_raw_data = self._download_json(
@ -2036,7 +2030,8 @@ class BiliIntlIE(BiliIntlBaseIE):
            'formats': self._get_formats(ep_id=ep_id, aid=aid),
            'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
            'chapters': chapters,
-            '__post_extractor': self.extract_comments(video_id, ep_id)
+            '__post_extractor': self.extract_comments(video_id, ep_id),
            'http_headers': self._HEADERS,
        }
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@ -0,0 +1,179 @@
 import re
 import time
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
 from ..utils.traversal import traverse_obj
 class GetCourseRuPlayerIE(InfoExtractor):
    _VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
    _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
    _TESTS = [{
        'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
        'info_dict': {
            'id': '513573381',
            'title': '190bdf93f1b29735309853a7a19e24b3',
            'ext': 'mp4',
            'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
            'duration': 1693
        },
        'skip': 'JWT expired',
    }]
    def _real_extract(self, url):
        webpage = self._download_webpage(url, None, 'Downloading player page')
        window_configs = self._search_json(
            r'window\.configs\s*=', webpage, 'config', None)
        video_id = str(window_configs['gcFileId'])
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
            window_configs['masterPlaylistUrl'], video_id)
        return {
            **traverse_obj(window_configs, {
                'title': ('videoHash', {str}),
                'thumbnail': ('previewUrl', {url_or_none}),
                'duration': ('videoDuration', {int_or_none}),
            }),
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles
        }
 class GetCourseRuIE(InfoExtractor):
    _NETRC_MACHINE = 'getcourseru'
    _DOMAINS = [
        'academymel.online',
        'marafon.mani-beauty.com',
        'on.psbook.ru'
    ]
    _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
    _VALID_URL = [
        rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
        rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
    ]
    _TESTS = [{
        'url': 'http://academymel.online/3video_1',
        'info_dict': {
            'id': '3059742',
            'display_id': '3video_1',
            'title': 'Промоуроки Академии МЕЛ',
        },
        'playlist_count': 1,
        'playlist': [{
            'info_dict': {
                'id': '513573381',
                'ext': 'mp4',
                'title': 'Промоуроки Академии МЕЛ',
                'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
                'duration': 1693
            },
        }]
    }, {
        'url': 'https://academymel.getcourse.ru/3video_1',
        'info_dict': {
            'id': '3059742',
            'display_id': '3video_1',
            'title': 'Промоуроки Академии МЕЛ',
        },
        'playlist_count': 1,
        'playlist': [{
            'info_dict': {
                'id': '513573381',
                'ext': 'mp4',
                'title': 'Промоуроки Академии МЕЛ',
                'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
                'duration': 1693
            },
        }]
    }, {
        'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
        'info_dict': {
            'id': '319141781',
            'title': '1. Разминка у стены',
        },
        'playlist_count': 1,
        'playlist': [{
            'info_dict': {
                'id': '4919601',
                'ext': 'mp4',
                'title': '1. Разминка у стены',
                'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
                'duration': 704
            },
        }],
        'skip': 'paid lesson'
    }, {
        'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
        'info_dict': {
            'id': '272499894',
            'title': 'Мотивация к тренировкам',
        },
        'playlist_count': 1,
        'playlist': [{
            'info_dict': {
                'id': '447479687',
                'ext': 'mp4',
                'title': 'Мотивация к тренировкам',
                'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
                'duration': 30
            },
        }],
        'skip': 'paid lesson'
    }, {
        'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
        'only_matching': True,
    }]
    _LOGIN_URL_PATH = '/cms/system/login'
    def _login(self, hostname, username, password):
        if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
            return
        login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
        webpage = self._download_webpage(login_url, None)
        self._request_webpage(
            login_url, None, 'Logging in', 'Failed to log in',
            data=urlencode_postdata({
                'action': 'processXdget',
                'xdgetId': self._html_search_regex(
                    r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
                    webpage, 'xdgetId'),
                'params[action]': 'login',
                'params[url]': login_url,
                'params[object_type]': 'cms_page',
                'params[object_id]': -1,
                'params[email]': username,
                'params[password]': password,
                'requestTime': int(time.time()),
                'requestSimpleSign': self._html_search_regex(
                    r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
            }))
    def _real_extract(self, url):
        hostname = urllib.parse.urlparse(url).hostname
        username, password = self._get_login_info(netrc_machine=hostname)
        if username:
            self._login(hostname, username, password)
        display_id = self._match_id(url)
        # NB: 404 is returned due to yt-dlp not properly following redirects #9020
        webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
        if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
            raise ExtractorError(
                f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
                expected=True)
        playlist_id = self._search_regex(
            r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
        title = self._og_search_title(webpage) or self._html_extract_title(webpage)
        return self.playlist_from_matches(
            re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
            playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
                'url_transparent': True,
                'title': title,
            })
--- a/yt_dlp/extractor/gopro.py
+++ b/yt_dlp/extractor/gopro.py
@ -57,8 +57,8 @@ class GoProIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        metadata = self._parse_json(
+        metadata = self._search_json(
-            self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
+            r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
        video_info = metadata['collectionMedia'][0]
        media_data = self._download_json(
@ -99,7 +99,7 @@ class GoProIE(InfoExtractor):
            'duration': int_or_none(
                video_info.get('source_duration')),
            'artist': str_or_none(
-                video_info.get('music_track_artist')),
+                video_info.get('music_track_artist')) or None,
            'track': str_or_none(
-                video_info.get('music_track_name')),
+                video_info.get('music_track_name')) or None,
        }
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@ -1,5 +1,6 @@
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    js_to_json,
    url_or_none,
    urlencode_postdata,
@ -20,39 +21,64 @@ class JioSaavnSongIE(JioSaavnBaseIE):
    _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
-        'md5': '7b1f70de088ede3a152ea34aece4df42',
+        'md5': '3b84396d15ed9e083c3106f1fa589c04',
        'info_dict': {
            'id': 'OQsEfQFVUXk',
-            'ext': 'mp3',
+            'ext': 'mp4',
            'title': 'Leja Re',
            'album': 'Leja Re',
            'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
            'duration': 205,
            'view_count': int,
            'release_year': 2018,
        },
    }, {
        'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
        'only_matching': True,
    }]
    _VALID_BITRATES = ('16', '32', '64', '128', '320')
    def _real_extract(self, url):
        audio_id = self._match_id(url)
        extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
        if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
            raise ValueError(
                f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
                + f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
        song_data = self._extract_initial_data(url, audio_id)['song']['song']
-        media_data = self._download_json(
+        formats = []
-            'https://www.jiosaavn.com/api.php', audio_id, data=urlencode_postdata({
+        for bitrate in extract_bitrates:
-                '__call': 'song.generateAuthToken',
+            media_data = self._download_json(
-                '_format': 'json',
+                'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
-                'bitrate': '128',
+                fatal=False, data=urlencode_postdata({
-                'url': song_data['encrypted_media_url'],
+                    '__call': 'song.generateAuthToken',
-            }))
+                    '_format': 'json',
                    'bitrate': bitrate,
                    'url': song_data['encrypted_media_url'],
                }))
            if not media_data.get('auth_url'):
                self.report_warning(f'Unable to extract format info for {bitrate}')
                continue
            formats.append({
                'url': media_data['auth_url'],
                'ext': media_data.get('type'),
                'format_id': bitrate,
                'abr': int(bitrate),
                'vcodec': 'none',
            })
        return {
            'id': audio_id,
-            'url': media_data['auth_url'],
+            'formats': formats,
            'ext': media_data.get('type'),
            'vcodec': 'none',
            **traverse_obj(song_data, {
                'title': ('title', 'text'),
                'album': ('album', 'text'),
                'thumbnail': ('image', 0, {url_or_none}),
                'duration': ('duration', {int_or_none}),
                'view_count': ('play_count', {int_or_none}),
                'release_year': ('year', {int_or_none}),
            }),
        }
--- a/yt_dlp/extractor/mlb.py
+++ b/yt_dlp/extractor/mlb.py
@ -355,11 +355,11 @@ class MLBArticleIE(InfoExtractor):
        'info_dict': {
            'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
            'title': 'Machado\'s grab draws hilarious irate reaction',
-            'modified_timestamp': 1650130737,
+            'modified_timestamp': 1675888370,
            'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
-            'modified_date': '20220416',
+            'modified_date': '20230208',
        },
-        'playlist_count': 2,
+        'playlist_mincount': 2,
    }]
    def _real_extract(self, url):
@ -367,15 +367,13 @@ class MLBArticleIE(InfoExtractor):
        webpage = self._download_webpage(url, display_id)
        apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
-        content_data_id = traverse_obj(
+        content_real_info = traverse_obj(
-            apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
+            apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
        content_real_info = apollo_cache_json[content_data_id]
        return self.playlist_from_matches(
-            traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
+            traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
-            getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
+            getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
-            ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
+            ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
            title=self._html_search_meta('og:title', webpage),
            description=content_real_info.get('summary'),
            modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
--- a/yt_dlp/extractor/rinsefm.py
+++ b/yt_dlp/extractor/rinsefm.py
@ -1,8 +1,34 @@
 from .common import InfoExtractor
-from ..utils import format_field, parse_iso8601
+from ..utils import (
    MEDIA_EXTENSIONS,
    determine_ext,
    parse_iso8601,
    traverse_obj,
    url_or_none,
 )
-class RinseFMIE(InfoExtractor):
+class RinseFMBaseIE(InfoExtractor):
    @staticmethod
    def _parse_entry(entry):
        return {
            **traverse_obj(entry, {
                'id': ('id', {str}),
                'title': ('title', {str}),
                'url': ('fileUrl', {url_or_none}),
                'release_timestamp': ('episodeDate', {parse_iso8601}),
                'thumbnail': ('featuredImage', 0, 'filename', {str},
                              {lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
                'webpage_url': ('slug', {str},
                                {lambda x: x and f'https://rinse.fm/episodes/{x}'}),
            }),
            'vcodec': 'none',
            'extractor_key': RinseFMIE.ie_key(),
            'extractor': RinseFMIE.IE_NAME,
        }
 class RinseFMIE(RinseFMBaseIE):
    _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
@ -22,12 +48,42 @@ class RinseFMIE(InfoExtractor):
        webpage = self._download_webpage(url, display_id)
        entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
-        return {
+        return self._parse_entry(entry)
-            'id': entry['id'],
+
-            'title': entry.get('title'),
+
-            'url': entry['fileUrl'],
+class RinseFMArtistPlaylistIE(RinseFMBaseIE):
-            'vcodec': 'none',
+    _VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
-            'release_timestamp': parse_iso8601(entry.get('episodeDate')),
+    _TESTS = [{
-            'thumbnail': format_field(
+        'url': 'https://rinse.fm/shows/resources/',
-                entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
+        'info_dict': {
-        }
+            'id': 'resources',
            'title': '[re]sources',
            'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
        },
        'playlist_mincount': 40
    }, {
        'url': 'https://rinse.fm/shows/ivy/',
        'info_dict': {
            'id': 'ivy',
            'title': '[IVY]',
            'description': 'A dedicated space for DNB/Turbo House and 4x4.'
        },
        'playlist_mincount': 7
    }]
    def _entries(self, data):
        for episode in traverse_obj(data, (
            'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
        ):
            yield self._parse_entry(episode)
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage)
        data = self._search_nextjs_data(webpage, playlist_id)
        return self.playlist_result(
            self._entries(data), playlist_id, title, description=description)
Author	SHA1	Message	Date
coletdjnz	69d3191495	[test] Skip source address tests if the address cannot be bound to (#8900 ) Fixes https://github.com/yt-dlp/yt-dlp/issues/8890 Authored by: coletdjnz	2024-01-20 10:39:49 +13:00
HobbyistDev	50e06e21a6	[ie/MLBArticle] Fix extractor (#9021 ) Closes #8682 Authored by: HobbyistDev	2024-01-19 20:31:06 +00:00
divStar	4310b6650e	[ie/getcourseru] Add extractors (#8873 ) Authored by: divStar, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2024-01-19 20:27:16 +00:00
SirElderling	1713c88273	[ie/bilibili] Add referer header and fix metadata extraction (#8832 ) Closes #6640 Authored by: SirElderling	2024-01-19 20:11:00 +00:00
Alexey Neyman	4a07a455bb	[ie/GoPro] Fix extractor (#9019 ) Authored by: stilor	2024-01-19 17:49:15 +01:00
Christopher Schreiner	5eb1458be4	[ie/adn] Add support for German site (#8708 ) - Add extractor for seasons Closes #6643, Closes #8945 Authored by: infanf	2024-01-19 17:38:21 +01:00
SirElderling	1a36dbad71	[ie/RinseFMArtistPlaylist] Add extractor (#8794 ) Authored by: SirElderling	2024-01-19 17:29:48 +01:00
Snack	12f0427405	[ie/asobichannel] Add extractors (#8700 ) Authored by: Snack-X	2024-01-19 17:16:07 +01:00
alien-developers	5154dc0a68	[ie/JioSaavnSong] Support more bitrates (#8834 ) Authored by: alien-developers, bashonly Co-authored-by: bashonly <bashonly@protonmail.com>	2024-01-19 16:48:45 +01:00