[PromoDJ] Add codecs

[PromoDJ] Update radio extractor and add tests
[PromoDJ] Fix login regex
2024-10-04 16:21:24 +02:00 · 2024-02-17 05:24:49 +03:00 · 2024-02-17 05:06:06 +03:00 · 2024-02-17 04:16:05 +03:00
1 changed files with 66 additions and 13 deletions
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@ -6,16 +6,17 @@ import urllib.parse
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    OnDemandPagedList,
    clean_html,
    dict_get,
    extract_attributes,
    ExtractorError,
    get_element_by_class,
    get_element_html_by_id,
    get_elements_html_by_class,
    int_or_none,
    js_to_json,
    merge_dicts,
    OnDemandPagedList,
    parse_duration,
    str_or_none,
    traverse_obj,
@ -65,8 +66,8 @@ class PromoDJBaseIE(InfoExtractor):
    _PAGES = ['featured', 'shop', *_MEDIA_TYPES]
    _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
-    _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
+    _NOT_LOGIN_LIST = '|'.join(['radio', 'embed', *_PAGES])
-    _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+'
+    _LOGIN_RE = rf'(?!(?:{_NOT_LOGIN_LIST})(?:/|$))[\w.-]+'
    def _set_url_page(self, url, page):
        parsed_url = urllib.parse.urlparse(url)
@ -154,6 +155,8 @@ class PromoDJBaseIE(InfoExtractor):
            'format_id': 'lossy',
            'url': traverse_obj(source, ('URL', {url_or_none})),
            'size': traverse_obj(source, ('size', {int_or_none})),
            'acodec': 'mp3',
            'vcodec': 'none',
        } for source in traverse_obj(media_data, ('sources'))]
        thumbnails = [{
            'url': url,
@ -247,6 +250,10 @@ class PromoDJUserIE(PromoDJBaseIE):
            'id': 'slim96',
        },
        'playlist_count': 0,
    }, {
        # login starts with page name
        'url': 'https://promodj.com/radio.remix',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -288,6 +295,10 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
            'id': 'worobyev-video',
        },
        'playlist_count': 0,
    }, {
        # login starts with page name
        'url': 'https://promodj.com/radio.remix/music',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -378,15 +389,18 @@ class PromoDJUserPageIE(PromoDJBaseIE):
        *PromoDJBaseIE._MEDIA_TYPES,
    ]
    _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
    _USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+'
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>(?!(?:{_NOT_USER_PAGE_LIST})$)[\w-]+$)'
    _TESTS = [{
        'url': 'https://promodj.com/djperetse/MaxMixes',
        'info_dict': {
            'id': 'djperetse-MaxMixes',
        },
        'playlist_count': 5,
    }, {
        # user page starts with media type (not a real link)
        'url': 'https://promodj.com/djperetse/remixes-best',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -768,7 +782,7 @@ class PromoDJIE(PromoDJBaseIE):
    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
    # https://regex101.com/r/2AuaxB/1
-    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
+    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит'
    _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
    # examples: 0:21 | 1:07 | 74:38
    _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@ -835,18 +849,19 @@ class PromoDJIE(PromoDJBaseIE):
        # size field describes best quality
        size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
        if type == 'videos':
-            for url, bitrate in formats_from_html:
+            for url, format, bitrate in formats_from_html:
                if url_or_none(url):
                    metadata['formats'].append({
                        'format_id': 'source',
                        'url': url,
                        'tbr': int(bitrate),
                        'size': size,
                        'container': format.lower(),
                        'quality': 1,
                    })
        elif not is_paid:
            for i, match in enumerate(formats_from_html):
-                url, bitrate = match
+                url, format, bitrate = match
                is_last = i == len(formats_from_html) - 1
                if is_last:
                    metadata['formats'][0]['abr'] = int(bitrate)
@ -855,6 +870,8 @@ class PromoDJIE(PromoDJBaseIE):
                        'format_id': 'lossless',
                        'url': url,
                        'abr': int(bitrate),
                        'acodec': format.lower(),
                        'vcodec': 'none',
                    })
            metadata['formats'][-1]['size'] = size
@ -978,19 +995,55 @@ class PromoDJRadioIE(PromoDJBaseIE):
    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)'
    _TESTS = [{
        'url': 'https://promodj.com/radio#dubstep',
-        'only_matching': True,
+        'info_dict': {
            'id': 'dubstep',
            'ext': 'mp3',
            'title': r're:^Dubstep ',
            'description': 'Всё лучше под дабстеп',
            'thumbnail': r're:^https?://',
            'live_status': 'is_live',
        },
    }, {
        'url': 'https://promodj.com/radio#oldschool',
-        'only_matching': True,
+        'info_dict': {
            'id': 'oldschool',
            'ext': 'mp3',
            'title': r're:^Old-School ',
            'description': 'То самое доброе, старое, вечное',
            'thumbnail': r're:^https?://',
            'live_status': 'is_live',
        },
    }]
    def _real_extract(self, url):
-        id = self._match_id(url)
+        slug = self._match_id(url)
        html = self._download_webpage(url, slug)
        radio_span = get_element_html_by_id(f'radio_{slug}', html)
        if not radio_span:
            raise ExtractorError('Radio channel is offline or not exists', expected=True)
        id = self._search_regex(r'amba="radio:(\d+)"', radio_span, 'id')
        tooltip_html = self._download_webpage(
            f'https://promodj.com/ajax/tooltip.html?wtf=radio:{id}', slug,
            note='Downloading tooltip webpage')
        title = clean_html(self._search_regex(
            r'<h1[^>]*><b>([^<]+)</b></h1>', tooltip_html, 'title', default=None))
        description = clean_html(self._search_regex(
            r'<div>([^<]+)</div>', tooltip_html, 'description', default=None))
        thumbnail = self._search_regex(
            rf'#radio_{slug}:after {{ background-image: url\(([^)]+)\); }}',
            html, 'thumbnail', default=None)
        return {
-            'id': id,
+            'id': slug,
            'title': title,
            'description': description,
            'thumbnail': url_or_none(thumbnail),
            'formats': [{
-                'url': f'https://radio.promodj.com/{id}-192',
+                'url': f'https://radio.promodj.com/{slug}-192',
                'abr': 192,
                'ext': 'mp3',
                'acodec': 'mp3',
                'vcodec': 'none',
            }],
            'is_live': True,
        }
Author	SHA1	Message	Date
DmitryScaletta	2416fddcfb	[PromoDJ] Add codecs	2024-02-17 05:24:49 +03:00
DmitryScaletta	49ac5d31a3	[PromoDJ] Update radio extractor and add tests	2024-02-17 05:06:06 +03:00
DmitryScaletta	e32ba3fc21	[PromoDJ] Fix login regex	2024-02-17 04:16:05 +03:00