Compare commits

..

No commits in common. "2416fddcfbd3dd22caa78fc2cf7018d82a7d2efc" and "345d01a175f0609c175a2141f7d552d919fae05e" have entirely different histories.

View File

@ -6,17 +6,16 @@ import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
OnDemandPagedList,
clean_html, clean_html,
dict_get, dict_get,
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
get_element_by_class, get_element_by_class,
get_element_html_by_id,
get_elements_html_by_class, get_elements_html_by_class,
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts, merge_dicts,
OnDemandPagedList,
parse_duration, parse_duration,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
@ -66,8 +65,8 @@ class PromoDJBaseIE(InfoExtractor):
_PAGES = ['featured', 'shop', *_MEDIA_TYPES] _PAGES = ['featured', 'shop', *_MEDIA_TYPES]
_BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
_NOT_LOGIN_LIST = '|'.join(['radio', 'embed', *_PAGES]) _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
_LOGIN_RE = rf'(?!(?:{_NOT_LOGIN_LIST})(?:/|$))[\w.-]+' _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+'
def _set_url_page(self, url, page): def _set_url_page(self, url, page):
parsed_url = urllib.parse.urlparse(url) parsed_url = urllib.parse.urlparse(url)
@ -155,8 +154,6 @@ class PromoDJBaseIE(InfoExtractor):
'format_id': 'lossy', 'format_id': 'lossy',
'url': traverse_obj(source, ('URL', {url_or_none})), 'url': traverse_obj(source, ('URL', {url_or_none})),
'size': traverse_obj(source, ('size', {int_or_none})), 'size': traverse_obj(source, ('size', {int_or_none})),
'acodec': 'mp3',
'vcodec': 'none',
} for source in traverse_obj(media_data, ('sources'))] } for source in traverse_obj(media_data, ('sources'))]
thumbnails = [{ thumbnails = [{
'url': url, 'url': url,
@ -250,10 +247,6 @@ class PromoDJUserIE(PromoDJBaseIE):
'id': 'slim96', 'id': 'slim96',
}, },
'playlist_count': 0, 'playlist_count': 0,
}, {
# login starts with page name
'url': 'https://promodj.com/radio.remix',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -295,10 +288,6 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
'id': 'worobyev-video', 'id': 'worobyev-video',
}, },
'playlist_count': 0, 'playlist_count': 0,
}, {
# login starts with page name
'url': 'https://promodj.com/radio.remix/music',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -389,18 +378,15 @@ class PromoDJUserPageIE(PromoDJBaseIE):
*PromoDJBaseIE._MEDIA_TYPES, *PromoDJBaseIE._MEDIA_TYPES,
] ]
_NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS) _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
_USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+'
_VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>(?!(?:{_NOT_USER_PAGE_LIST})$)[\w-]+$)' _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
_TESTS = [{ _TESTS = [{
'url': 'https://promodj.com/djperetse/MaxMixes', 'url': 'https://promodj.com/djperetse/MaxMixes',
'info_dict': { 'info_dict': {
'id': 'djperetse-MaxMixes', 'id': 'djperetse-MaxMixes',
}, },
'playlist_count': 5, 'playlist_count': 5,
}, {
# user page starts with media type (not a real link)
'url': 'https://promodj.com/djperetse/remixes-best',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -782,7 +768,7 @@ class PromoDJIE(PromoDJBaseIE):
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
# https://regex101.com/r/2AuaxB/1 # https://regex101.com/r/2AuaxB/1
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит' _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
_VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)' _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
# examples: 0:21 | 1:07 | 74:38 # examples: 0:21 | 1:07 | 74:38
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})' _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@ -849,19 +835,18 @@ class PromoDJIE(PromoDJBaseIE):
# size field describes best quality # size field describes best quality
size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups()) size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
if type == 'videos': if type == 'videos':
for url, format, bitrate in formats_from_html: for url, bitrate in formats_from_html:
if url_or_none(url): if url_or_none(url):
metadata['formats'].append({ metadata['formats'].append({
'format_id': 'source', 'format_id': 'source',
'url': url, 'url': url,
'tbr': int(bitrate), 'tbr': int(bitrate),
'size': size, 'size': size,
'container': format.lower(),
'quality': 1, 'quality': 1,
}) })
elif not is_paid: elif not is_paid:
for i, match in enumerate(formats_from_html): for i, match in enumerate(formats_from_html):
url, format, bitrate = match url, bitrate = match
is_last = i == len(formats_from_html) - 1 is_last = i == len(formats_from_html) - 1
if is_last: if is_last:
metadata['formats'][0]['abr'] = int(bitrate) metadata['formats'][0]['abr'] = int(bitrate)
@ -870,8 +855,6 @@ class PromoDJIE(PromoDJBaseIE):
'format_id': 'lossless', 'format_id': 'lossless',
'url': url, 'url': url,
'abr': int(bitrate), 'abr': int(bitrate),
'acodec': format.lower(),
'vcodec': 'none',
}) })
metadata['formats'][-1]['size'] = size metadata['formats'][-1]['size'] = size
@ -995,55 +978,19 @@ class PromoDJRadioIE(PromoDJBaseIE):
_VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)' _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)'
_TESTS = [{ _TESTS = [{
'url': 'https://promodj.com/radio#dubstep', 'url': 'https://promodj.com/radio#dubstep',
'info_dict': { 'only_matching': True,
'id': 'dubstep',
'ext': 'mp3',
'title': r're:^Dubstep ',
'description': 'Всё лучше под дабстеп',
'thumbnail': r're:^https?://',
'live_status': 'is_live',
},
}, { }, {
'url': 'https://promodj.com/radio#oldschool', 'url': 'https://promodj.com/radio#oldschool',
'info_dict': { 'only_matching': True,
'id': 'oldschool',
'ext': 'mp3',
'title': r're:^Old-School ',
'description': 'То самое доброе, старое, вечное',
'thumbnail': r're:^https?://',
'live_status': 'is_live',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
slug = self._match_id(url) id = self._match_id(url)
html = self._download_webpage(url, slug)
radio_span = get_element_html_by_id(f'radio_{slug}', html)
if not radio_span:
raise ExtractorError('Radio channel is offline or not exists', expected=True)
id = self._search_regex(r'amba="radio:(\d+)"', radio_span, 'id')
tooltip_html = self._download_webpage(
f'https://promodj.com/ajax/tooltip.html?wtf=radio:{id}', slug,
note='Downloading tooltip webpage')
title = clean_html(self._search_regex(
r'<h1[^>]*><b>([^<]+)</b></h1>', tooltip_html, 'title', default=None))
description = clean_html(self._search_regex(
r'<div>([^<]+)</div>', tooltip_html, 'description', default=None))
thumbnail = self._search_regex(
rf'#radio_{slug}:after {{ background-image: url\(([^)]+)\); }}',
html, 'thumbnail', default=None)
return { return {
'id': slug, 'id': id,
'title': title,
'description': description,
'thumbnail': url_or_none(thumbnail),
'formats': [{ 'formats': [{
'url': f'https://radio.promodj.com/{slug}-192', 'url': f'https://radio.promodj.com/{id}-192',
'abr': 192, 'abr': 192,
'ext': 'mp3',
'acodec': 'mp3',
'vcodec': 'none',
}], }],
'is_live': True, 'is_live': True,
} }