Compare commits

..

No commits in common. "2416fddcfbd3dd22caa78fc2cf7018d82a7d2efc" and "345d01a175f0609c175a2141f7d552d919fae05e" have entirely different histories.

View File

@ -6,17 +6,16 @@ import urllib.parse
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
OnDemandPagedList,
clean_html,
dict_get,
extract_attributes,
ExtractorError,
get_element_by_class,
get_element_html_by_id,
get_elements_html_by_class,
int_or_none,
js_to_json,
merge_dicts,
OnDemandPagedList,
parse_duration,
str_or_none,
traverse_obj,
@ -66,8 +65,8 @@ class PromoDJBaseIE(InfoExtractor):
_PAGES = ['featured', 'shop', *_MEDIA_TYPES]
_BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
_NOT_LOGIN_LIST = '|'.join(['radio', 'embed', *_PAGES])
_LOGIN_RE = rf'(?!(?:{_NOT_LOGIN_LIST})(?:/|$))[\w.-]+'
_NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
_LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+'
def _set_url_page(self, url, page):
parsed_url = urllib.parse.urlparse(url)
@ -155,8 +154,6 @@ class PromoDJBaseIE(InfoExtractor):
'format_id': 'lossy',
'url': traverse_obj(source, ('URL', {url_or_none})),
'size': traverse_obj(source, ('size', {int_or_none})),
'acodec': 'mp3',
'vcodec': 'none',
} for source in traverse_obj(media_data, ('sources'))]
thumbnails = [{
'url': url,
@ -250,10 +247,6 @@ class PromoDJUserIE(PromoDJBaseIE):
'id': 'slim96',
},
'playlist_count': 0,
}, {
# login starts with page name
'url': 'https://promodj.com/radio.remix',
'only_matching': True,
}]
def _real_extract(self, url):
@ -295,10 +288,6 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
'id': 'worobyev-video',
},
'playlist_count': 0,
}, {
# login starts with page name
'url': 'https://promodj.com/radio.remix/music',
'only_matching': True,
}]
def _real_extract(self, url):
@ -389,18 +378,15 @@ class PromoDJUserPageIE(PromoDJBaseIE):
*PromoDJBaseIE._MEDIA_TYPES,
]
_NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
_USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+'
_VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>(?!(?:{_NOT_USER_PAGE_LIST})$)[\w-]+$)'
_VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
_TESTS = [{
'url': 'https://promodj.com/djperetse/MaxMixes',
'info_dict': {
'id': 'djperetse-MaxMixes',
},
'playlist_count': 5,
}, {
# user page starts with media type (not a real link)
'url': 'https://promodj.com/djperetse/remixes-best',
'only_matching': True,
}]
def _real_extract(self, url):
@ -782,7 +768,7 @@ class PromoDJIE(PromoDJBaseIE):
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
# https://regex101.com/r/2AuaxB/1
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит'
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
_VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
# examples: 0:21 | 1:07 | 74:38
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@ -849,19 +835,18 @@ class PromoDJIE(PromoDJBaseIE):
# size field describes best quality
size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
if type == 'videos':
for url, format, bitrate in formats_from_html:
for url, bitrate in formats_from_html:
if url_or_none(url):
metadata['formats'].append({
'format_id': 'source',
'url': url,
'tbr': int(bitrate),
'size': size,
'container': format.lower(),
'quality': 1,
})
elif not is_paid:
for i, match in enumerate(formats_from_html):
url, format, bitrate = match
url, bitrate = match
is_last = i == len(formats_from_html) - 1
if is_last:
metadata['formats'][0]['abr'] = int(bitrate)
@ -870,8 +855,6 @@ class PromoDJIE(PromoDJBaseIE):
'format_id': 'lossless',
'url': url,
'abr': int(bitrate),
'acodec': format.lower(),
'vcodec': 'none',
})
metadata['formats'][-1]['size'] = size
@ -995,55 +978,19 @@ class PromoDJRadioIE(PromoDJBaseIE):
_VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)'
_TESTS = [{
'url': 'https://promodj.com/radio#dubstep',
'info_dict': {
'id': 'dubstep',
'ext': 'mp3',
'title': r're:^Dubstep ',
'description': 'Всё лучше под дабстеп',
'thumbnail': r're:^https?://',
'live_status': 'is_live',
},
'only_matching': True,
}, {
'url': 'https://promodj.com/radio#oldschool',
'info_dict': {
'id': 'oldschool',
'ext': 'mp3',
'title': r're:^Old-School ',
'description': 'То самое доброе, старое, вечное',
'thumbnail': r're:^https?://',
'live_status': 'is_live',
},
'only_matching': True,
}]
def _real_extract(self, url):
slug = self._match_id(url)
html = self._download_webpage(url, slug)
radio_span = get_element_html_by_id(f'radio_{slug}', html)
if not radio_span:
raise ExtractorError('Radio channel is offline or not exists', expected=True)
id = self._search_regex(r'amba="radio:(\d+)"', radio_span, 'id')
tooltip_html = self._download_webpage(
f'https://promodj.com/ajax/tooltip.html?wtf=radio:{id}', slug,
note='Downloading tooltip webpage')
title = clean_html(self._search_regex(
r'<h1[^>]*><b>([^<]+)</b></h1>', tooltip_html, 'title', default=None))
description = clean_html(self._search_regex(
r'<div>([^<]+)</div>', tooltip_html, 'description', default=None))
thumbnail = self._search_regex(
rf'#radio_{slug}:after {{ background-image: url\(([^)]+)\); }}',
html, 'thumbnail', default=None)
id = self._match_id(url)
return {
'id': slug,
'title': title,
'description': description,
'thumbnail': url_or_none(thumbnail),
'id': id,
'formats': [{
'url': f'https://radio.promodj.com/{slug}-192',
'url': f'https://radio.promodj.com/{id}-192',
'abr': 192,
'ext': 'mp3',
'acodec': 'mp3',
'vcodec': 'none',
}],
'is_live': True,
}