Compare commits

..

No commits in common. "99dec4d6ed065ba8da0ac2a7533d72f1b8759b6b" and "0c8466572fff01e9ec87fe4189d64d351314f8b9" have entirely different histories.

View File

@ -114,6 +114,10 @@ class PromoDJBaseIE(InfoExtractor):
if YoutubeIE.suitable(iframe_url): if YoutubeIE.suitable(iframe_url):
yield self.url_result(iframe_url, YoutubeIE) yield self.url_result(iframe_url, YoutubeIE)
def _get_playlist_page_size(self, url):
is_default_playlist = '/groups/' not in url
return 30 if is_default_playlist else 20
def _get_current_page(self, html): def _get_current_page(self, html):
return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
@ -155,11 +159,10 @@ class PromoDJBaseIE(InfoExtractor):
}) })
} }
formats = [{ formats = [traverse_obj(source, {
'format_id': 'lossy', 'url': ('URL', {url_or_none}),
'url': traverse_obj(source, ('URL', {url_or_none})), 'size': ('size', {int_or_none}),
'size': traverse_obj(source, ('size', {int_or_none})), }) for source in traverse_obj(media_data, ('sources'))]
} for source in traverse_obj(media_data, ('sources'))]
thumbnails = [{ thumbnails = [{
'url': url, 'url': url,
} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)] } for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
@ -379,7 +382,6 @@ class PromoDJUserPageIE(PromoDJBaseIE):
'blog', 'blog',
'feedback', 'feedback',
'contact', 'contact',
'uenno',
*PromoDJBaseIE._MEDIA_TYPES, *PromoDJBaseIE._MEDIA_TYPES,
] ]
_NOT_USER_PAGE_RE = '|'.join(_USER_PAGES) _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
@ -445,11 +447,8 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
class PromoDJPlaylistIE(PromoDJBaseIE): class PromoDJPlaylistIE(PromoDJBaseIE):
_PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES]
_PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES)
_VALID_URL = [ _VALID_URL = [
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_RE})$', rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})$',
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?', rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
] ]
_TESTS = [{ _TESTS = [{
@ -508,36 +507,20 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
# 900+ items # 900+ items
'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast', 'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
'only_matching': True, 'only_matching': True,
}, {
# user's best music and video
'url': 'https://promodj.com/djbaribyn/uenno',
'info_dict': {
'id': 'djbaribyn-uenno',
},
'playlist_count': 15,
'params': {
'playlistend': 15,
}
}] }]
_ALLOWED_MEDIA_CATS = ['music', 'video'] _ALLOWED_MEDIA_CATS = ['music', 'video']
def _get_page_size(self, type):
if type == 'uenno':
return 15
if type == 'groups':
return 20
return 30
def _real_extract(self, url): def _real_extract(self, url):
match = self._match_valid_url(url) match = self._match_valid_url(url)
login = match.group('login') login = match.group('login')
type = match.group('type') type = match.group('type')
playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}' playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
page_size = self._get_playlist_page_size(url)
entries = OnDemandPagedList( entries = OnDemandPagedList(
functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id), functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
self._get_page_size(type)) page_size)
return self.playlist_result(entries, playlist_id=playlist_id) return self.playlist_result(entries, playlist_id=playlist_id)
@ -770,6 +753,7 @@ class PromoDJIE(PromoDJBaseIE):
}, },
}] }]
_IS_PAID_RE = r'<b>Цена:</b>'
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
# https://regex101.com/r/2AuaxB/1 # https://regex101.com/r/2AuaxB/1
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит' _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
@ -777,7 +761,7 @@ class PromoDJIE(PromoDJBaseIE):
# examples: 0:21 | 1:07 | 74:38 # examples: 0:21 | 1:07 | 74:38
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})' _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
# examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб # examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
_SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Б|Кб|Мбб|Тб)' _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Кб|Мб|Гб)'
# examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46 # examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
_TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})' _TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
_TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>' _TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
@ -787,8 +771,9 @@ class PromoDJIE(PromoDJBaseIE):
# https://regex101.com/r/b9utBf/1 # https://regex101.com/r/b9utBf/1
_VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);' _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
def _parse_ru_date(self, day, month, year, hours, minutes): def _parse_ru_date(self, raw_date):
RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'] RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
day, month, year, hours, minutes = raw_date
if day == 'сегодня': if day == 'сегодня':
d = datetime.date.today() d = datetime.date.today()
day = d.day day = d.day
@ -805,9 +790,10 @@ class PromoDJIE(PromoDJBaseIE):
year = int(year) year = int(year)
return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp() return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
def _parse_ru_size(self, size, unit): def _parse_ru_size(self, raw_size):
RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб', 'Тб'] RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб']
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(unit))) size, size_unit = raw_size
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
# music: always have lossy format (mp3), sometimes have lossless (wav or flac) format # music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
# video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4) # video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
@ -835,9 +821,9 @@ class PromoDJIE(PromoDJBaseIE):
# download links can be missing # download links can be missing
# best quality format always comes first # best quality format always comes first
formats_from_html = re.findall(self._FORMATS_RE, meta_html) formats_from_html = re.findall(self._FORMATS_RE, meta_html)
is_paid = '<b>Цена:</b>' in meta_html is_paid = re.search(self._IS_PAID_RE, meta_html)
# size field describes best quality # size field describes best quality
size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups()) size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
if type == 'videos': if type == 'videos':
for url, bitrate in formats_from_html: for url, bitrate in formats_from_html:
if url_or_none(url): if url_or_none(url):
@ -848,15 +834,14 @@ class PromoDJIE(PromoDJBaseIE):
'size': size, 'size': size,
'quality': 1, 'quality': 1,
}) })
elif not is_paid: else:
for i, match in enumerate(formats_from_html): for i, match in enumerate(formats_from_html):
url, bitrate = match url, bitrate = match
is_last = i == len(formats_from_html) - 1 is_last = i == len(formats_from_html) - 1
if is_last: if is_last:
metadata['formats'][0]['abr'] = int(bitrate) metadata['formats'][0]['abr'] = int(bitrate)
elif url_or_none(url): elif url_or_none(url) and not is_paid:
metadata['formats'].append({ metadata['formats'].append({
'format_id': 'lossless',
'url': url, 'url': url,
'abr': int(bitrate), 'abr': int(bitrate),
}) })
@ -866,7 +851,7 @@ class PromoDJIE(PromoDJBaseIE):
'title': clean_html(get_element_by_class('file_title', html)), 'title': clean_html(get_element_by_class('file_title', html)),
'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)), 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
'timestamp': self._parse_ru_date(*re.search(self._TIMESTAMP_RE, meta_html).groups()), 'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()),
'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
}) })