mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-04 08:11:25 +02:00
Compare commits
6 Commits
0c8466572f
...
99dec4d6ed
Author | SHA1 | Date | |
---|---|---|---|
|
99dec4d6ed | ||
|
1b3c186424 | ||
|
7e96492ba0 | ||
|
e6f3e6de0e | ||
|
c837d90e12 | ||
|
c820715205 |
|
@ -114,10 +114,6 @@ class PromoDJBaseIE(InfoExtractor):
|
|||
if YoutubeIE.suitable(iframe_url):
|
||||
yield self.url_result(iframe_url, YoutubeIE)
|
||||
|
||||
def _get_playlist_page_size(self, url):
|
||||
is_default_playlist = '/groups/' not in url
|
||||
return 30 if is_default_playlist else 20
|
||||
|
||||
def _get_current_page(self, html):
|
||||
return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
|
||||
|
||||
|
@ -159,10 +155,11 @@ class PromoDJBaseIE(InfoExtractor):
|
|||
})
|
||||
}
|
||||
|
||||
formats = [traverse_obj(source, {
|
||||
'url': ('URL', {url_or_none}),
|
||||
'size': ('size', {int_or_none}),
|
||||
}) for source in traverse_obj(media_data, ('sources'))]
|
||||
formats = [{
|
||||
'format_id': 'lossy',
|
||||
'url': traverse_obj(source, ('URL', {url_or_none})),
|
||||
'size': traverse_obj(source, ('size', {int_or_none})),
|
||||
} for source in traverse_obj(media_data, ('sources'))]
|
||||
thumbnails = [{
|
||||
'url': url,
|
||||
} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
|
||||
|
@ -382,6 +379,7 @@ class PromoDJUserPageIE(PromoDJBaseIE):
|
|||
'blog',
|
||||
'feedback',
|
||||
'contact',
|
||||
'uenno',
|
||||
*PromoDJBaseIE._MEDIA_TYPES,
|
||||
]
|
||||
_NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
|
||||
|
@ -447,8 +445,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
|
|||
|
||||
|
||||
class PromoDJPlaylistIE(PromoDJBaseIE):
|
||||
_PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES]
|
||||
_PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES)
|
||||
|
||||
_VALID_URL = [
|
||||
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})$',
|
||||
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_RE})$',
|
||||
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
|
||||
]
|
||||
_TESTS = [{
|
||||
|
@ -507,20 +508,36 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
|
|||
# 900+ items
|
||||
'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# user's best music and video
|
||||
'url': 'https://promodj.com/djbaribyn/uenno',
|
||||
'info_dict': {
|
||||
'id': 'djbaribyn-uenno',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'params': {
|
||||
'playlistend': 15,
|
||||
}
|
||||
}]
|
||||
|
||||
_ALLOWED_MEDIA_CATS = ['music', 'video']
|
||||
|
||||
def _get_page_size(self, type):
|
||||
if type == 'uenno':
|
||||
return 15
|
||||
if type == 'groups':
|
||||
return 20
|
||||
return 30
|
||||
|
||||
def _real_extract(self, url):
|
||||
match = self._match_valid_url(url)
|
||||
login = match.group('login')
|
||||
type = match.group('type')
|
||||
playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
|
||||
page_size = self._get_playlist_page_size(url)
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
|
||||
page_size)
|
||||
self._get_page_size(type))
|
||||
return self.playlist_result(entries, playlist_id=playlist_id)
|
||||
|
||||
|
||||
|
@ -753,7 +770,6 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
},
|
||||
}]
|
||||
|
||||
_IS_PAID_RE = r'<b>Цена:</b>'
|
||||
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
|
||||
# https://regex101.com/r/2AuaxB/1
|
||||
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
|
||||
|
@ -761,7 +777,7 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
# examples: 0:21 | 1:07 | 74:38
|
||||
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
|
||||
# examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
|
||||
_SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Кб|Мб|Гб)'
|
||||
_SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Б|Кб|Мб|Гб|Тб)'
|
||||
# examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
|
||||
_TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
|
||||
_TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
|
||||
|
@ -771,9 +787,8 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
# https://regex101.com/r/b9utBf/1
|
||||
_VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
|
||||
|
||||
def _parse_ru_date(self, raw_date):
|
||||
def _parse_ru_date(self, day, month, year, hours, minutes):
|
||||
RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
|
||||
day, month, year, hours, minutes = raw_date
|
||||
if day == 'сегодня':
|
||||
d = datetime.date.today()
|
||||
day = d.day
|
||||
|
@ -790,10 +805,9 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
year = int(year)
|
||||
return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
|
||||
|
||||
def _parse_ru_size(self, raw_size):
|
||||
RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб']
|
||||
size, size_unit = raw_size
|
||||
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
|
||||
def _parse_ru_size(self, size, unit):
|
||||
RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб', 'Тб']
|
||||
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(unit)))
|
||||
|
||||
# music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
|
||||
# video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
|
||||
|
@ -821,9 +835,9 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
# download links can be missing
|
||||
# best quality format always comes first
|
||||
formats_from_html = re.findall(self._FORMATS_RE, meta_html)
|
||||
is_paid = re.search(self._IS_PAID_RE, meta_html)
|
||||
is_paid = '<b>Цена:</b>' in meta_html
|
||||
# size field describes best quality
|
||||
size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
|
||||
size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
|
||||
if type == 'videos':
|
||||
for url, bitrate in formats_from_html:
|
||||
if url_or_none(url):
|
||||
|
@ -834,14 +848,15 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
'size': size,
|
||||
'quality': 1,
|
||||
})
|
||||
else:
|
||||
elif not is_paid:
|
||||
for i, match in enumerate(formats_from_html):
|
||||
url, bitrate = match
|
||||
is_last = i == len(formats_from_html) - 1
|
||||
if is_last:
|
||||
metadata['formats'][0]['abr'] = int(bitrate)
|
||||
elif url_or_none(url) and not is_paid:
|
||||
elif url_or_none(url):
|
||||
metadata['formats'].append({
|
||||
'format_id': 'lossless',
|
||||
'url': url,
|
||||
'abr': int(bitrate),
|
||||
})
|
||||
|
@ -851,7 +866,7 @@ class PromoDJIE(PromoDJBaseIE):
|
|||
'title': clean_html(get_element_by_class('file_title', html)),
|
||||
'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
|
||||
'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
|
||||
'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()),
|
||||
'timestamp': self._parse_ru_date(*re.search(self._TIMESTAMP_RE, meta_html).groups()),
|
||||
'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
|
||||
})
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user