mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-04 16:21:24 +02:00
Compare commits
No commits in common. "99dec4d6ed065ba8da0ac2a7533d72f1b8759b6b" and "0c8466572fff01e9ec87fe4189d64d351314f8b9" have entirely different histories.
99dec4d6ed
...
0c8466572f
|
@ -114,6 +114,10 @@ class PromoDJBaseIE(InfoExtractor):
|
||||||
if YoutubeIE.suitable(iframe_url):
|
if YoutubeIE.suitable(iframe_url):
|
||||||
yield self.url_result(iframe_url, YoutubeIE)
|
yield self.url_result(iframe_url, YoutubeIE)
|
||||||
|
|
||||||
|
def _get_playlist_page_size(self, url):
|
||||||
|
is_default_playlist = '/groups/' not in url
|
||||||
|
return 30 if is_default_playlist else 20
|
||||||
|
|
||||||
def _get_current_page(self, html):
|
def _get_current_page(self, html):
|
||||||
return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
|
return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
|
||||||
|
|
||||||
|
@ -155,11 +159,10 @@ class PromoDJBaseIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = [{
|
formats = [traverse_obj(source, {
|
||||||
'format_id': 'lossy',
|
'url': ('URL', {url_or_none}),
|
||||||
'url': traverse_obj(source, ('URL', {url_or_none})),
|
'size': ('size', {int_or_none}),
|
||||||
'size': traverse_obj(source, ('size', {int_or_none})),
|
}) for source in traverse_obj(media_data, ('sources'))]
|
||||||
} for source in traverse_obj(media_data, ('sources'))]
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': url,
|
'url': url,
|
||||||
} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
|
} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
|
||||||
|
@ -379,7 +382,6 @@ class PromoDJUserPageIE(PromoDJBaseIE):
|
||||||
'blog',
|
'blog',
|
||||||
'feedback',
|
'feedback',
|
||||||
'contact',
|
'contact',
|
||||||
'uenno',
|
|
||||||
*PromoDJBaseIE._MEDIA_TYPES,
|
*PromoDJBaseIE._MEDIA_TYPES,
|
||||||
]
|
]
|
||||||
_NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
|
_NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
|
||||||
|
@ -445,11 +447,8 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PromoDJPlaylistIE(PromoDJBaseIE):
|
class PromoDJPlaylistIE(PromoDJBaseIE):
|
||||||
_PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES]
|
|
||||||
_PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES)
|
|
||||||
|
|
||||||
_VALID_URL = [
|
_VALID_URL = [
|
||||||
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_RE})$',
|
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})$',
|
||||||
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
|
rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
|
||||||
]
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -508,36 +507,20 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
|
||||||
# 900+ items
|
# 900+ items
|
||||||
'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
|
'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
# user's best music and video
|
|
||||||
'url': 'https://promodj.com/djbaribyn/uenno',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'djbaribyn-uenno',
|
|
||||||
},
|
|
||||||
'playlist_count': 15,
|
|
||||||
'params': {
|
|
||||||
'playlistend': 15,
|
|
||||||
}
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_ALLOWED_MEDIA_CATS = ['music', 'video']
|
_ALLOWED_MEDIA_CATS = ['music', 'video']
|
||||||
|
|
||||||
def _get_page_size(self, type):
|
|
||||||
if type == 'uenno':
|
|
||||||
return 15
|
|
||||||
if type == 'groups':
|
|
||||||
return 20
|
|
||||||
return 30
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
match = self._match_valid_url(url)
|
match = self._match_valid_url(url)
|
||||||
login = match.group('login')
|
login = match.group('login')
|
||||||
type = match.group('type')
|
type = match.group('type')
|
||||||
playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
|
playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
|
||||||
|
page_size = self._get_playlist_page_size(url)
|
||||||
|
|
||||||
entries = OnDemandPagedList(
|
entries = OnDemandPagedList(
|
||||||
functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
|
functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
|
||||||
self._get_page_size(type))
|
page_size)
|
||||||
return self.playlist_result(entries, playlist_id=playlist_id)
|
return self.playlist_result(entries, playlist_id=playlist_id)
|
||||||
|
|
||||||
|
|
||||||
|
@ -770,6 +753,7 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_IS_PAID_RE = r'<b>Цена:</b>'
|
||||||
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
|
# examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
|
||||||
# https://regex101.com/r/2AuaxB/1
|
# https://regex101.com/r/2AuaxB/1
|
||||||
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
|
_FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
|
||||||
|
@ -777,7 +761,7 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
# examples: 0:21 | 1:07 | 74:38
|
# examples: 0:21 | 1:07 | 74:38
|
||||||
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
|
_DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
|
||||||
# examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
|
# examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
|
||||||
_SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Б|Кб|Мб|Гб|Тб)'
|
_SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Кб|Мб|Гб)'
|
||||||
# examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
|
# examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
|
||||||
_TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
|
_TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
|
||||||
_TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
|
_TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
|
||||||
|
@ -787,8 +771,9 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
# https://regex101.com/r/b9utBf/1
|
# https://regex101.com/r/b9utBf/1
|
||||||
_VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
|
_VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
|
||||||
|
|
||||||
def _parse_ru_date(self, day, month, year, hours, minutes):
|
def _parse_ru_date(self, raw_date):
|
||||||
RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
|
RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
|
||||||
|
day, month, year, hours, minutes = raw_date
|
||||||
if day == 'сегодня':
|
if day == 'сегодня':
|
||||||
d = datetime.date.today()
|
d = datetime.date.today()
|
||||||
day = d.day
|
day = d.day
|
||||||
|
@ -805,9 +790,10 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
year = int(year)
|
year = int(year)
|
||||||
return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
|
return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
|
||||||
|
|
||||||
def _parse_ru_size(self, size, unit):
|
def _parse_ru_size(self, raw_size):
|
||||||
RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб', 'Тб']
|
RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб']
|
||||||
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(unit)))
|
size, size_unit = raw_size
|
||||||
|
return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
|
||||||
|
|
||||||
# music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
|
# music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
|
||||||
# video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
|
# video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
|
||||||
|
@ -835,9 +821,9 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
# download links can be missing
|
# download links can be missing
|
||||||
# best quality format always comes first
|
# best quality format always comes first
|
||||||
formats_from_html = re.findall(self._FORMATS_RE, meta_html)
|
formats_from_html = re.findall(self._FORMATS_RE, meta_html)
|
||||||
is_paid = '<b>Цена:</b>' in meta_html
|
is_paid = re.search(self._IS_PAID_RE, meta_html)
|
||||||
# size field describes best quality
|
# size field describes best quality
|
||||||
size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
|
size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
|
||||||
if type == 'videos':
|
if type == 'videos':
|
||||||
for url, bitrate in formats_from_html:
|
for url, bitrate in formats_from_html:
|
||||||
if url_or_none(url):
|
if url_or_none(url):
|
||||||
|
@ -848,15 +834,14 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
'size': size,
|
'size': size,
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
})
|
})
|
||||||
elif not is_paid:
|
else:
|
||||||
for i, match in enumerate(formats_from_html):
|
for i, match in enumerate(formats_from_html):
|
||||||
url, bitrate = match
|
url, bitrate = match
|
||||||
is_last = i == len(formats_from_html) - 1
|
is_last = i == len(formats_from_html) - 1
|
||||||
if is_last:
|
if is_last:
|
||||||
metadata['formats'][0]['abr'] = int(bitrate)
|
metadata['formats'][0]['abr'] = int(bitrate)
|
||||||
elif url_or_none(url):
|
elif url_or_none(url) and not is_paid:
|
||||||
metadata['formats'].append({
|
metadata['formats'].append({
|
||||||
'format_id': 'lossless',
|
|
||||||
'url': url,
|
'url': url,
|
||||||
'abr': int(bitrate),
|
'abr': int(bitrate),
|
||||||
})
|
})
|
||||||
|
@ -866,7 +851,7 @@ class PromoDJIE(PromoDJBaseIE):
|
||||||
'title': clean_html(get_element_by_class('file_title', html)),
|
'title': clean_html(get_element_by_class('file_title', html)),
|
||||||
'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
|
'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
|
||||||
'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
|
'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
|
||||||
'timestamp': self._parse_ru_date(*re.search(self._TIMESTAMP_RE, meta_html).groups()),
|
'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()),
|
||||||
'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
|
'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user