mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 02:01:25 +01:00
Compare commits
No commits in common. "901e78af62180c469d67a023a14b85c3754ac69b" and "8ef229428206db0b5a936408a5a37f3c6cb8d667" have entirely different histories.
901e78af62
...
8ef2294282
|
@ -15,12 +15,11 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
orderedSet,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
@ -46,7 +45,7 @@ class BoomplayBaseIE(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
# get_elements_text_and_html_by_attribute returns a generator
|
# get_elements_text_and_html_by_attribute returns a generator
|
||||||
return get_elements_text_and_html_by_attribute(
|
return get_elements_text_and_html_by_attribute(
|
||||||
attribute='class', value=rf'''[^'"]*(?<=['"\s]){class_}(?=['"\s])[^'"]*''', html=html,
|
'class', rf'''[^'"]*(?<=['"\s]){class_}(?=['"\s])[^'"]*''', html,
|
||||||
tag=tag, escape_value=False)
|
tag=tag, escape_value=False)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -112,7 +111,7 @@ class BoomplayBaseIE(InfoExtractor):
|
||||||
metadata_div = self._get_element_by_class_and_tag('summary', 'div', webpage) or ''
|
metadata_div = self._get_element_by_class_and_tag('summary', 'div', webpage) or ''
|
||||||
metadata_entries = re.findall(r'(?si)<strong>(?P<entry>.*?)</strong>', metadata_div) or []
|
metadata_entries = re.findall(r'(?si)<strong>(?P<entry>.*?)</strong>', metadata_div) or []
|
||||||
description = re.sub(
|
description = re.sub(
|
||||||
r'(?i)Listen and download music for free on Boomplay!', '',
|
'(?i)Listen and download music for free on Boomplay!', '',
|
||||||
clean_html(self._get_element_by_class_and_tag(
|
clean_html(self._get_element_by_class_and_tag(
|
||||||
'description_content', 'span', webpage)) or '') or None
|
'description_content', 'span', webpage)) or '') or None
|
||||||
|
|
||||||
|
@ -146,53 +145,39 @@ class BoomplayBaseIE(InfoExtractor):
|
||||||
page_metadata['release_year'] = int_or_none(v)
|
page_metadata['release_year'] = int_or_none(v)
|
||||||
return page_metadata
|
return page_metadata
|
||||||
|
|
||||||
@classmethod
|
def _extract_suitable_links(self, webpage, media_types=None):
|
||||||
def _extract_from_webpage(cls, url, webpage, **kwargs):
|
if media_types is None:
|
||||||
if kwargs:
|
media_types = self._MEDIA_TYPES
|
||||||
url = smuggle_url(url, kwargs)
|
media_types = list(variadic(media_types))
|
||||||
return super()._extract_from_webpage(url, webpage)
|
|
||||||
|
|
||||||
@classmethod
|
for idx, v in enumerate(media_types):
|
||||||
def _extract_embed_urls(cls, url, webpage):
|
media_types[idx] = re.escape(v) if v in self._MEDIA_TYPES else ''
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
media_types = join_nonempty(*media_types, delim='|')
|
||||||
media_types = variadic(smuggled_data.get('media_types', cls._MEDIA_TYPES))
|
return orderedSet(traverse_obj(re.finditer(
|
||||||
media_types = join_nonempty(*(
|
rf'''(?x)
|
||||||
re.escape(v)for v in media_types if v in cls._MEDIA_TYPES),
|
|
||||||
delim='|')
|
|
||||||
|
|
||||||
for mobj in re.finditer(
|
|
||||||
rf'''(?ix)
|
|
||||||
<a
|
<a
|
||||||
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
||||||
(?<=\s)href\s*=\s*(?P<_q>['"])
|
(?<=\s)href\s*=\s*(?P<_q>['"])
|
||||||
(?!javascript:)(?P<href>/(?:{media_types})/\d+/?[\-\w=?&#:;@]*)
|
(?:
|
||||||
|
(?!javascript:)(?P<link>/(?:{media_types})/\d+/?[\-a-zA-Z=?&#:;@]*)
|
||||||
|
)
|
||||||
(?P=_q)
|
(?P=_q)
|
||||||
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
||||||
>''', webpage):
|
>''', webpage), (..., 'link', {self._urljoin}, {self.url_result})))
|
||||||
if url := cls._urljoin(mobj.group('href')):
|
|
||||||
yield url
|
|
||||||
|
|
||||||
@classmethod
|
def _extract_playlist_entries(self, webpage, media_types, warn=True):
|
||||||
def _extract_playlist_entries(cls, webpage, media_types, warn=True):
|
|
||||||
song_list = strip_or_none(
|
song_list = strip_or_none(
|
||||||
cls._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
|
self._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
|
||||||
or cls._get_element_by_class_and_tag('morePart', 'ol', webpage)
|
or self._get_element_by_class_and_tag('morePart', 'ol', webpage)
|
||||||
or '')
|
or '')
|
||||||
|
|
||||||
entries = traverse_obj(cls.__yield_elements_html_by_class_and_tag(
|
entries = traverse_obj(self.__yield_elements_html_by_class_and_tag(
|
||||||
'songName', 'a', song_list),
|
'songName', 'a', song_list),
|
||||||
(..., {extract_attributes}, 'href', {cls._urljoin}, {cls.url_result}))
|
(..., {extract_attributes}, 'href', {self._urljoin}, {self.url_result}))
|
||||||
if not entries:
|
if not entries:
|
||||||
if warn:
|
if warn:
|
||||||
cls.report_warning('Failed to extract playlist entries, finding suitable links instead!')
|
self.report_warning('Failed to extract playlist entries, finding suitable links instead!')
|
||||||
|
return self._extract_suitable_links(webpage, media_types)
|
||||||
def strip_ie(entry):
|
|
||||||
# All our IEs have a _VALID_URL and set a key: don't use it
|
|
||||||
entry.pop('ie_key', None)
|
|
||||||
return entry
|
|
||||||
|
|
||||||
return (strip_ie(result) for result in
|
|
||||||
cls._extract_from_webpage(cls._BASE, webpage, media_types=media_types))
|
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
@ -317,7 +302,7 @@ class BoomplayPodcastIE(BoomplayBaseIE):
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
song_list = self._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
|
song_list = self._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
|
||||||
song_list = traverse_obj(re.finditer(
|
song_list = traverse_obj(re.finditer(
|
||||||
r'''(?ix)
|
r'''(?x)
|
||||||
<li
|
<li
|
||||||
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
|
||||||
\sdata-id\s*=\s*
|
\sdata-id\s*=\s*
|
||||||
|
|
Loading…
Reference in New Issue
Block a user