mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 11:21:24 +01:00
Compare commits
No commits in common. "0e6ad22f82f88fb4eadc5f5adf51c913ff37241a" and "4a9b2116a00ac7449ca5a5f7e96482b4fd04ad0f" have entirely different histories.
0e6ad22f82
...
4a9b2116a0
|
@ -1,9 +1,8 @@
|
||||||
import itertools
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
OnDemandPagedList,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
get_element_html_by_class,
|
get_element_html_by_class,
|
||||||
|
@ -20,7 +19,7 @@ from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class RadioComercialIE(InfoExtractor):
|
class RadioComercialIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)/?(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)(?:$|[?#|/])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
|
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
|
||||||
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
|
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
|
||||||
|
@ -75,12 +74,12 @@ class RadioComercialIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, season = self._match_valid_url(url).group('id', 'season')
|
video_id, season = self._match_valid_url(url).group('id', 'season')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
print(season)
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_extract_title(webpage),
|
'title': self._html_extract_title(webpage),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
'release_date': unified_strdate(get_element_by_class(
|
'release_date': unified_strdate(get_element_by_class('date', get_element_html_by_class('descriptions', webpage) or '')),
|
||||||
'date', get_element_html_by_class('descriptions', webpage) or '')),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'season': int_or_none(season),
|
'season': int_or_none(season),
|
||||||
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
|
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
|
||||||
|
@ -88,7 +87,8 @@ class RadioComercialIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class RadioComercialPlaylistIE(InfoExtractor):
|
class RadioComercialPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#|/])'
|
||||||
|
_PAGE_SIZE = 19
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
|
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -116,26 +116,22 @@ class RadioComercialPlaylistIE(InfoExtractor):
|
||||||
'id': 'tnt-todos-no-top_t2023',
|
'id': 'tnt-todos-no-top_t2023',
|
||||||
'title': 'TNT - Todos No Top - Temporada 2023',
|
'title': 'TNT - Todos No Top - Temporada 2023',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 39
|
'playlist_mincount': 41
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, url, playlist_id):
|
def _fetch_page(self, url, playlist_id, page):
|
||||||
for page in itertools.count(1):
|
page += 1
|
||||||
try:
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'{url}/{page}', playlist_id, f'Downloading page {page}')
|
f'{url}/{page}', playlist_id, note=f'Downloading page: {page}', expected_status=404)
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
|
||||||
break
|
|
||||||
raise
|
|
||||||
|
|
||||||
episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
|
# Note: episodes not available will default to the URL https://radiocomercial.pt/podcasts/<season>.
|
||||||
if not episodes:
|
episodies_html = ''.join(get_elements_html_by_class('position-relative', webpage) or '')
|
||||||
break
|
episodes = traverse_obj(
|
||||||
for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
|
get_elements_html_by_class('tm-ouvir-podcast', episodies_html),
|
||||||
episode_url = urljoin(url, url_path)
|
(..., {extract_attributes}, 'href'))
|
||||||
if RadioComercialIE.suitable(episode_url):
|
|
||||||
yield episode_url
|
for entry in episodes:
|
||||||
|
yield self.url_result(urljoin('https://radiocomercial.pt', entry), RadioComercialIE)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
podcast, season = self._match_valid_url(url).group('id', 'season')
|
podcast, season = self._match_valid_url(url).group('id', 'season')
|
||||||
|
@ -146,5 +142,6 @@ class RadioComercialPlaylistIE(InfoExtractor):
|
||||||
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||||
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
|
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_result(OnDemandPagedList(
|
||||||
self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)
|
functools.partial(self._fetch_page, url, playlist_id), self._PAGE_SIZE),
|
||||||
|
playlist_id, title)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user