Compare commits

..

7 Commits

Author SHA1 Message Date
Elan Ruusamäe
834a5907c1 Fix .groups() arguments
- https://docs.python.org/3/library/re.html#re.Match.groups
> Match.groups(default=None)
2023-11-10 12:21:46 +02:00
Elan Ruusamäe
f84846415d Update test data 2023-11-10 11:54:56 +02:00
Elan Ruusamäe
5beb56f2aa Fix groups arguments
telecast_id, episode = self._match_valid_url(url).groups('id', 'ep')
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: groups() takes at most 1 argument (2 given)
2023-11-10 11:50:05 +02:00
Elan Ruusamäe
091229b479
Update yt_dlp/extractor/duoplay.py: Update imports
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-10 11:39:26 +02:00
Elan Ruusamäe
1c1d5b2524
Update yt_dlp/extractor/duoplay.py: Take ep from url
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-10 11:38:25 +02:00
Elan Ruusamäe
761c2cfb36
Update yt_dlp/extractor/duoplay.py: Inline manifest_url
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-10 11:37:26 +02:00
Elan Ruusamäe
8d687ca4fa
Update yt_dlp/extractor/duoplay.py: use **traverse_obj
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-10 11:37:02 +02:00

View File

@ -1,19 +1,24 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
traverse_obj, extract_attributes,
get_element_text_and_html_by_tag,
int_or_none,
join_nonempty,
str_or_none,
try_call,
unified_timestamp, unified_timestamp,
strip_or_none,
) )
from ..utils.traversal import traverse_obj
class DuoplayIE(InfoExtractor): class DuoplayIE(InfoExtractor):
_VALID_URL = r'https://duoplay\.ee/(?P<id>\d+)/' _VALID_URL = r'https://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
_TESTS = [{ _TESTS = [{
'note': 'Siberi võmm S02E12', 'note': 'Siberi võmm S02E12',
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24', 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
'md5': '1ff59d535310ac9c5cf5f287d8f91b2d', 'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
'info_dict': { 'info_dict': {
'id': '4312', 'id': '4312_24',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Operatsioon "Öö"', 'title': 'Operatsioon "Öö"',
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
@ -21,19 +26,18 @@ class DuoplayIE(InfoExtractor):
'upload_date': '20170523', 'upload_date': '20170523',
'timestamp': 1495567800, 'timestamp': 1495567800,
'series': 'Siberi võmm', 'series': 'Siberi võmm',
'series_id': 4312, 'series_id': '4312',
'season': 'Season 2', 'season': 'Season 2',
'season_number': 2, 'season_number': 2,
'episode': 'Operatsioon "Öö"', 'episode': 'Operatsioon "Öö"',
'episode_number': 12, 'episode_number': 12,
'episode_id': 24,
}, },
}, { }, {
'note': 'Empty title', 'note': 'Empty title',
'url': 'https://duoplay.ee/17/uhikarotid?ep=14', 'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
'md5': '6aca68be71112314738dd17cced7f8bf', 'md5': '6aca68be71112314738dd17cced7f8bf',
'info_dict': { 'info_dict': {
'id': '17', 'id': '17_14',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Episode 14', 'title': 'Episode 14',
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
@ -41,36 +45,37 @@ class DuoplayIE(InfoExtractor):
'upload_date': '20100916', 'upload_date': '20100916',
'timestamp': 1284661800, 'timestamp': 1284661800,
'series': 'Ühikarotid', 'series': 'Ühikarotid',
'series_id': 17, 'series_id': '17',
'season': 'Season 2', 'season': 'Season 2',
'season_number': 2, 'season_number': 2,
'episode_id': 14, 'episode': 'Episode 14',
'episode_number': 14,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
def decode_quot(s: str): telecast_id, episode = self._match_valid_url(url).groups()
return s.replace("&quot;", '"') video_id = join_nonempty(telecast_id, episode, delim='_')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
manifest_url = self._search_regex(r'<video-player[^>]+manifest-url="([^"]+)"', webpage, 'video-player') video_player = try_call(lambda: extract_attributes(
episode_attr = self._search_regex(r'<video-player[^>]+:episode="([^"]+)"', webpage, 'episode data') get_element_text_and_html_by_tag('video-player', webpage)[1]))
ep = self._parse_json(episode_attr, video_id, decode_quot) if not video_player or not video_player.get('manifest-url'):
self.raise_no_formats('No video found', expected=True)
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
return { return {
'id': video_id, 'id': video_id,
# fallback to absolute "episode_id" value 'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
'title': traverse_obj(ep, 'subtitle') or f"Episode {traverse_obj(ep, 'episode_id')}", **traverse_obj(episode_attr, {
'description': strip_or_none(traverse_obj(ep, 'synopsis')), 'title': (None, ('subtitle', ('episode_id', {lambda x: f'Episode {x}'}))),
'thumbnail': traverse_obj(ep, ('images', 'original')), 'description': 'synopsis',
'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4'), 'thumbnail': ('images', 'original'),
'timestamp': unified_timestamp(traverse_obj(ep, 'airtime') + ' +0200'), 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
'series': traverse_obj(ep, 'title'), 'series': 'title',
'series_id': traverse_obj(ep, 'telecast_id'), 'series_id': ('telecast_id', {str_or_none}),
'season_number': traverse_obj(ep, 'season_id'), 'season_number': ('season_id', {int_or_none}),
'episode': traverse_obj(ep, 'subtitle'), 'episode': 'subtitle',
# fallback to absolute "episode_id" value 'episode_number': (None, ('episode_nr', 'episode_id'), {int_or_none}),
'episode_number': traverse_obj(ep, 'episode_nr') or traverse_obj(ep, 'episode_id'), }, get_all=False),
'episode_id': traverse_obj(ep, 'episode_id'),
} }