Compare commits

...

4 Commits

Author SHA1 Message Date
bashonly
009ab8646c
better episode number extraction
Authored by: bashonly
2024-01-29 14:33:03 -06:00
bashonly
6dadce6529
Add /embed/player/ test
Authored by: bashonly
2024-01-29 14:13:46 -06:00
bashonly
9211e5d9e7
Add player page note
Authored by: bashonly
2024-01-29 14:07:11 -06:00
bashonly
84b51653b4
simplify
Authored by: bashonly
2024-01-29 13:59:54 -06:00

View File

@ -5,6 +5,7 @@ from ..utils import (
merge_dicts,
parse_count,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
@ -18,7 +19,7 @@ class NFBBaseIE(InfoExtractor):
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
def _extract_ep_info(self, data, video_id):
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
@ -35,7 +36,7 @@ class NFBBaseIE(InfoExtractor):
'id': video_id,
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
'episode_number': int_or_none(self._search_regex(
r'-e(?:pisode)?-?(\d+)(?:-|$)', video_id, 'episode number', default=None)),
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
}
@ -200,29 +201,33 @@ class NFBIE(NFBBaseIE):
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB film /embed/player/ page',
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
'info_dict': {
'id': 'afterlife',
'ext': 'mp4',
'title': 'Afterlife',
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
'release_year': 1978,
'duration': 420.0,
'uploader': 'Ishu Patel',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
# Need to construct the URL since we match /embed/player/ URLs as well
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
json_ld = self._yield_json_ld(webpage, slug)
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
# /film/ URLs have unique slugs used in the embed url
video_id = slug if type_ == 'film' else traverse_obj(
json_ld, (lambda _, v: 'VideoObject' in v['@type'], 'embedUrl', {self._match_id}),
get_all=False) or self._match_id(self._og_search_property('url', webpage, 'video id'))
# type_ may have changed from film to serie(s) after redirect
type_ = self._match_valid_url(urlh.url).group('type')
player = self._download_webpage(
f'https://www.{site}.ca/film/{video_id}/embed/player/', video_id,
'Downloading player page', query={
'player_mode': '',
'embed_mode': '0',
'auto_focus': '1',
'context_type': type_ if type_ == 'film' else 'episode',
})
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
video_id = self._match_id(embed_url) # embed url has unique slug
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
if 'MESSAGE_GEOBLOCKED' in player:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
@ -244,12 +249,12 @@ class NFBIE(NFBBaseIE):
'release_year': int_or_none(self._html_search_regex(
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id), video_id)
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
return merge_dicts({
'formats': formats,
'subtitles': subtitles,
}, info, self._json_ld(json_ld, video_id))
}, info, self._search_json_ld(webpage, video_id, default={}))
class NFBSeriesIE(NFBBaseIE):