mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-02 07:11:24 +02:00
Compare commits
4 Commits
bc16d5f1b0
...
009ab8646c
Author | SHA1 | Date | |
---|---|---|---|
|
009ab8646c | ||
|
6dadce6529 | ||
|
9211e5d9e7 | ||
|
84b51653b4 |
|
@ -5,6 +5,7 @@ from ..utils import (
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_count,
|
parse_count,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
@ -18,7 +19,7 @@ class NFBBaseIE(InfoExtractor):
|
||||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
||||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
||||||
|
|
||||||
def _extract_ep_info(self, data, video_id):
|
def _extract_ep_info(self, data, video_id, slug=None):
|
||||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||||
'description': ('description', {str}),
|
'description': ('description', {str}),
|
||||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
@ -35,7 +36,7 @@ class NFBBaseIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
||||||
'episode_number': int_or_none(self._search_regex(
|
'episode_number': int_or_none(self._search_regex(
|
||||||
r'-e(?:pisode)?-?(\d+)(?:-|$)', video_id, 'episode number', default=None)),
|
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -200,29 +201,33 @@ class NFBIE(NFBBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'NFB film /embed/player/ page',
|
||||||
|
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'afterlife',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Afterlife',
|
||||||
|
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
|
||||||
|
'release_year': 1978,
|
||||||
|
'duration': 420.0,
|
||||||
|
'uploader': 'Ishu Patel',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
||||||
|
# Need to construct the URL since we match /embed/player/ URLs as well
|
||||||
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
||||||
json_ld = self._yield_json_ld(webpage, slug)
|
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||||
|
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||||
|
|
||||||
# /film/ URLs have unique slugs used in the embed url
|
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
||||||
video_id = slug if type_ == 'film' else traverse_obj(
|
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
||||||
json_ld, (lambda _, v: 'VideoObject' in v['@type'], 'embedUrl', {self._match_id}),
|
video_id = self._match_id(embed_url) # embed url has unique slug
|
||||||
get_all=False) or self._match_id(self._og_search_property('url', webpage, 'video id'))
|
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
||||||
|
|
||||||
# type_ may have changed from film to serie(s) after redirect
|
|
||||||
type_ = self._match_valid_url(urlh.url).group('type')
|
|
||||||
|
|
||||||
player = self._download_webpage(
|
|
||||||
f'https://www.{site}.ca/film/{video_id}/embed/player/', video_id,
|
|
||||||
'Downloading player page', query={
|
|
||||||
'player_mode': '',
|
|
||||||
'embed_mode': '0',
|
|
||||||
'auto_focus': '1',
|
|
||||||
'context_type': type_ if type_ == 'film' else 'episode',
|
|
||||||
})
|
|
||||||
if 'MESSAGE_GEOBLOCKED' in player:
|
if 'MESSAGE_GEOBLOCKED' in player:
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
@ -244,12 +249,12 @@ class NFBIE(NFBBaseIE):
|
||||||
'release_year': int_or_none(self._html_search_regex(
|
'release_year': int_or_none(self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
||||||
webpage, 'release_year', default=None)),
|
webpage, 'release_year', default=None)),
|
||||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id), video_id)
|
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||||
|
|
||||||
return merge_dicts({
|
return merge_dicts({
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}, info, self._json_ld(json_ld, video_id))
|
}, info, self._search_json_ld(webpage, video_id, default={}))
|
||||||
|
|
||||||
|
|
||||||
class NFBSeriesIE(NFBBaseIE):
|
class NFBSeriesIE(NFBBaseIE):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user