Compare commits

...

3 Commits

Author SHA1 Message Date
bashonly
151d7b38d3
make findall pattern more robust 2024-11-11 21:55:58 +00:00
bashonly
3bb4fa5c40
_find_json: default to None 2024-11-11 21:54:31 +00:00
bashonly
cc186aa008
replace dead test 2024-11-11 21:53:27 +00:00

View File

@ -39,12 +39,12 @@ class GoPlayIE(InfoExtractor):
'params': {'skip_download': True}, 'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/fantastic-beasts-the-secrets-of-dumbledore', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '046a91f1-db9c-41ff-8652-d35881ea72c4', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Fantastic Beasts: The Secrets of Dumbledore', 'title': '1917',
'description': r're:Professor Albus Dumbledore ontdekt dat de duistere tovenaar .{132}', 'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
@ -78,14 +78,14 @@ class GoPlayIE(InfoExtractor):
def _find_json(self, s): def _find_json(self, s):
return self._search_json( return self._search_json(
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=[]) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
nextjs_data = traverse_obj( nextjs_data = traverse_obj(
re.findall(r'<script>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\)\s*</script>', webpage), re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
(..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...)) (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
meta = traverse_obj(nextjs_data, ( meta = traverse_obj(nextjs_data, (
..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any)) ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))