Compare commits

..

5 Commits

Author SHA1 Message Date
Atsushi Watanabe
c1d71d0d9f
[ie/twitcasting] Support --wait-for-video (#7975)
Authored by: at-wat
2023-09-21 23:04:05 +00:00
bashonly
661c9a1d02
[test:download] Test for expected_exception
Authored by: at-wat

Co-authored-by: Atsushi Watanabe <atsushi.w@ieee.org>
2023-09-21 17:48:57 -05:00
std-move
568f080518
[ie/iprima] Fix extractor (#7216)
Closes #7229
Authored by: std-move
2023-09-21 22:20:52 +00:00
bashonly
904a19ee93
[ie] Make _search_nuxt_data more lenient
Authored by: std-move

Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>
2023-09-21 16:54:57 -05:00
bashonly
52414d64ca
[utils] js_to_json: Handle Array objects
Authored by: Grub4K, std-move

Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
2023-09-21 16:51:57 -05:00
6 changed files with 42 additions and 4 deletions

View File

@ -31,6 +31,7 @@ from yt_dlp.utils import (
DownloadError, DownloadError,
ExtractorError, ExtractorError,
UnavailableVideoError, UnavailableVideoError,
YoutubeDLError,
format_bytes, format_bytes,
join_nonempty, join_nonempty,
) )
@ -100,6 +101,8 @@ def generator(test_case, tname):
print_skipping('IE marked as not _WORKING') print_skipping('IE marked as not _WORKING')
for tc in test_cases: for tc in test_cases:
if tc.get('expected_exception'):
continue
info_dict = tc.get('info_dict', {}) info_dict = tc.get('info_dict', {})
params = tc.get('params', {}) params = tc.get('params', {})
if not info_dict.get('id'): if not info_dict.get('id'):
@ -139,6 +142,17 @@ def generator(test_case, tname):
res_dict = None res_dict = None
def match_exception(err):
expected_exception = test_case.get('expected_exception')
if not expected_exception:
return False
if err.__class__.__name__ == expected_exception:
return True
for exc in err.exc_info:
if exc.__class__.__name__ == expected_exception:
return True
return False
def try_rm_tcs_files(tcs=None): def try_rm_tcs_files(tcs=None):
if tcs is None: if tcs is None:
tcs = test_cases tcs = test_cases
@ -161,6 +175,8 @@ def generator(test_case, tname):
except (DownloadError, ExtractorError) as err: except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one # Check if the exception is not a network related one
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
if match_exception(err):
return
err.msg = f'{getattr(err, "msg", err)} ({tname})' err.msg = f'{getattr(err, "msg", err)} ({tname})'
raise raise
@ -171,6 +187,10 @@ def generator(test_case, tname):
print(f'Retrying: {try_num} failed tries\n\n##########\n\n') print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
try_num += 1 try_num += 1
except YoutubeDLError as err:
if match_exception(err):
return
raise
else: else:
break break

View File

@ -1218,6 +1218,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
self.assertEqual(js_to_json('`${name}`', {}), '"name"') self.assertEqual(js_to_json('`${name}`', {}), '"name"')
def test_js_to_json_map_array_constructors(self):
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})

View File

@ -1687,7 +1687,7 @@ class InfoExtractor:
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name) rectx = re.escape(context_name)
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex( js, arg_keys, arg_vals = self._search_regex(
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),

View File

@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
), webpage, 'real id', group='id', default=None) ), webpage, 'real id', group='id', default=None)
if not video_id: if not video_id:
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
video_id = traverse_obj( video_id = traverse_obj(
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
if not video_id:
nuxt_data = self._search_json(
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
if not video_id: if not video_id:
self.raise_no_formats('Unable to extract video ID from webpage') self.raise_no_formats('Unable to extract video ID from webpage')

View File

@ -5,8 +5,9 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..dependencies import websockets from ..dependencies import websockets
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
UserNotLive,
clean_html,
float_or_none, float_or_none,
get_element_by_class, get_element_by_class,
get_element_by_id, get_element_by_id,
@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://twitcasting.tv/ivetesangalo', 'url': 'https://twitcasting.tv/ivetesangalo',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://twitcasting.tv/c:unusedlive',
'expected_exception': 'UserNotLive',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -260,7 +264,7 @@ class TwitCastingLiveIE(InfoExtractor):
r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>', r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
webpage, 'current live ID 2', default=None, group='video_id') webpage, 'current live ID 2', default=None, group='video_id')
if not current_live: if not current_live:
raise ExtractorError('The user is not currently live') raise UserNotLive(video_id=uploader_id)
return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live)) return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))

View File

@ -2727,6 +2727,7 @@ def js_to_json(code, vars={}, *, strict=False):
def create_map(mobj): def create_map(mobj):
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars)))) return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code) code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict: if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)