Compare commits

..

3 Commits

Author SHA1 Message Date
Anonymous
389f903005 use search_json instead of search_regex and parse_json 2023-09-20 14:08:04 +02:00
Anonymous
99bd7b77a2 add simple test for Javascript Array/Map constructors in js_to_json 2023-09-20 12:18:29 +02:00
std-move
f60f0c387e
Update yt_dlp/utils/_utils.py
simplify Array constructor replacement by using backreferences

Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
2023-09-20 12:17:10 +02:00
3 changed files with 10 additions and 13 deletions

View File

@ -1200,6 +1200,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
self.assertEqual(js_to_json('`${name}`', {}), '"name"') self.assertEqual(js_to_json('`${name}`', {}), '"name"')
def test_js_to_json_map_array_constructors(self):
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})

View File

@ -139,11 +139,9 @@ class IPrimaIE(InfoExtractor):
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
if not video_id: if not video_id:
nuxt_data = self._parse_json( nuxt_data = self._search_json(
self._search_regex( r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
r'(?s)<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]+>(.+?)</script>', webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'(?s:.+?)')
webpage, 'nuxt data'),
'nuxt data')
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False) video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)

View File

@ -3234,15 +3234,8 @@ def js_to_json(code, vars={}, *, strict=False):
def create_map(mobj): def create_map(mobj):
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars)))) return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
def create_array(mobj): code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
return mobj.group(1) + js_to_json(f'[{mobj.group(2)}]', vars=vars) + mobj.group(3)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code) code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
while True:
old_code = code
code = re.sub(r'^(.*?)(?:new\s+)?Array\((.*?)\)(.*?)$', create_array, old_code)
if old_code == code:
break
if not strict: if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code) code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)