[ie/twitcasting] Support --wait-for-video (#7975 )

Authored by: at-wat
[test:download] Test for expected_exception
2024-09-20 01:11:23 +02:00 · 2023-09-21 23:04:05 +00:00 · 2023-09-21 17:48:57 -05:00 · 2023-09-21 22:20:52 +00:00 · 2023-09-21 16:54:57 -05:00 · 2023-09-21 16:51:57 -05:00
6 changed files with 42 additions and 4 deletions
--- a/test/test_download.py
+++ b/test/test_download.py
@ -31,6 +31,7 @@ from yt_dlp.utils import (
    DownloadError,
    ExtractorError,
    UnavailableVideoError,
+    YoutubeDLError,
    format_bytes,
    join_nonempty,
 )
@ -100,6 +101,8 @@ def generator(test_case, tname):
            print_skipping('IE marked as not _WORKING')

        for tc in test_cases:
+            if tc.get('expected_exception'):
+                continue
            info_dict = tc.get('info_dict', {})
            params = tc.get('params', {})
            if not info_dict.get('id'):
@ -139,6 +142,17 @@ def generator(test_case, tname):

        res_dict = None

+        def match_exception(err):
+            expected_exception = test_case.get('expected_exception')
+            if not expected_exception:
+                return False
+            if err.__class__.__name__ == expected_exception:
+                return True
+            for exc in err.exc_info:
+                if exc.__class__.__name__ == expected_exception:
+                    return True
+            return False
+
        def try_rm_tcs_files(tcs=None):
            if tcs is None:
                tcs = test_cases
@ -161,6 +175,8 @@ def generator(test_case, tname):
                except (DownloadError, ExtractorError) as err:
                    # Check if the exception is not a network related one
                    if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
+                        if match_exception(err):
+                            return
                        err.msg = f'{getattr(err, "msg", err)} ({tname})'
                        raise

@ -171,6 +187,10 @@ def generator(test_case, tname):
                    print(f'Retrying: {try_num} failed tries\n\n##########\n\n')

                    try_num += 1
+                except YoutubeDLError as err:
+                    if match_exception(err):
+                        return
+                    raise
                else:
                    break

--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -1218,6 +1218,12 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
        self.assertEqual(js_to_json('`${name}`', {}), '"name"')

+    def test_js_to_json_map_array_constructors(self):
+        self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
+        self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
+        self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
+        self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+
    def test_extract_attributes(self):
        self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
        self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1687,7 +1687,7 @@ class InfoExtractor:
    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
        rectx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
        js, arg_keys, arg_vals = self._search_regex(
            (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
        ), webpage, 'real id', group='id', default=None)

        if not video_id:
-            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
+            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
            video_id = traverse_obj(
                nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)

+        if not video_id:
+            nuxt_data = self._search_json(
+                r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
+                webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
+
+            video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
+
        if not video_id:
            self.raise_no_formats('Unable to extract video ID from webpage')

--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@ -5,8 +5,9 @@ import re
 from .common import InfoExtractor
 from ..dependencies import websockets
 from ..utils import (
-    clean_html,
    ExtractorError,
+    UserNotLive,
+    clean_html,
    float_or_none,
    get_element_by_class,
    get_element_by_id,
@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor):
    _TESTS = [{
        'url': 'https://twitcasting.tv/ivetesangalo',
        'only_matching': True,
+    }, {
+        'url': 'https://twitcasting.tv/c:unusedlive',
+        'expected_exception': 'UserNotLive',
    }]

    def _real_extract(self, url):
@ -260,7 +264,7 @@ class TwitCastingLiveIE(InfoExtractor):
                    r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
                    webpage, 'current live ID 2', default=None, group='video_id')
        if not current_live:
-            raise ExtractorError('The user is not currently live')
+            raise UserNotLive(video_id=uploader_id)
        return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))


--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -2727,6 +2727,7 @@ def js_to_json(code, vars={}, *, strict=False):
    def create_map(mobj):
        return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))

+    code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
    code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
    if not strict:
        code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
Author	SHA1	Message	Date
Atsushi Watanabe	c1d71d0d9f	[ie/twitcasting] Support `--wait-for-video` (#7975 ) Authored by: at-wat	2023-09-21 23:04:05 +00:00
bashonly	661c9a1d02	[test:download] Test for `expected_exception` Authored by: at-wat Co-authored-by: Atsushi Watanabe <atsushi.w@ieee.org>	2023-09-21 17:48:57 -05:00
std-move	568f080518	[ie/iprima] Fix extractor (#7216 ) Closes #7229 Authored by: std-move	2023-09-21 22:20:52 +00:00
bashonly	904a19ee93	[ie] Make `_search_nuxt_data` more lenient Authored by: std-move Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>	2023-09-21 16:54:57 -05:00
bashonly	52414d64ca	[utils] `js_to_json`: Handle `Array` objects Authored by: Grub4K, std-move Co-authored-by: std-move <26625259+std-move@users.noreply.github.com> Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>	2023-09-21 16:51:57 -05:00