Improve readme even more than more

Improve readme even more
Improve readme
2024-09-20 01:11:23 +02:00 · 2023-10-02 22:46:48 +02:00 · 2023-10-02 21:40:30 +02:00 · 2023-10-02 21:33:30 +02:00 · 2023-10-02 21:00:33 +02:00
2 changed files with 57 additions and 49 deletions
--- a/README.md
+++ b/README.md
@ -1809,7 +1809,7 @@ The following extractors use this feature:
 * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
 * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
 * `innertube_key`: Innertube API key to use for all API requests
-* `raise_incomplete_data`: `Incomplete Data` error should raise an error instead of a warning.
+* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
 #### youtubetab (YouTube playlists, channels, feeds, etc.)
 * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -941,57 +941,65 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                          ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
                          default_client='web'):
-        for icd_retry in self.RetryManager(
+        raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
-            fatal=self._configuration_arg('raise_incomplete_data', [False], ie_key=YoutubeIE)[0] is not False
+        icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
-        ):
+        icd_rm = next(icd_retries)
-            for retry in self.RetryManager():
+        main_retries = iter(self.RetryManager())
-                try:
+        main_rm = next(main_retries)
-                    response = self._call_api(
+        for _ in range(main_rm.retries + icd_rm.retries + 1):
-                        ep=ep, fatal=True, headers=headers,
+            try:
-                        video_id=item_id, query=query, note=note,
+                response = self._call_api(
-                        context=self._extract_context(ytcfg, default_client),
+                    ep=ep, fatal=True, headers=headers,
-                        api_key=self._extract_api_key(ytcfg, default_client),
+                    video_id=item_id, query=query, note=note,
-                        api_hostname=api_hostname, default_client=default_client)
+                    context=self._extract_context(ytcfg, default_client),
-                except ExtractorError as e:
+                    api_key=self._extract_api_key(ytcfg, default_client),
-                    if not isinstance(e.cause, network_exceptions):
+                    api_hostname=api_hostname, default_client=default_client)
-                        return self._error_or_warning(e, fatal=fatal)
+            except ExtractorError as e:
-                    elif not isinstance(e.cause, HTTPError):
+                if not isinstance(e.cause, network_exceptions):
                        retry.error = e
                        continue
                    first_bytes = e.cause.response.read(512)
                    if not is_html(first_bytes):
                        yt_error = try_get(
                            self._parse_json(
                                self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
                            lambda x: x['error']['message'], str)
                        if yt_error:
                            self._report_alerts([('ERROR', yt_error)], fatal=False)
                    # Downloading page may result in intermittent 5xx HTTP error
                    # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                    # We also want to catch all other network exceptions since errors in later pages can be troublesome
                    # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
                    if e.cause.status not in (403, 429):
                        retry.error = e
                        continue
                    return self._error_or_warning(e, fatal=fatal)
-
+                elif not isinstance(e.cause, HTTPError):
-                try:
+                    main_rm.error = e
-                    self._extract_and_report_alerts(response, only_once=True)
+                    next(main_retries)
                except ExtractorError as e:
                    # YouTube servers may return errors we want to retry on in a 200 OK response
                    # See: https://github.com/yt-dlp/yt-dlp/issues/839
                    if 'unknown error' in e.msg.lower():
                        retry.error = e
                        continue
                    return self._error_or_warning(e, fatal=fatal)
                # Youtube sometimes sends incomplete data
                # See: https://github.com/ytdl-org/youtube-dl/issues/28194
                if not traverse_obj(response, *variadic(check_get_keys)):
                    icd_retry.error = ExtractorError('Incomplete data received', expected=True)
                    continue
-                return response
+                first_bytes = e.cause.response.read(512)
                if not is_html(first_bytes):
                    yt_error = try_get(
                        self._parse_json(
                            self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
                        lambda x: x['error']['message'], str)
                    if yt_error:
                        self._report_alerts([('ERROR', yt_error)], fatal=False)
                # Downloading page may result in intermittent 5xx HTTP error
                # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                # We also want to catch all other network exceptions since errors in later pages can be troublesome
                # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
                if e.cause.status not in (403, 429):
                    main_rm.error = e
                    next(main_retries)
                    continue
                return self._error_or_warning(e, fatal=fatal)
            try:
                self._extract_and_report_alerts(response, only_once=True)
            except ExtractorError as e:
                # YouTube servers may return errors we want to retry on in a 200 OK response
                # See: https://github.com/yt-dlp/yt-dlp/issues/839
                if 'unknown error' in e.msg.lower():
                    main_rm.error = e
                    next(main_retries)
                    continue
                return self._error_or_warning(e, fatal=fatal)
            # Youtube sometimes sends incomplete data
            # See: https://github.com/ytdl-org/youtube-dl/issues/28194
            if not traverse_obj(response, *variadic(check_get_keys)):
                icd_rm.error = ExtractorError('Incomplete data received', expected=True)
                should_retry = next(icd_retries, None)
                if not should_retry:
                    return None
                continue
            return response
    @staticmethod
    def is_music_url(url):
Author	SHA1	Message	Date
Simon Sawicki	716492e82d	Improve readme even more than more	2023-10-02 22:46:48 +02:00
Simon Sawicki	5e867eb794	Improve readme even more	2023-10-02 21:40:30 +02:00
Simon Sawicki	636751f1fd	Improve readme	2023-10-02 21:33:30 +02:00
Simon Sawicki	c3e9646ecc	Rework to use flat loop	2023-10-02 21:00:33 +02:00