Merge bcace2019a into c699bafc50

impersonate=True in download_json, set age limit
inline request params to download_json
2024-11-26 01:01:25 +01:00 · 2024-11-16 00:09:14 +01:00 · 2024-11-15 21:31:49 +01:00 · 2024-11-15 21:13:27 +01:00 · 2024-11-15 21:04:57 +01:00 · 2024-11-15 21:04:13 +01:00
1 changed files with 45 additions and 6 deletions
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@ -5,6 +5,7 @@ from ..utils import (
    ExtractorError,
    lowercase_escape,
    url_or_none,
    urlencode_postdata,
 )
@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor):
        'only_matching': True,
    }]
-    _ROOM_OFFLINE = 'Room is currently offline'
+    _ERROR_MAP = {
        'offline': 'Room is currently offline',
        'private': 'Room is currently in a private show',
        'away': 'Performer is currently away',
        'password protected': 'Room is password protected',
        'hidden': 'Hidden session in progress',
    }
-    def _real_extract(self, url):
+    def _extract_from_api(self, video_id, tld):
-        video_id, tld = self._match_valid_url(url).group('id', 'tld')
+        response = self._download_json(
            f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
            data=urlencode_postdata({'room_slug': video_id}),
            headers={
                **self.geo_verification_headers(),
                'X-Requested-With': 'XMLHttpRequest',
                'Accept': 'application/json',
            }, fatal=False, impersonate=True) or {}
        status = response.get('room_status')
        if status != 'public':
            if error := self._ERROR_MAP.get(status):
                raise ExtractorError(error, expected=True)
            self.report_warning('Falling back to webpage extraction')
            return None
        m3u8_url = response.get('url')
        if not m3u8_url:
            self.raise_geo_restricted()
        return {
            'id': video_id,
            'title': video_id,
            'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
            'is_live': True,
            'age_limit': 18,
            'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
        }
    def _extract_from_webpage(self, video_id, tld):
        webpage = self._download_webpage(
            f'https://chaturbate.{tld}/{video_id}/', video_id,
-            headers=self.geo_verification_headers())
+            headers=self.geo_verification_headers(), impersonate=True)
        found_m3u8_urls = []
@ -85,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
                webpage, 'error', group='error', default=None)
            if not error:
                if any(p in webpage for p in (
-                        self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
+                        self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
-                    error = self._ROOM_OFFLINE
+                    error = self._ERROR_MAP['offline']
            if error:
                raise ExtractorError(error, expected=True)
            raise ExtractorError('Unable to find stream URL')
@ -113,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
            'is_live': True,
            'formats': formats,
        }
    def _real_extract(self, url):
        video_id, tld = self._match_valid_url(url).group('id', 'tld')
        return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)
Author	SHA1	Message	Date
powergold1	daef233092	Merge `bcace2019a` into `c699bafc50`	2024-11-16 00:09:14 +01:00
Leonhard Staut	bcace2019a	impersonate=True in download_json, set age limit	2024-11-15 21:31:49 +01:00
Leonhard Staut	7cdc93e736	inline request params to download_json	2024-11-15 21:13:27 +01:00
Leonhard Staut	260f282b16	inline formats extraction	2024-11-15 21:04:57 +01:00
Leonhard Staut	fc9163c101	use raise_geo_restricted	2024-11-15 21:04:13 +01:00
Leonhard Staut	6c590de17a	error map for more compact code	2024-11-15 21:03:40 +01:00
Leonhard Staut	548d50db25	[Chaturbate] Use API to get the stream URL Use the ajax api to get the stream URL. This also allows easier extraction of the room status. I left the existing webpage-based extraction as a fallback.	2024-11-15 18:25:06 +01:00