Merge f242b7c42d into 4b5eec0aaa

[ie/chaturbate] Fix support for non-public streams (#11624 )
Fix bug in 720b3dc453 Closes #11623 Authored by: jkruse
2024-11-27 01:31:25 +01:00 · 2024-11-25 14:26:52 +01:00 · 2024-11-24 22:20:30 +00:00 · 2024-11-13 16:15:41 -06:00 · 2024-11-13 16:12:55 -06:00 · 2024-11-13 16:00:29 -06:00
2 changed files with 116 additions and 50 deletions
--- a/yt_dlp/extractor/alphaporno.py
+++ b/yt_dlp/extractor/alphaporno.py
@ -1,70 +1,136 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_duration,
    parse_filesize,
-    parse_iso8601,
+    parse_resolution,
    unified_timestamp,
    urljoin,
 )
 class AlphaPornoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
-    _TEST = {
+    _TESTS = [
-        'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+        {
-        'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+            'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
-        'info_dict': {
+            'md5': '7e6a1cdd48fa67362a5a11d7039164e7',
-            'id': '258807',
+            'info_dict': {
-            'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+                'id': '258807',
-            'ext': 'mp4',
+                'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
-            'title': 'Sensual striptease porn with Samantha Alexandra',
+                'ext': 'mp4',
-            'thumbnail': r're:https?://.*\.jpg$',
+                'title': 'Sensual striptease porn with Samantha Alexandra',
-            'timestamp': 1418694611,
+                'description': 'md5:3c6d31008980654acaeb11451454a62c',
-            'upload_date': '20141216',
+                'thumbnail': r're:https?://.*\.jpg$',
-            'duration': 387,
+                'timestamp': 1418701811,
-            'filesize_approx': 54120000,
+                'upload_date': '20141216',
-            'tbr': 1145,
+                'duration': 387,
-            'categories': list,
+                'categories': list,
-            'age_limit': 18,
+                'age_limit': 18,
            },
        },
-    }
+        {
            'url': 'https://www.alphaporno.com/videos/amazing-inches-hammering-her-pussy-in-such-addictive-ways/',
            'info_dict': {
                'id': '433761',
                'ext': 'mp4',
                'title': 'Amazing inches hammering her pussy in such addictive ways',
                'display_id': 'amazing-inches-hammering-her-pussy-in-such-addictive-ways',
                'timestamp': 1641065820,
                'upload_date': '20220101',
                'description': 'md5:8bf8e04807b890b847cc9238c445783a',
                'categories': 'count:12',
                'age_limit': 18,
                'thumbnail': r're:https?://.*\.jpg$',
                'duration': 298.0,
            },
        },
        {
            'url': 'https://www.alphaporno.com/videos/anal-threesome-for-girls-younger-than-the-average/',
            'info_dict': {
                'id': '435603',
                'ext': 'mp4',
                'duration': 358.0,
                'description': 'md5:bf8ca502575c20e15f4f33740cd20a94',
                'categories': 'count:21',
                'title': 'Anal threesome for girls y***er than the average',
                'display_id': 'anal-threesome-for-girls-younger-than-the-average',
                'upload_date': '20220209',
                'thumbnail': r're:https?://.*\.jpg$',
                'age_limit': 18,
                'timestamp': 1644387720,
            },
        },
    ]
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
        info = {
            'display_id': display_id,
        }
        video_id = self._search_regex(
            r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
        if video_id:
            info['url'] = self._search_regex(
                r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
            info['ext'] = self._html_search_meta(
                'encodingFormat', webpage, 'ext', default='.mp4')[1:]
        else:
            video_id = self._search_regex(
                r'video_id=(\d+)\b', webpage, 'video id')
            formats = []
            joined_url = urljoin(urlh.url, rf'/get_file/\d.+?/{video_id}/{video_id}_(\w+)\..+?')
            for video_url, res in re.findall(rf'''({joined_url})(?:'|"|\b)\s''', webpage):
                fmt = {
                    'format_id': f'f{res}',
                    'url': video_url,
                }
                fmt.update(parse_resolution(res) or {})
                formats.append(fmt)
            info['formats'] = formats
-        video_url = self._search_regex(
+        title = (
-            r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+            self._html_search_regex(r'<title[^>]*>([^<]+?)(?:\s*-\s*Alpha\s*Porno\s*)?<', webpage, 'title', default=None)
-        ext = self._html_search_meta(
+            or self._og_search_title(webpage, default=None)
-            'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+            or self._search_regex(
-
+                (r'<meta content="([^"]+)" itemprop="description">',
-        title = self._search_regex(
+                 r'class="title" itemprop="name">([^<]+)<'),
-            [r'<meta content="([^"]+)" itemprop="description">',
+                webpage, 'title')
-             r'class="title" itemprop="name">([^<]+)<'],
+        )
-            webpage, 'title')
+        description = (
-        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+            self._og_search_description(webpage)
-        timestamp = parse_iso8601(self._html_search_meta(
+            or self._search_regex(
-            'uploadDate', webpage, 'upload date'))
+                r'<meta content="([^"]+)" itemprop="description">',
-        duration = parse_duration(self._html_search_meta(
+                webpage, 'description')
-            'duration', webpage, 'duration'))
+        )
        thumbnail = (
            self._og_search_thumbnail(webpage)
            or self._html_search_meta('thumbnail', webpage, 'thumbnail')
        )
        timestamp = unified_timestamp(self._og_search_property('video:release_date', webpage)
                                      or self._html_search_meta('uploadDate', webpage, 'upload date'))
        duration = parse_duration(self._og_search_property('video:duration', webpage)
                                  or self._html_search_meta('duration', webpage, 'duration'))
        filesize_approx = parse_filesize(self._html_search_meta(
-            'contentSize', webpage, 'file size'))
+            'contentSize', webpage, 'file size', default=None))
        bitrate = int_or_none(self._html_search_meta(
-            'bitrate', webpage, 'bitrate'))
+            'bitrate', webpage, 'bitrate', default=None))
-        categories = self._html_search_meta(
+        categories = re.split(
-            'keywords', webpage, 'categories', default='').split(',')
+            r'\s*,\s*',
            self._html_search_meta(
                'keywords', webpage, 'categories', default=''))
        age_limit = self._rta_search(webpage)
-        return {
+        info.update({
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'ext': ext,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
@ -72,4 +138,5 @@ class AlphaPornoIE(InfoExtractor):
            'tbr': bitrate,
            'categories': categories,
            'age_limit': age_limit,
-        }
+        })
        return info
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@ -59,16 +59,15 @@ class ChaturbateIE(InfoExtractor):
                'Accept': 'application/json',
            }, fatal=False, impersonate=True) or {}
        status = response.get('room_status')
        if status != 'public':
            if error := self._ERROR_MAP.get(status):
                raise ExtractorError(error, expected=True)
            self.report_warning('Falling back to webpage extraction')
            return None
        m3u8_url = response.get('url')
        if not m3u8_url:
-            self.raise_geo_restricted()
+            status = response.get('room_status')
            if error := self._ERROR_MAP.get(status):
                raise ExtractorError(error, expected=True)
            if status == 'public':
                self.raise_geo_restricted()
            self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
            return None
        return {
            'id': video_id,
Author	SHA1	Message	Date
Jackson Humphrey	3fb88b8f7e	Merge `f242b7c42d` into `4b5eec0aaa`	2024-11-25 14:26:52 +01:00
Jakob Kruse	4b5eec0aaa	[ie/chaturbate] Fix support for non-public streams (#11624 ) Fix bug in `720b3dc453` Closes #11623 Authored by: jkruse	2024-11-24 22:20:30 +00:00
Jackson Humphrey	f242b7c42d	Added a test for the URL referenced in Issue #2702 .	2024-11-13 16:15:41 -06:00
Jackson Humphrey	d45461e4b3	Added a test for the URL reported in youtube-dl Issue #30458 .	2024-11-13 16:12:55 -06:00
Jackson Humphrey	10834f196d	Removed deprecated constructions to satisfy DeprecationWarnings when running tests. Tests now pass.	2024-11-13 16:00:29 -06:00
Jackson Humphrey	e719bf1fbb	Applied the patch provided by dirkf in https://github.com/ytdl-org/youtube-dl/issues/30458 .	2024-11-13 15:59:21 -06:00