2024-10-03 07:41:27 +02:00
2 changed files with 46 additions and 80 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1319,8 +1319,9 @@ from .nytimes import (
 )
 from .nuum import (
    NuumLiveIE,
-    NuumTabsIE,
-    NuumMediaIE,
+    NuumStreamIE,
+    NuumVideoIE,
+    NuumClipIE,
 )
 from .nuvid import NuvidIE
 from .nzherald import NZHeraldIE
--- a/yt_dlp/extractor/nuum.py
+++ b/yt_dlp/extractor/nuum.py
@ -1,40 +1,25 @@
-import urllib.parse
-
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_iso8601,
    traverse_obj,
+    try_get,
 )


 class NuumBaseIE(InfoExtractor):

-    def _call_api(self, path, video_id, description, query={}):
+    def _fetch(self, path, video_id, description, query={}):
        response = self._download_json(
            f'https://nuum.ru/api/v2/{path}', video_id, query=query,
            note=f'Downloading {description} metadata',
            errnote=f'Unable to download {description} metadata')
        error = response.get('error')
        if error:
-            raise ExtractorError(f'API returned error: {error!r}')
+            raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
        return response.get('result')

-    def _get_container(self, url):
-        container_id = self._match_id(url)
-        return self._call_api(
-            f'media-containers/{container_id}', container_id, 'media container')
-
-    def _get_broadcast(self, channel_name):
-        return self._call_api(
-            'broadcasts/public', video_id=channel_name, description='channel',
-            query={
-                'with_extra': 'true',
-                'channel_name': channel_name,
-                'with_deleted': 'true',
-            })
-
    def _extract_thumbnails(self, thumbnails_dict):
        return [{
            'url': url,
@ -42,22 +27,30 @@ class NuumBaseIE(InfoExtractor):
        } for index, url in enumerate(
            traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]

+    def _get_container(self, url):
+        container_id = self._match_id(url)
+        return self._fetch(
+            f'media-containers/{container_id}', container_id, 'media container')
+
    def _get_media_url(self, media_meta):
        media_archive_url = media_meta.get('media_archive_url')
        if media_archive_url:
            return media_archive_url, False
        return media_meta['media_url'], True

-    def _extract_container(self, container):
+    def _real_extract(self, url):
+        container = self._get_container(url)
        stream = traverse_obj(container, ('media_container_streams', 0))
-        media = traverse_obj(stream, ('stream_media', 0))
+        media = try_get(stream, lambda x: x['stream_media'][0])
+        if not media:
+            raise ExtractorError('Cannot extract media data', expected=True)
        media_meta = media.get('media_meta')
        media_url, is_live = self._get_media_url(media_meta)
        video_id = media.get('media_id') or container.get('media_container_id')
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
        return {
            'id': str(video_id),
-            'title': container.get('media_container_name'),
+            'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
            'description': container.get('media_container_description'),
            'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images' if is_live else 'media_preview_archive_images')),
            'timestamp': parse_iso8601(container.get('created_at')),
@ -67,80 +60,46 @@ class NuumBaseIE(InfoExtractor):
            'subtitles': subtitles,
        }

-    def _real_extract(self, url):
-        return self._extract_container(self._get_container(url))
-

 class NuumLiveIE(NuumBaseIE):
    IE_NAME = 'nuum:live'
-    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)$'
+    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)'
    _TESTS = [{
        'url': 'https://nuum.ru/channel/mts_live',
        'only_matching': True,
    }]

    def _get_container(self, url):
-        broadcast = self._get_broadcast(self._match_id(url))
+        channel_name = self._match_id(url)
+        broadcast = self._fetch(
+            'broadcasts/public', video_id=channel_name, description='channel',
+            query={
+                'with_extra': 'true',
+                'channel_name': channel_name,
+                'with_deleted': 'true',
+            }
+        )
        if not traverse_obj(broadcast, ('channel', 'channel_is_live')):
            raise ExtractorError('The channel is not currently live', expected=True)
        return broadcast.get('media_container')

-
-class NuumTabsIE(NuumBaseIE):
-    IE_NAME = 'nuum:tabs'
-    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
-    _TESTS = [{
-        'url': 'https://nuum.ru/channel/mts_live/clips',
-        'only_matching': True,
-    }, {
-        'url': 'https://nuum.ru/channel/mts_live/videos',
-        'only_matching': True,
-    }, {
-        'url': 'https://nuum.ru/channel/mts_live/streams',
-        'only_matching': True,
-    }]
-
-    def _get_containers(self, channel_name, tab_type):
-        MAX_LIMIT = 50
-        CONTAINER_TYPES = {
-            'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
-            'videos': ['LONG_VIDEO'],
-            'streams': ['SINGLE'],
-        }
-        channel_id = traverse_obj(self._get_broadcast(channel_name), ('channel', 'channel_id'))
-        qs_types = ''.join([f'&media_container_type={type}' for type in CONTAINER_TYPES[tab_type]])
-        query = {
-            'limit': MAX_LIMIT,
-            'offset': 0,
-            'channel_id': channel_id,
-            'media_container_status': 'STOPPED'
-        }
-        media_containers = []
-        while True:
-            qs_main = urllib.parse.urlencode(query)
-            res = self._call_api(
-                f'media-containers?{qs_main}{qs_types}', video_id=channel_name, description=tab_type)
-            query['offset'] += MAX_LIMIT
-            media_containers.extend(res)
-            if len(res) == 0 or len(res) < MAX_LIMIT:
-                break
-        return media_containers
-
-    def _real_extract(self, url):
-        channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
-        containers = self._get_containers(channel_name, tab_type)
-        return self.playlist_result(
-            [self._extract_container(container) for container in containers],
-            channel_name, tab_type)
+    def _get_media_url(self, media_meta):
+        return media_meta['media_url'], True


-class NuumMediaIE(NuumBaseIE):
-    IE_NAME = 'nuum:media'
-    _VALID_URL = r'https?://nuum\.ru/(streams|videos|clips)/(?P<id>[\d]+)'
+class NuumStreamIE(NuumBaseIE):
+    IE_NAME = 'nuum:stream'
+    _VALID_URL = r'https?://nuum\.ru/streams/(?P<id>[\d]+)'
    _TESTS = [{
        'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
        'only_matching': True,
-    }, {
+    }]
+
+
+class NuumVideoIE(NuumBaseIE):
+    IE_NAME = 'nuum:video'
+    _VALID_URL = r'https?://nuum\.ru/videos/(?P<id>[\d]+)'
+    _TESTS = [{
        'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
        'md5': 'f1d9118a30403e32b702a204eb03aca3',
        'info_dict': {
@ -153,7 +112,13 @@ class NuumMediaIE(NuumBaseIE):
            'thumbnail': r're:^https?://.+\.jpg',
            'view_count': int,
        },
-    }, {
+    }]
+
+
+class NuumClipIE(NuumBaseIE):
+    IE_NAME = 'nuum:clip'
+    _VALID_URL = r'https?://nuum\.ru/clips/(?P<id>[\d]+)'
+    _TESTS = [{
        'url': 'https://nuum.ru/clips/1552564-pro-misu',
        'md5': 'b248ae1565b1e55433188f11beeb0ca1',
        'info_dict': {