[NUUM] Small fixes

[NUUM] Fix quotes
[NUUM] Add tabs extractor
2024-10-03 07:41:27 +02:00 · 2024-02-04 02:01:26 +03:00 · 2024-02-03 23:06:35 +03:00 · 2024-02-03 23:01:34 +03:00 · 2024-02-03 20:34:38 +03:00
2 changed files with 78 additions and 44 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1319,9 +1319,8 @@ from .nytimes import (
 )
 from .nuum import (
    NuumLiveIE,
-    NuumStreamIE,
+    NuumTabsIE,
-    NuumVideoIE,
+    NuumMediaIE,
    NuumClipIE,
 )
 from .nuvid import NuvidIE
 from .nzherald import NZHeraldIE
--- a/yt_dlp/extractor/nuum.py
+++ b/yt_dlp/extractor/nuum.py
@ -1,25 +1,40 @@
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_iso8601,
    traverse_obj,
    try_get,
 )
 class NuumBaseIE(InfoExtractor):
-    def _fetch(self, path, video_id, description, query={}):
+    def _call_api(self, path, video_id, description, query={}):
        response = self._download_json(
            f'https://nuum.ru/api/v2/{path}', video_id, query=query,
            note=f'Downloading {description} metadata',
            errnote=f'Unable to download {description} metadata')
        error = response.get('error')
        if error:
-            raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
+            raise ExtractorError(f'API returned error: {error!r}')
        return response.get('result')
    def _get_container(self, url):
        container_id = self._match_id(url)
        return self._call_api(
            f'media-containers/{container_id}', container_id, 'media container')
    def _get_broadcast(self, channel_name):
        return self._call_api(
            'broadcasts/public', video_id=channel_name, description='channel',
            query={
                'with_extra': 'true',
                'channel_name': channel_name,
                'with_deleted': 'true',
            })
    def _extract_thumbnails(self, thumbnails_dict):
        return [{
            'url': url,
@ -27,30 +42,22 @@ class NuumBaseIE(InfoExtractor):
        } for index, url in enumerate(
            traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
    def _get_container(self, url):
        container_id = self._match_id(url)
        return self._fetch(
            f'media-containers/{container_id}', container_id, 'media container')
    def _get_media_url(self, media_meta):
        media_archive_url = media_meta.get('media_archive_url')
        if media_archive_url:
            return media_archive_url, False
        return media_meta['media_url'], True
-    def _real_extract(self, url):
+    def _extract_container(self, container):
        container = self._get_container(url)
        stream = traverse_obj(container, ('media_container_streams', 0))
-        media = try_get(stream, lambda x: x['stream_media'][0])
+        media = traverse_obj(stream, ('stream_media', 0))
        if not media:
            raise ExtractorError('Cannot extract media data', expected=True)
        media_meta = media.get('media_meta')
        media_url, is_live = self._get_media_url(media_meta)
        video_id = media.get('media_id') or container.get('media_container_id')
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
        return {
            'id': str(video_id),
-            'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
+            'title': container.get('media_container_name'),
            'description': container.get('media_container_description'),
            'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images' if is_live else 'media_preview_archive_images')),
            'timestamp': parse_iso8601(container.get('created_at')),
@ -60,46 +67,80 @@ class NuumBaseIE(InfoExtractor):
            'subtitles': subtitles,
        }
    def _real_extract(self, url):
        return self._extract_container(self._get_container(url))
 class NuumLiveIE(NuumBaseIE):
    IE_NAME = 'nuum:live'
-    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)$'
    _TESTS = [{
        'url': 'https://nuum.ru/channel/mts_live',
        'only_matching': True,
    }]
    def _get_container(self, url):
-        channel_name = self._match_id(url)
+        broadcast = self._get_broadcast(self._match_id(url))
        broadcast = self._fetch(
            'broadcasts/public', video_id=channel_name, description='channel',
            query={
                'with_extra': 'true',
                'channel_name': channel_name,
                'with_deleted': 'true',
            }
        )
        if not traverse_obj(broadcast, ('channel', 'channel_is_live')):
            raise ExtractorError('The channel is not currently live', expected=True)
        return broadcast.get('media_container')
    def _get_media_url(self, media_meta):
        return media_meta['media_url'], True
-
+class NuumTabsIE(NuumBaseIE):
-class NuumStreamIE(NuumBaseIE):
+    IE_NAME = 'nuum:tabs'
-    IE_NAME = 'nuum:stream'
+    _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
    _VALID_URL = r'https?://nuum\.ru/streams/(?P<id>[\d]+)'
    _TESTS = [{
-        'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
+        'url': 'https://nuum.ru/channel/mts_live/clips',
        'only_matching': True,
    }, {
        'url': 'https://nuum.ru/channel/mts_live/videos',
        'only_matching': True,
    }, {
        'url': 'https://nuum.ru/channel/mts_live/streams',
        'only_matching': True,
    }]
    def _get_containers(self, channel_name, tab_type):
        MAX_LIMIT = 50
        CONTAINER_TYPES = {
            'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
            'videos': ['LONG_VIDEO'],
            'streams': ['SINGLE'],
        }
        channel_id = traverse_obj(self._get_broadcast(channel_name), ('channel', 'channel_id'))
        qs_types = ''.join([f'&media_container_type={type}' for type in CONTAINER_TYPES[tab_type]])
        query = {
            'limit': MAX_LIMIT,
            'offset': 0,
            'channel_id': channel_id,
            'media_container_status': 'STOPPED'
        }
        media_containers = []
        while True:
            qs_main = urllib.parse.urlencode(query)
            res = self._call_api(
                f'media-containers?{qs_main}{qs_types}', video_id=channel_name, description=tab_type)
            query['offset'] += MAX_LIMIT
            media_containers.extend(res)
            if len(res) == 0 or len(res) < MAX_LIMIT:
                break
        return media_containers
-class NuumVideoIE(NuumBaseIE):
+    def _real_extract(self, url):
-    IE_NAME = 'nuum:video'
+        channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
-    _VALID_URL = r'https?://nuum\.ru/videos/(?P<id>[\d]+)'
+        containers = self._get_containers(channel_name, tab_type)
        return self.playlist_result(
            [self._extract_container(container) for container in containers],
            channel_name, tab_type)
 class NuumMediaIE(NuumBaseIE):
    IE_NAME = 'nuum:media'
    _VALID_URL = r'https?://nuum\.ru/(streams|videos|clips)/(?P<id>[\d]+)'
    _TESTS = [{
        'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
        'only_matching': True,
    }, {
        'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
        'md5': 'f1d9118a30403e32b702a204eb03aca3',
        'info_dict': {
@ -112,13 +153,7 @@ class NuumVideoIE(NuumBaseIE):
            'thumbnail': r're:^https?://.+\.jpg',
            'view_count': int,
        },
-    }]
+    }, {
 class NuumClipIE(NuumBaseIE):
    IE_NAME = 'nuum:clip'
    _VALID_URL = r'https?://nuum\.ru/clips/(?P<id>[\d]+)'
    _TESTS = [{
        'url': 'https://nuum.ru/clips/1552564-pro-misu',
        'md5': 'b248ae1565b1e55433188f11beeb0ca1',
        'info_dict': {
Author	SHA1	Message	Date
DmitryScaletta	85ecedd72b	[NUUM] Small fixes	2024-02-04 02:01:26 +03:00
DmitryScaletta	27b3c4d2db	[NUUM] Fix quotes	2024-02-03 23:06:35 +03:00
DmitryScaletta	da53c0cac3	[NUUM] Add tabs extractor	2024-02-03 23:01:34 +03:00
DmitryScaletta	4c93640887	[NUUM] Combine stream, media and clip extractors	2024-02-03 20:34:38 +03:00