Compare commits

..

4 Commits

Author SHA1 Message Date
DmitryScaletta
85ecedd72b
[NUUM] Small fixes 2024-02-04 02:01:26 +03:00
DmitryScaletta
27b3c4d2db
[NUUM] Fix quotes 2024-02-03 23:06:35 +03:00
DmitryScaletta
da53c0cac3
[NUUM] Add tabs extractor 2024-02-03 23:01:34 +03:00
DmitryScaletta
4c93640887
[NUUM] Combine stream, media and clip extractors 2024-02-03 20:34:38 +03:00
2 changed files with 78 additions and 44 deletions

View File

@ -1319,9 +1319,8 @@ from .nytimes import (
) )
from .nuum import ( from .nuum import (
NuumLiveIE, NuumLiveIE,
NuumStreamIE, NuumTabsIE,
NuumVideoIE, NuumMediaIE,
NuumClipIE,
) )
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .nzherald import NZHeraldIE from .nzherald import NZHeraldIE

View File

@ -1,25 +1,40 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
traverse_obj, traverse_obj,
try_get,
) )
class NuumBaseIE(InfoExtractor): class NuumBaseIE(InfoExtractor):
def _fetch(self, path, video_id, description, query={}): def _call_api(self, path, video_id, description, query={}):
response = self._download_json( response = self._download_json(
f'https://nuum.ru/api/v2/{path}', video_id, query=query, f'https://nuum.ru/api/v2/{path}', video_id, query=query,
note=f'Downloading {description} metadata', note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata') errnote=f'Unable to download {description} metadata')
error = response.get('error') error = response.get('error')
if error: if error:
raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True) raise ExtractorError(f'API returned error: {error!r}')
return response.get('result') return response.get('result')
def _get_container(self, url):
container_id = self._match_id(url)
return self._call_api(
f'media-containers/{container_id}', container_id, 'media container')
def _get_broadcast(self, channel_name):
return self._call_api(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
})
def _extract_thumbnails(self, thumbnails_dict): def _extract_thumbnails(self, thumbnails_dict):
return [{ return [{
'url': url, 'url': url,
@ -27,30 +42,22 @@ class NuumBaseIE(InfoExtractor):
} for index, url in enumerate( } for index, url in enumerate(
traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url] traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
def _get_container(self, url):
container_id = self._match_id(url)
return self._fetch(
f'media-containers/{container_id}', container_id, 'media container')
def _get_media_url(self, media_meta): def _get_media_url(self, media_meta):
media_archive_url = media_meta.get('media_archive_url') media_archive_url = media_meta.get('media_archive_url')
if media_archive_url: if media_archive_url:
return media_archive_url, False return media_archive_url, False
return media_meta['media_url'], True return media_meta['media_url'], True
def _real_extract(self, url): def _extract_container(self, container):
container = self._get_container(url)
stream = traverse_obj(container, ('media_container_streams', 0)) stream = traverse_obj(container, ('media_container_streams', 0))
media = try_get(stream, lambda x: x['stream_media'][0]) media = traverse_obj(stream, ('stream_media', 0))
if not media:
raise ExtractorError('Cannot extract media data', expected=True)
media_meta = media.get('media_meta') media_meta = media.get('media_meta')
media_url, is_live = self._get_media_url(media_meta) media_url, is_live = self._get_media_url(media_meta)
video_id = media.get('media_id') or container.get('media_container_id') video_id = media.get('media_id') or container.get('media_container_id')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4') formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
return { return {
'id': str(video_id), 'id': str(video_id),
'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)), 'title': container.get('media_container_name'),
'description': container.get('media_container_description'), 'description': container.get('media_container_description'),
'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images' if is_live else 'media_preview_archive_images')), 'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images' if is_live else 'media_preview_archive_images')),
'timestamp': parse_iso8601(container.get('created_at')), 'timestamp': parse_iso8601(container.get('created_at')),
@ -60,46 +67,80 @@ class NuumBaseIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _real_extract(self, url):
return self._extract_container(self._get_container(url))
class NuumLiveIE(NuumBaseIE): class NuumLiveIE(NuumBaseIE):
IE_NAME = 'nuum:live' IE_NAME = 'nuum:live'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)' _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)$'
_TESTS = [{ _TESTS = [{
'url': 'https://nuum.ru/channel/mts_live', 'url': 'https://nuum.ru/channel/mts_live',
'only_matching': True, 'only_matching': True,
}] }]
def _get_container(self, url): def _get_container(self, url):
channel_name = self._match_id(url) broadcast = self._get_broadcast(self._match_id(url))
broadcast = self._fetch(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
}
)
if not traverse_obj(broadcast, ('channel', 'channel_is_live')): if not traverse_obj(broadcast, ('channel', 'channel_is_live')):
raise ExtractorError('The channel is not currently live', expected=True) raise ExtractorError('The channel is not currently live', expected=True)
return broadcast.get('media_container') return broadcast.get('media_container')
def _get_media_url(self, media_meta):
return media_meta['media_url'], True
class NuumTabsIE(NuumBaseIE):
class NuumStreamIE(NuumBaseIE): IE_NAME = 'nuum:tabs'
IE_NAME = 'nuum:stream' _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
_VALID_URL = r'https?://nuum\.ru/streams/(?P<id>[\d]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die', 'url': 'https://nuum.ru/channel/mts_live/clips',
'only_matching': True,
}, {
'url': 'https://nuum.ru/channel/mts_live/videos',
'only_matching': True,
}, {
'url': 'https://nuum.ru/channel/mts_live/streams',
'only_matching': True, 'only_matching': True,
}] }]
def _get_containers(self, channel_name, tab_type):
MAX_LIMIT = 50
CONTAINER_TYPES = {
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
'videos': ['LONG_VIDEO'],
'streams': ['SINGLE'],
}
channel_id = traverse_obj(self._get_broadcast(channel_name), ('channel', 'channel_id'))
qs_types = ''.join([f'&media_container_type={type}' for type in CONTAINER_TYPES[tab_type]])
query = {
'limit': MAX_LIMIT,
'offset': 0,
'channel_id': channel_id,
'media_container_status': 'STOPPED'
}
media_containers = []
while True:
qs_main = urllib.parse.urlencode(query)
res = self._call_api(
f'media-containers?{qs_main}{qs_types}', video_id=channel_name, description=tab_type)
query['offset'] += MAX_LIMIT
media_containers.extend(res)
if len(res) == 0 or len(res) < MAX_LIMIT:
break
return media_containers
class NuumVideoIE(NuumBaseIE): def _real_extract(self, url):
IE_NAME = 'nuum:video' channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
_VALID_URL = r'https?://nuum\.ru/videos/(?P<id>[\d]+)' containers = self._get_containers(channel_name, tab_type)
return self.playlist_result(
[self._extract_container(container) for container in containers],
channel_name, tab_type)
class NuumMediaIE(NuumBaseIE):
IE_NAME = 'nuum:media'
_VALID_URL = r'https?://nuum\.ru/(streams|videos|clips)/(?P<id>[\d]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz', 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3', 'md5': 'f1d9118a30403e32b702a204eb03aca3',
'info_dict': { 'info_dict': {
@ -112,13 +153,7 @@ class NuumVideoIE(NuumBaseIE):
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'view_count': int, 'view_count': int,
}, },
}] }, {
class NuumClipIE(NuumBaseIE):
IE_NAME = 'nuum:clip'
_VALID_URL = r'https?://nuum\.ru/clips/(?P<id>[\d]+)'
_TESTS = [{
'url': 'https://nuum.ru/clips/1552564-pro-misu', 'url': 'https://nuum.ru/clips/1552564-pro-misu',
'md5': 'b248ae1565b1e55433188f11beeb0ca1', 'md5': 'b248ae1565b1e55433188f11beeb0ca1',
'info_dict': { 'info_dict': {