Compare commits

...

2 Commits

Author SHA1 Message Date
DmitryScaletta
ef465aeace
[CHZZK] Refactor video extractor 2024-01-17 22:48:27 +03:00
DmitryScaletta
3f9ad7124e
[CHZZK] Refactor and add tests 2024-01-17 19:52:04 +03:00

View File

@ -1,60 +1,81 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
traverse_obj, int_or_none,
parse_iso8601, parse_iso8601,
traverse_obj,
url_or_none,
) )
class CHZZKLiveIE(InfoExtractor): class CHZZKLiveIE(InfoExtractor):
IE_NAME = 'chzzk:live' IE_NAME = 'chzzk:live'
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[^/#?]+)' _VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753', 'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
'only_matching': True, 'info_dict': {
"id": "c68b8ef525fb3d2fa146344d84991753",
"ext": "mp4",
'title': str,
'channel': '진짜도현',
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
'channel_is_verified': False,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1705510344,
'upload_date': '20240117',
'live_status': 'is_live',
'view_count': int,
},
'skip': 'The channel is not currently live',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
live_detail_response = self._download_json( live_detail = self._download_json(
f'https://api.chzzk.naver.com/service/v1/channels/{channel_id}/live-detail', channel_id, f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
note='Downloading channel info', note='Downloading channel info', errnote='Unable to download channel info')['content']
errnote='Unable to download channel info')
live_detail = live_detail_response.get('content')
if live_detail.get('status') == 'CLOSE': if live_detail.get('status') == 'CLOSE':
raise ExtractorError('The channel is not currently live', expected=True) raise ExtractorError('The channel is not currently live', expected=True)
live_playback = self._parse_json(live_detail.get('livePlaybackJson'), channel_id) live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
thumbnails = [] thumbnails = []
thumbnail_template = traverse_obj(live_playback, ('thumbnail', 'snapshotThumbnailTemplate')) thumbnail_template = traverse_obj(
for width in traverse_obj(live_playback, ('thumbnail', 'types')): live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
thumbnails.append({ if thumbnail_template:
'id': width, for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
'url': thumbnail_template.replace('{type}', width), thumbnails.append({
'width': int(width), 'id': width,
}) 'url': thumbnail_template.replace('{type}', width),
'width': int_or_none(width),
})
formats, subtitles = [], {} formats, subtitles = [], {}
for media in live_playback.get('media'): for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
media_url = media.get('path') is_low_latency = media.get('mediaId') == 'LLHLS'
fmts, subs = self._extract_m3u8_formats_and_subtitles(media_url, channel_id, 'mp4') fmts, subs = self._extract_m3u8_formats_and_subtitles(
if media.get('mediaId') == 'LLHLS': media['path'], channel_id, 'mp4', fatal=False, live=True,
for fmt in fmts: m3u8_id='hls-ll' if is_low_latency else 'hls')
fmt['format_id'] += '-ll' for f in fmts:
if is_low_latency:
f['source_preference'] = -2
if '-afragalow.stream-audio.stream' in f['format_id']:
f['quality'] = -2
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subtitles, subs) self._merge_subtitles(subs, target=subtitles)
return { return {
'id': str(channel_id), 'id': str(channel_id),
'title': live_detail.get('liveTitle'), 'title': live_detail.get('liveTitle'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': parse_iso8601(live_detail.get('openDate')), **traverse_obj(live_detail, {
'view_count': live_detail.get('concurrentUserCount'), 'timestamp': ('openDate', {lambda d: parse_iso8601(d, ' ')}),
'channel': traverse_obj(live_detail, ('channel', 'channelName')), 'view_count': ('concurrentUserCount', {int_or_none}),
'channel_id': traverse_obj(live_detail, ('channel', 'channelId')), 'channel': ('channel', 'channelName', {str}),
'channel_is_verified': traverse_obj(live_detail, ('channel', 'verifiedMark')), 'channel_id': ('channel', 'channelId', {str}),
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
}),
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
@ -66,56 +87,48 @@ class CHZZKVideoIE(InfoExtractor):
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)' _VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://chzzk.naver.com/video/1754', 'url': 'https://chzzk.naver.com/video/1754',
'only_matching': True, "md5": "b0c0c1bb888d913b93d702b1512c7f06",
'info_dict': {
"id": "1754",
"ext": "mp4",
'title': '치지직 테스트 방송',
'channel': '침착맨',
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
'channel_is_verified': False,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 15577,
'timestamp': 1702970505,
'upload_date': '20231219',
'view_count': int,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
NS_MAP = {
'nvod': "urn:naver:vod:2020",
'': "urn:mpeg:dash:schema:mpd:2011",
}
video_id = self._match_id(url) video_id = self._match_id(url)
video_meta_response = self._download_json( video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v1/videos/{video_id}', video_id, f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
note='Downloading video info', note='Downloading video info', errnote='Unable to download video info')['content']
errnote='Unable to download video info') formats, subtitles = self._extract_mpd_formats_and_subtitles(
video_meta = video_meta_response.get('content') f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
vod_id = video_meta.get('videoId')
in_key = video_meta.get('inKey')
playback_xml = self._download_xml(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{vod_id}', video_id,
query={ query={
'key': in_key, 'key': video_meta['inKey'],
'env': 'real', 'env': 'real',
'lc': 'en_US', 'lc': 'en_US',
'cpl': 'en_US', 'cpl': 'en_US',
}, }, note='Downloading video playback', errnote='Unable to download video playback')
note='Downloading video playback',
errnote='Unable to download video playback')
thumbnails = []
i = 0
for source in playback_xml.iterfind(
'./Period/SupplementalProperty/nvod:Thumbnails/nvod:ThumbnailSet/nvod:Thumbnail/nvod:Source',
NS_MAP,
):
thumbnails.append({'id': str(i), 'url': source.text.split('?')[0]})
i += 1
formats, subtitles = self._parse_mpd_formats_and_subtitles(playback_xml)
return { return {
'id': video_id, 'id': video_id,
'title': video_meta.get('videoTitle'),
'thumbnail': video_meta.get('thumbnailImageUrl'),
'thumbnails': thumbnails,
'timestamp': video_meta.get('publishDateAt'),
'view_count': video_meta.get('readCount'),
'duration': video_meta.get('duration'),
'channel': traverse_obj(video_meta, ('channel', 'channelName')),
'channel_id': traverse_obj(video_meta, ('channel', 'channelId')),
'channel_is_verified': traverse_obj(video_meta, ('channel', 'verifiedMark')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(video_meta, {
'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'timestamp': ('publishDateAt', {lambda t: int_or_none(t / 1000)}),
'view_count': ('readCount', {int_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelId', {str}),
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
}),
} }