Compare commits

...

4 Commits

Author SHA1 Message Date
sepro
0fff1bd353 Simplify traverse_obj 2023-10-10 23:10:17 +02:00
sepro
7f8051ebed Additional adjustments 2023-10-10 22:07:57 +02:00
bashonly
27b163397e
typo 2023-10-10 20:06:26 +00:00
sepro
89311b43ea
Apply suggestions from code review
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2023-10-10 21:40:34 +02:00

View File

@ -2,14 +2,20 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none,
parse_duration, parse_duration,
traverse_obj, traverse_obj,
url_or_none,
) )
class JTBCIE(InfoExtractor): class JTBCIE(InfoExtractor):
IE_DESC = 'jtbc.co.kr' IE_DESC = 'jtbc.co.kr'
_VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/player/(?:program|clip)|tv\.jtbc\.co\.kr/(?:replay|trailer|clip)/pr[0-9]+/pm[0-9]+)/(?P<id>(?:ep|vo)[0-9]+)' _VALID_URL = r'''(?x)
https?://(?:
vod\.jtbc\.co\.kr/player/(?:program|clip)
|tv\.jtbc\.co\.kr/(?:replay|trailer|clip)/pr\d+/pm\d+
)/(?P<id>(?:ep|vo)\d+)'''
_GEO_COUNTRIES = ['KR'] _GEO_COUNTRIES = ['KR']
_TESTS = [{ _TESTS = [{
@ -74,7 +80,7 @@ class JTBCIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
file_id = self._search_regex(r'data-vod="(VO[0-9]+)"', webpage, 'vod_id') file_id = self._search_regex(r'data-vod="(VO\d+)"', webpage, 'vod id')
metadata = self._download_json( metadata = self._download_json(
f'https://now-api.jtbc.co.kr/v1/vod/detail?vodFileId={file_id}', f'https://now-api.jtbc.co.kr/v1/vod/detail?vodFileId={file_id}',
@ -84,23 +90,23 @@ class JTBCIE(InfoExtractor):
f'https://api.jtbc.co.kr/vod/{file_id}', video_id, note='Downloading VOD playback data') f'https://api.jtbc.co.kr/vod/{file_id}', video_id, note='Downloading VOD playback data')
subtitles = {} subtitles = {}
for sub in playback_data.get('tracks', []): for sub in traverse_obj(playback_data, ('tracks', lambda _, v: v['file'])):
subtitles.setdefault(sub.get('label', 'und'), []).append({'url': sub.get('file')}) subtitles.setdefault(sub.get('label', 'und'), []).append({'url': sub['file']})
formats = [] formats = []
for format_id, stream in traverse_obj(playback_data, ('sources', 'HLS'), default={}).items(): for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file')):
m3u8_url = re.sub(r'/playlist(?:_pd180000)?\.m3u8', '/index.m3u8', stream.get('file')) m3u8_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id=format_id)) formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
return { return {
'id': video_id, 'id': video_id,
**traverse_obj(metadata, ('vodDetail', { **traverse_obj(metadata, ('vodDetail', {
'title': 'vodTitleView', 'title': 'vodTitleView',
'series': 'programTitle', 'series': 'programTitle',
'age_limit': 'watchAge', 'age_limit': ('watchAge', {int_or_none}),
'release_date': ('broadcastDate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0), 'release_date': ('broadcastDate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
'description': 'episodeContents', 'description': 'episodeContents',
'thumbnail': 'imgFileUrl', 'thumbnail': ('imgFileUrl', {url_or_none}),
})), })),
'duration': parse_duration(playback_data.get('playTime')), 'duration': parse_duration(playback_data.get('playTime')),
'formats': formats, 'formats': formats,
@ -110,7 +116,7 @@ class JTBCIE(InfoExtractor):
class JTBCProgramIE(InfoExtractor): class JTBCProgramIE(InfoExtractor):
IE_NAME = 'JTBC:program' IE_NAME = 'JTBC:program'
_VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/program|tv\.jtbc\.co\.kr/replay)/(?P<id>pr[0-9]+)/(?:replay|pm[0-9]+)$' _VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/program|tv\.jtbc\.co\.kr/replay)/(?P<id>pr\d+)/(?:replay|pm\d+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://tv.jtbc.co.kr/replay/pr10010392/pm10032710', 'url': 'https://tv.jtbc.co.kr/replay/pr10010392/pm10032710',
@ -132,11 +138,13 @@ class JTBCProgramIE(InfoExtractor):
program_id = self._match_id(url) program_id = self._match_id(url)
vod_list = self._download_json( vod_list = self._download_json(
f'https://now-api.jtbc.co.kr/v1/vodClip/programHome/programReplayVodList?programId={program_id}&rowCount=10000', 'https://now-api.jtbc.co.kr/v1/vodClip/programHome/programReplayVodList', program_id,
program_id, note='Downloading program replay list') note='Downloading program replay list', query={
'programId': program_id,
'rowCount': '10000',
})
entries = [self.url_result( entries = [self.url_result(f'https://vod.jtbc.co.kr/player/program/{video_id}', JTBCIE, video_id)
'https://vod.jtbc.co.kr/player/program/' + video.get('episodeId'), JTBCIE.ie_key() for video_id in traverse_obj(vod_list, ('programReplayVodList', ..., 'episodeId'))]
) for video in vod_list.get('programReplayVodList', [])]
return self.playlist_result(entries, program_id) return self.playlist_result(entries, program_id)