Compare commits

..

1 Commits

Author SHA1 Message Date
doe1080
f8d640e0c2
Merge 7622770586 into 52c0ffe40a 2024-11-17 11:32:36 +00:00
4 changed files with 19 additions and 183 deletions

View File

@ -946,10 +946,6 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
} }
def _extract_from_html(self, video_id, tld): def _extract_from_webpage(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True) headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld') video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld) return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)

View File

@ -1,160 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -20,40 +20,40 @@ class OnsenIE(InfoExtractor):
_NETRC_MACHINE = 'onsen' _NETRC_MACHINE = 'onsen'
_VALID_URL = r'https?://(?:(?:share|www)\.)onsen\.ag/program/(?P<id>[\w-]+)' _VALID_URL = r'https?://(?:(?:share|www)\.)onsen\.ag/program/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://share.onsen.ag/program/onsenking?p=90&c=MTA0NjI', 'url': 'https://share.onsen.ag/program/tricolor?p=393&c=MTk2NjE',
'info_dict': { 'info_dict': {
'id': '10462', 'id': '19661',
'title': '第0回',
'cast': ['礒部花凜', '土屋李央', '林鼓子'],
'ext': 'm4a', 'ext': 'm4a',
'title': '第SP回', 'description': 'md5:8435d68dcb7a43bc2c993911b0db245b',
'cast': ['下野紘', '佐藤元', '守屋亨香'], 'display_id': 'MTk2NjE=',
'description': 'md5:083c1eddf198694cd3cc83f4d5c03863',
'display_id': 'MTA0NjI=',
'http_headers': {'Referer': 'https://www.onsen.ag/'}, 'http_headers': {'Referer': 'https://www.onsen.ag/'},
'media_type': 'sound', 'media_type': 'sound',
'tags': ['かりこ'],
'thumbnail': 'https://d3bzklg4lms4gh.cloudfront.net/program_info/image/default/production/31/ea/c1db117c9b41655120d3a212b2038d15811f/image',
'section_start': 0, 'section_start': 0,
'series': '音泉キング「下野紘」のラジオ きみはもちろん、<音泉>ファミリーだよね?', 'series': '礒部花凜・土屋李央・林鼓子 トリコロールカラー',
'series_id': 'onsenking', 'series_id': 'tricolor',
'tags': ['音泉キング', '音泉ジュニア'], 'upload_date': '20240907',
'thumbnail': r're:https://d3bzklg4lms4gh\.cloudfront\.net/program_info/image/default/production/.+$', 'webpage_url': 'https://www.onsen.ag/program/tricolor?c=MTk2NjE=',
'upload_date': '20220627',
'webpage_url': 'https://www.onsen.ag/program/onsenking?c=MTA0NjI=',
}, },
}, { }, {
'url': 'https://share.onsen.ag/program/girls-band-cry-radio?p=370&c=MTgwMDE', 'url': 'https://share.onsen.ag/program/girls-band-cry-radio?p=370&c=MTgwMDE',
'info_dict': { 'info_dict': {
'id': '18001', 'id': '18001',
'ext': 'mp4',
'title': '第4回', 'title': '第4回',
'cast': ['夕莉', '理名', '朱李', '凪都', '美怜'], 'cast': ['夕莉', '理名', '朱李', '凪都', '美怜'],
'ext': 'mp4',
'description': 'md5:1d7f6a2f1f5a3e2a8ada4e9f652262dd', 'description': 'md5:1d7f6a2f1f5a3e2a8ada4e9f652262dd',
'display_id': 'MTgwMDE=', 'display_id': 'MTgwMDE=',
'http_headers': {'Referer': 'https://www.onsen.ag/'}, 'http_headers': {'Referer': 'https://www.onsen.ag/'},
'media_type': 'movie', 'media_type': 'movie',
'tags': ['ガールズバンドクライ', 'ガルクラ', 'ガルクラジオ'],
'thumbnail': 'https://d3bzklg4lms4gh.cloudfront.net/program_info/image/default/production/95/a7/6a848c87bebf3ec085d8890f3ce038f9b4dd/image',
'section_start': 0, 'section_start': 0,
'series': 'TVアニメ『ガールズバンドクライ』WEBラジオ「ガールズバンドクライラジオにも全部ぶち込め。', 'series': 'TVアニメ『ガールズバンドクライ』WEBラジオ「ガールズバンドクライラジオにも全部ぶち込め。',
'series_id': 'girls-band-cry-radio', 'series_id': 'girls-band-cry-radio',
'tags': ['ガールズバンドクライ', 'ガルクラ', 'ガルクラジオ'],
'thumbnail': r're:https://d3bzklg4lms4gh\.cloudfront\.net/program_info/image/default/production/.+$',
'upload_date': '20240425', 'upload_date': '20240425',
'webpage_url': 'https://www.onsen.ag/program/girls-band-cry-radio?c=MTgwMDE=', 'webpage_url': 'https://www.onsen.ag/program/girls-band-cry-radio?c=MTgwMDE=',
}, },
@ -83,7 +83,7 @@ class OnsenIE(InfoExtractor):
def _get_info(self, program, program_id, metadata): def _get_info(self, program, program_id, metadata):
m3u8 = program['streaming_url'] m3u8 = program['streaming_url']
rd = self._search_regex(rf'{program_id}(\d{{6}})', m3u8, 'release_date', default=None) rd = self._search_regex(f'{program_id}(\\d{{6}})', m3u8, 'release_date', default=None)
display_id = base64.b64encode(str(program['id']).encode()).decode() display_id = base64.b64encode(str(program['id']).encode()).decode()
return { return {
@ -96,8 +96,8 @@ class OnsenIE(InfoExtractor):
**traverse_obj(program, { **traverse_obj(program, {
'id': ('id', {str_or_none}), 'id': ('id', {str_or_none}),
'title': ('title', {strip_or_none}), 'title': ('title', {strip_or_none}),
'media_type': ('media_type', {str}),
'thumbnail': ('poster_image_url', {lambda x: x.split('?')[0]}), 'thumbnail': ('poster_image_url', {lambda x: x.split('?')[0]}),
'media_type': ('media_type', {str}),
}), }),
'cast': metadata['cast'] + traverse_obj(program, ('guests', ..., 'name', {str})), 'cast': metadata['cast'] + traverse_obj(program, ('guests', ..., 'name', {str})),
} }