Compare commits

...

6 Commits

Author SHA1 Message Date
Mozi
a52172fc19
Merge e767973d8c into eb15fd5a32 2024-11-17 17:03:36 +01:00
krichbanana
eb15fd5a32
[ie/kenh14] Add extractor (#3996)
Closes #3937
Authored by: krichbanana, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-17 14:12:26 +00:00
sepro
7cecd299e4
[ie/chaturbate] Don't break embed detection (#11565)
Bugfix for 720b3dc453

Authored by: seproDev
2024-11-17 13:32:12 +01:00
Mozi
e767973d8c merge 'master' 2024-11-16 07:57:41 +00:00
Mozi
dc2239391b imports 2024-11-16 07:57:30 +00:00
Mozi
507b7c6d4a [ie/nova:embed] Support live and VOD on markiza.sk and tvnoviny.sk 2024-10-13 10:01:53 +00:00
5 changed files with 342 additions and 136 deletions

View File

@ -946,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1089,10 +1093,6 @@ from .manoto import (
) )
from .manyvids import ManyVidsIE from .manyvids import ManyVidsIE
from .maoritv import MaoriTVIE from .maoritv import MaoriTVIE
from .markiza import (
MarkizaIE,
MarkizaPageIE,
)
from .massengeschmacktv import MassengeschmackTVIE from .massengeschmacktv import MassengeschmackTVIE
from .masters import MastersIE from .masters import MastersIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
} }
def _extract_from_webpage(self, video_id, tld): def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True) headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld') video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,123 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
orderedSet,
parse_duration,
try_get,
)
class MarkizaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
_TESTS = [{
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
'md5': 'ada4e9fad038abeed971843aa028c7b0',
'info_dict': {
'id': '139078',
'ext': 'mp4',
'title': 'Oteckovia 109',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2760,
},
}, {
'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
'info_dict': {
'id': '85430',
'title': 'Televízne noviny',
},
'playlist_count': 23,
}, {
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/84723',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/embed/85295',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
video_id, query={'id': video_id})
info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
if info.get('_type') == 'playlist':
info.update({
'id': video_id,
'title': try_get(
data, lambda x: x['details']['name'], str),
})
else:
info['duration'] = parse_duration(
try_get(data, lambda x: x['details']['duration'], str))
return info
class MarkizaPageIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
_TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
'md5': 'ada4e9fad038abeed971843aa028c7b0',
'info_dict': {
'id': '139355',
'ext': 'mp4',
'title': 'Oteckovia 110',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2604,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
'only_matching': True,
}, {
'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
'only_matching': True,
}, {
'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
'only_matching': True,
}, {
'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
'only_matching': True,
}, {
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MarkizaIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(
# Downloading for some hosts (e.g. dajto, doma) fails with 500
# although everything seems to be OK, so considering 500
# status code to be expected.
url, playlist_id, expected_status=500)
entries = [
self.url_result(f'http://videoarchiv.markiza.sk/video/{video_id}')
for video_id in orderedSet(re.findall(
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
webpage))]
return self.playlist_result(entries, playlist_id)

View File

@ -6,14 +6,20 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
traverse_obj, strip_or_none,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class NovaEmbedIE(InfoExtractor): class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media(?:tn)?\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)' _DOMAINS = [
r'media(?:tn)?\.cms\.nova\.cz',
r'media\.cms\.(?:markiza|tvnoviny)\.sk',
]
_VALID_URL = [rf'https?://{domain}/embed/(?P<id>[^/?#&"\']+)' for domain in _DOMAINS]
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\b(?:data-)?src=["\'](?P<url>{url})' for url in _VALID_URL]
_TESTS = [{ _TESTS = [{
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'info_dict': { 'info_dict': {
@ -43,19 +49,179 @@ class NovaEmbedIE(InfoExtractor):
'id': 'EU5ELEsmOHt', 'id': 'EU5ELEsmOHt',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací', 'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://cloudia\.cms\.nova\.cz/.+',
'duration': 1780, 'duration': 1780,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
_WEBPAGE_TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
'md5': 'a478390ea7f36aeb36004a107db8b031',
'info_dict': {
'id': '4q3zP2DsORO',
'ext': 'mp4',
'title': 'Oteckovia 110',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2603,
},
}, {
'url': 'https://tvnoviny.sk/domace/clanok/141815-byvaly-sportovec-udajne-vyrabal-mast-z-marihuany-sud-mu-vymeral-20-rocny-trest-a-vzal-aj-rodinny-dom',
'md5': '51de0754352a36b4d623f98c9636a5e1',
'info_dict': {
'id': '2LcfYRqGuYP',
'ext': 'mp4',
'title': 'Marihuanový mastičkár si vypočul vysoký trest a prepad majetku',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 119,
},
}, {
'url': 'https://tvnoviny.sk/domace/clanok/144055-robert-z-kosic-dostal-najnizsi-mozny-trest-za-to-co-spravil-je-to-aj-tak-vela-tvrdia-blizki',
'md5': 'c9a8467b37951877336a9ae6309558b0',
'info_dict': {
'id': '82N7FrJK7cR',
'ext': 'mp4',
'title': 'Robovi z Košíc znížili trest za marihuanu, odsúdili ho na päť rokov',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 152,
},
}, {
'url': 'https://tvnoviny.sk/domace/clanok/338907-preco-sa-mnozia-utoky-tinedzerov-podla-psychologiciek-je-za-tym-rastuca-frustracia',
'md5': '869b589e99d7c19dd66f024a7d088502',
'info_dict': {
'id': 'DeiezcjCJmg',
'ext': 'mp4',
'title': '2022-11-03-TN-2-Nasilie-medzi-mladymi',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 142,
},
}, {
'url': 'http://tvnoviny.sk/domace/clanok/890183-vlada-chysta-postavit-novu-nemocnicu-v-presove-informoval-premier-robert-fico',
'md5': 'b9ef0b4917deee2c930f2248b568a90c',
'info_dict': {
'id': '7VCyuyfGsNZ',
'ext': 'mp4',
'title': '2024-04-15-PTN-1-Co-caka-zdravotnictvo',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 137,
},
}, {
'url': 'https://www.markiza.sk/live/1-markiza',
'info_dict': {
'id': 'markiza-live',
'ext': 'mp4',
'title': r're:^CRA Markiza SD \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'thumbnail': r're:^https?://cloudia\.cms\.markiza\.sk/.+',
'live_status': 'is_live',
},
}, {
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
'md5': 'e3e0f1e98172ea64147cada308276df8',
'info_dict': {
'id': 'JxqRvQkFwHK',
'ext': 'mp4',
'title': 'Po smrti manžela ju čakalo prekvapenie',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 108,
},
}, {
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
'md5': 'b40d04d5cb4cf529e2ff14d6726a3548',
'info_dict': {
'id': '9ZnlOQp2MRa',
'ext': 'mp4',
'title': 'Príbeh Alžbetky',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 361,
},
}, {
'url': 'https://www.markiza.sk/relacie/superstar/clanok/549972-v-zakulisi-superstar-to-bolo-obcas-drsne-moderator-priznal-ze-musel-pouzit-aj-hrubu-silu',
'info_dict': {
'id': '549972-v-zakulisi-superstar-to-bolo-obcas-drsne-moderator-priznal-ze-musel-pouzit-aj-hrubu-silu',
'title': 'V zákulisí SuperStar to bolo občas drsné. Moderátor priznal, že musel použiť aj hrubú silu | TV Markíza',
'description': 'md5:02e240e302bddfd0cd352bc886d95161',
'thumbnail': r're:^https?://cmesk-ott-images-avod\.ssl\.cdn\.cra\.cz/.+',
'age_limit': 0,
},
'playlist_count': 2,
}, {
'url': 'https://voyo.markiza.sk/filmy/6702-vysnivana-svadba',
'info_dict': {
'id': '20kSOHBD8DQ',
'title': 'Vysnívaná svadba - 0000',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 4924,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
'skip': 'premium member only',
}, {
# Another URLs:
# http://videoarchiv.markiza.sk/video/84723
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
'info_dict': {
'id': '2a5fQmhjvYm',
'title': 'Oteckovia 109',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 2759,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
'skip': 'premium member only',
}, {
'url': 'https://voyo.markiza.sk/filmy/1377-frajeri-vo-vegas#player-fullscreen',
'info_dict': {
'id': '1377-frajeri-vo-vegas#player-fullscreen',
'title': 'Frajeri vo Vegas | Voyo',
'description': 'md5:7f16168f669f144986d862312949627c',
'thumbnail': r're:^https?://cmesk-ott-images-svod\.ssl\.cdn\.cra\.cz/.+',
'age_limit': 0,
},
'playlist': [{
'info_dict': {
'id': 'K8H4IvKNBbw',
'ext': 'mp4',
'title': 'frajeri-vo-vegas-hd-15_frajeri-trailer',
'duration': 90,
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
},
# BUG: The 2nd item (CDjGcqcCYKy) is the movie itself and it's DRM-protected.
# The "ext" field can neither be here nor omitted.
],
'playlist_count': 2,
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
'skip': 'premium member only',
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if 'player_not_logged_in' in webpage:
self.raise_login_required()
has_drm = False has_drm = False
duration = None duration = None
is_live = False
formats = [] formats = []
def process_format_list(format_list, format_id=''): def process_format_list(format_list, format_id=''):
@ -77,11 +243,11 @@ class NovaEmbedIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False)) fatal=False, headers={'Referer': url}))
elif (format_type == 'application/dash+xml' elif (format_type == 'application/dash+xml'
or format_id == 'DASH' or ext == 'mpd'): or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)) format_url, video_id, mpd_id='dash', fatal=False, headers={'Referer': url}))
else: else:
formats.append({ formats.append({
'url': format_url, 'url': format_url,
@ -93,6 +259,7 @@ class NovaEmbedIE(InfoExtractor):
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)): for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
process_format_list(src) process_format_list(src)
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none})) duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
is_live = player.get('isLive', False)
if not formats and not has_drm: if not formats and not has_drm:
# older code path, in use before August 2023 # older code path, in use before August 2023
player = self._parse_json( player = self._parse_json(
@ -108,11 +275,11 @@ class NovaEmbedIE(InfoExtractor):
if not formats and has_drm: if not formats and has_drm:
self.report_drm(video_id) self.report_drm(video_id)
title = self._og_search_title( title = strip_or_none(self._og_search_title(
webpage, default=None) or self._search_regex( webpage, default=None) or self._search_regex(
(r'<value>(?P<title>[^<]+)', (r'<value>(?P<title>[^<]+)',
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'title', group='value') 'title', group='value'))
thumbnail = self._og_search_thumbnail( thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex( webpage, default=None) or self._search_regex(
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
@ -127,6 +294,8 @@ class NovaEmbedIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'http_headers': {'Referer': url},
'is_live': is_live,
} }