mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 08:41:28 +01:00
Compare commits
23 Commits
99db988aa4
...
c6b59d5c16
Author | SHA1 | Date | |
---|---|---|---|
|
c6b59d5c16 | ||
|
37cd7660ea | ||
|
d867f99622 | ||
|
10fc719bc7 | ||
|
eb15fd5a32 | ||
|
7cecd299e4 | ||
|
7ab6662997 | ||
|
2b5eaf8601 | ||
|
725ab6ef3e | ||
|
17b667c2fa | ||
|
38746cb1af | ||
|
670ac229d9 | ||
|
3213c07265 | ||
|
5cc9b64268 | ||
|
0048ed894e | ||
|
b691d1dadb | ||
|
4cd8abfc08 | ||
|
4522cce417 | ||
|
6b2b7dbc42 | ||
|
7e8e6cb621 | ||
|
34236d0b95 | ||
|
6d7eb0e827 | ||
|
b5a111eeb8 |
|
@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
from test.helper import FakeYDL, is_download_test, md5
|
from test.helper import FakeYDL, is_download_test, md5
|
||||||
from yt_dlp.extractor import (
|
from yt_dlp.extractor import (
|
||||||
NPOIE,
|
NPOIE,
|
||||||
NRKTVIE,
|
NRKIE,
|
||||||
PBSIE,
|
PBSIE,
|
||||||
CeskaTelevizeIE,
|
CeskaTelevizeIE,
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
|
@ -299,15 +299,16 @@ class TestMTVSubtitles(BaseTestSubtitles):
|
||||||
|
|
||||||
@is_download_test
|
@is_download_test
|
||||||
class TestNRKSubtitles(BaseTestSubtitles):
|
class TestNRKSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
url = 'nrk:DMPV73000411' # http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1
|
||||||
IE = NRKTVIE
|
IE = NRKIE
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
|
self.assertEqual(set(subtitles.keys()), {'nb-ttv', 'no'})
|
||||||
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
|
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
|
||||||
|
self.assertEqual(md5(subtitles['no']), 'fc01036074116d245ddc6ba6f679263b')
|
||||||
|
|
||||||
|
|
||||||
@is_download_test
|
@is_download_test
|
||||||
|
|
|
@ -946,6 +946,10 @@ from .kaltura import KalturaIE
|
||||||
from .kankanews import KankaNewsIE
|
from .kankanews import KankaNewsIE
|
||||||
from .karaoketv import KaraoketvIE
|
from .karaoketv import KaraoketvIE
|
||||||
from .kelbyone import KelbyOneIE
|
from .kelbyone import KelbyOneIE
|
||||||
|
from .kenh14 import (
|
||||||
|
Kenh14PlaylistIE,
|
||||||
|
Kenh14VideoIE,
|
||||||
|
)
|
||||||
from .khanacademy import (
|
from .khanacademy import (
|
||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
KhanAcademyUnitIE,
|
KhanAcademyUnitIE,
|
||||||
|
@ -1135,12 +1139,6 @@ from .microsoftembed import (
|
||||||
MicrosoftMediusIE,
|
MicrosoftMediusIE,
|
||||||
)
|
)
|
||||||
from .microsoftstream import MicrosoftStreamIE
|
from .microsoftstream import MicrosoftStreamIE
|
||||||
from .mildom import (
|
|
||||||
MildomClipIE,
|
|
||||||
MildomIE,
|
|
||||||
MildomUserVodIE,
|
|
||||||
MildomVodIE,
|
|
||||||
)
|
|
||||||
from .minds import (
|
from .minds import (
|
||||||
MindsChannelIE,
|
MindsChannelIE,
|
||||||
MindsGroupIE,
|
MindsGroupIE,
|
||||||
|
@ -1404,7 +1402,6 @@ from .nrk import (
|
||||||
NRKSkoleIE,
|
NRKSkoleIE,
|
||||||
NRKTVDirekteIE,
|
NRKTVDirekteIE,
|
||||||
NRKTVEpisodeIE,
|
NRKTVEpisodeIE,
|
||||||
NRKTVEpisodesIE,
|
|
||||||
NRKTVSeasonIE,
|
NRKTVSeasonIE,
|
||||||
NRKTVSeriesIE,
|
NRKTVSeriesIE,
|
||||||
)
|
)
|
||||||
|
@ -1522,8 +1519,8 @@ from .pgatour import PGATourIE
|
||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .pialive import PiaLiveIE
|
||||||
from .piapro import PiaproIE
|
from .piapro import PiaproIE
|
||||||
from .piaulizaportal import PIAULIZAPortalIE
|
|
||||||
from .picarto import (
|
from .picarto import (
|
||||||
PicartoIE,
|
PicartoIE,
|
||||||
PicartoVodIE,
|
PicartoVodIE,
|
||||||
|
@ -1559,10 +1556,6 @@ from .podbayfm import (
|
||||||
)
|
)
|
||||||
from .podchaser import PodchaserIE
|
from .podchaser import PodchaserIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pokemon import (
|
|
||||||
PokemonIE,
|
|
||||||
PokemonWatchIE,
|
|
||||||
)
|
|
||||||
from .pokergo import (
|
from .pokergo import (
|
||||||
PokerGoCollectionIE,
|
PokerGoCollectionIE,
|
||||||
PokerGoIE,
|
PokerGoIE,
|
||||||
|
@ -2256,6 +2249,10 @@ from .ufctv import (
|
||||||
)
|
)
|
||||||
from .ukcolumn import UkColumnIE
|
from .ukcolumn import UkColumnIE
|
||||||
from .uktvplay import UKTVPlayIE
|
from .uktvplay import UKTVPlayIE
|
||||||
|
from .uliza import (
|
||||||
|
UlizaPlayerIE,
|
||||||
|
UlizaPortalIE,
|
||||||
|
)
|
||||||
from .umg import UMGDeIE
|
from .umg import UMGDeIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .unity import UnityIE
|
from .unity import UnityIE
|
||||||
|
@ -2284,10 +2281,6 @@ from .utreon import UtreonIE
|
||||||
from .varzesh3 import Varzesh3IE
|
from .varzesh3 import Varzesh3IE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veo import VeoIE
|
from .veo import VeoIE
|
||||||
from .veoh import (
|
|
||||||
VeohIE,
|
|
||||||
VeohUserIE,
|
|
||||||
)
|
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import (
|
from .vevo import (
|
||||||
VevoIE,
|
VevoIE,
|
||||||
|
|
|
@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
|
||||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
|
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_from_webpage(self, video_id, tld):
|
def _extract_from_html(self, video_id, tld):
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
||||||
headers=self.geo_verification_headers(), impersonate=True)
|
headers=self.geo_verification_headers(), impersonate=True)
|
||||||
|
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
||||||
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)
|
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
|
||||||
|
|
160
yt_dlp/extractor/kenh14.py
Normal file
160
yt_dlp/extractor/kenh14.py
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_html_by_attribute,
|
||||||
|
get_elements_html_by_class,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
|
strip_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
update_url,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class Kenh14VideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
|
||||||
|
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '316173',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||||
|
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||||
|
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||||
|
'tags': [],
|
||||||
|
'uploader': 'Unbox Therapy',
|
||||||
|
'upload_date': '20220517',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 722.86,
|
||||||
|
'timestamp': 1652764468,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.kenh14.vn/video-316174.chn',
|
||||||
|
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '316174',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
|
||||||
|
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
|
||||||
|
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||||
|
'tags': [],
|
||||||
|
'upload_date': '20220517',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 70.04,
|
||||||
|
'timestamp': 1652766021,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.kenh14.vn/0-344740.chn',
|
||||||
|
'md5': 'b843495d5e728142c8870c09b46df2a9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '344740',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
|
||||||
|
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
|
||||||
|
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
|
||||||
|
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
|
||||||
|
'uploader': 'Quang Vũ',
|
||||||
|
'upload_date': '20241024',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 198.88,
|
||||||
|
'timestamp': 1729741590,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
|
||||||
|
direct_url = attrs['data-vid']
|
||||||
|
|
||||||
|
metadata = self._download_json(
|
||||||
|
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
|
||||||
|
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
|
||||||
|
|
||||||
|
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
|
||||||
|
subtitles = {}
|
||||||
|
video_data = self._download_json(
|
||||||
|
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
|
||||||
|
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
hls_url, video_id, m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
|
dash_url, video_id, mpd_id='dash', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
'uploader': ('author', {strip_or_none}),
|
||||||
|
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
|
||||||
|
'view_count': ('views', {int_or_none}),
|
||||||
|
}),
|
||||||
|
'id': video_id,
|
||||||
|
'title': (
|
||||||
|
traverse_obj(metadata, ('title', {strip_or_none}))
|
||||||
|
or clean_html(self._og_search_title(webpage))
|
||||||
|
or clean_html(get_element_by_class('vdbw-title', webpage))),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'description': (
|
||||||
|
clean_html(self._og_search_description(webpage))
|
||||||
|
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
|
||||||
|
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
|
||||||
|
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
|
||||||
|
{lambda x: x.split(';')}, ..., filter)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Kenh14PlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '71',
|
||||||
|
'title': 'Trần Tình (Naked love) mùa 2',
|
||||||
|
'description': 'md5:e9522339304956dea931722dd72eddb2',
|
||||||
|
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '72',
|
||||||
|
'title': 'Lau Lại Đầu Từ',
|
||||||
|
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
|
||||||
|
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
category_detail = get_element_by_class('category-detail', webpage) or ''
|
||||||
|
embed_info = traverse_obj(
|
||||||
|
self._yield_json_ld(webpage, playlist_id),
|
||||||
|
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
|
||||||
|
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
get_elements_html_by_class('video-item', webpage), playlist_id,
|
||||||
|
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
|
||||||
|
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
|
||||||
|
ie=Kenh14VideoIE, playlist_description=(
|
||||||
|
clean_html(get_element_by_class('description', category_detail))
|
||||||
|
or unescapeHTML(embed_info.get('alternateName'))),
|
||||||
|
thumbnail=traverse_obj(
|
||||||
|
self._og_search_thumbnail(webpage),
|
||||||
|
({url_or_none}, {update_url(query=None)})))
|
|
@ -1,291 +0,0 @@
|
||||||
import functools
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
OnDemandPagedList,
|
|
||||||
determine_ext,
|
|
||||||
dict_get,
|
|
||||||
float_or_none,
|
|
||||||
traverse_obj,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MildomBaseIE(InfoExtractor):
|
|
||||||
_GUEST_ID = None
|
|
||||||
|
|
||||||
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
|
|
||||||
if not self._GUEST_ID:
|
|
||||||
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
|
|
||||||
|
|
||||||
content = self._download_json(
|
|
||||||
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
|
|
||||||
headers={'Content-Type': 'application/json'} if body else {},
|
|
||||||
query={
|
|
||||||
'__guest_id': self._GUEST_ID,
|
|
||||||
'__platform': 'web',
|
|
||||||
**(query or {}),
|
|
||||||
})
|
|
||||||
|
|
||||||
if content['code'] != 0:
|
|
||||||
raise ExtractorError(
|
|
||||||
f'Mildom says: {content["message"]} (code {content["code"]})',
|
|
||||||
expected=True)
|
|
||||||
return content['body']
|
|
||||||
|
|
||||||
|
|
||||||
class MildomIE(MildomBaseIE):
|
|
||||||
IE_NAME = 'mildom'
|
|
||||||
IE_DESC = 'Record ongoing live by specific user in Mildom'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
|
|
||||||
|
|
||||||
enterstudio = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
|
||||||
note='Downloading live metadata', query={'user_id': video_id})
|
|
||||||
result_video_id = enterstudio.get('log_id', video_id)
|
|
||||||
|
|
||||||
servers = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
|
||||||
note='Downloading live server list', query={
|
|
||||||
'user_id': video_id,
|
|
||||||
'live_server_type': 'hls',
|
|
||||||
})
|
|
||||||
|
|
||||||
playback_token = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
|
|
||||||
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
|
|
||||||
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
|
|
||||||
if not playback_token:
|
|
||||||
raise ExtractorError('Failed to obtain live playback token')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
|
|
||||||
result_video_id, 'mp4', headers={
|
|
||||||
'Referer': 'https://www.mildom.com/',
|
|
||||||
'Origin': 'https://www.mildom.com',
|
|
||||||
})
|
|
||||||
|
|
||||||
for fmt in formats:
|
|
||||||
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': result_video_id,
|
|
||||||
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
|
|
||||||
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
|
|
||||||
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
|
|
||||||
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
|
|
||||||
'uploader_id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'is_live': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MildomVodIE(MildomBaseIE):
|
|
||||||
IE_NAME = 'mildom:vod'
|
|
||||||
IE_DESC = 'VOD in Mildom'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10882672-1597662269',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '始めてのミルダム配信じゃぃ!',
|
|
||||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
|
||||||
'upload_date': '20200817',
|
|
||||||
'duration': 4138.37,
|
|
||||||
'description': 'ゲームをしたくて!',
|
|
||||||
'timestamp': 1597662269.0,
|
|
||||||
'uploader_id': '10882672',
|
|
||||||
'uploader': 'kson組長(けいそん)',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10882672-1597758589870-477',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '【kson】感染メイズ!麻酔銃で無双する',
|
|
||||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
|
||||||
'timestamp': 1597759093.0,
|
|
||||||
'uploader': 'kson組長(けいそん)',
|
|
||||||
'duration': 4302.58,
|
|
||||||
'uploader_id': '10882672',
|
|
||||||
'description': 'このステージ絶対乗り越えたい',
|
|
||||||
'upload_date': '20200818',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10882672-buha9td2lrn97fk2jme0',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '【kson組長】CART RACER!!!',
|
|
||||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
|
||||||
'uploader_id': '10882672',
|
|
||||||
'uploader': 'kson組長(けいそん)',
|
|
||||||
'upload_date': '20201104',
|
|
||||||
'timestamp': 1604494797.0,
|
|
||||||
'duration': 4657.25,
|
|
||||||
'description': 'WTF',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
|
||||||
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
|
|
||||||
|
|
||||||
autoplay = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
|
|
||||||
note='Downloading playback metadata', query={
|
|
||||||
'v_id': video_id,
|
|
||||||
})['playback']
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': autoplay['audio_url'],
|
|
||||||
'format_id': 'audio',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'aac',
|
|
||||||
'ext': 'm4a',
|
|
||||||
}]
|
|
||||||
for fmt in autoplay['video_link']:
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'video-{}'.format(fmt['name']),
|
|
||||||
'url': fmt['url'],
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
|
|
||||||
'height': fmt['level'],
|
|
||||||
'vcodec': 'h264',
|
|
||||||
'acodec': 'aac',
|
|
||||||
'ext': 'mp4',
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
|
|
||||||
'description': traverse_obj(autoplay, 'video_intro'),
|
|
||||||
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
|
|
||||||
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
|
|
||||||
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
|
|
||||||
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
|
|
||||||
'uploader_id': user_id,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MildomClipIE(MildomBaseIE):
|
|
||||||
IE_NAME = 'mildom:clip'
|
|
||||||
IE_DESC = 'Clip in Mildom'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
|
||||||
'title': '全然違ったよ',
|
|
||||||
'timestamp': 1619181890,
|
|
||||||
'duration': 59,
|
|
||||||
'thumbnail': r're:https?://.+',
|
|
||||||
'uploader': 'ざきんぽ',
|
|
||||||
'uploader_id': '10042245',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
|
||||||
'title': 'かっこいい',
|
|
||||||
'timestamp': 1621094003,
|
|
||||||
'duration': 59,
|
|
||||||
'thumbnail': r're:https?://.+',
|
|
||||||
'uploader': '(ルーキー',
|
|
||||||
'uploader_id': '10111524',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
|
||||||
'title': 'あ',
|
|
||||||
'timestamp': 1614769431,
|
|
||||||
'duration': 31,
|
|
||||||
'thumbnail': r're:https?://.+',
|
|
||||||
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
|
|
||||||
'uploader_id': '10660174',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
|
||||||
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
|
|
||||||
|
|
||||||
clip_detail = self._call_api(
|
|
||||||
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
|
|
||||||
note='Downloading playback metadata', query={
|
|
||||||
'clip_id': video_id,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._html_search_meta(
|
|
||||||
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
|
|
||||||
'timestamp': float_or_none(clip_detail.get('create_time')),
|
|
||||||
'duration': float_or_none(clip_detail.get('length')),
|
|
||||||
'thumbnail': clip_detail.get('cover'),
|
|
||||||
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
|
|
||||||
'uploader_id': user_id,
|
|
||||||
|
|
||||||
'url': clip_detail['url'],
|
|
||||||
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MildomUserVodIE(MildomBaseIE):
|
|
||||||
IE_NAME = 'mildom:user:vod'
|
|
||||||
IE_DESC = 'Download all VODs from specific user in Mildom'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.mildom.com/profile/10093333',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10093333',
|
|
||||||
'title': 'Uploads from ねこばたけ',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 732,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mildom.com/profile/10882672',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10882672',
|
|
||||||
'title': 'Uploads from kson組長(けいそん)',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 201,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _fetch_page(self, user_id, page):
|
|
||||||
page += 1
|
|
||||||
reply = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
|
|
||||||
user_id, note=f'Downloading page {page}', query={
|
|
||||||
'user_id': user_id,
|
|
||||||
'page': page,
|
|
||||||
'limit': '30',
|
|
||||||
})
|
|
||||||
if not reply:
|
|
||||||
return
|
|
||||||
for x in reply:
|
|
||||||
v_id = x.get('v_id')
|
|
||||||
if not v_id:
|
|
||||||
continue
|
|
||||||
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
user_id = self._match_id(url)
|
|
||||||
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
|
|
||||||
|
|
||||||
profile = self._call_api(
|
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
|
|
||||||
query={'user_id': user_id}, note='Downloading user profile')['user_info']
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
|
|
||||||
user_id, f'Uploads from {profile["loginname"]}')
|
|
|
@ -1,4 +1,5 @@
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -7,11 +8,12 @@ from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
@ -25,18 +27,23 @@ class NRKBaseIE(InfoExtractor):
|
||||||
nrk-od-no\.telenorcdn\.net|
|
nrk-od-no\.telenorcdn\.net|
|
||||||
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
|
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
|
||||||
)/'''
|
)/'''
|
||||||
|
_NETRC_MACHINE = 'nrk'
|
||||||
|
_LOGIN_URL = 'https://innlogging.nrk.no/logginn'
|
||||||
|
_AUTH_TOKEN = ''
|
||||||
|
_API_CALL_HEADERS = {'Accept': 'application/json;device=player-core'}
|
||||||
|
|
||||||
|
def _extract_nrk_formats_and_subtitles(self, asset_url, video_id):
|
||||||
|
|
||||||
def _extract_nrk_formats(self, asset_url, video_id):
|
|
||||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||||
return self._extract_akamai_formats(asset_url, video_id)
|
return self._extract_akamai_formats(asset_url, video_id)
|
||||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only|adap=.+?\b)&?', '', asset_url)
|
||||||
formats = self._extract_m3u8_formats(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||||
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
|
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
|
||||||
formats = self._extract_m3u8_formats(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
|
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
|
||||||
video_id, 'mp4', 'm3u8_native', fatal=False)
|
video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||||
return formats
|
return formats, subtitles
|
||||||
|
|
||||||
def _raise_error(self, data):
|
def _raise_error(self, data):
|
||||||
MESSAGES = {
|
MESSAGES = {
|
||||||
|
@ -47,7 +54,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
message_type = data.get('messageType', '')
|
message_type = data.get('messageType', '')
|
||||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||||
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
|
if 'IsGeoBlocked' in message_type or traverse_obj(data, ('usageRights', 'isGeoBlocked')) is True:
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||||
countries=self._GEO_COUNTRIES)
|
countries=self._GEO_COUNTRIES)
|
||||||
|
@ -58,7 +65,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
urljoin('https://psapi.nrk.no/', path),
|
urljoin('https://psapi.nrk.no/', path),
|
||||||
video_id, note or f'Downloading {item} JSON',
|
video_id, note or f'Downloading {item} JSON',
|
||||||
fatal=fatal, query=query)
|
fatal=fatal, query=query, headers=self._API_CALL_HEADERS)
|
||||||
|
|
||||||
|
|
||||||
class NRKIE(NRKBaseIE):
|
class NRKIE(NRKBaseIE):
|
||||||
|
@ -73,17 +80,20 @@ class NRKIE(NRKBaseIE):
|
||||||
)
|
)
|
||||||
(?P<id>[^?\#&]+)
|
(?P<id>[^?\#&]+)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video
|
# video
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
'md5': 'f46be075326e23ad0e524edfcb06aeb6',
|
'md5': '2b88a652ad2e275591e61cf550887eec',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150533',
|
'id': '150533',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
'duration': 262,
|
'duration': 262,
|
||||||
|
'timestamp': 1395751833,
|
||||||
|
'upload_date': '20140325',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/0mZgeckEzRU6qTWrbQHD2QcyralHrYB08wBvh-K-AtAQ',
|
||||||
|
'alt_title': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# audio
|
# audio
|
||||||
|
@ -95,6 +105,10 @@ class NRKIE(NRKBaseIE):
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
'duration': 20,
|
'duration': 20,
|
||||||
|
'alt_title': 'Cathrine Lie Wathne er blind, og bruker hurtigtaster for å navigere seg rundt på ulike nettsider.',
|
||||||
|
'upload_date': '20140425',
|
||||||
|
'timestamp': 1398429565,
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/urxQMSXF-WnbfjBH5ke2igLGyN27EdJVWZ6FOsEAclhA',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||||
|
@ -144,18 +158,10 @@ class NRKIE(NRKBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).split('/')[-1]
|
video_id = self._match_id(url).split('/')[-1]
|
||||||
|
|
||||||
def call_playback_api(item, query=None):
|
|
||||||
try:
|
|
||||||
return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
|
||||||
return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
|
|
||||||
raise
|
|
||||||
|
|
||||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
manifest = self._call_api(f'playback/manifest/{video_id}', video_id, 'manifest', query={'preferredCdn': 'akamai'})
|
||||||
|
|
||||||
video_id = try_get(manifest, lambda x: x['id'], str) or video_id
|
video_id = manifest.get('id') or video_id
|
||||||
|
|
||||||
if manifest.get('playability') == 'nonPlayable':
|
if manifest.get('playability') == 'nonPlayable':
|
||||||
self._raise_error(manifest['nonPlayable'])
|
self._raise_error(manifest['nonPlayable'])
|
||||||
|
@ -163,17 +169,22 @@ class NRKIE(NRKBaseIE):
|
||||||
playable = manifest['playable']
|
playable = manifest['playable']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for asset in playable['assets']:
|
subtitles = {}
|
||||||
if not isinstance(asset, dict):
|
has_drm = False
|
||||||
continue
|
for asset in traverse_obj(playable, ('assets', ..., {dict})):
|
||||||
if asset.get('encrypted'):
|
encryption_scheme = asset.get('encryptionScheme')
|
||||||
|
if encryption_scheme not in (None, 'none', 'statickey'):
|
||||||
|
self.report_warning(f'Skipping asset with unsupported encryption scheme "{encryption_scheme}"')
|
||||||
|
has_drm = True
|
||||||
continue
|
continue
|
||||||
format_url = url_or_none(asset.get('url'))
|
format_url = url_or_none(asset.get('url'))
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
asset_format = (asset.get('format') or '').lower()
|
asset_format = (asset.get('format') or '').lower()
|
||||||
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
fmts, subs = self._extract_nrk_formats_and_subtitles(format_url, video_id)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
elif asset_format == 'mp3':
|
elif asset_format == 'mp3':
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -181,19 +192,22 @@ class NRKIE(NRKBaseIE):
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
|
|
||||||
data = call_playback_api('metadata')
|
if not formats and has_drm:
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
preplay = data['preplay']
|
data = self._call_api(traverse_obj(manifest, ('_links', 'metadata', 'href', {str})), video_id, 'metadata')
|
||||||
titles = preplay['titles']
|
|
||||||
title = titles['title']
|
preplay = data.get('preplay')
|
||||||
|
titles = preplay.get('titles')
|
||||||
|
title = titles.get('title')
|
||||||
alt_title = titles.get('subtitle')
|
alt_title = titles.get('subtitle')
|
||||||
|
|
||||||
description = try_get(preplay, lambda x: x['description'].replace('\r', '\n'))
|
description = preplay.get('description')
|
||||||
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
|
# Use m3u8 vod dueration for NRKSkoleIE because of incorrect duration in metadata
|
||||||
|
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration')) or self._extract_m3u8_vod_duration(formats[0]['url'], video_id)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image in try_get(
|
for image in traverse_obj(preplay, ('poster', 'images', {list})) or []:
|
||||||
preplay, lambda x: x['poster']['images'], list) or []:
|
|
||||||
if not isinstance(image, dict):
|
if not isinstance(image, dict):
|
||||||
continue
|
continue
|
||||||
image_url = url_or_none(image.get('url'))
|
image_url = url_or_none(image.get('url'))
|
||||||
|
@ -205,13 +219,13 @@ class NRKIE(NRKBaseIE):
|
||||||
'height': int_or_none(image.get('pixelHeight')),
|
'height': int_or_none(image.get('pixelHeight')),
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {}
|
for sub in traverse_obj(playable, ('subtitles', {list})) or []:
|
||||||
for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
|
|
||||||
if not isinstance(sub, dict):
|
if not isinstance(sub, dict):
|
||||||
continue
|
continue
|
||||||
sub_url = url_or_none(sub.get('webVtt'))
|
sub_url = url_or_none(sub.get('webVtt'))
|
||||||
if not sub_url:
|
if not sub_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sub_key = str_or_none(sub.get('language')) or 'nb'
|
sub_key = str_or_none(sub.get('language')) or 'nb'
|
||||||
sub_type = str_or_none(sub.get('type'))
|
sub_type = str_or_none(sub.get('type'))
|
||||||
if sub_type:
|
if sub_type:
|
||||||
|
@ -220,8 +234,26 @@ class NRKIE(NRKBaseIE):
|
||||||
'url': sub_url,
|
'url': sub_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
legal_age = try_get(
|
chapters = []
|
||||||
data, lambda x: x['legalAge']['body']['rating']['code'], str)
|
if data.get('skipDialogInfo'):
|
||||||
|
chapters = [item for item in [{
|
||||||
|
'start_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'startIntroInSeconds'))),
|
||||||
|
'end_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'endIntroInSeconds'))),
|
||||||
|
'title': 'Intro',
|
||||||
|
}, {
|
||||||
|
'start_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'startCreditsInSeconds'))),
|
||||||
|
'end_time': duration,
|
||||||
|
'title': 'Outro',
|
||||||
|
}] if item['start_time'] != item['end_time']]
|
||||||
|
if preplay.get('indexPoints'):
|
||||||
|
seconds_or_none = lambda x: float_or_none(parse_duration(x))
|
||||||
|
chapters += traverse_obj(preplay, ('indexPoints', ..., {
|
||||||
|
'start_time': ('startPoint', {seconds_or_none}),
|
||||||
|
'end_time': ('endPoint', {seconds_or_none}),
|
||||||
|
'title': ('title', {lambda x: x}),
|
||||||
|
}))
|
||||||
|
chapters = sorted(chapters, key=lambda x: x['start_time']) if chapters else None
|
||||||
|
legal_age = traverse_obj(data, ('legalAge', 'body', 'rating', 'code'))
|
||||||
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority
|
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority
|
||||||
age_limit = None
|
age_limit = None
|
||||||
if legal_age:
|
if legal_age:
|
||||||
|
@ -230,7 +262,7 @@ class NRKIE(NRKBaseIE):
|
||||||
elif legal_age.isdigit():
|
elif legal_age.isdigit():
|
||||||
age_limit = int_or_none(legal_age)
|
age_limit = int_or_none(legal_age)
|
||||||
|
|
||||||
is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
|
is_series = traverse_obj(data, ('_links', 'series', 'name')) == 'series'
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -242,13 +274,23 @@ class NRKIE(NRKBaseIE):
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)),
|
'chapters': chapters,
|
||||||
|
'timestamp': parse_iso8601(traverse_obj(data, ('availability', 'onDemand', 'from'))),
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_series:
|
if is_series:
|
||||||
series = season_id = season_number = episode = episode_number = None
|
series = season_id = season_number = episode = episode_number = None
|
||||||
|
|
||||||
programs = self._call_api(
|
programs = self._call_api(
|
||||||
f'programs/{video_id}', video_id, 'programs', fatal=False)
|
f'programs/{video_id}', video_id, 'programs', fatal=False)
|
||||||
|
matched_dates = [
|
||||||
|
int(match.group()) // 1000
|
||||||
|
for date in [
|
||||||
|
traverse_obj(programs, ('firstTimeTransmitted', 'publicationDate')),
|
||||||
|
traverse_obj(programs, ('usageRights', 'availableFrom')),
|
||||||
|
] if date for match in [re.search(r'\d+', date)] if match
|
||||||
|
]
|
||||||
|
if matched_dates:
|
||||||
|
info.update({'timestamp': min(info['timestamp'], *matched_dates)})
|
||||||
if programs and isinstance(programs, dict):
|
if programs and isinstance(programs, dict):
|
||||||
series = str_or_none(programs.get('seriesTitle'))
|
series = str_or_none(programs.get('seriesTitle'))
|
||||||
season_id = str_or_none(programs.get('seasonId'))
|
season_id = str_or_none(programs.get('seasonId'))
|
||||||
|
@ -284,8 +326,38 @@ class NRKIE(NRKBaseIE):
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
try:
|
||||||
|
self._download_json(
|
||||||
|
self._LOGIN_URL, None, headers={'Content-Type': 'application/json; charset=UTF-8', 'accept': 'application/json; charset=utf-8'},
|
||||||
|
data=json.dumps({
|
||||||
|
'clientId': '',
|
||||||
|
'hashedPassword': {'current': {
|
||||||
|
'hash': password,
|
||||||
|
'recipe': {
|
||||||
|
'algorithm': 'cleartext',
|
||||||
|
'salt': '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'password': password,
|
||||||
|
'username': username,
|
||||||
|
}).encode())
|
||||||
|
|
||||||
class NRKTVIE(InfoExtractor):
|
self._download_webpage('https://tv.nrk.no/auth/web/login/opsession', None)
|
||||||
|
response = self._download_json('https://tv.nrk.no/auth/session/tokenforsub/_', None)
|
||||||
|
self._AUTH_TOKEN = traverse_obj(response, ('session', 'accessToken'))
|
||||||
|
self._API_CALL_HEADERS['authorization'] = f'Bearer {self._AUTH_TOKEN}'
|
||||||
|
except ExtractorError as e:
|
||||||
|
message = None
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 400):
|
||||||
|
resp = self._parse_json(
|
||||||
|
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||||
|
message = next((error['message'] for error in resp['errors'] if error['field'] == 'Password'), None)
|
||||||
|
self.report_warning(message or 'Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVIE(NRKBaseIE):
|
||||||
IE_DESC = 'NRK TV and NRK Radio'
|
IE_DESC = 'NRK TV and NRK Radio'
|
||||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||||
_VALID_URL = rf'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*{_EPISODE_RE}'
|
_VALID_URL = rf'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*{_EPISODE_RE}'
|
||||||
|
@ -307,6 +379,14 @@ class NRKTVIE(InfoExtractor):
|
||||||
'ext': 'vtt',
|
'ext': 'vtt',
|
||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
|
'upload_date': '20170627',
|
||||||
|
'chapters': [{'start_time': 0, 'end_time': 2213.0, 'title': '<Untitled Chapter 1>'}, {'start_time': 2213.0, 'end_time': 2223.44, 'title': 'Outro'}],
|
||||||
|
'timestamp': 1498591822,
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/myRSc4vuFlahB60P3n6swwRTQUZI1LqJZl9B7icZFgzA',
|
||||||
|
'alt_title': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
|
@ -318,9 +398,31 @@ class NRKTVIE(InfoExtractor):
|
||||||
'alt_title': '23. mai 2014',
|
'alt_title': '23. mai 2014',
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
'duration': 1741,
|
'duration': 1741,
|
||||||
|
'age_limit': 0,
|
||||||
'series': '20 spørsmål',
|
'series': '20 spørsmål',
|
||||||
'episode': '23. mai 2014',
|
'episode': '23. mai 2014',
|
||||||
'age_limit': 0,
|
'upload_date': '20140523',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/u7uCe79SEfPVGRAGVp2_uAZnNc4mfz_kjXg6Bgek8lMQ',
|
||||||
|
'season_id': '126936',
|
||||||
|
'season_number': 2014,
|
||||||
|
'season': 'Season 2014',
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0.0, 'end_time': 39.0, 'title': 'Intro'},
|
||||||
|
{'start_time': 0.0, 'title': 'Velkommen', 'end_time': 152.32},
|
||||||
|
{'start_time': 152.32, 'title': 'Tannpirker', 'end_time': 304.76},
|
||||||
|
{'start_time': 304.76, 'title': 'Orgelbrus', 'end_time': 513.48},
|
||||||
|
{'start_time': 513.48, 'title': 'G-streng', 'end_time': 712.96},
|
||||||
|
{'start_time': 712.96, 'title': 'Medalje', 'end_time': 837.76},
|
||||||
|
{'start_time': 837.76, 'title': 'Globus', 'end_time': 1124.48},
|
||||||
|
{'start_time': 1124.48, 'title': 'Primstav', 'end_time': 1417.4},
|
||||||
|
{'start_time': 1417.4, 'title': 'Fyr', 'end_time': 1721.0},
|
||||||
|
{'start_time': 1721.0, 'end_time': 1741.0, 'title': 'Outro'},
|
||||||
|
],
|
||||||
|
'episode_number': 3,
|
||||||
|
'timestamp': 1400871900,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||||
|
@ -333,6 +435,18 @@ class NRKTVIE(InfoExtractor):
|
||||||
'series': 'Kunnskapskanalen',
|
'series': 'Kunnskapskanalen',
|
||||||
'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'upload_date': '20140524',
|
||||||
|
'episode_number': 17,
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 4595.0, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 4595.0, 'end_time': 4605.08, 'title': 'Outro'},
|
||||||
|
],
|
||||||
|
'season': 'Season 2014',
|
||||||
|
'timestamp': 1400937600,
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/D2u6-EyVUZpVCq0PdSNHRgdBZCV40ekpk6s9fZWiMtyg',
|
||||||
|
'season_number': 2014,
|
||||||
|
'season_id': '39240',
|
||||||
|
'alt_title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -343,23 +457,51 @@ class NRKTVIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MSPO40010515',
|
'id': 'MSPO40010515',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
'title': 'Tour de Ski - Sprint fri teknikk, kvinner og menn',
|
||||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'episode': 'Sprint fri teknikk, kvinner og menn',
|
||||||
|
'series': 'Tour de Ski',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/s9vNwGPGN-Un-UCvitD09we9HRLDxisnipA9K__d5c3Q',
|
||||||
|
'season_id': '53512',
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 6938.0, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 6938.0, 'end_time': 6947.52, 'title': 'Outro'},
|
||||||
|
],
|
||||||
|
'season_number': 2015,
|
||||||
|
'episode_number': 5,
|
||||||
|
'upload_date': '20150106',
|
||||||
|
'duration': 6947.52,
|
||||||
|
'timestamp': 1420545563,
|
||||||
|
'alt_title': 'Sprint fri teknikk, kvinner og menn',
|
||||||
|
'season': 'Season 2015',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to download m3u8 information'],
|
|
||||||
'skip': 'particular part is not supported currently',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MSPO40010515',
|
'id': 'MSPO40010515',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
'title': 'Tour de Ski - Sprint fri teknikk, kvinner og menn',
|
||||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'episode': 'Sprint fri teknikk, kvinner og menn',
|
||||||
|
'series': 'Tour de Ski',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/s9vNwGPGN-Un-UCvitD09we9HRLDxisnipA9K__d5c3Q',
|
||||||
|
'season_id': '53512',
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 6938.0, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 6938.0, 'end_time': 6947.52, 'title': 'Outro'},
|
||||||
|
],
|
||||||
|
'season_number': 2015,
|
||||||
|
'episode_number': 5,
|
||||||
|
'upload_date': '20150106',
|
||||||
|
'duration': 6947.52,
|
||||||
|
'timestamp': 1420545563,
|
||||||
|
'alt_title': 'Sprint fri teknikk, kvinner og menn',
|
||||||
|
'season': 'Season 2015',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to download m3u8 information'],
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
'skip': 'Ikke tilgjengelig utenfor Norge',
|
'skip': 'Ikke tilgjengelig utenfor Norge',
|
||||||
|
@ -380,6 +522,7 @@ class NRKTVIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'ProgramRightsHasExpired',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -413,7 +556,7 @@ class NRKTVIE(InfoExtractor):
|
||||||
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVEpisodeIE(InfoExtractor):
|
class NRKTVEpisodeIE(NRKBaseIE):
|
||||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
|
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
|
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
|
||||||
|
@ -421,13 +564,24 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||||
'id': 'MUHH36005220',
|
'id': 'MUHH36005220',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
|
'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
|
||||||
'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
|
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
|
||||||
'duration': 1563.92,
|
'duration': 1563.92,
|
||||||
'series': 'Hellums kro',
|
'series': 'Hellums kro',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode_number': 2,
|
'episode_number': 2,
|
||||||
'episode': '2. Kro, krig og kjærlighet',
|
'episode': '2. Kro, krig og kjærlighet',
|
||||||
'age_limit': 6,
|
'age_limit': 6,
|
||||||
|
'timestamp': 1572584520,
|
||||||
|
'upload_date': '20191101',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/2_4mhU2JhR-8IYRC_OMmAQDbbOHgwcHqgi2sBrNrsjkg',
|
||||||
|
'alt_title': '2. Kro, krig og kjærlighet',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_id': '124163',
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 29.0, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 29.0, 'end_time': 50.0, 'title': 'Intro'},
|
||||||
|
{'start_time': 1530.0, 'end_time': 1563.92, 'title': 'Outro'},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -453,26 +607,14 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id, season_number, episode_number = self._match_valid_url(url).groups()
|
# HEADRequest(url) only works if a regular GET request was recently made by anyone for the specific URL being requested.
|
||||||
|
response = self._request_webpage(url, None, expected_status=True)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
nrk_id = self._match_id(url)
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, display_id, default={})
|
return self.url_result(
|
||||||
nrk_id = info.get('@id') or self._html_search_meta(
|
response.url, NRKTVIE.ie_key(), nrk_id, url_transparent=True,
|
||||||
'nrk:program-id', webpage, default=None) or self._search_regex(
|
)
|
||||||
rf'data-program-id=["\']({NRKTVIE._EPISODE_RE})', webpage,
|
|
||||||
'nrk id')
|
|
||||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'_type': 'url',
|
|
||||||
'id': nrk_id,
|
|
||||||
'url': f'nrk:{nrk_id}',
|
|
||||||
'ie_key': NRKIE.ie_key(),
|
|
||||||
'season_number': int(season_number),
|
|
||||||
'episode_number': int(episode_number),
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSerieBaseIE(NRKBaseIE):
|
class NRKTVSerieBaseIE(NRKBaseIE):
|
||||||
|
@ -482,6 +624,9 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||||
entries = []
|
entries = []
|
||||||
for episode in entry_list:
|
for episode in entry_list:
|
||||||
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||||
|
if traverse_obj(episode, ('availability', 'status')) == 'expired':
|
||||||
|
self.report_warning(episode['availability'].get('label'), nrk_id)
|
||||||
|
continue
|
||||||
if not nrk_id or not isinstance(nrk_id, str):
|
if not nrk_id or not isinstance(nrk_id, str):
|
||||||
continue
|
continue
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
|
@ -508,18 +653,18 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||||
if not assets_key:
|
if not assets_key:
|
||||||
break
|
break
|
||||||
# Extract entries
|
# Extract entries
|
||||||
entries = try_get(
|
entries = traverse_obj(
|
||||||
embedded,
|
embedded,
|
||||||
(lambda x: x[assets_key]['_embedded'][assets_key],
|
(assets_key, '_embedded', assets_key, {list}),
|
||||||
lambda x: x[assets_key]),
|
(assets_key, {list}),
|
||||||
list)
|
)
|
||||||
yield from self._extract_entries(entries)
|
yield from self._extract_entries(entries)
|
||||||
# Find next URL
|
# Find next URL
|
||||||
next_url_path = try_get(
|
next_url_path = traverse_obj(
|
||||||
data,
|
data,
|
||||||
(lambda x: x['_links']['next']['href'],
|
('_links', 'next', 'href'),
|
||||||
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
|
('_embedded', assets_key, '_links', 'next', 'href'),
|
||||||
str)
|
)
|
||||||
if not next_url_path:
|
if not next_url_path:
|
||||||
break
|
break
|
||||||
data = self._call_api(
|
data = self._call_api(
|
||||||
|
@ -548,6 +693,27 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
'title': 'Sesong 1',
|
'title': 'Sesong 1',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 30,
|
'playlist_mincount': 30,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/presten/sesong/ekstramateriale',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MUHH47005117',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/sJZroQqD2P8wGMMl5ADznwqiIlAXaCpNofA2pIhe3udA',
|
||||||
|
'alt_title': 'Bloopers: Episode 1',
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 356.0, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 356.0, 'end_time': 365.8, 'title': 'Outro'},
|
||||||
|
],
|
||||||
|
'upload_date': '20180302',
|
||||||
|
'timestamp': 1519966800,
|
||||||
|
'title': 'Presten',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 365.8,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# no /sesong/ in path
|
# no /sesong/ in path
|
||||||
'url': 'https://tv.nrk.no/serie/lindmo/2016',
|
'url': 'https://tv.nrk.no/serie/lindmo/2016',
|
||||||
|
@ -572,6 +738,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
'title': 'September 2015',
|
'title': 'September 2015',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 841,
|
'playlist_mincount': 841,
|
||||||
|
'skip': 'ProgramRightsHasExpired',
|
||||||
}, {
|
}, {
|
||||||
# 180 entries, single page
|
# 180 entries, single page
|
||||||
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
||||||
|
@ -594,21 +761,20 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
else super().suitable(url))
|
else super().suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
domain, serie_kind, serie, season_id, season_id_2 = self._match_valid_url(url).group(
|
||||||
domain = mobj.group('domain')
|
'domain', 'serie_kind', 'serie', 'id', 'id_2')
|
||||||
serie_kind = mobj.group('serie_kind')
|
season_id = season_id or season_id_2
|
||||||
serie = mobj.group('serie')
|
|
||||||
season_id = mobj.group('id') or mobj.group('id_2')
|
|
||||||
display_id = f'{serie}/{season_id}'
|
display_id = f'{serie}/{season_id}'
|
||||||
|
|
||||||
|
api_suffix = f'/seasons/{season_id}' if season_id != 'ekstramateriale' else '/extramaterial'
|
||||||
|
|
||||||
data = self._call_api(
|
data = self._call_api(
|
||||||
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}/seasons/{season_id}',
|
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}{api_suffix}',
|
||||||
display_id, 'season', query={'pageSize': 50})
|
display_id, 'season', query={'pageSize': 50})
|
||||||
|
|
||||||
title = try_get(data, lambda x: x['titles']['title'], str) or display_id
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(data, display_id),
|
self._entries(data, display_id), display_id,
|
||||||
display_id, title)
|
title=traverse_obj(data, ('titles', 'title', {str})))
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
|
@ -666,7 +832,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dickie-dick-dickens',
|
'id': 'dickie-dick-dickens',
|
||||||
'title': 'Dickie Dick Dickens',
|
'title': 'Dickie Dick Dickens',
|
||||||
'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
|
'description': 'md5:605464fab26d06b1ce6a11c3ea37d36d',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 8,
|
'playlist_mincount': 8,
|
||||||
}, {
|
}, {
|
||||||
|
@ -676,6 +842,8 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
|
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ulrikkes_univers',
|
'id': 'ulrikkes_univers',
|
||||||
|
'title': 'Ulrikkes univers',
|
||||||
|
'description': 'md5:8af9fc2ee4aecd7f91777383fde50dcc',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
}, {
|
}, {
|
||||||
|
@ -699,16 +867,18 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
series = self._call_api(
|
series = self._call_api(
|
||||||
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{series_id}',
|
f'{domain}/catalog/{self._catalog_name(serie_kind)}/{series_id}',
|
||||||
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||||
titles = try_get(series, [
|
titles = traverse_obj(
|
||||||
lambda x: x['titles'],
|
series,
|
||||||
lambda x: x[x['type']]['titles'],
|
(..., 'titles'),
|
||||||
lambda x: x[x['seriesType']]['titles'],
|
(..., 'type', 'titles'),
|
||||||
]) or {}
|
(..., 'seriesType', 'titles'),
|
||||||
|
get_all=False,
|
||||||
|
|
||||||
|
)
|
||||||
entries = []
|
entries = []
|
||||||
entries.extend(self._entries(series, series_id))
|
entries.extend(self._entries(series, series_id))
|
||||||
embedded = series.get('_embedded') or {}
|
embedded = series.get('_embedded') or {}
|
||||||
linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
|
linked_seasons = traverse_obj(series, ('_links', 'seasons')) or []
|
||||||
embedded_seasons = embedded.get('seasons') or []
|
embedded_seasons = embedded.get('seasons') or []
|
||||||
if len(linked_seasons) > len(embedded_seasons):
|
if len(linked_seasons) > len(embedded_seasons):
|
||||||
for season in linked_seasons:
|
for season in linked_seasons:
|
||||||
|
@ -731,7 +901,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
entries, series_id, titles.get('title'), titles.get('subtitle'))
|
entries, series_id, titles.get('title'), titles.get('subtitle'))
|
||||||
|
|
||||||
|
|
||||||
class NRKTVDirekteIE(NRKTVIE): # XXX: Do not subclass from concrete IE
|
class NRKTVDirekteIE(NRKBaseIE):
|
||||||
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
||||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
@ -743,21 +913,29 @@ class NRKTVDirekteIE(NRKTVIE): # XXX: Do not subclass from concrete IE
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
class NRKRadioPodkastIE(InfoExtractor):
|
|
||||||
|
class NRKRadioPodkastIE(NRKBaseIE):
|
||||||
_VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
'md5': 'a68c3564be2f4426254f026c95a06348',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314AA',
|
'id': 'l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||||
'ext': 'mp4',
|
'ext': 'mp3',
|
||||||
'title': '20 spørsmål 23.05.2014',
|
'timestamp': 1522897200,
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'alt_title': 'md5:06eae9f8c8ccf0718b54c83654e65550',
|
||||||
'duration': 1741,
|
'upload_date': '20180405',
|
||||||
'series': '20 spørsmål',
|
'thumbnail': 'https://gfx.nrk.no/CEDlVkEKxLYiBZ-CXjxSxgduDdaL-a4XTZlar9AoJFOA',
|
||||||
'episode': '23.05.2014',
|
'description': '',
|
||||||
|
'title': 'Jeg er sinna og det må du tåle!',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 1682.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||||
|
@ -776,15 +954,16 @@ class NRKRadioPodkastIE(InfoExtractor):
|
||||||
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class NRKPlaylistBaseIE(InfoExtractor):
|
class NRKPlaylistBaseIE(NRKBaseIE):
|
||||||
def _extract_description(self, webpage):
|
def _extract_description(self, webpage):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
# Uses the render HTML endpoint instead of the regular article URL to prevent unrelated videos from being downloaded
|
||||||
|
# if .rich[data-video-id] elements appear in the "related articles" section too instead of just the main article.
|
||||||
|
webpage = self._download_webpage(f'https://www.nrk.no/serum/api/render/{playlist_id.split("-")[-1]}', playlist_id)
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(f'nrk:{video_id}', NRKIE.ie_key())
|
self.url_result(f'nrk:{video_id}', NRKIE.ie_key())
|
||||||
for video_id in re.findall(self._ITEM_RE, webpage)
|
for video_id in re.findall(self._ITEM_RE, webpage)
|
||||||
|
@ -800,6 +979,8 @@ class NRKPlaylistBaseIE(InfoExtractor):
|
||||||
class NRKPlaylistIE(NRKPlaylistBaseIE):
|
class NRKPlaylistIE(NRKPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
_ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
|
_ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
|
||||||
|
_TITLE_RE = r'class="[^"]*\barticle-title\b[^"]*"[^>]*>([^<]+)<'
|
||||||
|
_DESCRIPTION_RE = r'class="[^"]*[\s"]article-lead[\s"][^>]*>[^<]*<p>([^<]*)<'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -819,42 +1000,29 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_title(self, webpage):
|
def _extract_title(self, webpage):
|
||||||
return self._og_search_title(webpage, fatal=False)
|
return re.search(self._TITLE_RE, webpage).group(1)
|
||||||
|
|
||||||
def _extract_description(self, webpage):
|
def _extract_description(self, webpage):
|
||||||
return self._og_search_description(webpage)
|
return re.search(self._DESCRIPTION_RE, webpage).group(1)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
class NRKSkoleIE(NRKBaseIE):
|
||||||
_VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
|
|
||||||
_ITEM_RE = rf'data-episode=["\']{NRKTVIE._EPISODE_RE}'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '69031',
|
|
||||||
'title': 'Nytt på nytt, sesong: 201210',
|
|
||||||
},
|
|
||||||
'playlist_count': 4,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _extract_title(self, webpage):
|
|
||||||
return self._html_search_regex(
|
|
||||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
|
||||||
|
|
||||||
|
|
||||||
class NRKSkoleIE(InfoExtractor):
|
|
||||||
IE_DESC = 'NRK Skole'
|
IE_DESC = 'NRK Skole'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
|
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
|
||||||
'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
|
'md5': '1d54ec4cff70d8f2c7909d1922514af2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6021',
|
'id': '6021',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Genetikk og eneggede tvillinger',
|
'title': 'Genetikk og eneggede tvillinger',
|
||||||
'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
|
'description': 'md5:7c0cc42d35d99bbc58f45639cdbcc163',
|
||||||
'duration': 399,
|
'duration': 399,
|
||||||
|
'thumbnail': 'https://gfx.nrk.no/5SN-Uq11iR3ADwrCwTv0bAKbbBXXNpVJsaCLGiU8lFoQ',
|
||||||
|
'timestamp': 1205622000,
|
||||||
|
'upload_date': '20080315',
|
||||||
|
'alt_title': '',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
|
'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
|
||||||
|
@ -863,9 +1031,14 @@ class NRKSkoleIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
response = self._download_json(
|
||||||
nrk_id = self._download_json(
|
|
||||||
f'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/{video_id}',
|
f'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/{video_id}',
|
||||||
video_id)['psId']
|
video_id)
|
||||||
|
nrk_id = response['psId']
|
||||||
return self.url_result(f'nrk:{nrk_id}')
|
return self.url_result(
|
||||||
|
f'nrk:{nrk_id}', NRKIE, nrk_id, url_transparent=True,
|
||||||
|
**traverse_obj(response, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'timestamp': ('airedDate', {parse_iso8601}),
|
||||||
|
'description': ('summary', {str}),
|
||||||
|
}))
|
||||||
|
|
122
yt_dlp/extractor/pialive.py
Normal file
122
yt_dlp/extractor/pialive.py
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_html_by_class,
|
||||||
|
multipart_encode,
|
||||||
|
str_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class PiaLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
|
||||||
|
_PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
|
||||||
|
_PIA_LIVE_API_URL = 'https://api.pia-live.jp'
|
||||||
|
_API_KEY = 'kfds)FKFps-dms9e'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||||
|
'display_id': '2431867_001',
|
||||||
|
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'getcomments': True,
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'skip': 'The video is no longer available',
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
|
||||||
|
'display_id': '2431867_002',
|
||||||
|
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'getcomments': True,
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'skip': 'The video is no longer available',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_var(self, variable, html):
|
||||||
|
return self._search_regex(
|
||||||
|
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
html, f'variable {variable}', group='value')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_key = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_key)
|
||||||
|
|
||||||
|
program_code = self._extract_var('programCode', webpage)
|
||||||
|
article_code = self._extract_var('articleCode', webpage)
|
||||||
|
title = self._html_extract_title(webpage)
|
||||||
|
|
||||||
|
if get_element_html_by_class('play-end', webpage):
|
||||||
|
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
|
||||||
|
|
||||||
|
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
|
||||||
|
date, time = self._search_regex(
|
||||||
|
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
|
||||||
|
start_info, 'start_info', fatal=False, group=('date', 'time'))
|
||||||
|
if date and time:
|
||||||
|
release_timestamp_str = f'{date} {time} +09:00'
|
||||||
|
release_timestamp = unified_timestamp(release_timestamp_str)
|
||||||
|
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
|
||||||
|
return {
|
||||||
|
'id': program_code,
|
||||||
|
'title': title,
|
||||||
|
'live_status': 'is_upcoming',
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
payload, content_type = multipart_encode({
|
||||||
|
'play_url': video_key,
|
||||||
|
'api_key': self._API_KEY,
|
||||||
|
})
|
||||||
|
api_data_and_headers = {
|
||||||
|
'data': payload,
|
||||||
|
'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
|
||||||
|
}
|
||||||
|
|
||||||
|
player_tag_list = self._download_json(
|
||||||
|
f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
|
||||||
|
'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
|
||||||
|
url_transparent=True, title=title, display_id=program_code,
|
||||||
|
__post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
|
||||||
|
|
||||||
|
def _get_comments(self, program_code, article_code, api_data_and_headers):
|
||||||
|
chat_room_url = traverse_obj(self._download_json(
|
||||||
|
f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
|
||||||
|
'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
|
||||||
|
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
|
||||||
|
if not chat_room_url:
|
||||||
|
return
|
||||||
|
comment_page = self._download_webpage(
|
||||||
|
chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
|
||||||
|
fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
|
||||||
|
if not comment_page:
|
||||||
|
return
|
||||||
|
yield from traverse_obj(self._search_json(
|
||||||
|
r'var\s+_history\s*=', comment_page, 'comment list',
|
||||||
|
program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
|
||||||
|
'timestamp': (0, {int}),
|
||||||
|
'author_is_uploader': (1, {lambda x: x == 2}),
|
||||||
|
'author': (2, {str}),
|
||||||
|
'text': (3, {str}),
|
||||||
|
'id': (4, {str_or_none}),
|
||||||
|
}))
|
|
@ -1,70 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
parse_qs,
|
|
||||||
time_seconds,
|
|
||||||
traverse_obj,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PIAULIZAPortalIE(InfoExtractor):
|
|
||||||
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
|
|
||||||
'title': 'プレゼンテーションプレイヤーのサンプル',
|
|
||||||
'live_status': 'not_live',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
'ignore_no_formats_error': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
|
|
||||||
'title': '【確認用】視聴サンプルページ(ULIZA)',
|
|
||||||
'live_status': 'not_live',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
'ignore_no_formats_error': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
|
|
||||||
if expires and expires <= time_seconds():
|
|
||||||
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
player_data = self._download_webpage(
|
|
||||||
self._search_regex(
|
|
||||||
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
|
|
||||||
webpage, 'player data url'),
|
|
||||||
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
|
|
||||||
note='Fetching player data', errnote='Unable to fetch player data')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
self._search_regex(
|
|
||||||
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
|
|
||||||
'm3u8 url', default=None),
|
|
||||||
video_id, fatal=False)
|
|
||||||
m3u8_type = self._search_regex(
|
|
||||||
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._html_extract_title(webpage),
|
|
||||||
'formats': formats,
|
|
||||||
'live_status': {
|
|
||||||
'video': 'is_live',
|
|
||||||
'dvr': 'was_live', # short-term archives
|
|
||||||
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
|
|
||||||
}
|
|
|
@ -1,136 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
merge_dicts,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PokemonIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
|
|
||||||
'md5': '2fe8eaec69768b25ef898cda9c43062e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Ol’ Raise and Switch!',
|
|
||||||
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
|
|
||||||
},
|
|
||||||
'add_id': ['LimelightMedia'],
|
|
||||||
}, {
|
|
||||||
# no data-video-title
|
|
||||||
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': "Pokémon : L'ascension de Darkrai",
|
|
||||||
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
|
|
||||||
},
|
|
||||||
'add_id': ['LimelightMedia'],
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id, display_id = self._match_valid_url(url).groups()
|
|
||||||
webpage = self._download_webpage(url, video_id or display_id)
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
|
||||||
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
|
|
||||||
webpage, 'video data element'))
|
|
||||||
video_id = video_data['data-video-id']
|
|
||||||
title = video_data.get('data-video-title') or self._html_search_meta(
|
|
||||||
'pkm-title', webpage, ' title', default=None) or self._search_regex(
|
|
||||||
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': f'limelight:media:{video_id}',
|
|
||||||
'title': title,
|
|
||||||
'description': video_data.get('data-video-summary'),
|
|
||||||
'thumbnail': video_data.get('data-video-poster'),
|
|
||||||
'series': 'Pokémon',
|
|
||||||
'season_number': int_or_none(video_data.get('data-video-season')),
|
|
||||||
'episode': title,
|
|
||||||
'episode_number': int_or_none(video_data.get('data-video-episode')),
|
|
||||||
'ie_key': 'LimelightMedia',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class PokemonWatchIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
|
|
||||||
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
|
|
||||||
'md5': '62833938a31e61ab49ada92f524c42ff',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '8309a40969894a8e8d5bc1311e9c5667',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Lillier and the Staff!',
|
|
||||||
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _extract_media(self, channel_array, video_id):
|
|
||||||
for channel in channel_array:
|
|
||||||
for media in channel.get('media'):
|
|
||||||
if media.get('id') == video_id:
|
|
||||||
return media
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': video_id,
|
|
||||||
'url': f'limelight:media:{video_id}',
|
|
||||||
'ie_key': 'LimelightMedia',
|
|
||||||
}
|
|
||||||
|
|
||||||
# API call can be avoided entirely if we are listing formats
|
|
||||||
if self.get_param('listformats', False):
|
|
||||||
return info
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
build_vars = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
region = build_vars.get('region')
|
|
||||||
channel_array = self._download_json(self._API_URL.format(region), video_id)
|
|
||||||
video_data = self._extract_media(channel_array, video_id)
|
|
||||||
|
|
||||||
if video_data is None:
|
|
||||||
raise ExtractorError(
|
|
||||||
f'Video {video_id} does not exist', expected=True)
|
|
||||||
|
|
||||||
info['_type'] = 'url_transparent'
|
|
||||||
images = video_data.get('images')
|
|
||||||
|
|
||||||
return merge_dicts(info, {
|
|
||||||
'title': video_data.get('title'),
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'thumbnail': images.get('medium') or images.get('small'),
|
|
||||||
'series': 'Pokémon',
|
|
||||||
'season_number': int_or_none(video_data.get('season')),
|
|
||||||
'episode': video_data.get('title'),
|
|
||||||
'episode_number': int_or_none(video_data.get('episode')),
|
|
||||||
})
|
|
113
yt_dlp/extractor/uliza.py
Normal file
113
yt_dlp/extractor/uliza.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
parse_qs,
|
||||||
|
time_seconds,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class UlizaPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
player_data = self._download_webpage(
|
||||||
|
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
|
||||||
|
note='Fetching player data', errnote='Unable to fetch player data')
|
||||||
|
|
||||||
|
m3u8_url = self._search_regex(
|
||||||
|
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
|
||||||
|
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id)
|
||||||
|
m3u8_type = self._search_regex(
|
||||||
|
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': {
|
||||||
|
'video': 'is_live',
|
||||||
|
'dvr': 'was_live', # short-term archives
|
||||||
|
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
|
||||||
|
'_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class UlizaPortalIE(InfoExtractor):
|
||||||
|
IE_DESC = 'ulizaportal.jp'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||||
|
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
|
||||||
|
'title': 'プレゼンテーションプレイヤーのサンプル',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||||
|
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
|
||||||
|
'title': '【確認用】視聴サンプルページ(ULIZA)',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
|
||||||
|
if expires and expires <= time_seconds():
|
||||||
|
raise ExtractorError('The link is expired', video_id=video_id, expected=True)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_data_url = self._search_regex(
|
||||||
|
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
|
||||||
|
webpage, 'player data url')
|
||||||
|
return self.url_result(
|
||||||
|
player_data_url, UlizaPlayerIE, url_transparent=True,
|
||||||
|
display_id=video_id, video_title=self._html_extract_title(webpage))
|
|
@ -1,189 +0,0 @@
|
||||||
import functools
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
OnDemandPagedList,
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
qualities,
|
|
||||||
remove_start,
|
|
||||||
strip_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VeohIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
|
||||||
'md5': '620e68e6a3cff80086df3348426c9ca3',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'v56314296nk7Zdmz3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Straight Backs Are Stronger',
|
|
||||||
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
|
|
||||||
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
|
|
||||||
'uploader': 'LUMOback',
|
|
||||||
'duration': 46,
|
|
||||||
'view_count': int,
|
|
||||||
'average_rating': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'age_limit': 0,
|
|
||||||
'categories': ['technology_and_gaming'],
|
|
||||||
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
|
|
||||||
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '27701988',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Chile workers cover up to avoid skin damage',
|
|
||||||
'description': 'md5:2bd151625a60a32822873efc246ba20d',
|
|
||||||
'uploader': 'afp-news',
|
|
||||||
'duration': 123,
|
|
||||||
},
|
|
||||||
'skip': 'This video has been deleted.',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
|
||||||
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
|
|
||||||
'note': 'Embedded ooyala video',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '69525809',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
|
|
||||||
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
|
|
||||||
'uploader': 'newsy-videos',
|
|
||||||
},
|
|
||||||
'skip': 'This video has been deleted.',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
|
|
||||||
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'v16374379WA437rMH',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Phantasmagoria 2, pt. 1-3',
|
|
||||||
'description': 'Phantasmagoria: a Puzzle of Flesh',
|
|
||||||
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
|
|
||||||
'uploader': 'davidspackage',
|
|
||||||
'duration': 968,
|
|
||||||
'view_count': int,
|
|
||||||
'average_rating': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'age_limit': 18,
|
|
||||||
'categories': ['technology_and_gaming', 'gaming'],
|
|
||||||
'tags': ['puzzle', 'of', 'flesh'],
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
metadata = self._download_json(
|
|
||||||
'https://www.veoh.com/watch/getVideo/' + video_id,
|
|
||||||
video_id)
|
|
||||||
video = metadata['video']
|
|
||||||
title = video['title']
|
|
||||||
|
|
||||||
thumbnail_url = None
|
|
||||||
q = qualities(['Regular', 'HQ'])
|
|
||||||
formats = []
|
|
||||||
for f_id, f_url in video.get('src', {}).items():
|
|
||||||
if not f_url:
|
|
||||||
continue
|
|
||||||
if f_id == 'poster':
|
|
||||||
thumbnail_url = f_url
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': f_id,
|
|
||||||
'quality': q(f_id),
|
|
||||||
'url': f_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
categories = metadata.get('categoryPath')
|
|
||||||
if not categories:
|
|
||||||
category = remove_start(strip_or_none(video.get('category')), 'category_')
|
|
||||||
categories = [category] if category else None
|
|
||||||
tags = video.get('tags')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': video.get('description'),
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'uploader': video.get('author', {}).get('nickname'),
|
|
||||||
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
|
|
||||||
'view_count': int_or_none(video.get('views')),
|
|
||||||
'formats': formats,
|
|
||||||
'average_rating': int_or_none(video.get('rating')),
|
|
||||||
'comment_count': int_or_none(video.get('numOfComments')),
|
|
||||||
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
|
|
||||||
'categories': categories,
|
|
||||||
'tags': tags.split(', ') if tags else None,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
|
|
||||||
IE_NAME = 'veoh:user'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'https://www.veoh.com/users/valentinazoe',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'valentinazoe',
|
|
||||||
'title': 'valentinazoe (Uploads)',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 75,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://www.veoh.com/users/PiensaLibre',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'PiensaLibre',
|
|
||||||
'title': 'PiensaLibre (Uploads)',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 2,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PAGE_SIZE = 16
|
|
||||||
|
|
||||||
def _fetch_page(self, uploader, page):
|
|
||||||
response = self._download_json(
|
|
||||||
'https://www.veoh.com/users/published/videos', uploader,
|
|
||||||
note=f'Downloading videos page {page + 1}',
|
|
||||||
headers={
|
|
||||||
'x-csrf-token': self._TOKEN,
|
|
||||||
'content-type': 'application/json;charset=UTF-8',
|
|
||||||
},
|
|
||||||
data=json.dumps({
|
|
||||||
'username': uploader,
|
|
||||||
'maxResults': self._PAGE_SIZE,
|
|
||||||
'page': page + 1,
|
|
||||||
'requestName': 'userPage',
|
|
||||||
}).encode())
|
|
||||||
if not response.get('success'):
|
|
||||||
raise ExtractorError(response['message'])
|
|
||||||
|
|
||||||
for video in response['videos']:
|
|
||||||
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
|
|
||||||
video['permalinkId'], video.get('title'))
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'https://www.veoh.com', None, note='Downloading authorization token')
|
|
||||||
self._TOKEN = self._search_regex(
|
|
||||||
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
|
|
||||||
'request token', group='token')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
uploader = self._match_id(url)
|
|
||||||
return self.playlist_result(OnDemandPagedList(
|
|
||||||
functools.partial(self._fetch_page, uploader),
|
|
||||||
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
|
|
|
@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
def _rich_entries(self, rich_grid_renderer):
|
def _rich_entries(self, rich_grid_renderer):
|
||||||
renderer = traverse_obj(
|
renderer = traverse_obj(
|
||||||
rich_grid_renderer,
|
rich_grid_renderer,
|
||||||
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
|
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
|
||||||
video_id = renderer.get('videoId')
|
video_id = renderer.get('videoId')
|
||||||
if video_id:
|
if video_id:
|
||||||
yield self._extract_video(renderer)
|
yield self._extract_video(renderer)
|
||||||
|
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
})),
|
})),
|
||||||
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
|
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
|
||||||
return
|
return
|
||||||
|
# lockupViewModel extraction
|
||||||
|
content_id = renderer.get('contentId')
|
||||||
|
if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://www.youtube.com/playlist?list={content_id}',
|
||||||
|
ie=YoutubeTabIE, video_id=content_id,
|
||||||
|
**traverse_obj(renderer, {
|
||||||
|
'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
|
||||||
|
}),
|
||||||
|
thumbnails=self._extract_thumbnails(renderer, (
|
||||||
|
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
|
||||||
|
return
|
||||||
|
|
||||||
def _video_entry(self, video_renderer):
|
def _video_entry(self, video_renderer):
|
||||||
video_id = video_renderer.get('videoId')
|
video_id = video_renderer.get('videoId')
|
||||||
|
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
}, {
|
}, {
|
||||||
# Podcasts tab, with rich entry playlistRenderers
|
# Podcasts tab, with rich entry lockupViewModel
|
||||||
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
|
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
|
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
|
||||||
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
|
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
|
||||||
'title': '99 Percent Invisible - Podcasts',
|
'title': '99% Invisible - Podcasts',
|
||||||
'uploader': '99 Percent Invisible',
|
'uploader': '99% Invisible',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'channel': '99 Percent Invisible',
|
'channel': '99% Invisible',
|
||||||
'uploader_id': '@99percentinvisiblepodcast',
|
'uploader_id': '@99percentinvisiblepodcast',
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 5,
|
||||||
}, {
|
}, {
|
||||||
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
||||||
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user