Compare commits

...

11 Commits

Author SHA1 Message Date
D Trombett
9f5ef16b3f
Merge aef78fdfa8 into 10fc719bc7 2024-11-17 17:34:25 +01:00
doe1080
10fc719bc7
[cleanup] Remove dead extractors (#11566)
- Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE
- Removes PokemonIE, PokemonWatchIE
- Removes VeohIE, VeohUserIE

Closes #3373, Closes #7059
Authored by: doe1080
2024-11-17 16:22:40 +00:00
krichbanana
eb15fd5a32
[ie/kenh14] Add extractor (#3996)
Closes #3937
Authored by: krichbanana, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-17 14:12:26 +00:00
sepro
7cecd299e4
[ie/chaturbate] Don't break embed detection (#11565)
Bugfix for 720b3dc453

Authored by: seproDev
2024-11-17 13:32:12 +01:00
DTrombett
aef78fdfa8
[ie/npo] Add Zapp extractor 2024-10-12 13:38:15 +02:00
DTrombett
9768d62bf8
[ie/npo] Fix ntr extractor 2024-10-12 13:13:42 +02:00
DTrombett
e569c03196
[ie/npo] Fix hetklokhuis extractor 2024-10-11 20:25:03 +02:00
DTrombett
2e9c00649c
[ie/npo] Fix schooltv extractor 2024-10-11 19:27:09 +02:00
DTrombett
d1175cf7a3
[ie/npo] Add npo3 extractor 2024-10-11 18:52:07 +02:00
DTrombett
5f600dc613
[ie/npo] Fix radio extractor 2024-10-11 17:16:50 +02:00
DTrombett
79795b9502
[ie/npo.nl:start] Split extractor 2024-10-10 20:56:28 +02:00
7 changed files with 484 additions and 1118 deletions

View File

@ -946,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1135,12 +1139,6 @@ from .microsoftembed import (
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1385,15 +1383,15 @@ from .nowness import (
) )
from .noz import NozIE from .noz import NozIE
from .npo import ( from .npo import (
NPOIE, NPO3IE,
NTRIE,
VPROIE, VPROIE,
WNLIE,
AndereTijdenIE, AndereTijdenIE,
HetKlokhuisIE, HetKlokhuisIE,
NPOLiveIE,
NPORadioFragmentIE,
NPORadioIE, NPORadioIE,
NPOStartIE,
SchoolTVIE, SchoolTVIE,
ZappIE,
) )
from .npr import NprIE from .npr import NprIE
from .nrk import ( from .nrk import (
@ -1559,10 +1557,6 @@ from .podbayfm import (
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -2284,10 +2278,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
} }
def _extract_from_webpage(self, video_id, tld): def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True) headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld') video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,291 +0,0 @@
import functools
import json
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
dict_get,
float_or_none,
traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
headers={'Content-Type': 'application/json'} if body else {},
query={
'__guest_id': self._GUEST_ID,
'__platform': 'web',
**(query or {}),
})
if content['code'] != 0:
raise ExtractorError(
f'Mildom says: {content["message"]} (code {content["code"]})',
expected=True)
return content['body']
class MildomIE(MildomBaseIE):
IE_NAME = 'mildom'
IE_DESC = 'Record ongoing live by specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
'user_id': video_id,
'live_server_type': 'hls',
})
playback_token = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
if not playback_token:
raise ExtractorError('Failed to obtain live playback token')
formats = self._extract_m3u8_formats(
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
result_video_id, 'mp4', headers={
'Referer': 'https://www.mildom.com/',
'Origin': 'https://www.mildom.com',
})
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
}
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
'info_dict': {
'id': '10882672-1597662269',
'ext': 'mp4',
'title': '始めてのミルダム配信じゃぃ!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'upload_date': '20200817',
'duration': 4138.37,
'description': 'ゲームをしたくて!',
'timestamp': 1597662269.0,
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
'info_dict': {
'id': '10882672-1597758589870-477',
'ext': 'mp4',
'title': '【kson】感染メイズ麻酔銃で無双する',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'timestamp': 1597759093.0,
'uploader': 'kson組長(けいそん)',
'duration': 4302.58,
'uploader_id': '10882672',
'description': 'このステージ絶対乗り越えたい',
'upload_date': '20200818',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
'info_dict': {
'id': '10882672-buha9td2lrn97fk2jme0',
'ext': 'mp4',
'title': '【kson組長】CART RACER!!!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
'upload_date': '20201104',
'timestamp': 1604494797.0,
'duration': 4657.25,
'description': 'WTF',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
note='Downloading playback metadata', query={
'v_id': video_id,
})['playback']
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
'protocol': 'm3u8_native',
'vcodec': 'none',
'acodec': 'aac',
'ext': 'm4a',
}]
for fmt in autoplay['video_link']:
formats.append({
'format_id': 'video-{}'.format(fmt['name']),
'url': fmt['url'],
'protocol': 'm3u8_native',
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
'height': fmt['level'],
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
})
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'description': traverse_obj(autoplay, 'video_intro'),
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
class MildomClipIE(MildomBaseIE):
IE_NAME = 'mildom:clip'
IE_DESC = 'Clip in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
'info_dict': {
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
'title': '全然違ったよ',
'timestamp': 1619181890,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': 'ざきんぽ',
'uploader_id': '10042245',
},
}, {
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'info_dict': {
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'title': 'かっこいい',
'timestamp': 1621094003,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': '(ルーキー',
'uploader_id': '10111524',
},
}, {
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'info_dict': {
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'title': '',
'timestamp': 1614769431,
'duration': 31,
'thumbnail': r're:https?://.+',
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
'uploader_id': '10660174',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
clip_detail = self._call_api(
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
note='Downloading playback metadata', query={
'clip_id': video_id,
})
return {
'id': video_id,
'title': self._html_search_meta(
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
'timestamp': float_or_none(clip_detail.get('create_time')),
'duration': float_or_none(clip_detail.get('length')),
'thumbnail': clip_detail.get('cover'),
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
'uploader_id': user_id,
'url': clip_detail['url'],
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
}
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mildom.com/profile/10093333',
'info_dict': {
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
'playlist_mincount': 201,
}]
def _fetch_page(self, user_id, page):
page += 1
reply = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
user_id, note=f'Downloading page {page}', query={
'user_id': user_id,
'page': page,
'limit': '30',
})
if not reply:
return
for x in reply:
v_id = x.get('v_id')
if not v_id:
continue
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
profile = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
user_id, f'Uploads from {profile["loginname"]}')

View File

@ -1,606 +1,438 @@
import random import json
import re import re
import urllib.parse
from yt_dlp.utils._utils import ExtractorError
from yt_dlp.utils.traversal import traverse_obj
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
merge_dicts,
orderedSet, orderedSet,
str_or_none,
try_call,
unified_timestamp,
url_or_none,
urlencode_postdata,
) )
class NPOIE(InfoExtractor): class NPOBaseIE(InfoExtractor):
IE_NAME = 'npo' def _extract_product_id_information(self, product_id):
IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl' token = self._download_json(
_VALID_URL = r'''(?x) f'https://npo.nl/start/api/domain/player-token?productId={product_id}', product_id,
(?: 'Downloading token')['token']
npo:| return self._extract_info_from_token(product_id, token)
https?://
(?:www\.)?
(?:
npo\.nl/(?:[^/]+/)*|
(?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
)
)
(?P<id>[^/?#]+)
'''
_TESTS = [{ def _extract_info_from_token(self, video_id, token):
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', data = {
'md5': '4b3f9c429157ec4775f2c9cb7b911016', 'id': video_id,
'info_dict': { }
'id': 'VPWON_1220719',
'ext': 'm4v',
'title': 'Nieuwsuur',
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
'upload_date': '20140622',
},
'skip': 'Video was removed',
}, {
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
'info_dict': {
'id': 'VARA_101191800',
'ext': 'm4v',
'title': 'De Mega Mike & Mega Thomas show: The best of.',
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
'upload_date': '20090227',
'duration': 2400,
},
'skip': 'Video was removed',
}, {
'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
'md5': '1b279c0547f6b270e014c576415268c5',
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'mp4',
'title': 'Zwart geld: de toekomst komt uit Afrika',
'description': 'md5:dffaf3d628a9c36f78ca48d834246261',
'upload_date': '20130225',
'duration': 3000,
'creator': 'NED2',
'series': 'Tegenlicht',
'timestamp': 1361822340,
'thumbnail': 'https://images.npo.nl/tile/1280x720/142854.jpg',
'episode': 'Zwart geld: de toekomst komt uit Afrika',
'episode_number': 18,
},
}, {
'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
'info_dict': {
'id': 'WO_VPRO_043706',
'ext': 'mp4',
'title': 'De nieuwe mens - Deel 1',
'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
'duration': 4680,
'episode': 'De nieuwe mens - Deel 1',
'thumbnail': 'https://images.npo.nl/tile/1280x720/6289.jpg',
'timestamp': 1279716057,
'series': 'De nieuwe mens - Deel 1',
'upload_date': '20100721',
},
'params': {
'skip_download': True,
},
}, {
# non asf in streams
'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
'info_dict': {
'id': 'WO_NOS_762771',
'ext': 'mp4',
'title': 'Hoe gaat Europa verder na Parijs?',
},
'params': {
'skip_download': True,
},
'skip': 'Video was removed',
}, {
'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
'info_dict': {
'id': 'VPWON_1233944',
'ext': 'mp4',
'title': 'Aap, poot, pies',
'description': 'md5:4b46b1b9553b4c036a04d2a532a137e6',
'upload_date': '20150508',
'duration': 599,
'episode': 'Aap, poot, pies',
'thumbnail': 'https://images.poms.omroep.nl/image/s1280/c1280x720/608118.jpg',
'timestamp': 1431064200,
'series': 'Aap, poot, pies',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
'info_dict': {
'id': 'POW_00996502',
'ext': 'm4v',
'title': '''"Dit is wel een 'landslide'..."''',
'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
'upload_date': '20150508',
'duration': 462,
},
'params': {
'skip_download': True,
},
'skip': 'Video was removed',
}, {
# audio
'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
'info_dict': {
'id': 'RBX_FUNX_6683215',
'ext': 'mp3',
'title': 'Jouw Stad Rotterdam',
'description': 'md5:db251505244f097717ec59fabc372d9f',
},
'params': {
'skip_download': True,
},
'skip': 'Video was removed',
}, {
'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
'only_matching': True,
}, {
'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
'only_matching': True,
}, {
'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
'only_matching': True,
}, {
'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
'only_matching': True,
}, {
# live stream
'url': 'npo:LI_NL1_4188102',
'only_matching': True,
}, {
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
'only_matching': True,
}, {
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
'only_matching': True,
}, {
'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
'only_matching': True,
}, {
'url': 'https://npo.nl/KN_1698996',
'only_matching': True,
}, {
'url': 'https://www.npo3.nl/the-genius/21-11-2022/VPWON_1341105',
'info_dict': {
'id': 'VPWON_1341105',
'ext': 'mp4',
'duration': 2658,
'series': 'The Genius',
'description': 'md5:db02f1456939ca63f7c408f858044e94',
'title': 'The Genius',
'timestamp': 1669062000,
'creator': 'NED3',
'episode': 'The Genius',
'thumbnail': 'https://images.npo.nl/tile/1280x720/1827650.jpg',
'episode_number': 8,
'upload_date': '20221121',
},
'params': {
'skip_download': True,
},
}]
@classmethod
def suitable(cls, url):
return (False if any(ie.suitable(url)
for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
else super().suitable(url))
def _real_extract(self, url):
video_id = self._match_id(url)
if urllib.parse.urlparse(url).netloc in ['www.ntr.nl', 'ntr.nl']:
player = self._download_json(
f'https://www.ntr.nl/ajax/player/embed/{video_id}', video_id,
'Downloading player JSON', query={
'parameters[elementId]': f'npo{random.randint(0, 999)}',
'parameters[sterReferralUrl]': url,
'parameters[autoplay]': 0,
})
else:
self._request_webpage(
'https://www.npostart.nl/api/token', video_id,
'Downloading token', headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
})
player = self._download_json(
f'https://www.npostart.nl/player/{video_id}', video_id,
'Downloading player JSON', data=urlencode_postdata({
'autoplay': 0,
'share': 1,
'pageUrl': url,
'hasAdConsent': 0,
}), headers={
'x-xsrf-token': try_call(lambda: urllib.parse.unquote(
self._get_cookies('https://www.npostart.nl')['XSRF-TOKEN'].value)),
})
player_token = player['token']
drm = False
format_urls = set()
formats = [] formats = []
for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): thumbnails = []
streams = self._download_json( subtitles = {}
f'https://start-player.npo.nl/video/{video_id}/streams', for profile_name in ('dash', 'hls', 'smooth'):
video_id, f'Downloading {profile} profile JSON', fatal=False, profile = self._download_json(
query={ 'https://prod.npoplayer.nl/stream-link',
'profile': profile, video_id,
'quality': 'npoplus', f'Downloading profile {profile_name} JSON',
'tokenId': player_token, data=json.dumps({'profileName': profile_name}).encode(),
'streamType': 'broadcast', headers={'Authorization': token},
}, data=b'') # endpoint requires POST fatal=False,
if not streams: )
continue metadata = profile.get('metadata')
stream = streams.get('stream') if metadata is not None:
if not isinstance(stream, dict): duration = metadata.get('duration')
continue thumbnail = metadata.get('poster')
stream_url = url_or_none(stream.get('src')) data['title'] = metadata.get('title')
if not stream_url or stream_url in format_urls: data['description'] = metadata.get('description')
continue data['channel_id'] = metadata.get('channel')
format_urls.add(stream_url) data['uploader_id'] = metadata.get('channel')
if stream.get('protection') is not None or stream.get('keySystemOptions') is not None: data['genres'] = metadata.get('genres')
drm = True if duration:
continue data['duration'] = duration / 1000
stream_type = stream.get('type') if thumbnail and not any(thumb['url'] == thumbnail for thumb in thumbnails):
thumbnails.append({
'url': thumbnail,
})
raw_subtitles = traverse_obj(profile, ('assets', 'subtitles'))
stream_url = traverse_obj(profile, ('stream', 'streamURL'))
stream_ext = determine_ext(stream_url) stream_ext = determine_ext(stream_url)
if stream_type == 'application/dash+xml' or stream_ext == 'mpd': if stream_ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
stream_url, video_id, mpd_id='dash', fatal=False)) stream_url, video_id=video_id, mpd_id='dash', fatal=False))
elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8': elif stream_ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
stream_url, video_id, ext='mp4', stream_url, video_id=video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
elif re.search(r'\.isml?/Manifest', stream_url): elif re.search(r'\.isml?/Manifest', stream_url):
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
stream_url, video_id, ism_id='mss', fatal=False)) stream_url, video_id=video_id, ism_id='mss', fatal=False))
else: else:
formats.append({ formats.append({
'url': stream_url, 'url': stream_url,
}) })
if (raw_subtitles):
if not formats: for subtitle in raw_subtitles:
if not self.get_param('allow_unplayable_formats') and drm: tag = subtitle.get('iso')
self.report_drm(video_id) if tag not in subtitles:
subtitles[tag] = []
info = { if not any(sub['url'] == subtitle['location'] for sub in subtitles[tag]):
'id': video_id, subtitles[tag].append({
'title': video_id, 'url': subtitle.get('location'),
'formats': formats, 'name': subtitle.get('name'),
}
embed_url = url_or_none(player.get('embedUrl'))
if embed_url:
webpage = self._download_webpage(
embed_url, video_id, 'Downloading embed page', fatal=False)
if webpage:
video = self._parse_json(
self._search_regex(
r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
default='{}'), video_id)
if video:
title = video.get('episodeTitle')
subtitles = {}
subtitles_list = video.get('subtitles')
if isinstance(subtitles_list, list):
for cc in subtitles_list:
cc_url = url_or_none(cc.get('src'))
if not cc_url:
continue
lang = str_or_none(cc.get('language')) or 'nl'
subtitles.setdefault(lang, []).append({
'url': cc_url,
}) })
return merge_dicts({ data['formats'] = formats
'title': title, data['subtitles'] = subtitles
'description': video.get('description'), data['thumbnails'] = thumbnails
'thumbnail': url_or_none( return data
video.get('still_image_url') or video.get('orig_image_url')),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('broadcastDate')),
'creator': video.get('channel'),
'series': video.get('title'),
'episode': title,
'episode_number': int_or_none(video.get('episodeNumber')),
'subtitles': subtitles,
}, info)
return info
class NPOLiveIE(InfoExtractor): class NPOStartIE(NPOBaseIE):
IE_NAME = 'npo.nl:live' IE_NAME = 'npo.nl:start'
_VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?' _VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.npo.nl/live/npo-1', 'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika/afspelen',
'md5': '8c30593a81ac80d65b531eaf2a92ac02',
'info_dict': { 'info_dict': {
'id': 'LI_NL1_4188102', 'id': 'VPWON_1169289',
'display_id': 'npo-1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 'Zwart geld: de toekomst komt uit Afrika',
'is_live': True, 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
'duration': 3000,
'uploader_id': 'NED2',
'series': 'VPRO Tegenlicht',
'timestamp': 1361822340,
'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
'episode': 'Zwart geld: de toekomst komt uit Afrika',
'episode_number': 18,
'channel_id': 'NED2',
'genres': [],
'release_date': '20130225',
'release_timestamp': 1361822340,
'season': 'Season 11',
'season_id': 'df5e2334-e07a-4301-b3d3-8e224d8c1f07',
'season_number': 11,
'series_id': '6727dcdf-4bd2-477c-bf96-1ead69fad6c9',
'upload_date': '20130225',
}, },
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
}, {
'url': 'https://www.npostart.nl/live/npo-1',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) or 'npo-1' slug = self._match_id(url)
metadata = self._download_json(f'https://npo.nl/start/api/domain/program-detail?slug={slug}', video_id=slug, note='Downloading program details JSON')
video_id = metadata['productId']
data = self._extract_product_id_information(video_id)
thumbnails = []
for image in metadata.get('images'):
thumbnails.append({
'id': image.get('guid'),
'url': image.get('url'),
})
break
webpage = self._download_webpage(url, display_id) data['title'] = metadata.get('title') or data.get('title')
data['episode'] = metadata.get('title') or data.get('title')
live_id = self._search_regex( data['episode_number'] = int_or_none(metadata.get('programKey'))
[r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id') data['duration'] = int_or_none(metadata.get('durationInSeconds'), default=data.get('duration'))
data['description'] = traverse_obj(metadata, ('synopsis', 'long')) or traverse_obj(metadata, ('synopsis', 'short')) or traverse_obj(metadata, ('synopsis', 'brief')) or data.get('description')
return { data['thumbnails'] = thumbnails
'_type': 'url_transparent', data['genres'] = metadata.get('genres') or data.get('genres')
'url': f'npo:{live_id}', data['series'] = traverse_obj(metadata, ('series', 'title'))
'ie_key': NPOIE.ie_key(), data['series_id'] = traverse_obj(metadata, ('series', 'guid'))
'id': live_id, data['season_number'] = int_or_none(traverse_obj(metadata, ('season', 'seasonKey')))
'display_id': display_id, data['season_id'] = traverse_obj(metadata, ('season', 'guid'))
} data['release_timestamp'] = int_or_none(metadata.get('firstBroadcastDate'))
data['timestamp'] = int_or_none(metadata.get('publishedDateTime'))
return data
class NPORadioIE(InfoExtractor): class NPORadioIE(NPOBaseIE):
IE_NAME = 'npo.nl:radio' IE_NAME = 'npo.nl:radio'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?nporadio(?P<n>\d)\.nl(?:/[^/]+)*/(?P<id>[^/]+)?'
_TEST = { _TESTS = [{
'url': 'http://www.npo.nl/radio/radio-1', 'url': 'https://www.nporadio1.nl/',
'info_dict': { 'info_dict': {
'id': 'radio-1', 'id': 'live',
'ext': 'mp3', 'ext': 'mp4',
'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': r're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'live_status': 'is_live',
'thumbnail': r're:^https?://.*\.jpg',
'description': 'Live programmering',
'is_live': True, 'is_live': True,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} },
{
@classmethod 'url': 'https://www.nporadio1.nl/nieuws/binnenland/15bcad75-22c5-4226-a3fe-d54a76175da3/utrecht-zet-rolmodellen-in-voor-bewustwording-mentale-gezondheid',
def suitable(cls, url): 'md5': '8ad04123febc07716f45e324d7fb792d',
return False if NPORadioFragmentIE.suitable(url) else super().suitable(url) 'info_dict': {
'id': 'utrecht-zet-rolmodellen-in-voor-bewustwording-mentale-gezondheid',
@staticmethod 'ext': 'mp4',
def _html_get_attribute_regex(attribute): 'duration': 262,
return rf'{attribute}\s*=\s*\'([^\']+)\'' 'channel_id': 'RAD1',
'description': 'md5:7d36b72407e757e6c748a6cdf27c7628',
'title': 'Utrecht zet rolmodellen in voor bewustzijn mentale gezondheid ',
'genres': ['Informatief'],
'uploader_id': 'RAD1',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/2217026',
},
},
{
'url': 'https://www.nporadio2.nl/fragmenten/janwillemstartop/9d35b8fb-a07b-41f9-9cc5-a9c89dd60dbb/2024-10-10-nancy-zet-zich-in-voor-daklozen-voor-mij-was-het-op-het-randje',
'md5': '432b0e106082ffaa0e31c4549db09b0c',
'info_dict': {
'id': '2024-10-10-nancy-zet-zich-in-voor-daklozen-voor-mij-was-het-op-het-randje',
'ext': 'mp4',
'genres': ['Muziek'],
'title': 'Nancy zet zich in voor daklozen: "Voor mij was het op het randje" ',
'duration': 235,
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/2216783',
'description': 'md5:26925e8bd2c715b160cc864efa731583',
'uploader_id': 'RAD2',
'channel_id': 'RAD2',
},
},
{
'url': 'https://www.nporadio2.nl/uitzendingen/dit-is-tannaz/9bc1ab7e-77f6-4444-986b-1cd7c25ff4bf/2024-10-11-dit-is-tannaz',
'md5': 'a1212f4d2fe361aafcced5bcd3cf939b',
'info_dict': {
'id': '2024-10-11-dit-is-tannaz',
'ext': 'mp3',
'uploader_id': 'RAD2',
'genres': ['Muziek'],
'title': 'Dit is Tannaz',
'channel_id': 'RAD2',
'description': 'md5:3f2b5dad3e965ae7915a5f9a5a2decc5',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/2190854',
'duration': 7200.026,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) parsed = self._match_valid_url(url)
video_id = parsed.group('id') or 'live'
webpage = self._download_webpage(url, video_id) if video_id == 'live':
radio_number = parsed.group('n')
title = self._html_search_regex( token_url = self._download_json(f'https://www.nporadio{radio_number}.nl/api/player/npo-radio-{radio_number}', video_id)['tokenUrl']
self._html_get_attribute_regex('data-channel'), webpage, 'title') else:
props = self._search_nextjs_data(self._download_webpage(url, video_id), video_id)['props']['pageProps']
stream = self._parse_json( token_url = traverse_obj(props, ('article', 'content', 0, 'value', 'player', 'tokenUrl')) or traverse_obj(props, ('fragmentDetail', 'bodyContent', 0, 'payload', 'player', 'tokenUrl')) or traverse_obj(props, ('radioBroadcast', 'showAssets', 0, 'player', 'tokenUrl'))
self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'), if token_url is None:
video_id) raise ExtractorError('Token url not found')
data = self._extract_info_from_token(video_id, self._download_json(token_url, video_id, 'Downloading token JSON')['playerToken'])
codec = stream.get('codec') data['is_live'] = video_id == 'live'
return data
return {
'id': video_id,
'url': stream['url'],
'title': title,
'acodec': codec,
'ext': codec,
'is_live': True,
}
class NPORadioFragmentIE(InfoExtractor): class NPO3IE(NPOBaseIE):
IE_NAME = 'npo.nl:radio:fragment' IE_NAME = 'npo.nl:npo3'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?npo\.nl/npo3/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://www.npo.nl/radio/radio-5/fragment/174356', 'url': 'https://npo.nl/npo3/vlees-smakelijk/11-10-2024/WO_KN_20222563',
'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2', 'md5': 'e0cd5b96c712edea2e7f0700d348bc98',
'info_dict': { 'info_dict': {
'id': '174356', 'id': 'WO_KN_20222563',
'ext': 'mp3', 'ext': 'mp4',
'title': 'Jubileumconcert Willeke Alberti', 'description': 'md5:31f5ffff8c70af1635cbb93a8205e0c4',
'duration': 1021.994,
'title': 'Vlees smakelijk',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/2215940',
'genres': ['Human Interest', 'Reality TV'],
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
audio_id = self._match_id(url) return self._extract_product_id_information(self._match_id(url))
webpage = self._download_webpage(url, audio_id)
title = self._html_search_regex(
rf'href="/radio/[^/]+/fragment/{audio_id}" title="([^"]+)"',
webpage, 'title')
audio_url = self._search_regex(
r"data-streams='([^']+)'", webpage, 'audio url')
return {
'id': audio_id,
'url': audio_url,
'title': title,
}
class NPODataMidEmbedIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor class SchoolTVIE(NPOBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
return {
'_type': 'url_transparent',
'ie_key': 'NPO',
'url': f'npo:{video_id}',
'display_id': display_id,
}
class SchoolTVIE(NPODataMidEmbedIE):
IE_NAME = 'schooltv' IE_NAME = 'schooltv'
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video-item/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/', 'url': 'https://schooltv.nl/video-item/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
'info_dict': { 'info_dict': {
'id': 'WO_NTR_429477', 'id': 'WO_NTR_429477',
'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:abfa0ff690adb73fd0297fd033aaa631', 'duration': 51.0,
}, 'genres': ['Jeugd'],
'params': { 'thumbnail': 'https://images.poms.omroep.nl/image/s1080/242560',
# Skip because of m3u8 download 'title': 'Ademhaling',
'skip_download': True, 'description': 'md5:db41d874d9ebe597686dda69e892ba49',
}, },
} }
def _real_extract(self, url):
video_id = self._search_regex(r'id=([a-zA-Z0-9_]+)', self._html_search_meta(('og:video', 'og:video:secure_url'), self._download_webpage(url, self._match_id(url))), 'video id')
return self._extract_info_from_token(video_id, self._download_json(f'https://api3.schooltv.nl/player/{video_id}', video_id, 'Downloading token JSON')['data']['token'])
class HetKlokhuisIE(NPODataMidEmbedIE):
class HetKlokhuisIE(NPOBaseIE):
IE_NAME = 'hetklokhuis' IE_NAME = 'hetklokhuis'
_VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', 'url': 'https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens',
'md5': '679d610017689ecc798b316b8daa34e7',
'info_dict': { 'info_dict': {
'id': 'VPWON_1260528', 'id': 'VPWON_1225126',
'display_id': 'Zwaartekrachtsgolven', 'ext': 'mp4',
'ext': 'm4v', 'duration': 852.27,
'title': 'Het Klokhuis: Zwaartekrachtsgolven', 'channel_id': 'NED3',
'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48', 'description': 'md5:17681c9644521344a0573e04c78912d7',
'upload_date': '20170223', 'title': 'Aliens',
}, 'genres': ['Jeugd', 'Informatief'],
'params': { 'thumbnail': 'https://images.poms.omroep.nl/image/s1080/624824',
'skip_download': True, 'uploader_id': 'NED3',
'series': 'Zoek Het Uit!',
}, },
} }
def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url))
series, title = self._og_search_title(webpage).split(' - ')
video_id = self._html_search_regex(r'data-mid="([a-zA-Z0-9_]+)"', webpage, 'video id')
data = self._extract_info_from_token(video_id, self._download_json(f'https://api3.schooltv.nl/player/{video_id}', video_id, 'Downloading token JSON')['data']['token'])
data['series'] = series
data['title'] = title
return data
class ZappIE(NPOBaseIE):
IE_NAME = 'zapp.nl'
_VALID_URL = r'https?://(?:www\.)?zapp\.nl/programmas/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.zapp.nl/programmas/zappsport/gemist/POMS_AT_811523',
'md5': 'faf6811abea03ba8a52298c97bd0146b',
'info_dict': {
'id': 'POMS_AT_811523',
'ext': 'mp4',
'genres': [],
'uploader_id': 'NED3',
'description': 'Kindersportprogramma waarin alle takken van sport voorbijkomen.',
'channel_id': 'NED3',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/586056',
'duration': 900.0,
'title': 'Running Team 2015 - aflevering 1',
},
}
def _real_extract(self, url):
return self._extract_product_id_information(self._match_id(url))
class NPOPlaylistBaseIE(NPOBaseIE):
def _mid_result(self, mid, title, url):
token = self._download_token(mid, url)
data = self._extract_info_from_token(mid, token)
# The API returns the same title for every video so overwrite it with the video title
data['title'] = title
return data
class NPOPlaylistBaseIE(NPOIE): # XXX: Do not subclass from concrete IE
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result(f'npo:{video_id}' if not video_id.startswith('http') else video_id)
for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
]
playlist_title = self._html_search_regex( playlist_title = self._html_search_regex(
self._PLAYLIST_TITLE_RE, webpage, 'playlist title', self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
default=None) or self._og_search_title(webpage) default=None) or self._og_search_title(webpage)
entries = [
self._mid_result(video_id, playlist_title, url)
for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
]
return self.playlist_result(entries, playlist_id, playlist_title) return self.playlist_result(entries, playlist_id, playlist_title)
class VPROIE(NPOPlaylistBaseIE): class VPROIE(NPOPlaylistBaseIE):
IE_NAME = 'vpro' IE_NAME = 'vpro'
_VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:www\.)?(?:vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/.]+)'
_PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)', _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)')
r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
_PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
_TESTS = [ _TESTS = [
{ {
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', 'url': 'https://www.vpro.nl/programmas/3doc/2015/sergio-herman.html',
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
'title': 'De toekomst komt uit Afrika',
'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
'upload_date': '20130225',
},
'skip': 'Video gone',
},
{
'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
'info_dict': { 'info_dict': {
'id': 'sergio-herman', 'id': 'sergio-herman',
'title': 'sergio herman: fucking perfect', 'title': 'Sergio Herman: Fucking Perfect',
},
'playlist_count': 2,
},
{
# playlist with youtube embed
'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
'info_dict': {
'id': 'education-education',
'title': 'education education',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, },
{ {
'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html', 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
'md5': '87a4cc5a62da942ca9270e4da1d2b8b5',
'info_dict': { 'info_dict': {
'id': 'de-tegenprestatie', 'id': 'VPWON_1223413',
'ext': 'mp4',
'title': 'De Tegenprestatie', 'title': 'De Tegenprestatie',
'duration': 2992.45,
'genres': ['Documentaire'],
'channel_id': 'NED2',
'description': 'Er wordt van burgers steeds meer eigen verantwoordelijkheid en zelfredzaamheid gevraagd.',
'uploader_id': 'NED2',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/663342',
}, },
'playlist_count': 2,
}, { }, {
'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html', 'url': 'https://www.2doc.nl/documentaires/2018/07/het-verdriet-van-nederland.html',
'md5': '9ae67f6bdcb16a62f99e01198674fe10',
'info_dict': { 'info_dict': {
'id': 'VARA_101375237', 'id': 'VARA_101375237',
'ext': 'm4v', 'ext': 'mp4',
'title': 'MH17: Het verdriet van Nederland', 'uploader_id': 'NED1',
'description': 'md5:09e1a37c1fdb144621e22479691a9f18', 'channel_id': 'NED1',
'upload_date': '20150716', 'thumbnail': 'https://images.poms.omroep.nl/image/s1080/1608566',
}, 'duration': 4259.63,
'params': { 'genres': ['Documentaire'],
# Skip because of m3u8 download 'title': 'Het verdriet van Nederland',
'skip_download': True, 'description': 'md5:d19b745af196f6cb98daf50ac7cb01fc',
}, },
}, },
] ]
def _download_token(self, mid, url):
return self._download_json('https://rs.vpro.nl/v3/api/npoplayer/token', mid, 'Downloading token JSON', data=json.dumps({'mid': mid}).encode(), headers={
'Content-Type': 'application/json',
'Referer': url,
})['token']
class WNLIE(NPOPlaylistBaseIE):
IE_NAME = 'wnl' class NTRIE(NPOPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+' IE_NAME = 'ntr'
_PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>' _VALID_URL = r'https?://(?:www\.)?ntr\.nl/(?:[^/]+/)+(?P<id>[^/?#&]+)'
_PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' _PLAYLIST_TITLE_RE = r'<h2[^>]*>\s*(.+)\s*</h2>'
_PLAYLIST_ENTRY_RE = r'<div[^>]+class=["\']npo_wrapper["\'][^>]+data-mid=["\'](.+?)["\']'
_TESTS = [{ _TESTS = [{
'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', 'url': 'https://ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944',
'md5': '5b7b45c15750c9a74f8b70ea69f0b55a',
'info_dict': { 'info_dict': {
'id': 'vandaag-de-dag-6-mei', 'id': 'VPWON_1233944',
'title': 'Vandaag de Dag 6 mei', 'ext': 'mp4',
'duration': 599.98,
'channel_id': 'NED3',
'genres': ['Jeugd', 'Serie'],
'description': 'Appie en Aisia mogen beurtelings mee met hun vader Luc, die in de dierentuin werkt.',
'uploader_id': 'NED3',
'title': 'Aap, poot, pies',
'thumbnail': 'https://images.poms.omroep.nl/image/s1080/608116',
}, },
'playlist_count': 4,
}] }]
def _download_token(self, mid, url):
return self._download_json(f'https://ntr.nl/ajax/player/embed/{mid}', mid, 'Downloading token JSON')['jwt']
class AndereTijdenIE(NPOPlaylistBaseIE):
class AndereTijdenIE(NTRIE):
IE_NAME = 'anderetijden' IE_NAME = 'anderetijden'
_VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
_PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>' _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
_PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container\s+episode-page["\'][^>]+data-mid=["\'](.+?)["\']'
_TESTS = [{ _TESTS = [{
'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem', 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',

View File

@ -1,136 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
js_to_json,
merge_dicts,
)
class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': {
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4',
'title': 'The Ol Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': {
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4',
'title': "Pokémon : L'ascension de Darkrai",
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id or display_id)
video_data = extract_attributes(self._search_regex(
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element'))
video_id = video_data['data-video-id']
title = video_data.get('data-video-title') or self._html_search_meta(
'pkm-title', webpage, ' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'limelight:media:{video_id}',
'title': title,
'description': video_data.get('data-video-summary'),
'thumbnail': video_data.get('data-video-poster'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('data-video-season')),
'episode': title,
'episode_number': int_or_none(video_data.get('data-video-episode')),
'ie_key': 'LimelightMedia',
}
class PokemonWatchIE(InfoExtractor):
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
_TESTS = [{
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
'md5': '62833938a31e61ab49ada92f524c42ff',
'info_dict': {
'id': '8309a40969894a8e8d5bc1311e9c5667',
'ext': 'mp4',
'title': 'Lillier and the Staff!',
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
},
}, {
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
'only_matching': True,
}, {
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
'only_matching': True,
}]
def _extract_media(self, channel_array, video_id):
for channel in channel_array:
for media in channel.get('media'):
if media.get('id') == video_id:
return media
return None
def _real_extract(self, url):
video_id = self._match_id(url)
info = {
'_type': 'url',
'id': video_id,
'url': f'limelight:media:{video_id}',
'ie_key': 'LimelightMedia',
}
# API call can be avoided entirely if we are listing formats
if self.get_param('listformats', False):
return info
webpage = self._download_webpage(url, video_id)
build_vars = self._parse_json(self._search_regex(
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
video_id, transform_source=js_to_json)
region = build_vars.get('region')
channel_array = self._download_json(self._API_URL.format(region), video_id)
video_data = self._extract_media(channel_array, video_id)
if video_data is None:
raise ExtractorError(
f'Video {video_id} does not exist', expected=True)
info['_type'] = 'url_transparent'
images = video_data.get('images')
return merge_dicts(info, {
'title': video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': images.get('medium') or images.get('small'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('season')),
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})

View File

@ -1,189 +0,0 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
parse_duration,
qualities,
remove_start,
strip_or_none,
)
class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
'uploader': 'LUMOback',
'duration': 46,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 0,
'categories': ['technology_and_gaming'],
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
},
}, {
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
'only_matching': True,
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}, {
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
'info_dict': {
'id': 'v16374379WA437rMH',
'ext': 'mp4',
'title': 'Phantasmagoria 2, pt. 1-3',
'description': 'Phantasmagoria: a Puzzle of Flesh',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
'uploader': 'davidspackage',
'duration': 968,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 18,
'categories': ['technology_and_gaming', 'gaming'],
'tags': ['puzzle', 'of', 'flesh'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
'https://www.veoh.com/watch/getVideo/' + video_id,
video_id)
video = metadata['video']
title = video['title']
thumbnail_url = None
q = qualities(['Regular', 'HQ'])
formats = []
for f_id, f_url in video.get('src', {}).items():
if not f_url:
continue
if f_id == 'poster':
thumbnail_url = f_url
else:
formats.append({
'format_id': f_id,
'quality': q(f_id),
'url': f_url,
})
categories = metadata.get('categoryPath')
if not categories:
category = remove_start(strip_or_none(video.get('category')), 'category_')
categories = [category] if category else None
tags = video.get('tags')
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': thumbnail_url,
'uploader': video.get('author', {}).get('nickname'),
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
'view_count': int_or_none(video.get('views')),
'formats': formats,
'average_rating': int_or_none(video.get('rating')),
'comment_count': int_or_none(video.get('numOfComments')),
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
'categories': categories,
'tags': tags.split(', ') if tags else None,
}
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
IE_NAME = 'veoh:user'
_TESTS = [
{
'url': 'https://www.veoh.com/users/valentinazoe',
'info_dict': {
'id': 'valentinazoe',
'title': 'valentinazoe (Uploads)',
},
'playlist_mincount': 75,
},
{
'url': 'https://www.veoh.com/users/PiensaLibre',
'info_dict': {
'id': 'PiensaLibre',
'title': 'PiensaLibre (Uploads)',
},
'playlist_mincount': 2,
}]
_PAGE_SIZE = 16
def _fetch_page(self, uploader, page):
response = self._download_json(
'https://www.veoh.com/users/published/videos', uploader,
note=f'Downloading videos page {page + 1}',
headers={
'x-csrf-token': self._TOKEN,
'content-type': 'application/json;charset=UTF-8',
},
data=json.dumps({
'username': uploader,
'maxResults': self._PAGE_SIZE,
'page': page + 1,
'requestName': 'userPage',
}).encode())
if not response.get('success'):
raise ExtractorError(response['message'])
for video in response['videos']:
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
video['permalinkId'], video.get('title'))
def _real_initialize(self):
webpage = self._download_webpage(
'https://www.veoh.com', None, note='Downloading authorization token')
self._TOKEN = self._search_regex(
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
'request token', group='token')
def _real_extract(self, url):
uploader = self._match_id(url)
return self.playlist_result(OnDemandPagedList(
functools.partial(self._fetch_page, uploader),
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')