Compare commits

..

4 Commits

Author SHA1 Message Date
sepro
45d82be65f
[ie/nebula] Overhaul extractors (#8566)
Closes #4300, Closes #5814, Closes #7588, Closes #6334, Closes #6538
Authored by: elyse0, pukkandan, seproDev

Co-authored-by: Elyse <26639800+elyse0@users.noreply.github.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
2023-11-20 01:03:33 +00:00
Safouane Aarab
3237f8ba29
[ie/allstar] Add extractors (#8274)
Closes #6917
Authored by: S-Aarab
2023-11-20 00:07:19 +00:00
Kyraminol Endyeran
1725e943b0
[ie/vvvvid] Set user-agent to fix extraction (#8615)
Authored by: Kyraminol
2023-11-19 21:30:21 +00:00
c-basalt
9f09bdcfcb
[ie/bilibili] Support courses and interactive videos (#8343)
Closes #6135, Closes #8428
Authored by: c-basalt
2023-11-19 21:26:46 +00:00
6 changed files with 1119 additions and 294 deletions

View File

@ -214,8 +214,9 @@ def sanitize_got_info_dict(got_dict):
test_info_dict = {
key: sanitize(key, value) for key, value in got_dict.items()
if value is not None and key not in IGNORED_FIELDS and not any(
key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
if value is not None and key not in IGNORED_FIELDS and (
not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
or key == '_old_archive_ids')
}
# display_id may be generated from id

View File

@ -81,16 +81,20 @@ from .airmozilla import AirMozillaIE
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
from .aljazeera import AlJazeeraIE
from .allstar import (
AllstarIE,
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
from .altcensored import (
AltCensoredIE,
AltCensoredChannelIE,
)
from .amara import AmaraIE
from .alura import (
AluraIE,
AluraCourseIE
)
from .amara import AmaraIE
from .amcnetworks import AMCNetworksIE
from .amazon import (
AmazonStoreIE,
@ -216,6 +220,8 @@ from .bilibili import (
BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
BilibiliCheeseIE,
BilibiliCheeseSeasonIE,
BiliBiliSearchIE,
BilibiliCategoryIE,
BilibiliAudioIE,
@ -1241,6 +1247,7 @@ from .ndr import (
from .ndtv import NDTVIE
from .nebula import (
NebulaIE,
NebulaClassIE,
NebulaSubscriptionsIE,
NebulaChannelIE,
)

253
yt_dlp/extractor/allstar.py Normal file
View File

@ -0,0 +1,253 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
join_nonempty,
parse_qs,
urljoin,
)
from ..utils.traversal import traverse_obj
_FIELDS = '''
_id
clipImageSource
clipImageThumb
clipLink
clipTitle
createdDate
shareId
user { _id }
username
views'''
_EXTRA_FIELDS = '''
clipLength
clipSizeBytes'''
_QUERIES = {
'clip': '''query ($id: String!) {
video: getClip(clipIdentifier: $id) {
%s %s
}
}''' % (_FIELDS, _EXTRA_FIELDS),
'montage': '''query ($id: String!) {
video: getMontage(clipIdentifier: $id) {
%s
}
}''' % _FIELDS,
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
'Montages': '''query ($page: Int!, $user: String!) {
videos: montages(search: createdDate, page: $page, user: $user) {
data { %s }
}
}''' % _FIELDS,
'Mobile Clips': '''query ($page: Int!, $user: String!) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}
class AllstarBaseIE(InfoExtractor):
@staticmethod
def _parse_video_data(video_data):
def media_url_or_none(path):
return urljoin('https://media.allstar.gg/', path)
info = traverse_obj(video_data, {
'id': ('_id', {str}),
'display_id': ('shareId', {str}),
'title': ('clipTitle', {str}),
'url': ('clipLink', {media_url_or_none}),
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}),
})
if info.get('id') and info.get('url'):
basename = 'clip' if '/clips/' in info['url'] else 'montage'
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
info.update({
'extractor_key': AllstarIE.ie_key(),
'extractor': AllstarIE.IE_NAME,
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
})
return info
def _call_api(self, query, variables, path, video_id=None, note=None):
response = self._download_json(
'https://a1.allstar.gg/graphql', video_id, note=note,
headers={'content-type': 'application/json'},
data=json.dumps({'variables': variables, 'query': query}).encode())
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
if errors:
raise ExtractorError('; '.join(errors))
return traverse_obj(response, path)
class AllstarIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
'info_dict': {
'id': '64482c2da9eec30008a67d1b',
'title': '4K on Inferno',
'url': 'md5:66befb5381eef0c9456026386c25fa55',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'uploader': 'chrk.',
'ext': 'mp4',
'duration': 20,
'filesize': 21199257,
'timestamp': 1682451501,
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230425',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
'info_dict': {
'id': '64a1ec6b887f4c0008dc50b8',
'display_id': '8LJLY4JKB',
'title': 'AK-47 3K on Mirage',
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
'ext': 'mp4',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'duration': 16,
'filesize': 30175859,
'timestamp': 1688333419,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230702',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
'info_dict': {
'id': '643e64089da7e9363e1fa66c',
'display_id': 'APQLGM2IMXW',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:a3ee356022115db2b27c81321d195945',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1681810448,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230418',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
'info_dict': {
'id': '64a2697372ce3703de29e868',
'display_id': 'RILJMH6QOS',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1688365434,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230703',
'view_count': int,
}
}]
def _real_extract(self, url):
query_id, video_id = self._match_valid_url(url).group('type', 'id')
return self._parse_video_data(
self._call_api(
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
class AllstarProfileIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips',
'title': 'cherokee - Clips',
},
'playlist_mincount': 15
}, {
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips-730',
'title': 'cherokee - Clips - 730',
},
'playlist_mincount': 15
}, {
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-montages',
'title': 'cherokee - Montages',
},
'playlist_mincount': 4
}, {
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-mobile',
'title': 'cherokee - Mobile Clips',
},
'playlist_mincount': 1
}]
_PAGE_SIZE = 10
def _get_page(self, user_id, display_id, game, query, page_num):
page_num += 1
for video_data in self._call_api(
query, {
'user': user_id,
'page': page_num,
'game': game,
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
yield self._parse_video_data(video_data)
def _real_extract(self, url):
display_id = self._match_id(url)
profile_data = self._download_json(
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
if not user_id:
raise ExtractorError('Unable to extract the user id')
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
url_query = parse_qs(url)
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
return self.playlist_result(
OnDemandPagedList(
functools.partial(
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))

View File

@ -2,6 +2,7 @@ import base64
import functools
import hashlib
import itertools
import json
import math
import re
import time
@ -16,9 +17,11 @@ from ..utils import (
InAdvancePagedList,
OnDemandPagedList,
bool_or_none,
clean_html,
filter_dict,
float_or_none,
format_field,
get_element_by_class,
int_or_none,
join_nonempty,
make_archive_id,
@ -88,6 +91,12 @@ class BilibiliBaseIE(InfoExtractor):
return formats
def _download_playinfo(self, video_id, cid):
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}')['data']
def json2srt(self, json_data):
srt_data = ''
for idx, line in enumerate(json_data.get('body') or []):
@ -96,7 +105,7 @@ class BilibiliBaseIE(InfoExtractor):
f'{line["content"]}\n\n')
return srt_data
def _get_subtitles(self, video_id, aid, cid):
def _get_subtitles(self, video_id, cid, aid=None):
subtitles = {
'danmaku': [{
'ext': 'xml',
@ -104,8 +113,15 @@ class BilibiliBaseIE(InfoExtractor):
}]
}
video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
subtitle_info = traverse_obj(self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
for s in subs_list:
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
@ -155,7 +171,54 @@ class BilibiliBaseIE(InfoExtractor):
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
cid_edges = cid_edges or {}
division_data = self._download_json(
'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
note=f'Extracting divisions from edge {edge_id}')
edges.setdefault(edge_id, {}).update(
traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
'title': ('title', {str}),
'cid': ('cid', {int_or_none}),
}), get_all=False))
edges[edge_id].update(traverse_obj(division_data, ('data', {
'title': ('title', {str}),
'choices': ('edges', 'questions', ..., 'choices', ..., {
'edge_id': ('id', {int_or_none}),
'cid': ('cid', {int_or_none}),
'text': ('option', {str}),
}),
})))
# use dict to combine edges that use the same video section (same cid)
cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
if choice['edge_id'] not in edges:
edges[choice['edge_id']] = {'cid': choice['cid']}
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges
def _get_interactive_entries(self, video_id, cid, metainfo):
graph_version = traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id,
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
'formats': self.extract_formats(play_info),
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(video_id, cid),
}
class BiliBiliIE(BilibiliBaseIE):
@ -180,7 +243,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
},
}, {
# old av URL version
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
@ -212,7 +275,7 @@ class BiliBiliIE(BilibiliBaseIE):
'id': 'BV1bK411W797_p1',
'ext': 'mp4',
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
'tags': 'count:11',
'tags': 'count:10',
'timestamp': 1589601697,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'uploader': '打牌还是打桩',
@ -232,7 +295,7 @@ class BiliBiliIE(BilibiliBaseIE):
'id': 'BV1bK411W797_p1',
'ext': 'mp4',
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
'tags': 'count:11',
'tags': 'count:10',
'timestamp': 1589601697,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'uploader': '打牌还是打桩',
@ -343,18 +406,120 @@ class BiliBiliIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'params': {'skip_download': True},
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
'info_dict': {
'id': 'BV1af4y1H7ga',
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
'timestamp': 1630500414,
'upload_date': '20210901',
'description': 'md5:01113e39ab06e28042d74ac356a08786',
'tags': list,
'uploader': '钉宫妮妮Ninico',
'duration': 1503,
'uploader_id': '8881297',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'playlist_count': 33,
'playlist': [{
'info_dict': {
'id': 'BV1af4y1H7ga_400950101',
'ext': 'mp4',
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
'timestamp': 1630500414,
'upload_date': '20210901',
'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
'tags': list,
'uploader': '钉宫妮妮Ninico',
'duration': 11.605,
'uploader_id': '8881297',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}, {
'note': '301 redirect to bangumi link',
'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
'info_dict': {
'id': '288525',
'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
'ext': 'mp4',
'series': '我和我的祖国',
'series_id': '4780',
'season': '幕后纪实',
'season_id': '28609',
'season_number': 1,
'episode': '钱学森弹道和乘波体飞行器是什么?',
'episode_id': '288525',
'episode_number': 105,
'duration': 1183.957,
'timestamp': 1571648124,
'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
'id': 'BV1jL41167ZG',
'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
'ext': 'mp4',
},
'skip': 'supporter-only video',
}, {
'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
'info_dict': {
'id': 'BV1Ks411f7aQ',
'title': '【BD1080P】狼与香辛料I【华盟】',
'ext': 'mp4',
},
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
'info_dict': {
'id': 'BV1GJ411x7h7',
'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
'ext': 'mp4',
},
'skip': 'geo-restricted',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
is_festival = 'videoData' not in initial_state
if is_festival:
video_data = initial_state['videoInfo']
else:
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
play_info_obj = self._search_json(
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
if not play_info_obj:
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
self.raise_login_required()
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
raise ExtractorError(
'This video may be deleted or geo-restricted. '
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
play_info = traverse_obj(play_info_obj, ('data', {dict}))
if not play_info:
if traverse_obj(play_info_obj, 'code') == 87007:
toast = get_element_by_class('tips-toast', webpage) or ''
msg = clean_html(
f'{get_element_by_class("belongs-to", toast) or ""}'
+ (get_element_by_class('level', toast) or ''))
raise ExtractorError(
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
raise ExtractorError('Failed to extract play info')
video_data = initial_state['videoData']
video_id, title = video_data['bvid'], video_data.get('title')
@ -385,10 +550,7 @@ class BiliBiliIE(BilibiliBaseIE):
festival_info = {}
if is_festival:
play_info = self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note='Extracting festival video formats')['data']
play_info = self._download_playinfo(video_id, cid)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
@ -397,7 +559,7 @@ class BiliBiliIE(BilibiliBaseIE):
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
}, get_all=False)
return {
metainfo = {
**traverse_obj(initial_state, {
'uploader': ('upData', 'name'),
'uploader_id': ('upData', 'mid', {str_or_none}),
@ -413,28 +575,59 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': ('stat', 'reply', {int_or_none}),
}, get_all=False),
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
'formats': self.extract_formats(play_info),
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
'title': title,
'http_headers': {'Referer': url},
}
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
'__post_extractor': self.extract_comments(aid),
})
else:
return {
**metainfo,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid),
'http_headers': {'Referer': url},
}
class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
'info_dict': {
'id': '21495',
'ext': 'mp4',
'series': '悠久之翼',
'series_id': '774',
'season': '第二季',
'season_id': '1182',
'season_number': 2,
'episode': 'foreveref',
'episode_id': '21495',
'episode_number': 12,
'title': '12 foreveref',
'duration': 1420.791,
'timestamp': 1320412200,
'upload_date': '20111104',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': {
'id': '267851',
'ext': 'mp4',
'series': '鬼灭之刃',
'series_id': '4358',
'season': '鬼灭之刃',
'season': '立志篇',
'season_id': '26801',
'season_number': 1,
'episode': '残酷',
@ -446,13 +639,32 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
'skip': 'Geo-restricted',
}, {
'note': 'a making-of which falls outside main section',
'url': 'https://www.bilibili.com/bangumi/play/ep345120',
'info_dict': {
'id': '345120',
'ext': 'mp4',
'series': '鬼灭之刃',
'series_id': '4358',
'season': '立志篇',
'season_id': '26801',
'season_number': 1,
'episode': '炭治郎篇',
'episode_id': '345120',
'episode_number': 27,
'title': '#1 炭治郎篇',
'duration': 1922.129,
'timestamp': 1602853860,
'upload_date': '20201016',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id)
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
@ -461,7 +673,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
@ -472,40 +684,43 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
self.raise_login_required('This video is for premium members only')
bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next((
idx + 1 for idx, e in enumerate(
season_number, season_title = season_id and next((
(idx + 1, e.get('season_title')) for idx, e in enumerate(
traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id
), None)
), (None, None))
aid = episode_info.get('aid')
return {
'id': video_id,
'id': episode_id,
'formats': formats,
**traverse_obj(bangumi_info, {
'series': ('series', 'series_title', {str}),
'series_id': ('series', 'series_id', {str_or_none}),
'thumbnail': ('square_cover', {url_or_none}),
}),
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
**traverse_obj(episode_info, {
'episode': ('long_title', {str}),
'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
'timestamp': ('pub_time', {int_or_none}),
'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
}),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season': str_or_none(season_title),
'season_id': str_or_none(season_id),
'season_number': season_number,
'timestamp': int_or_none(episode_info.get('pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': headers,
}
@ -517,17 +732,53 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
'info_dict': {
'id': '24097891',
'title': 'CAROLE & TUESDAY',
'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
},
'playlist_mincount': 25,
}, {
'url': 'https://www.bilibili.com/bangumi/media/md1565/',
'info_dict': {
'id': '1565',
'title': '攻壳机动队 S.A.C. 2nd GIG',
'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
},
'playlist_count': 26,
'playlist': [{
'info_dict': {
'id': '68540',
'ext': 'mp4',
'series': '攻壳机动队',
'series_id': '1077',
'season': '第二季',
'season_id': '1565',
'season_number': 2,
'episode': '再启动 REEMBODY',
'episode_id': '68540',
'episode_number': 1,
'title': '1 再启动 REEMBODY',
'duration': 1525.777,
'timestamp': 1425074413,
'upload_date': '20150227',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}],
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
initial_state = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
ss_id = initial_state['mediaInfo']['season_id']
return self.playlist_result(
self._get_episodes_from_season(ss_id, url), media_id,
**traverse_obj(initial_state, ('mediaInfo', {
'title': ('title', {str}),
'description': ('evaluate', {str}),
})))
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
@ -535,15 +786,183 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
'id': '26801',
'title': '鬼灭之刃',
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
},
'playlist_mincount': 26
}, {
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
'info_dict': {
'id': '2251',
'title': '玲音',
'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
},
'playlist_count': 13,
'playlist': [{
'info_dict': {
'id': '50188',
'ext': 'mp4',
'series': '玲音',
'series_id': '1526',
'season': 'TV',
'season_id': '2251',
'season_number': 1,
'episode': 'WEIRD',
'episode_id': '50188',
'episode_number': 1,
'title': '1 WEIRD',
'duration': 1436.992,
'timestamp': 1343185080,
'upload_date': '20120725',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}],
}]
def _real_extract(self, url):
ss_id = self._match_id(url)
webpage = self._download_webpage(url, ss_id)
metainfo = traverse_obj(
self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
('itemListElement', ..., {
'title': ('name', {str}),
'description': ('description', {str}),
}), get_all=False)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
class BilibiliCheeseBaseIE(BilibiliBaseIE):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
aid, cid = episode_info['aid'], episode_info['cid']
if traverse_obj(episode_info, 'ep_status') == -1:
raise ExtractorError('This course episode is not yet available.', expected=True)
if not traverse_obj(episode_info, 'playable'):
self.raise_login_required('You need to purchase the course to download this episode')
play_info = self._download_json(
'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
headers=self._HEADERS, note='Downloading playinfo')['data']
return {
'id': str_or_none(ep_id),
'episode_id': str_or_none(ep_id),
'formats': self.extract_formats(play_info),
'extractor_key': BilibiliCheeseIE.ie_key(),
'extractor': BilibiliCheeseIE.IE_NAME,
'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
**traverse_obj(episode_info, {
'episode': ('title', {str}),
'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
'alt_title': ('subtitle', {str}),
'duration': ('duration', {int_or_none}),
'episode_number': ('index', {int_or_none}),
'thumbnail': ('cover', {url_or_none}),
'timestamp': ('release_date', {int_or_none}),
'view_count': ('play', {int_or_none}),
}),
**traverse_obj(season_info, {
'uploader': ('up_info', 'uname', {str}),
'uploader_id': ('up_info', 'mid', {str_or_none}),
}),
'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': self._HEADERS,
}
def _download_season_info(self, query_key, video_id):
return self._download_json(
f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
headers=self._HEADERS, note='Downloading season info')['data']
class BilibiliCheeseIE(BilibiliCheeseBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/cheese/play/ep229832',
'info_dict': {
'id': '229832',
'ext': 'mp4',
'title': '1 - 课程先导片',
'alt_title': '视频课·3分41秒',
'uploader': '马督工',
'uploader_id': '316568752',
'episode': '课程先导片',
'episode_id': '229832',
'episode_number': 1,
'duration': 221,
'timestamp': 1695549606,
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
}]
def _real_extract(self, url):
ep_id = self._match_id(url)
return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/cheese/play/ss5918',
'info_dict': {
'id': '5918',
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
},
'playlist': [{
'info_dict': {
'id': '229832',
'ext': 'mp4',
'title': '1 - 课程先导片',
'alt_title': '视频课·3分41秒',
'uploader': '马督工',
'uploader_id': '316568752',
'episode': '课程先导片',
'episode_id': '229832',
'episode_number': 1,
'duration': 221,
'timestamp': 1695549606,
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
}],
'params': {'playlist_items': '1'},
}, {
'url': 'https://www.bilibili.com/cheese/play/ss5918',
'info_dict': {
'id': '5918',
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
},
'playlist_mincount': 5,
'skip': 'paid video in list',
}]
def _get_cheese_entries(self, season_info):
for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
yield self._extract_episode(season_info, ep_id)
def _real_extract(self, url):
season_id = self._match_id(url)
season_info = self._download_season_info('season_id', season_id)
return self.playlist_result(
self._get_cheese_entries(season_info), season_id,
**traverse_obj(season_info, {
'title': ('title', {str}),
'description': ('subtitle', {str}),
}))
class BilibiliSpaceBaseIE(InfoExtractor):

View File

@ -3,140 +3,140 @@ import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
from ..utils import (
ExtractorError,
int_or_none,
make_archive_id,
parse_iso8601,
smuggle_url,
try_call,
unsmuggle_url,
update_url_query,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
class NebulaBaseIE(InfoExtractor):
_NETRC_MACHINE = 'watchnebula'
_token = _api_token = None
_nebula_api_token = None
_nebula_bearer_token = None
def _perform_nebula_auth(self, username, password):
if not username or not password:
self.raise_login_required(method='password')
data = json.dumps({'email': username, 'password': password}).encode('utf8')
response = self._download_json(
'https://api.watchnebula.com/api/v1/auth/login/',
data=data, fatal=False, video_id=None,
headers={
'content-type': 'application/json',
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
'cookie': ''
},
note='Logging in to Nebula with supplied credentials',
errnote='Authentication failed or rejected')
if not response or not response.get('key'):
self.raise_login_required(method='password')
return response['key']
def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
assert method in ('GET', 'POST',)
assert auth_type in ('api', 'bearer',)
def inner_call():
authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
return self._download_json(
url, video_id, note=note, headers={'Authorization': authorization},
data=b'' if method == 'POST' else None)
def _perform_login(self, username, password):
try:
return inner_call()
except ExtractorError as exc:
# if 401 or 403, attempt credential re-auth and retry
if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403):
self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
self._perform_login()
return inner_call()
else:
response = self._download_json(
'https://nebula.tv/auth/login/', None,
'Logging in to Nebula', 'Login failed',
data=json.dumps({'email': username, 'password': password}).encode(),
headers={'content-type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Login failed: Invalid username or password', expected=True)
raise
self._api_token = traverse_obj(response, ('key', {str}))
if not self._api_token:
raise ExtractorError('Login failed: No token')
def _call_api(self, *args, **kwargs):
if self._token:
kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
try:
return self._download_json(*args, **kwargs)
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
raise
self.to_screen(
f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
self._real_initialize()
if self._token:
kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
return self._download_json(*args, **kwargs)
def _real_initialize(self):
if not self._api_token:
self._api_token = try_call(
lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
self._token = self._download_json(
'https://users.api.nebula.app/api/v1/authorization/', None,
headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
note='Authorizing to Nebula', data=b'')['token']
def _extract_formats(self, content_id, slug):
for retry in (False, True):
try:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
slug, 'mp4', query={
'token': self._token,
'app_version': '23.10.0',
'platform': 'ios',
})
return {'formats': fmts, 'subtitles': subs}
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
self.raise_login_required()
if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
self._real_initialize()
continue
raise
def _fetch_nebula_bearer_token(self):
"""
Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
"""
response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
method='POST',
note='Authorizing to Nebula')
return response['token']
def _fetch_video_formats(self, slug):
stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
video_id=slug,
auth_type='bearer',
note='Fetching video stream info')
manifest_url = stream_info['manifest']
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
def _build_video_info(self, episode):
fmts, subs = self._fetch_video_formats(episode['slug'])
channel_slug = episode['channel_slug']
channel_title = episode['channel_title']
zype_id = episode.get('zype_id')
def _extract_video_metadata(self, episode):
channel_url = traverse_obj(
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
return {
'id': remove_start(episode['id'], 'video_episode:'),
'display_id': episode['slug'],
'formats': fmts,
'subtitles': subs,
'webpage_url': f'https://nebula.tv/{episode["slug"]}',
'title': episode['title'],
'description': episode['description'],
'timestamp': parse_iso8601(episode['published_at']),
'thumbnails': [{
# 'id': tn.get('name'), # this appears to be null
'url': tn['original'],
'height': key,
} for key, tn in episode['assets']['thumbnail'].items()],
'duration': episode['duration'],
'channel': channel_title,
'channel_id': channel_slug,
'channel_url': f'https://nebula.tv/{channel_slug}',
'uploader': channel_title,
'uploader_id': channel_slug,
'uploader_url': f'https://nebula.tv/{channel_slug}',
'series': channel_title,
'creator': channel_title,
'extractor_key': NebulaIE.ie_key(),
'extractor': NebulaIE.IE_NAME,
'_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
'id': episode['id'].partition(':')[2],
**traverse_obj(episode, {
'display_id': 'slug',
'title': 'title',
'description': 'description',
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': 'channel_slug',
'uploader_id': 'channel_slug',
'channel': 'channel_title',
'uploader': 'channel_title',
'series': 'channel_title',
'creator': 'channel_title',
'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
'episode_number': ('order', {int_or_none}),
# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
'_old_archive_ids': ('zype_id', {lambda x: [
make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
}),
'channel_url': channel_url,
'uploader_url': channel_url,
}
def _perform_login(self, username=None, password=None):
self._nebula_api_token = self._perform_nebula_auth(username, password)
self._nebula_bearer_token = self._fetch_nebula_bearer_token()
class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
_TESTS = [
{
_TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'md5': '14944cfee8c7beeea106320c47560efc',
'info_dict': {
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
'ext': 'mp4',
'title': 'That Time Disney Remade Beauty and the Beast',
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We werent able to remove it without reducing video quality, so its presented here in its original context.',
'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
'upload_date': '20180731',
'timestamp': 1533009600,
'channel': 'Lindsay Ellis',
'channel_id': 'lindsayellis',
'uploader': 'Lindsay Ellis',
'uploader_id': 'lindsayellis',
'uploader_url': 'https://nebula.tv/lindsayellis',
'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
'series': 'Lindsay Ellis',
'display_id': 'that-time-disney-remade-beauty-and-the-beast',
'channel_url': 'https://nebula.tv/lindsayellis',
'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
'creator': 'Lindsay Ellis',
'duration': 2212,
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
'_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
},
},
{
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'md5': 'd05739cf6c38c09322422f696b569c23',
'info_dict': {
@ -156,10 +156,11 @@ class NebulaIE(NebulaBaseIE):
'duration': 841,
'channel_url': 'https://nebula.tv/d-day',
'uploader_url': 'https://nebula.tv/d-day',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
'_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
},
},
{
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
'md5': 'ebe28a7ad822b9ee172387d860487868',
'info_dict': {
@ -178,55 +179,130 @@ class NebulaIE(NebulaBaseIE):
'channel_url': 'https://nebula.tv/tom-scott-presents-money',
'series': 'Tom Scott Presents: Money',
'display_id': 'money-episode-1-the-draw',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
'creator': 'Tom Scott Presents: Money',
'_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
},
},
{
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
'only_matching': True,
}, {
'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
'info_dict': {
'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
'ext': 'mp4',
'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
'title': 'Did the US Really Blow Up the NordStream Pipelines?',
'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
'upload_date': '20230223',
'timestamp': 1677144070,
'channel': 'TLDR News EU',
'channel_id': 'tldrnewseu',
'uploader': 'TLDR News EU',
'uploader_id': 'tldrnewseu',
'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
'duration': 524,
'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
'series': 'TLDR News EU',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
'creator': 'TLDR News EU',
'_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
},
{
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
'only_matching': True,
},
]
def _fetch_video_metadata(self, slug):
return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
video_id=slug,
auth_type='bearer',
note='Fetching video meta data')
}]
def _real_extract(self, url):
slug = self._match_id(url)
video = self._fetch_video_metadata(slug)
return self._build_video_info(video)
url, smuggled_data = unsmuggle_url(url, {})
if smuggled_data.get('id'):
return {
'id': smuggled_data['id'],
'display_id': slug,
'title': '',
**self._extract_formats(smuggled_data['id'], slug),
}
metadata = self._call_api(
f'https://content.api.nebula.app/content/videos/{slug}',
slug, note='Fetching video metadata')
return {
**self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
_TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': {
'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
'ext': 'mp4',
'display_id': '14',
'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
'episode_number': 14,
'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
'duration': 646,
'episode': 'Episode 14',
'title': 'Photos, Sculpture, and Video',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
slug, episode = self._match_valid_url(url).group('id', 'ep')
url, smuggled_data = unsmuggle_url(url, {})
if smuggled_data.get('id'):
return {
'id': smuggled_data['id'],
'display_id': slug,
'title': '',
**self._extract_formats(smuggled_data['id'], slug),
}
metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata')
return {
**self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/myshows'
_TESTS = [
{
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
_TESTS = [{
'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1,
'info_dict': {
'id': 'myshows',
},
},
]
}]
def _generate_playlist_entries(self):
next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
page_num = 1
while next_url:
channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
note=f'Retrieving subscriptions page {page_num}')
next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
'following': 'true',
'include': 'engagement',
'ordering': '-published_at',
})
for page_num in itertools.count(1):
channel = self._call_api(
next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
for episode in channel['results']:
yield self._build_video_info(episode)
next_url = channel['next']
page_num += 1
metadata = self._extract_video_metadata(episode)
yield self.url_result(smuggle_url(
f'https://nebula.tv/videos/{metadata["display_id"]}',
{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
next_url = channel.get('next')
if not next_url:
return
def _real_extract(self, url):
return self.playlist_result(self._generate_playlist_entries(), 'myshows')
@ -234,9 +310,8 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
_TESTS = [
{
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': {
'id': 'tom-scott-presents-money',
@ -252,30 +327,57 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
},
'playlist_mincount': 2,
}, {
'url': 'https://nebula.tv/johnnyharris',
'info_dict': {
'id': 'johnnyharris',
'title': 'Johnny Harris',
'description': 'I make videos about maps and many other things.',
},
]
'playlist_mincount': 90,
}, {
'url': 'https://nebula.tv/copyright-for-fun-and-profit',
'info_dict': {
'id': 'copyright-for-fun-and-profit',
'title': 'Copyright for Fun and Profit',
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
},
'playlist_count': 23,
}]
def _generate_playlist_entries(self, collection_id, channel):
episodes = channel['episodes']['results']
for page_num in itertools.count(2):
for episode in episodes:
yield self._build_video_info(episode)
next_url = channel['episodes']['next']
def _generate_playlist_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
for episode in episodes['results']:
metadata = self._extract_video_metadata(episode)
yield self.url_result(smuggle_url(
episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
next_url = episodes.get('next')
if not next_url:
break
channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
note=f'Retrieving channel page {page_num}')
episodes = channel['episodes']['results']
def _generate_class_entries(self, channel):
for lesson in channel['lessons']:
metadata = self._extract_video_metadata(lesson)
yield self.url_result(smuggle_url(
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _real_extract(self, url):
collection_id = self._match_id(url)
channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
channel_details = channel['details']
collection_slug = self._match_id(url)
channel = self._call_api(
f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
collection_slug, note='Retrieving channel')
if channel.get('type') == 'class':
entries = self._generate_class_entries(channel)
else:
entries = self._generate_playlist_entries(channel['id'], collection_slug)
return self.playlist_result(
entries=self._generate_playlist_entries(collection_id, channel),
playlist_id=collection_id,
playlist_title=channel_details['title'],
playlist_description=channel_details['description']
)
entries=entries,
playlist_id=collection_slug,
playlist_title=channel.get('title'),
playlist_description=channel.get('description'))

View File

@ -1,3 +1,4 @@
import functools
import re
from .common import InfoExtractor
@ -14,21 +15,21 @@ class VVVVIDIE(InfoExtractor):
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
'info_dict': {
'id': '489048',
'id': '505692',
'ext': 'mp4',
'title': 'Ping Pong',
'duration': 239,
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
'season_id': '437',
'episode': 'Ping Pong',
'episode_number': 1,
'episode_id': '3334',
'title': 'Playstation VR cambierà il nostro modo di giocare',
'duration': 93,
'series': 'The Power of Computing',
'season_id': '518',
'episode': 'Playstation VR cambierà il nostro modo di giocare',
'episode_number': None,
'episode_id': '4747',
'view_count': int,
'like_count': int,
'repost_count': int,
'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
},
'params': {
'skip_download': True,
@ -36,7 +37,6 @@ class VVVVIDIE(InfoExtractor):
}, {
# video_type == 'video/rcs'
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
'md5': '33e0edfba720ad73a8782157fdebc648',
'info_dict': {
'id': '482493',
'ext': 'mp4',
@ -45,6 +45,7 @@ class VVVVIDIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'Every video/rcs is not working even in real website',
}, {
# video_type == 'video/youtube'
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
@ -55,19 +56,54 @@ class VVVVIDIE(InfoExtractor):
'title': 'Trailer',
'upload_date': '20150906',
'description': 'md5:a5e802558d35247fee285875328c0b80',
'uploader_id': 'BandaiVisual',
'uploader': 'BANDAI NAMCO Arts Channel',
'uploader_id': '@EMOTIONLabelChannel',
'uploader': 'EMOTION Label Channel',
'episode_number': None,
'episode_id': '3115',
'view_count': int,
'like_count': int,
'repost_count': int,
'availability': str,
'categories': list,
'age_limit': 0,
'channel': 'EMOTION Label Channel',
'channel_follower_count': int,
'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
'comment_count': int,
'duration': 133,
'episode': 'Trailer',
'heatmap': list,
'live_status': 'not_live',
'playable_in_embed': True,
'season_id': '406',
'series': 'One-Punch Man',
'tags': list,
'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
},
'params': {
'skip_download': True,
},
}, {
# video_type == 'video/dash'
'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
'info_dict': {
'id': '693786',
'id': '527551',
'ext': 'mp4',
'title': 'Nanachi',
'title': 'Golden Wind',
'duration': 1430,
'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
'season_id': '938',
'episode': 'Golden Wind',
'episode_number': 1,
'episode_id': '9089',
'view_count': int,
'like_count': int,
'repost_count': int,
'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
'season': 'Season 5',
'season_number': 5,
},
'params': {
'skip_download': True,
@ -79,10 +115,17 @@ class VVVVIDIE(InfoExtractor):
}]
_conn_id = None
@functools.cached_property
def _headers(self):
return {
**self.geo_verification_headers(),
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
}
def _real_initialize(self):
self._conn_id = self._download_json(
'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id']
None, headers=self._headers)['data']['conn_id']
def _download_info(self, show_id, path, video_id, fatal=True, query=None):
q = {
@ -92,7 +135,7 @@ class VVVVIDIE(InfoExtractor):
q.update(query)
response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
video_id, headers=self.geo_verification_headers(), query=q, fatal=fatal)
video_id, headers=self._headers, query=q, fatal=fatal)
if not (response or fatal):
return
if response.get('result') == 'error':
@ -219,7 +262,7 @@ class VVVVIDIE(InfoExtractor):
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id, skip_protocols=['f4m']))
metadata_from_url(embed_code)
if not is_youtube: