Compare commits

...

11 Commits

Author SHA1 Message Date
N/Ame
bd656f7655
Merge 13ed574168 into 70c55cb08f 2024-11-16 23:10:15 +08:00
Alessandro Campolo
70c55cb08f
[ie/RadioRadicale] Add extractor (#5607)
Authored by: a13ssandr0, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-16 13:56:15 +01:00
grqx_wsl
13ed574168 [BiliBiliBangumiIE] supports play_info extraction from webpage
- i.e. extracts premiums formats with logged-in cookies, haven't tested with format `12240` yet.
  * test url: https://www.bilibili.com/bangumi/play/ep829434, cookies: logged-in, non-premium
2024-08-20 00:46:21 +12:00
grqx_wsl
79bb63957d Merge remote-tracking branch 'upstream' into biliTryLook 2024-08-17 10:58:16 +12:00
grqx_wsl
d5dbdbccd3 _download_playinfo: more understandable note 2024-07-27 23:10:53 +12:00
grqx_wsl
b2965fa3b2 [BiliBiliBangumiIE] support format 12240(format name 智能修复, premium only)
[cleanup]code formatting
2024-07-27 22:51:15 +12:00
grqx_wsl
510e29a42c add support for _get_interactive_entries 2024-07-27 22:09:44 +12:00
grqx_wsl
90f4203632 keep the original play_info traversal 2024-07-26 10:46:41 +12:00
grqx_wsl
b01183f904 pops param try_look when logged in. 2024-07-26 10:04:18 +12:00
grqx_wsl
29a5968278 - Applied try_look to festival videos
- Removed redundant calls to `_download_playinfo`
2024-07-26 03:07:32 +12:00
grqx_wsl
e187799c58 patch from https://github.com/yt-dlp/yt-dlp/issues/10554#issuecomment-2250014807
modified:   yt_dlp/extractor/bilibili.py
2024-07-26 02:36:04 +12:00
3 changed files with 131 additions and 14 deletions

View File

@ -1649,6 +1649,7 @@ from .radiokapital import (
RadioKapitalIE,
RadioKapitalShowIE,
)
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE
from .radlive import (
RadLiveChannelIE,

View File

@ -165,14 +165,18 @@ class BilibiliBaseIE(InfoExtractor):
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
if qn:
params['qn'] = qn
def _download_playinfo(self, bvid, cid, headers=None, **kwargs):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **kwargs}
if self.is_logged_in:
params.pop('try_look', None)
if kwargs.get('qn'):
note = f'Downloading video format {kwargs["qn"]} for cid {cid}'
else:
note = f'Downloading video formats for cid {cid}'
return self._download_json(
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers,
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
def json2srt(self, json_data):
srt_data = ''
@ -286,7 +290,7 @@ class BilibiliBaseIE(InfoExtractor):
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid, headers=headers)
play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
@ -688,11 +692,12 @@ class BiliBiliIE(BilibiliBaseIE):
aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
if is_festival or not self.is_logged_in:
query = {'try_look': 1} if not self.is_logged_in else {}
play_info = self._download_playinfo(video_id, cid, headers=headers, **query)
festival_info = {}
if is_festival:
play_info = self._download_playinfo(video_id, cid, headers=headers)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
@ -730,7 +735,7 @@ class BiliBiliIE(BilibiliBaseIE):
else:
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')):
if not play_info.get('dash'):
# we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
@ -860,10 +865,16 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
self.raise_login_required('This video is for premium members only')
headers['Referer'] = url
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
play_info = self._search_json(
r'playurlSSRData\s*?=\s*?', webpage, 'embedded page info', episode_id,
end_pattern='\n', default=None)
if not play_info:
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}

View File

@ -0,0 +1,105 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class RadioRadicaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.radioradicale.it/scheda/471591',
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
'info_dict': {
'id': '471591',
'ext': 'mp4',
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'location': 'Napoli',
'duration': 2852.0,
'timestamp': 1459987200,
'upload_date': '20160407',
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
},
}, {
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
'info_dict': {
'id': '742783',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
'location': 'CAMERA',
'duration': 5868.0,
'timestamp': 1730246400,
'upload_date': '20241030',
},
'playlist': [{
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
'info_dict': {
'id': '742783-0',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'be915c189c70ad2920e5810f32260ff5',
'info_dict': {
'id': '742783-1',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
'info_dict': {
'id': '742783-2',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}],
}]
def _entries(self, videos_info, page_id):
for idx, video in enumerate(traverse_obj(
videos_info, ('playlist', lambda _, v: v['sources']))):
video_id = f'{page_id}-{idx}'
formats = []
subtitles = {}
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
self._merge_subtitles({sub.get('srclang') or 'und': [{
'url': sub['src'],
'name': sub.get('label'),
}]}, target=subtitles)
yield {
'id': video_id,
'title': video.get('title'),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
videos_info = self._search_json(
r'jQuery\.extend\(Drupal\.settings\s*,',
webpage, 'videos_info', page_id)['RRscheda']
entries = list(self._entries(videos_info, page_id))
common_info = {
'id': page_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'location': videos_info.get('luogo'),
**self._search_json_ld(webpage, page_id),
}
if len(entries) == 1:
return {
**entries[0],
**common_info,
}
return self.playlist_result(entries, multi_video=True, **common_info)