mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-09 17:01:25 +01:00
Compare commits
No commits in common. "cf11b40ac40e3d23a6352753296f3a732886efb9" and "20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69" have entirely different histories.
cf11b40ac4
...
20fbbd9249
|
@ -223,11 +223,7 @@ from .bilibili import (
|
|||
BiliBiliPlayerIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliCollectionListIE,
|
||||
BilibiliSeriesListIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BilibiliWatchlaterIE,
|
||||
BilibiliPlaylistIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
|
@ -1505,7 +1501,6 @@ from .polskieradio import (
|
|||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
|
@ -1560,14 +1555,7 @@ from .radiocanada import (
|
|||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
RadioFranceLiveIE,
|
||||
RadioFrancePodcastIE,
|
||||
RadioFranceProfileIE,
|
||||
RadioFranceProgramScheduleIE,
|
||||
)
|
||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
|
@ -2372,8 +2360,7 @@ from .webofstories import (
|
|||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboVideoIE,
|
||||
WeiboUserIE,
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
|
|
|
@ -15,7 +15,6 @@ from ..utils import (
|
|||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -36,7 +35,6 @@ from ..utils import (
|
|||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
|
@ -158,7 +156,7 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
|
@ -254,7 +252,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'uploader': '小夫Tech',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
|
@ -511,7 +509,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
|||
|
||||
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
|
@ -530,7 +528,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
|||
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
|
@ -681,35 +679,13 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
|||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||
|
||||
def _get_uploader(self, uid, playlist_id):
|
||||
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
metadata.pop('page_count', None)
|
||||
metadata.pop('page_size', None)
|
||||
return metadata, page_list
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
'title': '《底特律 变人》'
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
@ -730,251 +706,22 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
|||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**traverse_obj(page_data, {
|
||||
'title': ('meta', 'name', {str}),
|
||||
'description': ('meta', 'description', {str}),
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
'description': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||
})
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||
'info_dict': {
|
||||
'id': '1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'description': '',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
fid = self._match_id(url)
|
||||
|
||||
list_info = self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||
fid, note='Downloading favlist metadata')
|
||||
if list_info['code'] == -403:
|
||||
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||
|
||||
entries = self._get_entries(self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||
fid, note='Download favlist entries'), 'data')
|
||||
|
||||
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('intro', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'modified_timestamp': ('mtime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||
})))
|
||||
|
||||
|
||||
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||
watchlater_info = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||
if watchlater_info['code'] == -101:
|
||||
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||
|
||||
|
||||
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
'title': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
if not page_data.get('has_more', False):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
'with_current': False,
|
||||
**traverse_obj(initial_state, {
|
||||
'type': ('playlist', 'type', {int_or_none}),
|
||||
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'Bilibili category extractor'
|
||||
_MAX_RESULTS = 1000000
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
|
@ -1659,7 +1406,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
|||
|
||||
|
||||
class BiliLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
|
|
|
@ -90,17 +90,10 @@ class CCCPlaylistIE(InfoExtractor):
|
|||
'id': '30c3',
|
||||
},
|
||||
'playlist_count': 135,
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/c/DS2023',
|
||||
'info_dict': {
|
||||
'title': 'Datenspuren 2023',
|
||||
'id': 'DS2023',
|
||||
},
|
||||
'playlist_count': 37
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist_id = self._match_id(url).lower()
|
||||
|
||||
conf = self._download_json(
|
||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||
|
|
|
@ -33,7 +33,7 @@ class N1InfoAssetIE(InfoExtractor):
|
|||
|
||||
class N1InfoIIE(InfoExtractor):
|
||||
IE_NAME = 'N1Info:article'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# Youtube embedded
|
||||
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
|
||||
|
@ -94,16 +94,6 @@ class N1InfoIIE(InfoExtractor):
|
|||
'upload_date': '20211102',
|
||||
'timestamp': 1635861677,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
|
||||
'info_dict': {
|
||||
'id': '1332368',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
|
||||
'upload_date': '20230620',
|
||||
'timestamp': 1687290536,
|
||||
'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
|
||||
'only_matching': True,
|
||||
|
@ -115,24 +105,9 @@ class N1InfoIIE(InfoExtractor):
|
|||
|
||||
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
|
||||
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
|
||||
plugin_data = self._html_search_meta('BridPlugin', webpage)
|
||||
entries = []
|
||||
if plugin_data:
|
||||
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
|
||||
for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
|
||||
video_id = self._parse_json(video_data, title)['video']
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailURL', webpage),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
|
||||
video_id, fatal=False),
|
||||
})
|
||||
else:
|
||||
# Old player still present in older articles
|
||||
|
||||
videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
|
||||
entries = []
|
||||
for video in videos:
|
||||
video_data = extract_attributes(video)
|
||||
entries.append({
|
||||
|
@ -142,8 +117,7 @@ class N1InfoIIE(InfoExtractor):
|
|||
'title': title,
|
||||
'thumbnail': video_data.get('data-thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'ie_key': 'N1InfoAsset',
|
||||
})
|
||||
'ie_key': 'N1InfoAsset'})
|
||||
|
||||
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
|
||||
for embedded_video in embedded_videos:
|
||||
|
|
|
@ -1,113 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PornboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pornbox.com/application/watch-page/212108',
|
||||
'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
|
||||
'info_dict': {
|
||||
'id': '212108',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
|
||||
'uploader': 'Lily Strong',
|
||||
'timestamp': 1665871200,
|
||||
'upload_date': '20221015',
|
||||
'age_limit': 18,
|
||||
'availability': 'needs_auth',
|
||||
'duration': 1505,
|
||||
'cast': ['Lily Strong', 'John Strong'],
|
||||
'tags': 'count:11',
|
||||
'description': 'md5:589c7f33e183aa8aa939537300efb859',
|
||||
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://pornbox.com/application/watch-page/216045',
|
||||
'info_dict': {
|
||||
'id': '216045',
|
||||
'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
|
||||
'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
|
||||
'uploader': 'VK Studio',
|
||||
'timestamp': 1618264800,
|
||||
'upload_date': '20210412',
|
||||
'age_limit': 18,
|
||||
'availability': 'premium_only',
|
||||
'duration': 2710,
|
||||
'cast': 'count:3',
|
||||
'tags': 'count:29',
|
||||
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
|
||||
'subtitles': 'count:6'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True
|
||||
},
|
||||
'expected_warnings': [
|
||||
'You are either not logged in or do not have access to this scene',
|
||||
'No video formats found', 'Requested format is not available']
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
|
||||
|
||||
subtitles = {country_code: [{
|
||||
'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
|
||||
'ext': 'srt'
|
||||
}] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
|
||||
|
||||
is_free_scene = traverse_obj(
|
||||
public_data, ('price', 'is_available_for_free', {bool}), default=False)
|
||||
|
||||
metadata = {
|
||||
'id': video_id,
|
||||
**traverse_obj(public_data, {
|
||||
'title': ('scene_name', {str.strip}),
|
||||
'description': ('small_description', {str.strip}),
|
||||
'uploader': 'studio',
|
||||
'duration': ('runtime', {parse_duration}),
|
||||
'cast': (('models', 'male_models'), ..., 'model_name'),
|
||||
'thumbnail': ('player_poster', {url_or_none}),
|
||||
'tags': ('niches', ..., 'niche'),
|
||||
}),
|
||||
'age_limit': 18,
|
||||
'timestamp': parse_iso8601(traverse_obj(
|
||||
public_data, ('studios', 'release_date'), 'publish_date')),
|
||||
'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
if not public_data.get('is_purchased') or not is_free_scene:
|
||||
self.raise_login_required(
|
||||
'You are either not logged in or do not have access to this scene', metadata_available=True)
|
||||
return metadata
|
||||
|
||||
media_id = traverse_obj(public_data, (
|
||||
'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
|
||||
if not media_id:
|
||||
self.raise_no_formats('Could not find stream id', video_id=video_id)
|
||||
|
||||
stream_data = self._download_json(
|
||||
f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
|
||||
|
||||
get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
|
||||
metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
|
||||
'url': 'src',
|
||||
'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'quality': ('quality', {get_quality}),
|
||||
'width': ('size', {lambda x: int(x[:-1])}),
|
||||
}))
|
||||
|
||||
return metadata
|
|
@ -1,18 +1,7 @@
|
|||
import itertools
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
strftime_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils import parse_duration, unified_strdate
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
|
@ -67,32 +56,8 @@ class RadioFranceIE(InfoExtractor):
|
|||
}
|
||||
|
||||
|
||||
class RadioFranceBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
|
||||
|
||||
_STATIONS_RE = '|'.join(map(re.escape, (
|
||||
'franceculture',
|
||||
'franceinfo',
|
||||
'franceinter',
|
||||
'francemusique',
|
||||
'fip',
|
||||
'mouv',
|
||||
)))
|
||||
|
||||
def _extract_data_from_webpage(self, webpage, display_id, key):
|
||||
return traverse_obj(self._search_json(
|
||||
r'\bconst\s+data\s*=', webpage, key, display_id,
|
||||
contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
|
||||
(..., 'data', key, {dict}), get_all=False) or {}
|
||||
|
||||
|
||||
class FranceCultureIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||
/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
|
||||
'''
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
|
||||
|
@ -102,30 +67,14 @@ class FranceCultureIE(RadioFranceBaseIE):
|
|||
'ext': 'mp3',
|
||||
'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
|
||||
'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
|
||||
'upload_date': '20220514',
|
||||
'duration': 2750,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
|
||||
'info_dict': {
|
||||
'id': '2107675',
|
||||
'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
|
||||
'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
|
||||
'description': 'md5:36ee74351ede77a314fdebb94026b916',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'upload_date': '20230310',
|
||||
'duration': 8977,
|
||||
'ext': 'mp3',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -140,6 +89,7 @@ class FranceCultureIE(RadioFranceBaseIE):
|
|||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_data['contentUrl'],
|
||||
'ext': video_data.get('encodingFormat'),
|
||||
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
|
||||
|
@ -152,322 +102,3 @@ class FranceCultureIE(RadioFranceBaseIE):
|
|||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
|
||||
}
|
||||
|
||||
|
||||
class RadioFranceLiveIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://(?:www\.)?radiofrance\.fr
|
||||
/(?P<id>{RadioFranceBaseIE._STATIONS_RE})
|
||||
/?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/',
|
||||
'info_dict': {
|
||||
'id': 'franceinter',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture',
|
||||
'info_dict': {
|
||||
'id': 'franceculture',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-musique-kids-family',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-rnb-soul',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-musique-mix',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/fip/radio-rock',
|
||||
'info_dict': {
|
||||
'id': 'fip-radio-rock',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
|
||||
|
||||
if substation_id:
|
||||
webpage = self._download_webpage(url, station_id)
|
||||
api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
|
||||
else:
|
||||
api_response = self._download_json(
|
||||
f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
|
||||
if media_source.get('format') == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_source['url'],
|
||||
'abr': media_source.get('bitrate'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': join_nonempty(station_id, substation_id),
|
||||
'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
|
||||
('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RadioFrancePlaylistBase(RadioFranceBaseIE):
|
||||
"""Subclasses must set _METADATA_KEY"""
|
||||
|
||||
def _call_api(self, content_id, cursor, page_num):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _generate_playlist_entries(self, content_id, content_response):
|
||||
for page_num in itertools.count(2):
|
||||
for entry in content_response['items']:
|
||||
yield self.url_result(
|
||||
f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
|
||||
'title': 'title',
|
||||
'description': 'standFirst',
|
||||
'timestamp': ('publishedDate', {int_or_none}),
|
||||
'thumbnail': ('visual', 'src'),
|
||||
}))
|
||||
|
||||
next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
|
||||
if not next_cursor:
|
||||
break
|
||||
|
||||
content_response = self._call_api(content_id, next_cursor, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://www.radiofrance.fr/api/v2.1/path', display_id,
|
||||
query={'value': urllib.parse.urlparse(url).path})['content']
|
||||
|
||||
content_id = metadata['id']
|
||||
|
||||
return self.playlist_result(
|
||||
self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
|
||||
display_id=display_id, **{**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'standFirst',
|
||||
'thumbnail': ('visual', 'src'),
|
||||
}), **traverse_obj(metadata, {
|
||||
'title': 'name',
|
||||
'description': 'role',
|
||||
})})
|
||||
|
||||
|
||||
class RadioFrancePodcastIE(RadioFrancePlaylistBase):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||
/podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
|
||||
'info_dict': {
|
||||
'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
|
||||
'display_id': 'le-billet-vert',
|
||||
'title': 'Le billet sciences',
|
||||
'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
|
||||
'info_dict': {
|
||||
'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
|
||||
'display_id': 'jean-marie-le-pen-l-obsession-nationale',
|
||||
'title': 'Jean-Marie Le Pen, l\'obsession nationale',
|
||||
'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
|
||||
'info_dict': {
|
||||
'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
|
||||
'display_id': 'serie-thomas-grjebine',
|
||||
'title': 'Thomas Grjebine',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
|
||||
'info_dict': {
|
||||
'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
|
||||
'display_id': 'certains-l-aiment-fip',
|
||||
'title': 'Certains l’aiment Fip',
|
||||
'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 321,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_METADATA_KEY = 'expressions'
|
||||
|
||||
def _call_api(self, podcast_id, cursor, page_num):
|
||||
return self._download_json(
|
||||
f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
|
||||
note=f'Downloading page {page_num}', query={'pageCursor': cursor})
|
||||
|
||||
|
||||
class RadioFranceProfileIE(RadioFrancePlaylistBase):
|
||||
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
|
||||
'info_dict': {
|
||||
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
||||
'display_id': 'thomas-pesquet',
|
||||
'title': 'Thomas Pesquet',
|
||||
'description': 'Astronaute à l\'agence spatiale européenne',
|
||||
},
|
||||
'playlist_mincount': 212,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
|
||||
'info_dict': {
|
||||
'id': '9593050b-0183-4972-a0b5-d8f699079e02',
|
||||
'display_id': 'eugenie-bastie',
|
||||
'title': 'Eugénie Bastié',
|
||||
'description': 'Journaliste et essayiste',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 39,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/personnes/lea-salame',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_METADATA_KEY = 'documents'
|
||||
|
||||
def _call_api(self, profile_id, cursor, page_num):
|
||||
resp = self._download_json(
|
||||
f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
|
||||
note=f'Downloading page {page_num}', query={
|
||||
'relation': 'personality',
|
||||
'cursor': cursor,
|
||||
})
|
||||
|
||||
resp['next'] = traverse_obj(resp, ('pagination', 'next'))
|
||||
return resp
|
||||
|
||||
|
||||
class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?P<station>{RadioFranceBaseIE._STATIONS_RE})
|
||||
/grille-programmes(?:\?date=(?P<date>[\d-]+))?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
|
||||
'info_dict': {
|
||||
'id': 'franceinter-program-20230217',
|
||||
'upload_date': '20230217',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
|
||||
'info_dict': {
|
||||
'id': 'franceculture-program-20230201',
|
||||
'upload_date': '20230201',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
|
||||
'info_dict': {
|
||||
'id': 'mouv-program-20230319',
|
||||
'upload_date': '20230319',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
|
||||
'info_dict': {
|
||||
'id': 'francemusique-program-20230318',
|
||||
'upload_date': '20230318',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _generate_playlist_entries(self, webpage_url, api_response):
|
||||
for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
|
||||
yield self.url_result(
|
||||
urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
|
||||
url_transparent=True, **traverse_obj(entry, {
|
||||
'title': ('expression', 'title'),
|
||||
'thumbnail': ('expression', 'visual', 'src'),
|
||||
'timestamp': ('startTime', {int_or_none}),
|
||||
'series_id': ('concept', 'id'),
|
||||
'series': ('concept', 'title'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
station, date = self._match_valid_url(url).group('station', 'date')
|
||||
webpage = self._download_webpage(url, station)
|
||||
grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
|
||||
upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
|
||||
|
||||
return self.playlist_result(
|
||||
self._generate_playlist_entries(url, grid_data),
|
||||
join_nonempty(station, 'program', upload_date), upload_date=upload_date)
|
||||
|
|
|
@ -1,241 +1,134 @@
|
|||
import random
|
||||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class WeiboBaseIE(InfoExtractor):
|
||||
def _update_visitor_cookies(self, video_id):
|
||||
class WeiboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
|
||||
'info_dict': {
|
||||
'id': 'Fp6RGfbff',
|
||||
'ext': 'mp4',
|
||||
'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
visitor_url = urlh.url
|
||||
|
||||
if 'passport.weibo.com' in visitor_url:
|
||||
# first visit
|
||||
visitor_data = self._download_json(
|
||||
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
||||
note='Generating first-visit guest request',
|
||||
note='Generating first-visit data',
|
||||
transform_source=strip_jsonp,
|
||||
headers={'Referer': visitor_url},
|
||||
data=urlencode_postdata({
|
||||
'cb': 'gen_callback',
|
||||
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
|
||||
'fp': json.dumps({
|
||||
'os': '2',
|
||||
'browser': 'Gecko57,0,0,0',
|
||||
'fonts': 'undefined',
|
||||
'screenInfo': '1440*900*24',
|
||||
'plugins': '',
|
||||
}),
|
||||
}))
|
||||
|
||||
tid = visitor_data['data']['tid']
|
||||
cnfd = '%03d' % visitor_data['data']['confidence']
|
||||
|
||||
self._download_webpage(
|
||||
'https://passport.weibo.com/visitor/visitor', video_id,
|
||||
note='Running first-visit callback to get guest cookies',
|
||||
note='Running first-visit callback',
|
||||
query={
|
||||
'a': 'incarnate',
|
||||
't': visitor_data['data']['tid'],
|
||||
't': tid,
|
||||
'w': 2,
|
||||
'c': '%03d' % visitor_data['data']['confidence'],
|
||||
'c': cnfd,
|
||||
'cb': 'cross_domain',
|
||||
'from': 'weibo',
|
||||
'_rand': random.random(),
|
||||
})
|
||||
|
||||
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
|
||||
self._update_visitor_cookies(video_id)
|
||||
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||
return self._parse_json(webpage, video_id, fatal=fatal)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Revisiting webpage')
|
||||
|
||||
def _extract_formats(self, video_info):
|
||||
media_info = traverse_obj(video_info, ('page_info', 'media_info'))
|
||||
formats = traverse_obj(media_info, (
|
||||
'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
|
||||
'url': 'url',
|
||||
'format': ('quality_desc', {str}),
|
||||
'format_id': ('label', {str}),
|
||||
'ext': ('mime', {mimetype2ext}),
|
||||
'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
|
||||
'vcodec': ('video_codecs', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'acodec': ('audio_codecs', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {int_or_none}),
|
||||
}))
|
||||
if not formats: # fallback, should be barely used
|
||||
for url in set(traverse_obj(media_info, (..., {url_or_none}))):
|
||||
if 'label=' in url: # filter out non-video urls
|
||||
format_id, resolution = self._search_regex(
|
||||
r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
|
||||
group=(1, 2), default=(None, None))
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
video_formats = compat_parse_qs(self._search_regex(
|
||||
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
|
||||
|
||||
formats = []
|
||||
supported_resolutions = (480, 720)
|
||||
for res in supported_resolutions:
|
||||
vid_urls = video_formats.get(compat_str(res))
|
||||
if not vid_urls or not isinstance(vid_urls, list):
|
||||
continue
|
||||
|
||||
vid_url = vid_urls[0]
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': format_id,
|
||||
**parse_resolution(resolution),
|
||||
**traverse_obj(media_info, (
|
||||
'video_details', lambda _, v: v['label'].startswith(format_id), {
|
||||
'size': ('size', {int_or_none}),
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
}
|
||||
), get_all=False),
|
||||
'url': vid_url,
|
||||
'height': res,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _parse_video_info(self, video_info, video_id=None):
|
||||
uploader = self._og_search_property(
|
||||
'nick-name', webpage, 'uploader', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'extractor_key': WeiboIE.ie_key(),
|
||||
'extractor': WeiboIE.IE_NAME,
|
||||
'formats': self._extract_formats(video_info),
|
||||
'http_headers': {'Referer': 'https://weibo.com/'},
|
||||
'_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
|
||||
**traverse_obj(video_info, {
|
||||
'id': (('id', 'id_str', 'mid'), {str_or_none}),
|
||||
'display_id': ('mblogid', {str_or_none}),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
|
||||
'description': ('text_raw', {str}),
|
||||
'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
|
||||
'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
|
||||
'thumbnail': ('page_info', 'page_pic', {url_or_none}),
|
||||
'uploader': ('user', 'screen_name', {str}),
|
||||
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
|
||||
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
|
||||
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
|
||||
'like_count': ('attitudes_count', {int_or_none}),
|
||||
'repost_count': ('reposts_count', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class WeiboIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
||||
'info_dict': {
|
||||
'id': '4910815147462302',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'N4xlMvjhI',
|
||||
'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
|
||||
'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
|
||||
'duration': 918,
|
||||
'timestamp': 1686312819,
|
||||
'upload_date': '20230609',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '睡前视频基地',
|
||||
'uploader_id': '7827771738',
|
||||
'uploader_url': 'https://weibo.com/u/7827771738',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228',
|
||||
class WeiboMobileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
|
||||
_TEST = {
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
|
||||
'info_dict': {
|
||||
'id': '4189191225395228',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'FBqgOmDxO',
|
||||
'title': '柴犬柴犬的秒拍视频',
|
||||
'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
|
||||
'duration': 53,
|
||||
'timestamp': 1514264429,
|
||||
'upload_date': '20171226',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '柴犬柴犬',
|
||||
'uploader_id': '5926682210',
|
||||
'uploader_url': 'https://weibo.com/u/5926682210',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'title': '午睡当然是要甜甜蜜蜜的啦',
|
||||
'uploader': '柴犬柴犬'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weibo.com/0/4224132150961381',
|
||||
'note': 'no playback_list example',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage = self._download_webpage(url, video_id, note='visit the page')
|
||||
|
||||
return self._parse_video_info(self._weibo_download_json(
|
||||
f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
|
||||
weibo_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
|
||||
webpage, 'js_code', flags=re.DOTALL),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
status_data = weibo_info.get('status', {})
|
||||
page_info = status_data.get('page_info')
|
||||
title = status_data['status_title']
|
||||
uploader = status_data.get('user', {}).get('screen_name')
|
||||
|
||||
class WeiboVideoIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
|
||||
'info_dict': {
|
||||
'id': '4797700463137878',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'LEZDodaiW',
|
||||
'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
|
||||
'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ',
|
||||
'duration': 76,
|
||||
'timestamp': 1659344278,
|
||||
'upload_date': '20220801',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '君子爱财陈平安',
|
||||
'uploader_id': '3905382233',
|
||||
'uploader_url': 'https://weibo.com/u/3905382233',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
|
||||
video_info = self._weibo_download_json(
|
||||
f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
|
||||
video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
|
||||
return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
|
||||
|
||||
|
||||
class WeiboUserIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/u/2066652961?tabtype=video',
|
||||
'info_dict': {
|
||||
'id': '2066652961',
|
||||
'title': '萧影殿下的视频',
|
||||
'description': '萧影殿下的全部视频',
|
||||
'uploader': '萧影殿下',
|
||||
},
|
||||
'playlist_mincount': 195,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, uid, cursor=0, page=1):
|
||||
return self._weibo_download_json(
|
||||
'https://weibo.com/ajax/profile/getWaterFallContent',
|
||||
uid, note=f'Downloading videos page {page}',
|
||||
query={'uid': uid, 'cursor': cursor})['data']
|
||||
|
||||
def _entries(self, uid, first_page):
|
||||
cursor = 0
|
||||
for page in itertools.count(1):
|
||||
response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
|
||||
for video_info in traverse_obj(response, ('list', ..., {dict})):
|
||||
yield self._parse_video_info(video_info)
|
||||
cursor = response.get('next_cursor')
|
||||
if (int_or_none(cursor) or -1) < 0:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
uid = self._match_id(url)
|
||||
first_page = self._fetch_page(uid)
|
||||
uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
|
||||
metainfo = {
|
||||
'title': f'{uploader}的视频',
|
||||
'description': f'{uploader}的全部视频',
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
} if uploader else {}
|
||||
|
||||
return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
|
||||
'url': page_info['media_info']['stream_url']
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user