Compare commits

...

21 Commits

Author SHA1 Message Date
Lev
1029c0e373
Merge 8b3127cf67 into f9d98509a8 2024-11-17 20:35:13 +01:00
qbnu
f9d98509a8
[ie/ctvnews] Fix playlist ID extraction (#8892)
Authored by: qbnu
2024-11-17 19:35:10 +00:00
sepro
37cd7660ea
[ie/youtube:tab] Fix podcasts tab extraction (#11567)
Authored by: seproDev
2024-11-17 19:46:04 +01:00
ChocoLZS
d867f99622
[ie/PiaLive] Add extractor (#10811)
Authored by: ChocoLZS
2024-11-17 19:41:57 +01:00
doe1080
10fc719bc7
[cleanup] Remove dead extractors (#11566)
- Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE
- Removes PokemonIE, PokemonWatchIE
- Removes VeohIE, VeohUserIE

Closes #3373, Closes #7059
Authored by: doe1080
2024-11-17 16:22:40 +00:00
krichbanana
eb15fd5a32
[ie/kenh14] Add extractor (#3996)
Closes #3937
Authored by: krichbanana, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-17 14:12:26 +00:00
sepro
7cecd299e4
[ie/chaturbate] Don't break embed detection (#11565)
Bugfix for 720b3dc453

Authored by: seproDev
2024-11-17 13:32:12 +01:00
bashonly
8b3127cf67
Merge branch 'master' into mutagen-metadata 2024-02-21 21:10:05 -06:00
Lev Plyusnin
aa3498b17e
Remove Musepack from supported formats 2024-01-03 16:42:24 +07:00
Lev Plyusnin
bbc5ee2b97
Bring MutagenMetadataPP back
This reverts commit 2598790093.
2024-01-03 15:20:26 +07:00
Lev Plyusnin
2598790093
Revert MutagenMetadataPP 2024-01-03 15:16:10 +07:00
Lev Plyusnin
dca6384283
Update README and fix IE documentation typo 2024-01-03 15:05:08 +07:00
Lev Plyusnin
c6246594cf
Update README 2024-01-03 14:55:47 +07:00
Lev Plyusnin
c3fe956e87
Revert unrelated change 2024-01-03 14:52:19 +07:00
Lev Plyusnin
41c3dab547
Revert unrelated changes 2024-01-03 14:50:55 +07:00
Lev Plyusnin
265e0f7154
Rename new fields
- Moved fix_deprecated_fields into _fill_common_fields
2024-01-03 14:12:02 +07:00
pukkandan
ac52bf0952
Update yt_dlp/YoutubeDL.py 2024-01-03 09:16:21 +05:30
pukkandan
d60ad19944
Update yt_dlp/extractor/common.py 2024-01-03 09:10:53 +05:30
pukkandan
a691696290
Apply suggestions from code review 2024-01-03 09:10:07 +05:30
pukkandan
698199b0e8
Apply suggestions from code review 2024-01-03 09:07:28 +05:30
Lev Plyusnin
071326c0cc
[ie] Add new fields with proper support for multiple values 2024-01-03 08:35:28 +07:00
14 changed files with 488 additions and 711 deletions

View File

@ -691,6 +691,11 @@ def get_postprocessors(opts):
'add_metadata': opts.addmetadata,
'add_infojson': opts.embed_infojson,
}
# MutagenMetadata must run after FFmpegMetadata
if opts.addmetadata:
yield {
'key': 'MutagenMetadata',
}
# Deprecated
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
# but must be below EmbedSubtitle and FFmpegMetadata

View File

@ -946,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import (
KhanAcademyIE,
KhanAcademyUnitIE,
@ -1135,12 +1139,6 @@ from .microsoftembed import (
MicrosoftMediusIE,
)
from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import (
MindsChannelIE,
MindsGroupIE,
@ -1522,8 +1520,8 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import (
PicartoIE,
PicartoVodIE,
@ -1559,10 +1557,6 @@ from .podbayfm import (
)
from .podchaser import PodchaserIE
from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import (
PokerGoCollectionIE,
PokerGoIE,
@ -2256,6 +2250,10 @@ from .ufctv import (
)
from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
)
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
@ -2284,10 +2282,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE
from .vevo import (
VevoIE,

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_webpage(self, video_id, tld):
def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View File

@ -5,10 +5,10 @@ from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)(?:$|[#?&])'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '9b8624ba66351a23e0b6e1391971f9af',
'md5': 'b608f466c7fa24b9666c6439d766ab7e',
'info_dict': {
'id': '901995',
'ext': 'flv',
@ -16,6 +16,14 @@ class CTVNewsIE(InfoExtractor):
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
'upload_date': '20160630',
'categories': [],
'tags': [],
'season_id': 57981,
'duration': 764.631,
'series': 'CTV News National story',
'thumbnail': r're:^https?://.*\.jpg$',
'season': 'Season 0',
'season_number': 0,
},
}, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +39,13 @@ class CTVNewsIE(InfoExtractor):
'id': '1.2876780',
},
'playlist_mincount': 100,
}, {
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
'info_dict':
{
'id': '1.5736957',
},
'playlist_mincount': 6,
}, {
'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True,

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,291 +0,0 @@
import functools
import json
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
dict_get,
float_or_none,
traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
headers={'Content-Type': 'application/json'} if body else {},
query={
'__guest_id': self._GUEST_ID,
'__platform': 'web',
**(query or {}),
})
if content['code'] != 0:
raise ExtractorError(
f'Mildom says: {content["message"]} (code {content["code"]})',
expected=True)
return content['body']
class MildomIE(MildomBaseIE):
IE_NAME = 'mildom'
IE_DESC = 'Record ongoing live by specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
'user_id': video_id,
'live_server_type': 'hls',
})
playback_token = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
if not playback_token:
raise ExtractorError('Failed to obtain live playback token')
formats = self._extract_m3u8_formats(
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
result_video_id, 'mp4', headers={
'Referer': 'https://www.mildom.com/',
'Origin': 'https://www.mildom.com',
})
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
}
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
'info_dict': {
'id': '10882672-1597662269',
'ext': 'mp4',
'title': '始めてのミルダム配信じゃぃ!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'upload_date': '20200817',
'duration': 4138.37,
'description': 'ゲームをしたくて!',
'timestamp': 1597662269.0,
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
'info_dict': {
'id': '10882672-1597758589870-477',
'ext': 'mp4',
'title': '【kson】感染メイズ麻酔銃で無双する',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'timestamp': 1597759093.0,
'uploader': 'kson組長(けいそん)',
'duration': 4302.58,
'uploader_id': '10882672',
'description': 'このステージ絶対乗り越えたい',
'upload_date': '20200818',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
'info_dict': {
'id': '10882672-buha9td2lrn97fk2jme0',
'ext': 'mp4',
'title': '【kson組長】CART RACER!!!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
'upload_date': '20201104',
'timestamp': 1604494797.0,
'duration': 4657.25,
'description': 'WTF',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
note='Downloading playback metadata', query={
'v_id': video_id,
})['playback']
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
'protocol': 'm3u8_native',
'vcodec': 'none',
'acodec': 'aac',
'ext': 'm4a',
}]
for fmt in autoplay['video_link']:
formats.append({
'format_id': 'video-{}'.format(fmt['name']),
'url': fmt['url'],
'protocol': 'm3u8_native',
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
'height': fmt['level'],
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
})
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'description': traverse_obj(autoplay, 'video_intro'),
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
class MildomClipIE(MildomBaseIE):
IE_NAME = 'mildom:clip'
IE_DESC = 'Clip in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
'info_dict': {
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
'title': '全然違ったよ',
'timestamp': 1619181890,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': 'ざきんぽ',
'uploader_id': '10042245',
},
}, {
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'info_dict': {
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'title': 'かっこいい',
'timestamp': 1621094003,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': '(ルーキー',
'uploader_id': '10111524',
},
}, {
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'info_dict': {
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'title': '',
'timestamp': 1614769431,
'duration': 31,
'thumbnail': r're:https?://.+',
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
'uploader_id': '10660174',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
clip_detail = self._call_api(
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
note='Downloading playback metadata', query={
'clip_id': video_id,
})
return {
'id': video_id,
'title': self._html_search_meta(
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
'timestamp': float_or_none(clip_detail.get('create_time')),
'duration': float_or_none(clip_detail.get('length')),
'thumbnail': clip_detail.get('cover'),
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
'uploader_id': user_id,
'url': clip_detail['url'],
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
}
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mildom.com/profile/10093333',
'info_dict': {
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
'playlist_mincount': 201,
}]
def _fetch_page(self, user_id, page):
page += 1
reply = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
user_id, note=f'Downloading page {page}', query={
'user_id': user_id,
'page': page,
'limit': '30',
})
if not reply:
return
for x in reply:
v_id = x.get('v_id')
if not v_id:
continue
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
profile = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
user_id, f'Uploads from {profile["loginname"]}')

122
yt_dlp/extractor/pialive.py Normal file
View File

@ -0,0 +1,122 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
str_or_none,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PiaLiveIE(InfoExtractor):
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
_PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
_PIA_LIVE_API_URL = 'https://api.pia-live.jp'
_API_KEY = 'kfds)FKFps-dms9e'
_TESTS = [{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'display_id': '2431867_001',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}, {
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
'info_dict': {
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
'display_id': '2431867_002',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}]
def _extract_var(self, variable, html):
return self._search_regex(
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, f'variable {variable}', group='value')
def _real_extract(self, url):
video_key = self._match_id(url)
webpage = self._download_webpage(url, video_key)
program_code = self._extract_var('programCode', webpage)
article_code = self._extract_var('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if date and time:
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self._API_KEY,
})
api_data_and_headers = {
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
url_transparent=True, title=title, display_id=program_code,
__post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
def _get_comments(self, program_code, article_code, api_data_and_headers):
chat_room_url = traverse_obj(self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
if not chat_room_url:
return
comment_page = self._download_webpage(
chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
if not comment_page:
return
yield from traverse_obj(self._search_json(
r'var\s+_history\s*=', comment_page, 'comment list',
program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
'timestamp': (0, {int}),
'author_is_uploader': (1, {lambda x: x == 2}),
'author': (2, {str}),
'text': (3, {str}),
'id': (4, {str_or_none}),
}))

View File

@ -1,70 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
time_seconds,
traverse_obj,
)
class PIAULIZAPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'),
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}

View File

@ -1,136 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
js_to_json,
merge_dicts,
)
class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': {
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4',
'title': 'The Ol Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': {
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4',
'title': "Pokémon : L'ascension de Darkrai",
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id or display_id)
video_data = extract_attributes(self._search_regex(
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element'))
video_id = video_data['data-video-id']
title = video_data.get('data-video-title') or self._html_search_meta(
'pkm-title', webpage, ' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'limelight:media:{video_id}',
'title': title,
'description': video_data.get('data-video-summary'),
'thumbnail': video_data.get('data-video-poster'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('data-video-season')),
'episode': title,
'episode_number': int_or_none(video_data.get('data-video-episode')),
'ie_key': 'LimelightMedia',
}
class PokemonWatchIE(InfoExtractor):
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
_TESTS = [{
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
'md5': '62833938a31e61ab49ada92f524c42ff',
'info_dict': {
'id': '8309a40969894a8e8d5bc1311e9c5667',
'ext': 'mp4',
'title': 'Lillier and the Staff!',
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
},
}, {
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
'only_matching': True,
}, {
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
'only_matching': True,
}]
def _extract_media(self, channel_array, video_id):
for channel in channel_array:
for media in channel.get('media'):
if media.get('id') == video_id:
return media
return None
def _real_extract(self, url):
video_id = self._match_id(url)
info = {
'_type': 'url',
'id': video_id,
'url': f'limelight:media:{video_id}',
'ie_key': 'LimelightMedia',
}
# API call can be avoided entirely if we are listing formats
if self.get_param('listformats', False):
return info
webpage = self._download_webpage(url, video_id)
build_vars = self._parse_json(self._search_regex(
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
video_id, transform_source=js_to_json)
region = build_vars.get('region')
channel_array = self._download_json(self._API_URL.format(region), video_id)
video_data = self._extract_media(channel_array, video_id)
if video_data is None:
raise ExtractorError(
f'Video {video_id} does not exist', expected=True)
info['_type'] = 'url_transparent'
images = video_data.get('images')
return merge_dicts(info, {
'title': video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': images.get('medium') or images.get('small'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('season')),
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})

113
yt_dlp/extractor/uliza.py Normal file
View File

@ -0,0 +1,113 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
make_archive_id,
parse_qs,
time_seconds,
)
from ..utils.traversal import traverse_obj
class UlizaPlayerIE(InfoExtractor):
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
'_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
player_data = self._download_webpage(
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
'_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
}
class UlizaPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data_url = self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url')
return self.url_result(
player_data_url, UlizaPlayerIE, url_transparent=True,
display_id=video_id, video_title=self._html_extract_title(webpage))

View File

@ -1,189 +0,0 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
parse_duration,
qualities,
remove_start,
strip_or_none,
)
class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
'uploader': 'LUMOback',
'duration': 46,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 0,
'categories': ['technology_and_gaming'],
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
},
}, {
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
'only_matching': True,
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}, {
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
'info_dict': {
'id': 'v16374379WA437rMH',
'ext': 'mp4',
'title': 'Phantasmagoria 2, pt. 1-3',
'description': 'Phantasmagoria: a Puzzle of Flesh',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
'uploader': 'davidspackage',
'duration': 968,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 18,
'categories': ['technology_and_gaming', 'gaming'],
'tags': ['puzzle', 'of', 'flesh'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
'https://www.veoh.com/watch/getVideo/' + video_id,
video_id)
video = metadata['video']
title = video['title']
thumbnail_url = None
q = qualities(['Regular', 'HQ'])
formats = []
for f_id, f_url in video.get('src', {}).items():
if not f_url:
continue
if f_id == 'poster':
thumbnail_url = f_url
else:
formats.append({
'format_id': f_id,
'quality': q(f_id),
'url': f_url,
})
categories = metadata.get('categoryPath')
if not categories:
category = remove_start(strip_or_none(video.get('category')), 'category_')
categories = [category] if category else None
tags = video.get('tags')
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': thumbnail_url,
'uploader': video.get('author', {}).get('nickname'),
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
'view_count': int_or_none(video.get('views')),
'formats': formats,
'average_rating': int_or_none(video.get('rating')),
'comment_count': int_or_none(video.get('numOfComments')),
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
'categories': categories,
'tags': tags.split(', ') if tags else None,
}
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
IE_NAME = 'veoh:user'
_TESTS = [
{
'url': 'https://www.veoh.com/users/valentinazoe',
'info_dict': {
'id': 'valentinazoe',
'title': 'valentinazoe (Uploads)',
},
'playlist_mincount': 75,
},
{
'url': 'https://www.veoh.com/users/PiensaLibre',
'info_dict': {
'id': 'PiensaLibre',
'title': 'PiensaLibre (Uploads)',
},
'playlist_mincount': 2,
}]
_PAGE_SIZE = 16
def _fetch_page(self, uploader, page):
response = self._download_json(
'https://www.veoh.com/users/published/videos', uploader,
note=f'Downloading videos page {page + 1}',
headers={
'x-csrf-token': self._TOKEN,
'content-type': 'application/json;charset=UTF-8',
},
data=json.dumps({
'username': uploader,
'maxResults': self._PAGE_SIZE,
'page': page + 1,
'requestName': 'userPage',
}).encode())
if not response.get('success'):
raise ExtractorError(response['message'])
for video in response['videos']:
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
video['permalinkId'], video.get('title'))
def _real_initialize(self):
webpage = self._download_webpage(
'https://www.veoh.com', None, note='Downloading authorization token')
self._TOKEN = self._search_regex(
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
'request token', group='token')
def _real_extract(self, url):
uploader = self._match_id(url)
return self.playlist_result(OnDemandPagedList(
functools.partial(self._fetch_page, uploader),
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')

View File

@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _rich_entries(self, rich_grid_renderer):
renderer = traverse_obj(
rich_grid_renderer,
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
video_id = renderer.get('videoId')
if video_id:
yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
})),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
return
# lockupViewModel extraction
content_id = renderer.get('contentId')
if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
yield self.url_result(
f'https://www.youtube.com/playlist?list={content_id}',
ie=YoutubeTabIE, video_id=content_id,
**traverse_obj(renderer, {
'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
}),
thumbnails=self._extract_thumbnails(renderer, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
return
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist_count': 0,
}, {
# Podcasts tab, with rich entry playlistRenderers
# Podcasts tab, with rich entry lockupViewModel
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
'info_dict': {
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
'title': '99 Percent Invisible - Podcasts',
'uploader': '99 Percent Invisible',
'title': '99% Invisible - Podcasts',
'uploader': '99% Invisible',
'channel_follower_count': int,
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'tags': [],
'channel': '99 Percent Invisible',
'channel': '99% Invisible',
'uploader_id': '@99percentinvisiblepodcast',
},
'playlist_count': 0,
'playlist_count': 5,
}, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
'url': 'https://www.youtube.com/@AHimitsu/releases',

View File

@ -30,6 +30,7 @@ from .metadataparser import (
)
from .modify_chapters import ModifyChaptersPP
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .mutagenmetadata import MutagenMetadataPP
from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP

View File

@ -0,0 +1,41 @@
from .common import PostProcessor
from ..dependencies import mutagen
if mutagen:
from mutagen.easymp4 import EasyMP4
from mutagen.flac import FLAC
from mutagen.mp3 import EasyMP3
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
class MutagenMetadataPP(PostProcessor):
def __init__(self, downloader):
PostProcessor.__init__(self, downloader)
@PostProcessor._restrict_to(images=False)
def run(self, information):
extension = information['ext']
ret = [], information
if not mutagen:
if extension in ['mp3', 'm4a', 'ogg', 'opus', 'flac']:
self.report_warning('module mutagen was not found. Tags with multiple values (e.g. artist, album artist and genre) may be set incorrectly. Please install using `python -m pip install mutagen`')
return ret
tag_mapping = {
'artist': 'artists',
'albumartist': 'album_artists',
'genre': 'genres',
'composer': 'composers'
}
supported_formats = [EasyMP3, EasyMP4, OggVorbis, OggOpus, FLAC]
file = mutagen.File(information['filepath'], supported_formats)
if not file:
return ret
if isinstance(file, EasyMP4):
file.RegisterTextKey('composer', '\251wrt')
for tag_key, info_key in tag_mapping.items():
value = information.get(info_key)
if value:
file[tag_key] = value
file.save()
return ret