Compare commits

...

11 Commits

Author SHA1 Message Date
clodobox
fef8d527ec
Merge e67441a1e4 into e079ffbda6 2024-11-17 22:59:03 +00:00
gillux
e079ffbda6
[ie/litv] Fix extractor (#11071)
Authored by: jiru
2024-11-17 21:37:15 +00:00
bashonly
2009cb27e1
[ie/SonyLIVSeries] Add sort_order extractor-arg (#11569)
Authored by: bashonly
2024-11-17 21:16:22 +00:00
Jackson Humphrey
f351440f1d
[ie/ctvnews] Fix extractor (#11534)
Closes #8689
Authored by: jshumphrey, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-17 21:06:50 +00:00
qbnu
f9d98509a8
[ie/ctvnews] Fix playlist ID extraction (#8892)
Authored by: qbnu
2024-11-17 19:35:10 +00:00
sepro
37cd7660ea
[ie/youtube:tab] Fix podcasts tab extraction (#11567)
Authored by: seproDev
2024-11-17 19:46:04 +01:00
ChocoLZS
d867f99622
[ie/PiaLive] Add extractor (#10811)
Authored by: ChocoLZS
2024-11-17 19:41:57 +01:00
doe1080
10fc719bc7
[cleanup] Remove dead extractors (#11566)
- Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE
- Removes PokemonIE, PokemonWatchIE
- Removes VeohIE, VeohUserIE

Closes #3373, Closes #7059
Authored by: doe1080
2024-11-17 16:22:40 +00:00
krichbanana
eb15fd5a32
[ie/kenh14] Add extractor (#3996)
Closes #3937
Authored by: krichbanana, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-17 14:12:26 +00:00
sepro
7cecd299e4
[ie/chaturbate] Don't break embed detection (#11565)
Bugfix for 720b3dc453

Authored by: seproDev
2024-11-17 13:32:12 +01:00
clodobox
e67441a1e4
Update playsuisse.py
Improve PlaySuisse extractor: enhance episode naming and error handling

- Modified episode naming to include series name: "Series Name - Episode Name"
- Added robust error handling to prevent NoneType errors
- Improved playlist handling for series with multiple episodes
- Enhanced data validation in _extract_single method
- Updated _real_extract to handle potential None returns from _extract_single

These changes improve the reliability of the extractor and provide more
informative file names for series episodes, making content easier to identify
and organize.
2024-07-21 13:28:21 +02:00
15 changed files with 649 additions and 912 deletions

View File

@ -1869,6 +1869,9 @@ The following extractors use this feature:
#### digitalconcerthall #### digitalconcerthall
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats * `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats
#### sonylivseries
* `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@ -946,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1135,12 +1139,6 @@ from .microsoftembed import (
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1522,8 +1520,8 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import ( from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
@ -1559,10 +1557,6 @@ from .podbayfm import (
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -2256,6 +2250,10 @@ from .ufctv import (
) )
from .ukcolumn import UkColumnIE from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
)
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
@ -2284,10 +2282,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
} }
def _extract_from_webpage(self, video_id, tld): def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True) headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld') video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View File

@ -1,14 +1,27 @@
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import orderedSet from .ninecninemedia import NineCNineMediaIE
from ..utils import extract_attributes, orderedSet
from ..utils.traversal import find_element, traverse_obj
class CTVNewsIE(InfoExtractor): class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' _BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
_VALID_URL = [
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995', 'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '9b8624ba66351a23e0b6e1391971f9af', 'md5': 'b608f466c7fa24b9666c6439d766ab7e',
'info_dict': { 'info_dict': {
'id': '901995', 'id': '901995',
'ext': 'flv', 'ext': 'flv',
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284, 'timestamp': 1467286284,
'upload_date': '20160630', 'upload_date': '20160630',
'categories': [],
'season_number': 0,
'season': 'Season 0',
'tags': [],
'series': 'CTV News National | Archive | Stories 2',
'season_id': '57981',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 764.631,
},
}, {
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
'info_dict': {
'id': '3030933',
'ext': 'flv',
'title': 'Heres whats making news for Nov. 15',
'description': 'Here are the top stories were working on for CTV News at 11 for Nov. 15',
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
'season_id': '58104',
'season_number': 0,
'tags': [],
'season': 'Season 0',
'categories': [],
'series': 'CTV News Barrie',
'upload_date': '20241116',
'duration': 42.943,
'timestamp': 1731722452,
}, },
}, { }, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
'id': '1.2876780', 'id': '1.2876780',
}, },
'playlist_mincount': 100, 'playlist_mincount': 100,
}, {
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
'info_dict':
{
'id': '1.5736957',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
'info_dict': {
'id': '2695026',
'ext': 'flv',
'season_id': '89852',
'series': 'From CTV News Channel',
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
'season': '2023',
'title': 'Bank of Canada asks public about digital currency',
'categories': [],
'tags': [],
'upload_date': '20230526',
'season_number': 2023,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'timestamp': 1685105157,
'duration': 253.553,
},
}, {
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
'md5': '135cc592df607d29dddc931f1b756ae2',
'info_dict': {
'id': '582589',
'ext': 'flv',
'categories': [],
'timestamp': 1427906183,
'season_number': 0,
'duration': 125.559,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'series': 'CTV News Stox',
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
'title': 'Berlin Wall',
'season_id': '63817',
'season': 'Season 0',
'tags': [],
'upload_date': '20150401',
},
}, {
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
'md5': 'a14c0603557decc6531260791c23cc5e',
'info_dict': {
'id': '3023759',
'ext': 'flv',
'season_number': 2024,
'timestamp': 1731798000,
'season': '2024',
'episode': 'Episode 125',
'description': 'CTV News Ottawa at Six',
'duration': 2712.076,
'episode_number': 125,
'upload_date': '20241116',
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'categories': [],
'tags': [],
'series': 'CTV News Ottawa at Six',
'season_id': '92667',
},
}, { }, {
'url': 'http://www.ctvnews.ca/1.810401', 'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True, 'only_matching': True,
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _ninecninemedia_url_result(self, clip_id):
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id): if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
return { page_id = mobj.group('id')
'_type': 'url_transparent',
'id': clip_id,
'url': f'9c9media:ctvnews_web:{clip_id}',
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit(): if re.fullmatch(self._VIDEO_ID_RE, page_id):
return ninecninemedia_url_result(page_id) return self._ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={ webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
'ot': 'example.AjaxPageLayout.ot', 'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000, 'maxItemsPerPage': 1000000,
}) })
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( entries = [self._ninecninemedia_url_result(clip_id)
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
if not entries: if not entries:
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
if 'getAuthStates("' in webpage: if 'getAuthStates("' in webpage:
entries = [ninecninemedia_url_result(clip_id) for clip_id in entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')] self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
return self.playlist_result(entries, page_id) else:
entries = [
self._ninecninemedia_url_result(clip_id) for clip_id in
traverse_obj(webpage, (
{find_element(tag='jasper-player-container', html=True)},
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId'))
]
return self.playlist_result(entries, page_id)

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,30 +1,32 @@
import json import json
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
join_nonempty,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
try_call, try_call,
unsmuggle_url, unsmuggle_url,
urljoin,
) )
class LiTVIE(InfoExtractor): class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
_URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' _GEO_COUNTRIES = ['TW']
_TESTS = [{ _TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/drama/watch/VOD00041610',
'info_dict': { 'info_dict': {
'id': 'VOD00041606', 'id': 'VOD00041606',
'title': '花千骨', 'title': '花千骨',
}, },
'playlist_count': 51, # 50 episodes + 1 trailer 'playlist_count': 51, # 50 episodes + 1 trailer
}, { }, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/drama/watch/VOD00041610',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
'info_dict': { 'info_dict': {
'id': 'VOD00041610', 'id': 'VOD00041610',
@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
'title': '花千骨第1集', 'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', 'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
'categories': ['奇幻', '愛情', '中國', '仙俠'], 'categories': ['奇幻', '愛情', '仙俠', '古裝'],
'episode': 'Episode 1', 'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
}, },
'params': { 'params': {
'noplaylist': True, 'noplaylist': True,
}, },
'skip': 'Georestricted to Taiwan',
}, { }, {
'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&', 'url': 'https://www.litv.tv/drama/watch/VOD00044841',
'md5': '88322ea132f848d6e3e18b32a832b918', 'md5': '88322ea132f848d6e3e18b32a832b918',
'info_dict': { 'info_dict': {
'id': 'VOD00044841', 'id': 'VOD00044841',
@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
def _extract_playlist(self, playlist_data, content_type): def _extract_playlist(self, playlist_data, content_type):
all_episodes = [ all_episodes = [
self.url_result(smuggle_url( self.url_result(smuggle_url(
self._URL_TEMPLATE % (content_type, episode['contentId']), self._URL_TEMPLATE % (content_type, episode['content_id']),
{'force_noplaylist': True})) # To prevent infinite recursion {'force_noplaylist': True})) # To prevent infinite recursion
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
if self._search_regex( program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"', playlist_data = traverse_obj(vod_data, ('seriesTree'))
webpage, 'meta refresh redirect', default=False, group=0): if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
raise ExtractorError('No such content found', expected=True) return self._extract_playlist(playlist_data, program_info.get('content_type'))
program_info = self._parse_json(self._search_regex( asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), if asset_id: # This is a VOD
video_id) media_type = 'vod'
else: # This is a live stream
asset_id = program_info['content_id']
media_type = program_info['content_type']
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
endpoint = 'get-urls'
else:
puid = str(uuid.uuid4())
endpoint = 'get-urls-no-auth'
video_data = self._download_json(
f'https://www.litv.tv/api/{endpoint}', video_id,
data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
headers={'Content-Type': 'application/json'})
# In browsers `getProgramInfo` request is always issued. Usually this if error := traverse_obj(video_data, ('error', {dict})):
# endpoint gives the same result as the data embedded in the webpage. error_msg = traverse_obj(error, ('message', {str}))
# If, for some reason, there are no embedded data, we do an extra request. if error_msg and 'OutsideRegionError' in error_msg:
if 'assetId' not in program_info:
program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id},
headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {'assetId': program_info['assetId']}
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
payload.update({
'type': 'auth',
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
})
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json(
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
data=json.dumps(payload).encode(),
headers={'Content-Type': 'application/json'})
if not video_data.get('fullpath'):
error_msg = video_data.get('errorMessage')
if error_msg == 'vod.error.outsideregionerror':
self.raise_geo_restricted('This video is available in Taiwan only') self.raise_geo_restricted('This video is available in Taiwan only')
if error_msg: elif error_msg:
raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True) raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
raise ExtractorError(f'Unexpected result from {self.IE_NAME}') raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['fullpath'], video_id, ext='mp4', video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
entry_protocol='m3u8_native', m3u8_id='hls')
for a_format in formats: for a_format in formats:
# LiTV HLS segments doesn't like compressions # LiTV HLS segments doesn't like compressions
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity' a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
title = program_info['title'] + program_info.get('secondaryMark', '')
description = program_info.get('description')
thumbnail = program_info.get('imageFile')
categories = [item['name'] for item in program_info.get('category', [])]
episode = int_or_none(program_info.get('episode'))
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
'description': description, **traverse_obj(program_info, {
'thumbnail': thumbnail, 'description': ('description', {str}),
'categories': categories, 'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
'episode_number': episode, 'categories': ('genres', ..., 'name', {str}),
'episode_number': ('episode', {int_or_none}),
}),
} }

View File

@ -1,291 +0,0 @@
import functools
import json
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
dict_get,
float_or_none,
traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
headers={'Content-Type': 'application/json'} if body else {},
query={
'__guest_id': self._GUEST_ID,
'__platform': 'web',
**(query or {}),
})
if content['code'] != 0:
raise ExtractorError(
f'Mildom says: {content["message"]} (code {content["code"]})',
expected=True)
return content['body']
class MildomIE(MildomBaseIE):
IE_NAME = 'mildom'
IE_DESC = 'Record ongoing live by specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
'user_id': video_id,
'live_server_type': 'hls',
})
playback_token = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
if not playback_token:
raise ExtractorError('Failed to obtain live playback token')
formats = self._extract_m3u8_formats(
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
result_video_id, 'mp4', headers={
'Referer': 'https://www.mildom.com/',
'Origin': 'https://www.mildom.com',
})
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
}
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
'info_dict': {
'id': '10882672-1597662269',
'ext': 'mp4',
'title': '始めてのミルダム配信じゃぃ!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'upload_date': '20200817',
'duration': 4138.37,
'description': 'ゲームをしたくて!',
'timestamp': 1597662269.0,
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
'info_dict': {
'id': '10882672-1597758589870-477',
'ext': 'mp4',
'title': '【kson】感染メイズ麻酔銃で無双する',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'timestamp': 1597759093.0,
'uploader': 'kson組長(けいそん)',
'duration': 4302.58,
'uploader_id': '10882672',
'description': 'このステージ絶対乗り越えたい',
'upload_date': '20200818',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
'info_dict': {
'id': '10882672-buha9td2lrn97fk2jme0',
'ext': 'mp4',
'title': '【kson組長】CART RACER!!!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
'upload_date': '20201104',
'timestamp': 1604494797.0,
'duration': 4657.25,
'description': 'WTF',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
note='Downloading playback metadata', query={
'v_id': video_id,
})['playback']
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
'protocol': 'm3u8_native',
'vcodec': 'none',
'acodec': 'aac',
'ext': 'm4a',
}]
for fmt in autoplay['video_link']:
formats.append({
'format_id': 'video-{}'.format(fmt['name']),
'url': fmt['url'],
'protocol': 'm3u8_native',
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
'height': fmt['level'],
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
})
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'description': traverse_obj(autoplay, 'video_intro'),
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
class MildomClipIE(MildomBaseIE):
IE_NAME = 'mildom:clip'
IE_DESC = 'Clip in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
'info_dict': {
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
'title': '全然違ったよ',
'timestamp': 1619181890,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': 'ざきんぽ',
'uploader_id': '10042245',
},
}, {
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'info_dict': {
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'title': 'かっこいい',
'timestamp': 1621094003,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': '(ルーキー',
'uploader_id': '10111524',
},
}, {
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'info_dict': {
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'title': '',
'timestamp': 1614769431,
'duration': 31,
'thumbnail': r're:https?://.+',
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
'uploader_id': '10660174',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
clip_detail = self._call_api(
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
note='Downloading playback metadata', query={
'clip_id': video_id,
})
return {
'id': video_id,
'title': self._html_search_meta(
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
'timestamp': float_or_none(clip_detail.get('create_time')),
'duration': float_or_none(clip_detail.get('length')),
'thumbnail': clip_detail.get('cover'),
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
'uploader_id': user_id,
'url': clip_detail['url'],
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
}
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mildom.com/profile/10093333',
'info_dict': {
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
'playlist_mincount': 201,
}]
def _fetch_page(self, user_id, page):
page += 1
reply = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
user_id, note=f'Downloading page {page}', query={
'user_id': user_id,
'page': page,
'limit': '30',
})
if not reply:
return
for x in reply:
v_id = x.get('v_id')
if not v_id:
continue
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
profile = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
user_id, f'Uploads from {profile["loginname"]}')

122
yt_dlp/extractor/pialive.py Normal file
View File

@ -0,0 +1,122 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
str_or_none,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PiaLiveIE(InfoExtractor):
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
_PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
_PIA_LIVE_API_URL = 'https://api.pia-live.jp'
_API_KEY = 'kfds)FKFps-dms9e'
_TESTS = [{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'display_id': '2431867_001',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}, {
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
'info_dict': {
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
'display_id': '2431867_002',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}]
def _extract_var(self, variable, html):
return self._search_regex(
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, f'variable {variable}', group='value')
def _real_extract(self, url):
video_key = self._match_id(url)
webpage = self._download_webpage(url, video_key)
program_code = self._extract_var('programCode', webpage)
article_code = self._extract_var('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if date and time:
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self._API_KEY,
})
api_data_and_headers = {
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
url_transparent=True, title=title, display_id=program_code,
__post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
def _get_comments(self, program_code, article_code, api_data_and_headers):
chat_room_url = traverse_obj(self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
if not chat_room_url:
return
comment_page = self._download_webpage(
chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
if not comment_page:
return
yield from traverse_obj(self._search_json(
r'var\s+_history\s*=', comment_page, 'comment list',
program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
'timestamp': (0, {int}),
'author_is_uploader': (1, {lambda x: x == 2}),
'author': (2, {str}),
'text': (3, {str}),
'id': (4, {str_or_none}),
}))

View File

@ -1,70 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
time_seconds,
traverse_obj,
)
class PIAULIZAPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'),
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}

View File

@ -14,95 +14,6 @@ from ..utils import (
class PlaySuisseIE(InfoExtractor): class PlaySuisseIE(InfoExtractor):
_NETRC_MACHINE = 'playsuisse' _NETRC_MACHINE = 'playsuisse'
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
_TESTS = [
{
# Old URL
'url': 'https://www.playsuisse.ch/watch/763211/0',
'only_matching': True,
},
{
# episode in a series
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
'info_dict': {
'id': '763211',
'ext': 'mp4',
'title': 'Knochen',
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
'duration': 3344,
'series': 'Wilder',
'season': 'Season 1',
'season_number': 1,
'episode': 'Knochen',
'episode_number': 1,
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
},
}, {
# film
'url': 'https://www.playsuisse.ch/watch/808675',
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
'info_dict': {
'id': '808675',
'ext': 'mp4',
'title': 'Der Läufer',
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
'duration': 5280,
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
},
}, {
# series (treated as a playlist)
'url': 'https://www.playsuisse.ch/detail/1115687',
'info_dict': {
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
'id': '1115687',
'series': 'They all came out to Montreux',
'title': 'They all came out to Montreux',
},
'playlist': [{
'info_dict': {
'description': 'md5:f2462744834b959a31adc6292380cda2',
'duration': 3180,
'episode': 'Folge 1',
'episode_number': 1,
'id': '1112663',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 1',
'ext': 'mp4',
},
}, {
'info_dict': {
'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
'duration': 2935,
'episode': 'Folge 2',
'episode_number': 2,
'id': '1112661',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 2',
'ext': 'mp4',
},
}, {
'info_dict': {
'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
'duration': 2994,
'episode': 'Folge 3',
'episode_number': 3,
'id': '1112664',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 3',
'ext': 'mp4',
},
}],
},
]
_GRAPHQL_QUERY = ''' _GRAPHQL_QUERY = '''
query AssetWatch($assetId: ID!) { query AssetWatch($assetId: ID!) {
@ -179,8 +90,6 @@ class PlaySuisseIE(InfoExtractor):
raise ExtractorError('Login failed') raise ExtractorError('Login failed')
def _get_media_data(self, media_id): def _get_media_data(self, media_id):
# NOTE In the web app, the "locale" header is used to switch between languages,
# However this doesn't seem to take effect when passing the header here.
response = self._download_json( response = self._download_json(
'https://www.playsuisse.ch/api/graphql', 'https://www.playsuisse.ch/api/graphql',
media_id, data=json.dumps({ media_id, data=json.dumps({
@ -188,7 +97,7 @@ class PlaySuisseIE(InfoExtractor):
'query': self._GRAPHQL_QUERY, 'query': self._GRAPHQL_QUERY,
'variables': {'assetId': media_id}, 'variables': {'assetId': media_id},
}).encode(), }).encode(),
headers={'Content-Type': 'application/json', 'locale': 'de'}) headers={'Content-Type': 'application/json', 'locale': 'fr'})
return response['data']['assetV2'] return response['data']['assetV2']
@ -199,14 +108,25 @@ class PlaySuisseIE(InfoExtractor):
media_id = self._match_id(url) media_id = self._match_id(url)
media_data = self._get_media_data(media_id) media_data = self._get_media_data(media_id)
info = self._extract_single(media_data) info = self._extract_single(media_data)
if info is None:
raise ExtractorError('Unable to extract media information')
if media_data.get('episodes'): if media_data.get('episodes'):
info.update({ info.update({
'_type': 'playlist', '_type': 'playlist',
'entries': map(self._extract_single, media_data['episodes']), 'entries': [
self._extract_single(episode)
for episode in media_data['episodes']
if self._extract_single(episode) is not None
],
}) })
return info return info
def _extract_single(self, media_data): def _extract_single(self, media_data):
if not media_data or 'id' not in media_data:
return None
thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail')) thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
formats, subtitles = [], {} formats, subtitles = [], {}
@ -219,16 +139,24 @@ class PlaySuisseIE(InfoExtractor):
formats.extend(f) formats.extend(f)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
series_name = media_data.get('seriesName', '')
episode_name = media_data.get('name', '')
if series_name and episode_name:
title = f"{series_name} - {episode_name}"
else:
title = episode_name or series_name or 'Unknown Title'
return { return {
'id': media_data['id'], 'id': media_data['id'],
'title': media_data.get('name'), 'title': title,
'description': media_data.get('description'), 'description': media_data.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': int_or_none(media_data.get('duration')), 'duration': int_or_none(media_data.get('duration')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'series': media_data.get('seriesName'), 'series': series_name,
'season_number': int_or_none(media_data.get('seasonNumber')), 'season_number': int_or_none(media_data.get('seasonNumber')),
'episode': media_data.get('name') if media_data.get('episodeNumber') else None, 'episode': episode_name if media_data.get('episodeNumber') else None,
'episode_number': int_or_none(media_data.get('episodeNumber')), 'episode_number': int_or_none(media_data.get('episodeNumber')),
} }

View File

@ -1,136 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
js_to_json,
merge_dicts,
)
class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': {
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4',
'title': 'The Ol Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': {
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4',
'title': "Pokémon : L'ascension de Darkrai",
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id or display_id)
video_data = extract_attributes(self._search_regex(
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element'))
video_id = video_data['data-video-id']
title = video_data.get('data-video-title') or self._html_search_meta(
'pkm-title', webpage, ' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'limelight:media:{video_id}',
'title': title,
'description': video_data.get('data-video-summary'),
'thumbnail': video_data.get('data-video-poster'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('data-video-season')),
'episode': title,
'episode_number': int_or_none(video_data.get('data-video-episode')),
'ie_key': 'LimelightMedia',
}
class PokemonWatchIE(InfoExtractor):
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
_TESTS = [{
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
'md5': '62833938a31e61ab49ada92f524c42ff',
'info_dict': {
'id': '8309a40969894a8e8d5bc1311e9c5667',
'ext': 'mp4',
'title': 'Lillier and the Staff!',
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
},
}, {
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
'only_matching': True,
}, {
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
'only_matching': True,
}]
def _extract_media(self, channel_array, video_id):
for channel in channel_array:
for media in channel.get('media'):
if media.get('id') == video_id:
return media
return None
def _real_extract(self, url):
video_id = self._match_id(url)
info = {
'_type': 'url',
'id': video_id,
'url': f'limelight:media:{video_id}',
'ie_key': 'LimelightMedia',
}
# API call can be avoided entirely if we are listing formats
if self.get_param('listformats', False):
return info
webpage = self._download_webpage(url, video_id)
build_vars = self._parse_json(self._search_regex(
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
video_id, transform_source=js_to_json)
region = build_vars.get('region')
channel_array = self._download_json(self._API_URL.format(region), video_id)
video_data = self._extract_media(channel_array, video_id)
if video_data is None:
raise ExtractorError(
f'Video {video_id} does not exist', expected=True)
info['_type'] = 'url_transparent'
images = video_data.get('images')
return merge_dicts(info, {
'title': video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': images.get('medium') or images.get('small'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('season')),
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})

View File

@ -199,8 +199,9 @@ class SonyLIVSeriesIE(InfoExtractor):
}, },
}] }]
_API_BASE = 'https://apiv2.sonyliv.com/AGL' _API_BASE = 'https://apiv2.sonyliv.com/AGL'
_SORT_ORDERS = ('asc', 'desc')
def _entries(self, show_id): def _entries(self, show_id, sort_order):
headers = { headers = {
'Accept': 'application/json, text/plain, */*', 'Accept': 'application/json, text/plain, */*',
'Referer': 'https://www.sonyliv.com', 'Referer': 'https://www.sonyliv.com',
@ -215,6 +216,9 @@ class SonyLIVSeriesIE(InfoExtractor):
'from': '0', 'from': '0',
'to': '49', 'to': '49',
}), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id']))) }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
if sort_order == 'desc':
seasons = reversed(seasons)
for season in seasons: for season in seasons:
season_id = str(season['id']) season_id = str(season['id'])
note = traverse_obj(season, ('metadata', 'title', {str})) or 'season' note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
@ -226,7 +230,7 @@ class SonyLIVSeriesIE(InfoExtractor):
'from': str(cursor), 'from': str(cursor),
'to': str(cursor + 99), 'to': str(cursor + 99),
'orderBy': 'episodeNumber', 'orderBy': 'episodeNumber',
'sortOrder': 'asc', 'sortOrder': sort_order,
}), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id']))) }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
if not episodes: if not episodes:
break break
@ -237,4 +241,10 @@ class SonyLIVSeriesIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
sort_order = self._configuration_arg('sort_order', [self._SORT_ORDERS[0]])[0]
if sort_order not in self._SORT_ORDERS:
raise ValueError(
f'Invalid sort order "{sort_order}". Allowed values are: {", ".join(self._SORT_ORDERS)}')
return self.playlist_result(self._entries(show_id, sort_order), playlist_id=show_id)

113
yt_dlp/extractor/uliza.py Normal file
View File

@ -0,0 +1,113 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
make_archive_id,
parse_qs,
time_seconds,
)
from ..utils.traversal import traverse_obj
class UlizaPlayerIE(InfoExtractor):
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
'_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
player_data = self._download_webpage(
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
'_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
}
class UlizaPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data_url = self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url')
return self.url_result(
player_data_url, UlizaPlayerIE, url_transparent=True,
display_id=video_id, video_title=self._html_extract_title(webpage))

View File

@ -1,189 +0,0 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
parse_duration,
qualities,
remove_start,
strip_or_none,
)
class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
'uploader': 'LUMOback',
'duration': 46,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 0,
'categories': ['technology_and_gaming'],
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
},
}, {
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
'only_matching': True,
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}, {
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
'info_dict': {
'id': 'v16374379WA437rMH',
'ext': 'mp4',
'title': 'Phantasmagoria 2, pt. 1-3',
'description': 'Phantasmagoria: a Puzzle of Flesh',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
'uploader': 'davidspackage',
'duration': 968,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 18,
'categories': ['technology_and_gaming', 'gaming'],
'tags': ['puzzle', 'of', 'flesh'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
'https://www.veoh.com/watch/getVideo/' + video_id,
video_id)
video = metadata['video']
title = video['title']
thumbnail_url = None
q = qualities(['Regular', 'HQ'])
formats = []
for f_id, f_url in video.get('src', {}).items():
if not f_url:
continue
if f_id == 'poster':
thumbnail_url = f_url
else:
formats.append({
'format_id': f_id,
'quality': q(f_id),
'url': f_url,
})
categories = metadata.get('categoryPath')
if not categories:
category = remove_start(strip_or_none(video.get('category')), 'category_')
categories = [category] if category else None
tags = video.get('tags')
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': thumbnail_url,
'uploader': video.get('author', {}).get('nickname'),
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
'view_count': int_or_none(video.get('views')),
'formats': formats,
'average_rating': int_or_none(video.get('rating')),
'comment_count': int_or_none(video.get('numOfComments')),
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
'categories': categories,
'tags': tags.split(', ') if tags else None,
}
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
IE_NAME = 'veoh:user'
_TESTS = [
{
'url': 'https://www.veoh.com/users/valentinazoe',
'info_dict': {
'id': 'valentinazoe',
'title': 'valentinazoe (Uploads)',
},
'playlist_mincount': 75,
},
{
'url': 'https://www.veoh.com/users/PiensaLibre',
'info_dict': {
'id': 'PiensaLibre',
'title': 'PiensaLibre (Uploads)',
},
'playlist_mincount': 2,
}]
_PAGE_SIZE = 16
def _fetch_page(self, uploader, page):
response = self._download_json(
'https://www.veoh.com/users/published/videos', uploader,
note=f'Downloading videos page {page + 1}',
headers={
'x-csrf-token': self._TOKEN,
'content-type': 'application/json;charset=UTF-8',
},
data=json.dumps({
'username': uploader,
'maxResults': self._PAGE_SIZE,
'page': page + 1,
'requestName': 'userPage',
}).encode())
if not response.get('success'):
raise ExtractorError(response['message'])
for video in response['videos']:
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
video['permalinkId'], video.get('title'))
def _real_initialize(self):
webpage = self._download_webpage(
'https://www.veoh.com', None, note='Downloading authorization token')
self._TOKEN = self._search_regex(
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
'request token', group='token')
def _real_extract(self, url):
uploader = self._match_id(url)
return self.playlist_result(OnDemandPagedList(
functools.partial(self._fetch_page, uploader),
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')

View File

@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _rich_entries(self, rich_grid_renderer): def _rich_entries(self, rich_grid_renderer):
renderer = traverse_obj( renderer = traverse_obj(
rich_grid_renderer, rich_grid_renderer,
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {} ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
video_id = renderer.get('videoId') video_id = renderer.get('videoId')
if video_id: if video_id:
yield self._extract_video(renderer) yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
})), })),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources')) thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
return return
# lockupViewModel extraction
content_id = renderer.get('contentId')
if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
yield self.url_result(
f'https://www.youtube.com/playlist?list={content_id}',
ie=YoutubeTabIE, video_id=content_id,
**traverse_obj(renderer, {
'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
}),
thumbnails=self._extract_thumbnails(renderer, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
return
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId') video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_count': 0, 'playlist_count': 0,
}, { }, {
# Podcasts tab, with rich entry playlistRenderers # Podcasts tab, with rich entry lockupViewModel
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts', 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
'info_dict': { 'info_dict': {
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast', 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c', 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
'title': '99 Percent Invisible - Podcasts', 'title': '99% Invisible - Podcasts',
'uploader': '99 Percent Invisible', 'uploader': '99% Invisible',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'tags': [], 'tags': [],
'channel': '99 Percent Invisible', 'channel': '99% Invisible',
'uploader_id': '@99percentinvisiblepodcast', 'uploader_id': '@99percentinvisiblepodcast',
}, },
'playlist_count': 0, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',