Compare commits

...

16 Commits

Author SHA1 Message Date
Frank Aurich
8afd9468b0
[ie/n-tv.de] Fix extractor (#8414)
Closes #3179
Authored by: 1100101
2023-11-11 21:00:06 +00:00
SirElderling
ef12dbdcd3
[ie/radiocomercial] Add extractors (#8508)
Authored by: SirElderling
2023-11-11 20:10:19 +00:00
LoserFox
46acc418a5
[ie/neteasemusic] Improve metadata extraction (#8531)
Closes #8530
Authored by: LoserFox
2023-11-11 20:08:53 +00:00
Esokrates
6ba3085616
[ie/orf:podcast] Add extractor (#8486)
Closes #5265
Authored by: Esokrates
2023-11-11 20:06:25 +00:00
bashonly
f6e97090d2
[ie/twitter:broadcast] Support --wait-for-video (#8475)
Closes #8473
Authored by: bashonly
2023-11-11 20:05:07 +00:00
bashonly
2863fcf2b6
[ie/theatercomplextown] Add extractors (#8560)
Closes #8491
Authored by: bashonly
2023-11-11 20:04:29 +00:00
bashonly
c76c96677f
[ie/thisoldhouse] Add login support (#8561)
Closes #8257
Authored by: bashonly
2023-11-11 20:03:50 +00:00
c-basalt
15b252dfd2
[ie/weibo] Fix extraction (#8463)
Closes #8445
Authored by: c-basalt
2023-11-11 20:02:59 +00:00
Aniol Pagès
312a2d1e8b
[ie/LaXarxaMes] Add extractor (#8412)
Authored by: aniolpages
2023-11-11 20:00:31 +00:00
garret
54579be436
[ie/nhk] Improve metadata extraction (#8388)
Authored by: garret1317
2023-11-11 19:59:01 +00:00
sepro
05adfd883a
[ie/ondemandkorea] Overhaul extractor (#8386)
Closes #8374
Authored by: seproDev
2023-11-11 19:57:56 +00:00
Martin Pecka
3ff494f6f4
[ie/NovaEmbed] Improve _VALID_URL (#8368)
Authored by: peci1
2023-11-11 19:56:29 +00:00
Mozi
9b5bedf13a
[ie/brilliantpala] Fix cookies support (#8352)
Authored by: pzhlkj6612
2023-11-11 19:54:53 +00:00
bashonly
cb480e390d
[ie/thisav] Remove (#8346)
Authored by: bashonly
2023-11-11 19:53:59 +00:00
sepro
25a4bd345a
[ie/sbs.co.kr] Add extractors (#8326)
Authored by: seproDev
2023-11-11 19:53:10 +00:00
Tom
3906de0755
[ie/zoom] Extract combined view formats (#7847)
Authored by: Mipsters
2023-11-11 19:51:54 +00:00
19 changed files with 1022 additions and 249 deletions

View File

@ -953,6 +953,7 @@ from .lastfm import (
LastFMPlaylistIE,
LastFMUserIE,
)
from .laxarxames import LaXarxaMesIE
from .lbry import (
LBRYIE,
LBRYChannelIE,
@ -1387,7 +1388,10 @@ from .oftv import (
from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE
from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE
from .ondemandkorea import (
OnDemandKoreaIE,
OnDemandKoreaProgramIE,
)
from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
@ -1416,6 +1420,7 @@ from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
)
from .outsidetv import OutsideTVIE
@ -1578,6 +1583,10 @@ from .radiocanada import (
RadioCanadaIE,
RadioCanadaAudioVideoIE,
)
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
@ -1758,6 +1767,11 @@ from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
from .sbscokr import (
SBSCoKrIE,
SBSCoKrAllvodProgramIE,
SBSCoKrProgramsVodIE,
)
from .screen9 import Screen9IE
from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
@ -1902,6 +1916,8 @@ from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
TheaterComplexTownVODIE,
TheaterComplexTownPPVIE,
)
from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE
@ -2014,7 +2030,6 @@ from .thestar import TheStarIE
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .thisvid import (
ThisVidIE,

View File

@ -21,10 +21,10 @@ class BrilliantpalaBaseIE(InfoExtractor):
def _get_logged_in_username(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
if self._LOGIN_API == urlh.url:
if urlh.url.startswith(self._LOGIN_API):
self.raise_login_required()
return self._html_search_regex(
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage(

View File

@ -0,0 +1,73 @@
import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils.traversal import traverse_obj
class LaXarxaMesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laxarxames\.cat/(?:[^/?#]+/)*?(player|movie-details)/(?P<id>\d+)'
_NETRC_MACHINE = 'laxarxames'
_TOKEN = None
_TESTS = [{
'url': 'https://www.laxarxames.cat/player/3459421',
'md5': '0966f46c34275934c19af78f3df6e2bc',
'info_dict': {
'id': '6339612436112',
'ext': 'mp4',
'title': 'Resum | UA Horta — UD Viladecans',
'timestamp': 1697905186,
'thumbnail': r're:https?://.*\.jpg',
'description': '',
'upload_date': '20231021',
'duration': 129.44,
'tags': ['ott', 'esports', '23-24', ' futbol', ' futbol-partits', 'elit', 'resum'],
'uploader_id': '5779379807001',
},
'skip': 'Requires login',
}]
def _perform_login(self, username, password):
if self._TOKEN:
return
login = self._download_json(
'https://api.laxarxames.cat/Authorization/SignIn', None, note='Logging in', headers={
'X-Tenantorigin': 'https://laxarxames.cat',
'Content-Type': 'application/json',
}, data=json.dumps({
'Username': username,
'Password': password,
'Device': {
'PlatformCode': 'WEB',
'Name': 'Mac OS ()',
},
}).encode(), expected_status=401)
self._TOKEN = traverse_obj(login, ('AuthorizationToken', 'Token', {str}))
if not self._TOKEN:
raise ExtractorError('Login failed', expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
if not self._TOKEN:
self.raise_login_required()
media_play_info = self._download_json(
'https://api.laxarxames.cat/Media/GetMediaPlayInfo', video_id,
data=json.dumps({
'MediaId': int(video_id),
'StreamType': 'MAIN'
}).encode(), headers={
'Authorization': f'Bearer {self._TOKEN}',
'X-Tenantorigin': 'https://laxarxames.cat',
'Content-Type': 'application/json',
})
if not traverse_obj(media_play_info, ('ContentUrl', {str})):
self.raise_no_formats('No video found', expected=True)
return self.url_result(
f'https://players.brightcove.net/5779379807001/default_default/index.html?videoId={media_play_info["ContentUrl"]}',
BrightcoveNewIE, video_id, media_play_info.get('Title'))

View File

@ -142,6 +142,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
"duration": 256,
'thumbnail': r're:^http.*\.jpg',
'album': '偶像练习生 表演曲目合集',
'average_rating': int,
'album_artist': '偶像练习生',
},
}, {
'note': 'No lyrics.',
@ -155,6 +158,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1202745600,
'duration': 263,
'thumbnail': r're:^http.*\.jpg',
'album': 'Piano Solos Vol. 2',
'album_artist': 'Dustin O\'Halloran',
'average_rating': int,
},
}, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@ -171,6 +177,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg',
'average_rating': int,
'album': '红色摇滚',
'album_artist': '侯牧人',
},
}, {
'url': 'http://music.163.com/#/song?id=32102397',
@ -186,6 +195,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 199,
'thumbnail': r're:^http.*\.jpg',
'album': 'Bad Blood',
'average_rating': int,
'album_artist': 'Taylor Swift',
},
'skip': 'Blocked outside Mainland China',
}, {
@ -203,6 +215,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'duration': 229,
'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg',
'average_rating': int,
'album': 'Oh!',
'album_artist': '少女时代',
},
'skip': 'Blocked outside Mainland China',
}]
@ -253,12 +268,15 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}),
}),
}

View File

@ -3,6 +3,8 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
get_element_by_class,
int_or_none,
join_nonempty,
parse_duration,
@ -45,25 +47,36 @@ class NhkBaseIE(InfoExtractor):
self.cache.store('nhk', 'api_info', api_info)
return api_info
def _extract_formats_and_subtitles(self, vod_id):
def _extract_stream_info(self, vod_id):
for refresh in (False, True):
api_info = self._get_api_info(refresh)
if not api_info:
continue
api_url = api_info.pop('url')
stream_url = traverse_obj(
meta = traverse_obj(
self._download_json(
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
**api_info,
'type': 'json',
'optional_id': vod_id,
'active_flg': 1,
}),
('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
if stream_url:
return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
}), ('meta', 0))
stream_url = traverse_obj(
meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
if stream_url:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
return {
**traverse_obj(meta, {
'duration': ('duration', {int_or_none}),
'timestamp': ('publication_date', {unified_timestamp}),
'release_timestamp': ('insert_date', {unified_timestamp}),
'modified_timestamp': ('update_date', {unified_timestamp}),
}),
'formats': formats,
'subtitles': subtitles,
}
raise ExtractorError('Unable to extract stream url')
def _extract_episode_info(self, url, episode=None):
@ -77,11 +90,11 @@ class NhkBaseIE(InfoExtractor):
if fetch_episode:
episode = self._call_api(
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
title = episode.get('sub_title_clean') or episode['sub_title']
def get_clean_field(key):
return episode.get(key + '_clean') or episode.get(key)
return clean_html(episode.get(key + '_clean') or episode.get(key))
title = get_clean_field('sub_title')
series = get_clean_field('title')
thumbnails = []
@ -96,22 +109,30 @@ class NhkBaseIE(InfoExtractor):
'url': 'https://www3.nhk.or.jp' + img_path,
})
episode_name = title
if series and title:
title = f'{series} - {title}'
elif series and not title:
title = series
series = None
episode_name = None
else: # title, no series
episode_name = None
info = {
'id': episode_id + '-' + lang,
'title': '%s - %s' % (series, title) if series and title else title,
'title': title,
'description': get_clean_field('description'),
'thumbnails': thumbnails,
'series': series,
'episode': title,
'episode': episode_name,
}
if is_video:
vod_id = episode['vod_id']
formats, subs = self._extract_formats_and_subtitles(vod_id)
info.update({
**self._extract_stream_info(vod_id),
'id': vod_id,
'formats': formats,
'subtitles': subs,
})
else:
@ -148,6 +169,14 @@ class NhkVodIE(NhkBaseIE):
'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
'series': 'Japan Railway Journal',
'modified_timestamp': 1694243656,
'timestamp': 1681428600,
'release_timestamp': 1693883728,
'duration': 1679,
'upload_date': '20230413',
'modified_date': '20230909',
'release_date': '20230905',
},
}, {
# video clip
@ -161,6 +190,13 @@ class NhkVodIE(NhkBaseIE):
'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
'series': 'Dining with the Chef',
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
'duration': 148,
'upload_date': '20190816',
'release_date': '20230902',
'release_timestamp': 1693619292,
'modified_timestamp': 1694168033,
'modified_date': '20230908',
'timestamp': 1565997540,
},
}, {
# radio
@ -170,7 +206,7 @@ class NhkVodIE(NhkBaseIE):
'ext': 'm4a',
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
'series': 'Living in Japan',
'description': 'md5:850611969932874b4a3309e0cae06c2f',
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
},
@ -212,6 +248,23 @@ class NhkVodIE(NhkBaseIE):
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
},
'skip': 'expires 2023-10-15',
}, {
# a one-off (single-episode series). title from the api is just '<p></p>'
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
'info_dict': {
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
'ext': 'mp4',
'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
'description': 'md5:5db620c46a0698451cc59add8816b797',
'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
'release_date': '20230905',
'timestamp': 1690103400,
'duration': 2939,
'release_timestamp': 1693898699,
'modified_timestamp': 1698057495,
'modified_date': '20231023',
'upload_date': '20230723',
},
}]
def _real_extract(self, url):
@ -226,13 +279,15 @@ class NhkVodProgramIE(NhkBaseIE):
'info_dict': {
'id': 'sumo',
'title': 'GRAND SUMO Highlights',
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
},
'playlist_mincount': 12,
'playlist_mincount': 0,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
'info_dict': {
'id': 'japanrailway',
'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
},
'playlist_mincount': 12,
}, {
@ -241,6 +296,7 @@ class NhkVodProgramIE(NhkBaseIE):
'info_dict': {
'id': 'japanrailway',
'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
},
'playlist_mincount': 5,
}, {
@ -265,11 +321,11 @@ class NhkVodProgramIE(NhkBaseIE):
entries.append(self._extract_episode_info(
urljoin(url, episode_path), episode))
program_title = None
if entries:
program_title = entries[0].get('series')
html = self._download_webpage(url, program_id)
program_title = clean_html(get_element_by_class('p-programDetail__title', html))
program_description = clean_html(get_element_by_class('p-programDetail__text', html))
return self.playlist_result(entries, program_id, program_title)
return self.playlist_result(entries, program_id, program_title, program_description)
class NhkForSchoolBangumiIE(InfoExtractor):
@ -421,6 +477,7 @@ class NhkRadiruIE(InfoExtractor):
'skip': 'Episode expired on 2023-04-16',
'info_dict': {
'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'ext': 'm4a',
'id': '0449_01_3853544',
@ -441,6 +498,7 @@ class NhkRadiruIE(InfoExtractor):
'title': 'ベストオブクラシック',
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
},
'playlist_mincount': 3,
@ -454,6 +512,7 @@ class NhkRadiruIE(InfoExtractor):
'title': '有島武郎「一房のぶどう」',
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より',
'channel': 'NHKラジオ第1、NHK-FM',
'uploader': 'NHKラジオ第1、NHK-FM',
'timestamp': 1635757200,
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
'release_date': '20161207',
@ -469,6 +528,7 @@ class NhkRadiruIE(InfoExtractor):
'id': 'F261_01_3855109',
'ext': 'm4a',
'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1',
'timestamp': 1681635900,
'release_date': '20230416',
'series': 'NHKラジオニュース',
@ -513,6 +573,7 @@ class NhkRadiruIE(InfoExtractor):
series_meta = traverse_obj(meta, {
'title': 'program_name',
'channel': 'media_name',
'uploader': 'media_name',
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
}, get_all=False)
@ -541,6 +602,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1',
'title': 'NHKラジオニュース',
}
}]

View File

@ -13,7 +13,7 @@ from ..utils import (
class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://media(?:tn)?\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'info_dict': {
@ -37,6 +37,16 @@ class NovaEmbedIE(InfoExtractor):
'duration': 114,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://mediatn.cms.nova.cz/embed/EU5ELEsmOHt?autoplay=1',
'info_dict': {
'id': 'EU5ELEsmOHt',
'ext': 'mp4',
'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1780,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):

View File

@ -1,21 +1,21 @@
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
url_or_none,
)
from ..utils.traversal import traverse_obj
class NTVDeIE(InfoExtractor):
IE_NAME = 'n-tv.de'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/(?:videos|magazine)/[^/?#]+/[^/?#]+-article(?P<id>[^/?#]+)\.html'
_TESTS = [{
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
'md5': '6bcf2a6638cb83f45d5561659a1cb498',
'info_dict': {
'id': '14438086',
'ext': 'mp4',
@ -23,51 +23,61 @@ class NTVDeIE(InfoExtractor):
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
'alt_title': 'Winterchaos auf deutschen Straßen',
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
'duration': 4020,
'duration': 67,
'timestamp': 1422892797,
'upload_date': '20150202',
},
}, {
'url': 'https://www.n-tv.de/mediathek/magazine/auslandsreport/Juedische-Siedler-wollten-Rache-die-wollten-nur-toeten-article24523089.html',
'md5': 'c5c6014c014ccc3359470e1d34472bfd',
'info_dict': {
'id': '24523089',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Jüdische Siedler "wollten Rache, die wollten nur töten"',
'alt_title': 'Israelische Gewalt fern von Gaza',
'description': 'Vier Tage nach dem Massaker der Hamas greifen jüdische Siedler das Haus einer palästinensischen Familie im Westjordanland an. Die Überlebenden berichten, sie waren unbewaffnet, die Angreifer seien nur auf "Rache und Töten" aus gewesen. Als die Toten beerdigt werden sollen, eröffnen die Siedler erneut das Feuer.',
'duration': 326,
'timestamp': 1699688294,
'upload_date': '20231111',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._parse_json(self._search_regex(
r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
vdata = self._parse_json(self._search_regex(
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
webpage, 'player data'), video_id,
transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
duration = parse_duration(vdata.get('duration'))
info = self._search_json(
r'article:', webpage, 'info', video_id, transform_source=js_to_json)
vdata = self._search_json(
r'\$\(\s*"#playerwrapper"\s*\)\s*\.data\(\s*"player",',
webpage, 'player data', video_id,
transform_source=lambda s: js_to_json(re.sub(r'ivw:[^},]+', '', s)))['setup']['source']
formats = []
if vdata.get('video'):
if vdata.get('progressive'):
formats.append({
'format_id': 'flash',
'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
'format_id': 'http',
'url': vdata['progressive'],
})
if vdata.get('videoMp4'):
formats.append({
'format_id': 'mobile',
'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
'tbr': 400, # estimation
})
if vdata.get('videoM3u8'):
m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
if vdata.get('hls'):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
quality=1, m3u8_id='hls', fatal=False))
vdata['hls'], video_id, 'mp4', m3u8_id='hls', fatal=False))
if vdata.get('dash'):
formats.extend(self._extract_mpd_formats(vdata['dash'], video_id, fatal=False, mpd_id='dash'))
return {
'id': video_id,
'title': info['headline'],
'description': info.get('intro'),
'alt_title': info.get('kicker'),
'timestamp': timestamp,
'thumbnail': vdata.get('html5VideoPoster'),
'duration': duration,
**traverse_obj(info, {
'title': 'headline',
'description': 'intro',
'alt_title': 'kicker',
'timestamp': ('publishedDateAsUnixTimeStamp', {int_or_none}),
}),
**traverse_obj(vdata, {
'thumbnail': ('poster', {url_or_none}),
'duration': ('length', {int_or_none}),
}),
'formats': formats,
}

View File

@ -1,87 +1,167 @@
import functools
import re
import uuid
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
js_to_json,
OnDemandPagedList,
float_or_none,
int_or_none,
join_nonempty,
parse_age_limit,
parse_qs,
unified_strdate,
url_or_none,
)
from ..utils.traversal import traverse_obj
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html',
'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
'md5': 'e2ff77255d989e3135bde0c5889fbce8',
'info_dict': {
'id': 'ask-us-anything-e351',
'id': '686471',
'ext': 'mp4',
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022',
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'duration': 5486.955,
'release_date': '20220924',
'series': 'Ask Us Anything',
'series_id': 11790,
'episode_number': 351,
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
},
'params': {
'skip_download': 'm3u8 download'
}
}, {
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html',
'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
'md5': '57266c720006962be7ff415b24775caa',
'info_dict': {
'id': 'work-later-drink-now-e1',
'id': '1595796',
'ext': 'mp4',
'title': 'Work Later, Drink Now : E01',
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af',
'thumbnail': r're:^https?://.*\.png$',
'subtitles': {
'English': 'mincount:1',
},
'title': 'Breakup Probation, A Week: E08',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'duration': 1586.0,
'release_date': '20231001',
'series': 'Breakup Probation, A Week',
'series_id': 22912,
'episode_number': 8,
'episode': 'E08',
},
'params': {
'skip_download': 'm3u8 download'
}
}, {
'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
'info_dict': {
'id': '369531',
'ext': 'mp4',
'release_date': '20220519',
'duration': 7267.0,
'title': 'The Outlaws: Main Movie',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'age_limit': 18,
},
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError(
'Unable to access page. You may have been blocked.',
expected=True)
data = self._download_json(
f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
if not traverse_obj(data, ('result', {dict})):
msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
msg='This content is not available in your region',
countries=self._GEO_COUNTRIES)
data = data['result']
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
'This video is only available to ODK PLUS members.',
expected=True)
def try_geo_bypass(url):
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
if 'ODK PREMIUM Members Only' in webpage:
raise ExtractorError(
'This video is only available to ODK PREMIUM members.',
expected=True)
def try_upgrade_quality(url):
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
return mod_url if mod_url != url and self._request_webpage(
HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
title = self._search_regex(
r'class=["\']episode_title["\'][^>]*>([^<]+)',
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage)
formats = []
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
jw_config = self._parse_json(
self._search_regex((
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$',
r'playlist\s*=\s*\[(?P<options>.+)];?$',
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;',
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
subtitles = {}
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
subtitles.setdefault(track.get('language', 'und'), []).append({
'url': track['url'],
'ext': track.get('codec'),
'name': track.get('label'),
})
info.update({
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage)
})
return info
def if_series(key=None):
return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
return {
'id': video_id,
'title': join_nonempty(
('episode', 'program', 'title'),
('episode', 'title'), from_dict=data, delim=': '),
**traverse_obj(data, {
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
'series': ('episode', {if_series(key='program')}, 'title'),
'series_id': ('episode', {if_series(key='program')}, 'id'),
'episode': ('episode', {if_series(key='title')}),
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
class OnDemandKoreaProgramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
'info_dict': {
'id': 'uskn-news',
},
'playlist_mincount': 755,
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
'info_dict': {
'id': 'the-land',
},
'playlist_count': 52,
}]
_PAGE_SIZE = 100
def _fetch_page(self, display_id, page):
page += 1
page_data = self._download_json(
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
headers={'service-name': 'odk'}, query={
'page': page,
'page_size': self._PAGE_SIZE,
}, note=f'Downloading page {page}', expected_status=404)
for episode in traverse_obj(page_data, ('result', 'results', ...)):
yield self.url_result(
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
ie=OnDemandKoreaIE, video_title=episode.get('title'))
def _real_extract(self, url):
display_id = self._match_id(url)
entries = OnDemandPagedList(functools.partial(
self._fetch_page, display_id), self._PAGE_SIZE)
return self.playlist_result(entries, display_id)

View File

@ -4,15 +4,16 @@ import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
InAdvancePagedList,
int_or_none,
join_nonempty,
make_archive_id,
mimetype2ext,
orderedSet,
remove_end,
make_archive_id,
smuggle_url,
strip_jsonp,
try_call,
@ -21,6 +22,7 @@ from ..utils import (
unsmuggle_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class ORFTVthekIE(InfoExtractor):
@ -334,6 +336,45 @@ class ORFRadioIE(InfoExtractor):
self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
class ORFPodcastIE(InfoExtractor):
IE_NAME = 'orf:podcast'
_STATION_RE = '|'.join(map(re.escape, (
'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
_VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
'md5': '526a5700e03d271a1505386a8721ab9b',
'info_dict': {
'id': 'nicolas-stockhammer-15102023',
'ext': 'mp3',
'title': 'Nicolas Stockhammer (15.10.2023)',
'duration': 3396.0,
'series': 'Frühstück bei mir',
},
'skip': 'ORF podcasts are only available for a limited time'
}]
def _real_extract(self, url):
station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
data = self._download_json(
f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
return {
'id': show_id,
'ext': 'mp3',
'vcodec': 'none',
**traverse_obj(data, ('payload', {
'url': ('enclosures', 0, 'url'),
'ext': ('enclosures', 0, 'type', {mimetype2ext}),
'title': 'title',
'description': ('description', {clean_html}),
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
'series': ('podcast', 'title'),
})),
}
class ORFIPTVIE(InfoExtractor):
IE_NAME = 'orf:iptv'
IE_DESC = 'iptv.ORF.at'

View File

@ -4,6 +4,7 @@ from ..utils import (
parse_iso8601,
unescapeHTML,
)
from ..utils.traversal import traverse_obj
class PeriscopeBaseIE(InfoExtractor):
@ -20,8 +21,6 @@ class PeriscopeBaseIE(InfoExtractor):
title = broadcast.get('status') or 'Periscope Broadcast'
uploader = broadcast.get('user_display_name') or broadcast.get('username')
title = '%s - %s' % (uploader, title) if uploader else title
is_live = broadcast.get('state').lower() == 'running'
thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]
@ -31,12 +30,16 @@ class PeriscopeBaseIE(InfoExtractor):
'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
broadcast.get('created_at_ms'), scale=1000),
'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,
'view_count': int_or_none(broadcast.get('total_watched')),
'tags': broadcast.get('tags'),
'is_live': is_live,
'live_status': {
'running': 'is_live',
'not_started': 'is_upcoming',
}.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live'
}
@staticmethod

View File

@ -0,0 +1,150 @@
import itertools
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
get_element_text_and_html_by_tag,
get_elements_html_by_class,
int_or_none,
join_nonempty,
try_call,
unified_strdate,
update_url,
urljoin
)
from ..utils.traversal import traverse_obj
class RadioComercialIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
'info_dict': {
'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas',
'ext': 'mp3',
'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.',
'release_date': '20231025',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 6
}
}, {
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem',
'md5': '47e96c273aef96a8eb160cd6cf46d782',
'info_dict': {
'id': 'convenca-me-num-minuto-que-os-lobisomens-existem',
'ext': 'mp3',
'title': 'Convença-me num minuto que os lobisomens existem',
'release_date': '20231026',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 3
}
}, {
'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao',
'md5': '69be64255420fec23b7259955d771e54',
'info_dict': {
'id': 'o-desastre-de-aviao',
'ext': 'mp3',
'title': 'O desastre de avião',
'description': 'md5:8a82beeb372641614772baab7246245f',
'release_date': '20231101',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 2
},
'params': {
# inconsistant md5
'skip_download': True,
},
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro',
'md5': '91d32d4d4b1407272068b102730fc9fa',
'info_dict': {
'id': 't-n-t-29-de-outubro',
'ext': 'mp3',
'title': 'T.N.T 29 de outubro',
'release_date': '20231029',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 2023
}
}]
def _real_extract(self, url):
video_id, season = self._match_valid_url(url).group('id', 'season')
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'description': self._og_search_description(webpage, default=None),
'release_date': unified_strdate(get_element_by_class(
'date', get_element_html_by_class('descriptions', webpage) or '')),
'thumbnail': self._og_search_thumbnail(webpage),
'season': int_or_none(season),
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
}
class RadioComercialPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
'info_dict': {
'id': 'convenca-me-num-minuto_t3',
'title': 'Convença-me num Minuto - Temporada 3',
},
'playlist_mincount': 32
}, {
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao',
'info_dict': {
'id': 'o-homem-que-mordeu-o-cao',
'title': 'O Homem Que Mordeu o Cão',
},
'playlist_mincount': 19
}, {
'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas',
'info_dict': {
'id': 'as-minhas-coisas-favoritas',
'title': 'As Minhas Coisas Favoritas',
},
'playlist_mincount': 131
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023',
'info_dict': {
'id': 'tnt-todos-no-top_t2023',
'title': 'TNT - Todos No Top - Temporada 2023',
},
'playlist_mincount': 39
}]
def _entries(self, url, playlist_id):
for page in itertools.count(1):
try:
webpage = self._download_webpage(
f'{url}/{page}', playlist_id, f'Downloading page {page}')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
if not episodes:
break
for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
episode_url = urljoin(url, url_path)
if RadioComercialIE.suitable(episode_url):
yield episode_url
def _real_extract(self, url):
podcast, season = self._match_valid_url(url).group('id', 'season')
playlist_id = join_nonempty(podcast, season, delim='_t')
url = update_url(url, query=None, fragment=None)
webpage = self._download_webpage(url, playlist_id)
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
return self.playlist_from_matches(
self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)

200
yt_dlp/extractor/sbscokr.py Normal file
View File

@ -0,0 +1,200 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
parse_resolution,
url_or_none,
)
from ..utils.traversal import traverse_obj
class SBSCoKrIE(InfoExtractor):
IE_NAME = 'sbs.co.kr'
_VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)',
r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)']
_TESTS = [{
'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip',
'md5': 'c3f6d45e1fb5682039d94cda23c36f19',
'info_dict': {
'id': 'OC467706746',
'ext': 'mp4',
'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨',
'description': 'md5:6a71eb1979ee4a94ea380310068ccab4',
'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg',
'release_timestamp': 1696889400,
'release_date': '20231009',
'view_count': int,
'like_count': int,
'duration': 238,
'age_limit': 15,
'series': '동상이몽2_너는 내 운명',
'episode': '레이디제인, ‘혼전임신설’ 3개월 앞당긴 결혼식 비하인드 스토리 최초 공개!',
'episode_number': 311,
},
}, {
'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=',
'md5': 'bf46b2e89fda7ae7de01f5743cef7236',
'info_dict': {
'id': '22000489324',
'ext': 'mp4',
'title': '[다시보기] 트롤리 15회',
'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4',
'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg',
'release_timestamp': 1676325600,
'release_date': '20230213',
'view_count': int,
'like_count': int,
'duration': 5931,
'age_limit': 15,
'series': '트롤리',
'episode': '이거 다 거짓말이야',
'episode_number': 15,
},
}, {
'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948',
'md5': '41e8ae4cc6c8424f4e4d76661a4becbf',
'info_dict': {
'id': '22000508948',
'ext': 'mp4',
'title': '[다시보기] 신발 벗고 돌싱포맨 104회',
'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e',
'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg',
'release_timestamp': 1693342800,
'release_date': '20230829',
'view_count': int,
'like_count': int,
'duration': 7036,
'age_limit': 15,
'series': '신발 벗고 돌싱포맨',
'episode': '돌싱포맨 저격수들 등장!',
'episode_number': 104,
},
}]
def _call_api(self, video_id, rscuse=''):
return self._download_json(
f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id,
note=f'Downloading m3u8 information {rscuse}',
query={
'platform': 'pcweb',
'protocol': 'download',
'absolute_show': 'Y',
'service': 'program',
'ssl': 'Y',
'rscuse': rscuse,
})
def _real_extract(self, url):
video_id = self._match_id(url)
details = self._call_api(video_id)
source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {}
formats = []
for stream in traverse_obj(details, (
'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse']
), default=[source]):
if not stream.get('mediaurl'):
new_source = traverse_obj(
self._call_api(video_id, rscuse=stream['mediarscuse']),
('vod', 'source', 'mediasource', {dict})) or {}
if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'):
continue
stream = new_source
formats.append({
'url': stream['mediaurl'],
'format_id': stream.get('mediarscuse'),
'format_note': stream.get('medianame'),
**parse_resolution(stream.get('quality')),
'preference': int_or_none(stream.get('mediarscuse'))
})
caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none}))
return {
'id': video_id,
**traverse_obj(details, ('vod', {
'title': ('info', 'title'),
'duration': ('info', 'duration', {int_or_none}),
'view_count': ('info', 'viewcount', {int_or_none}),
'like_count': ('info', 'likecount', {int_or_none}),
'description': ('info', 'synopsis', {clean_html}),
'episode': ('info', 'content', ('contenttitle', 'title')),
'episode_number': ('info', 'content', 'number', {int_or_none}),
'series': ('info', 'program', 'programtitle'),
'age_limit': ('info', 'targetage', {int_or_none}),
'release_timestamp': ('info', 'broaddate', {parse_iso8601}),
'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}),
}), get_all=False),
'formats': formats,
'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None,
}
class SBSCoKrAllvodProgramIE(InfoExtractor):
IE_NAME = 'sbs.co.kr:allvod_program'
_VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)'
_TESTS = [{
'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc',
'info_dict': {
'_type': 'playlist',
'id': '22000010159',
},
'playlist_count': 18,
}, {
'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577',
'info_dict': {
'_type': 'playlist',
'id': 'P460810577',
},
'playlist_count': 13,
}]
def _real_extract(self, url):
program_id = self._match_id(url)
details = self._download_json(
'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do',
program_id, note='Downloading program details',
query={
'pgmId': program_id,
'currentCount': '10000',
})
return self.playlist_result(
[self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE)
for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id)
class SBSCoKrProgramsVodIE(InfoExtractor):
IE_NAME = 'sbs.co.kr:programs_vod'
_VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods'
_TESTS = [{
'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007',
'info_dict': {
'_type': 'playlist',
'id': '00000210215',
},
'playlist_mincount': 9782,
}, {
'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006',
'info_dict': {
'_type': 'playlist',
'id': '22000010476',
},
'playlist_mincount': 312,
}]
def _real_extract(self, url):
program_slug = self._match_id(url)
program_id = self._download_json(
f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug,
note='Downloading program menu data')['program']['programid']
return self.url_result(
f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE)

View File

@ -38,9 +38,48 @@ class StacommuBaseIE(WrestleUniverseBaseIE):
return None
return traverse_obj(encryption_data, {'key': ('key', {decrypt}), 'iv': ('iv', {decrypt})})
def _extract_vod(self, url):
video_id = self._match_id(url)
video_info = self._download_metadata(
url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watch', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'description': ('description', {str}),
'timestamp': ('watchStartTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'cast': ('casts', ..., 'displayName', {str}),
'duration': ('duration', {int}),
}),
}
def _extract_ppv(self, url):
video_id = self._match_id(url)
video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'timestamp': ('startTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'duration': ('duration', {int_or_none}),
}),
}
class StacommuVODIE(StacommuBaseIE):
_VALID_URL = r'https?://www\.stacommu\.jp/videos/episodes/(?P<id>[\da-zA-Z]+)'
_VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?videos/episodes/(?P<id>[\da-zA-Z]+)'
_TESTS = [{
# not encrypted
'url': 'https://www.stacommu.jp/videos/episodes/aXcVKjHyAENEjard61soZZ',
@ -79,34 +118,19 @@ class StacommuVODIE(StacommuBaseIE):
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.stacommu.jp/en/videos/episodes/aXcVKjHyAENEjard61soZZ',
'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._download_metadata(
url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watch', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'description': ('description', {str}),
'timestamp': ('watchStartTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'cast': ('casts', ..., 'displayName', {str}),
'duration': ('duration', {int}),
}),
}
return self._extract_vod(url)
class StacommuLiveIE(StacommuBaseIE):
_VALID_URL = r'https?://www\.stacommu\.jp/live/(?P<id>[\da-zA-Z]+)'
_VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?live/(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'https://www.stacommu.jp/live/d2FJ3zLnndegZJCAEzGM3m',
'info_dict': {
@ -125,24 +149,83 @@ class StacommuLiveIE(StacommuBaseIE):
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.stacommu.jp/en/live/d2FJ3zLnndegZJCAEzGM3m',
'only_matching': True,
}]
_API_PATH = 'events'
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'stream information', data={'method': 1})
return self._extract_ppv(url)
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'timestamp': ('startTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'duration': ('duration', {int_or_none}),
}),
}
class TheaterComplexTownBaseIE(StacommuBaseIE):
_NETRC_MACHINE = 'theatercomplextown'
_API_HOST = 'api.theater-complex.town'
_LOGIN_QUERY = {'key': 'AIzaSyAgNCqToaIz4a062EeIrkhI_xetVfAOrfc'}
_LOGIN_HEADERS = {
'Accept': '*/*',
'Content-Type': 'application/json',
'X-Client-Version': 'Chrome/JsCore/9.23.0/FirebaseCore-web',
'Referer': 'https://www.theater-complex.town/',
'Origin': 'https://www.theater-complex.town',
}
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:vod'
_TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
'info_dict': {
'id': 'hoxqidYNoAn7bP92DN6p78',
'ext': 'mp4',
'title': '演劇ドラフトグランプリ2023 劇団『恋のぼり』〜劇団名決定秘話ラジオ',
'description': 'md5:a7e2e9cf570379ea67fb630f345ff65d',
'cast': ['玉城 裕規', '石川 凌雅'],
'thumbnail': 'https://image.theater-complex.town/5URnXX6KCeDysuFrPkP38o/5URnXX6KCeDysuFrPkP38o',
'upload_date': '20231103',
'timestamp': 1699016400,
'duration': 868,
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
def _real_extract(self, url):
return self._extract_vod(url)
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
'info_dict': {
'id': 'wytW3X7khrjJBUpKuV3jen',
'ext': 'mp4',
'title': 'BREAK FREE STARS 11月5日12:30千秋楽公演',
'thumbnail': 'https://image.theater-complex.town/5GWEB31JcTUfjtgdeV5t6o/5GWEB31JcTUfjtgdeV5t6o',
'upload_date': '20231105',
'timestamp': 1699155000,
'duration': 8378,
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True,
}]
_API_PATH = 'events'
def _real_extract(self, url):
return self._extract_ppv(url)

View File

@ -1,66 +0,0 @@
from .common import InfoExtractor
from ..utils import remove_end
class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TESTS = [{
# jwplayer
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
'id': '47734',
'ext': 'flv',
'title': '高樹マリア - Just fit',
'uploader': 'dj7970',
'uploader_id': 'dj7970'
}
}, {
# html5 media
'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
'md5': 'ba90c076bd0f80203679e5b60bf523ee',
'info_dict': {
'id': '242352',
'ext': 'mp4',
'title': 'Nerdy 18yo Big Ass Tattoos and Glasses',
'uploader': 'cybersluts',
'uploader_id': 'cybersluts',
},
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = remove_end(self._html_extract_title(webpage), ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
if video_url:
info_dict = {
'formats': [{
'url': video_url,
}],
}
else:
entries = self._parse_html5_media_entries(url, webpage, video_id)
if entries:
info_dict = entries[0]
else:
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
uploader = self._html_search_regex(
r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
info_dict.update({
'id': video_id,
'uploader': uploader,
'uploader_id': uploader_id,
'title': title,
})
return info_dict

View File

@ -1,11 +1,23 @@
import json
from .common import InfoExtractor
from .zype import ZypeIE
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
filter_dict,
parse_qs,
try_call,
urlencode_postdata,
)
class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
_NETRC_MACHINE = 'thisoldhouse'
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
'info_dict': {
'id': '5dcdddf673c3f956ef5db202',
'ext': 'mp4',
@ -23,13 +35,16 @@ class ThisOldHouseIE(InfoExtractor):
'skip_download': True,
},
}, {
# Page no longer has video
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
}, {
# 404 Not Found
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
'only_matching': True,
}, {
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
# 404 Not Found
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'only_matching': True,
}, {
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
@ -39,17 +54,51 @@ class ThisOldHouseIE(InfoExtractor):
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
'only_matching': True,
}]
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
def _perform_login(self, username, password):
self._request_webpage(
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
urlh = self._request_webpage(
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
try:
auth_form = self._download_webpage(
self._LOGIN_URL, None, 'Submitting credentials', headers={
'Content-Type': 'application/json',
'Referer': urlh.url,
}, data=json.dumps(filter_dict({
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
'tenant': 'thisoldhouse',
'username': username,
'password': password,
'popup_options': {},
'sso': True,
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
'_intstate': 'deprecated',
}), separators=(',', ':')).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username or password', expected=True)
raise
self._request_webpage(
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
data=urlencode_postdata(self._hidden_inputs(auth_form)))
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'To Unlock This content' in webpage:
self.raise_login_required(method='cookies')
video_url = self._search_regex(
self.raise_login_required(
'This video is only available for subscribers. '
'Note that --cookies-from-browser may not work due to this site using session cookies')
video_url, video_id = self._search_regex(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
webpage, 'video url')
if 'subscription_required=true' in video_url or 'c-entry-group-labels__image' in webpage:
return self.url_result(self._request_webpage(HEADRequest(video_url), display_id).url, 'Zype', display_id)
video_id = self._search_regex(r'(?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})', video_url, 'video id')
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
webpage, 'video url', group=(1, 2))
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
return self.url_result(video_url, ZypeIE, video_id)

View File

@ -1619,6 +1619,9 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info['title'] = broadcast.get('status') or info.get('title')
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
if info['live_status'] == 'is_upcoming':
return info
media_key = broadcast['media_key']
source = self._call_api(
f'live_video_stream/status/{media_key}', media_key)['source']

View File

@ -164,11 +164,15 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'viewsb\.com',
r'filemoon\.sx',
r'hentai\.animestigma\.com',
r'thisav\.com',
)
_TESTS = [{
'url': 'http://dood.to/e/5s1wmbdacezb',
'only_matching': True,
}, {
'url': 'https://thisav.com/en/terms',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -1,3 +1,4 @@
import json
import random
import itertools
import urllib.parse
@ -18,24 +19,33 @@ from ..utils import (
class WeiboBaseIE(InfoExtractor):
def _update_visitor_cookies(self, video_id):
def _update_visitor_cookies(self, visitor_url, video_id):
headers = {'Referer': visitor_url}
chrome_ver = self._search_regex(
r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
visitor_data = self._download_json(
'https://passport.weibo.com/visitor/genvisitor', video_id,
note='Generating first-visit guest request',
transform_source=strip_jsonp,
headers=headers, transform_source=strip_jsonp,
data=urlencode_postdata({
'cb': 'gen_callback',
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
}))
'fp': json.dumps({
'os': '1',
'browser': f'Chrome{chrome_ver},0,0,0',
'fonts': 'undefined',
'screenInfo': '1920*1080*24',
'plugins': ''
}, separators=(',', ':'))}))['data']
self._download_webpage(
'https://passport.weibo.com/visitor/visitor', video_id,
note='Running first-visit callback to get guest cookies',
query={
headers=headers, query={
'a': 'incarnate',
't': visitor_data['data']['tid'],
'w': 2,
'c': '%03d' % visitor_data['data']['confidence'],
't': visitor_data['tid'],
'w': 3 if visitor_data.get('new_tid') else 2,
'c': f'{visitor_data.get("confidence", 100):03d}',
'gc': '',
'cb': 'cross_domain',
'from': 'weibo',
'_rand': random.random(),
@ -44,7 +54,7 @@ class WeiboBaseIE(InfoExtractor):
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(video_id)
self._update_visitor_cookies(urlh.url, video_id)
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
return self._parse_json(webpage, video_id, fatal=fatal)

View File

@ -2,10 +2,12 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
js_to_json,
parse_filesize,
parse_resolution,
str_or_none,
traverse_obj,
url_basename,
urlencode_postdata,
urljoin,
)
@ -41,6 +43,18 @@ class ZoomIE(InfoExtractor):
'ext': 'mp4',
'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
},
'skip': 'This recording has expired',
}, {
# view_with_share URL
'url': 'https://cityofdetroit.zoom.us/rec/share/VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
'md5': 'bdc7867a5934c151957fb81321b3c024',
'info_dict': {
'id': 'VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
'ext': 'mp4',
'title': 'February 2022 Detroit Revenue Estimating Conference',
'duration': 7299,
'formats': 'mincount:3',
},
}]
def _get_page_data(self, webpage, video_id):
@ -72,6 +86,7 @@ class ZoomIE(InfoExtractor):
def _real_extract(self, url):
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
query = {}
if url_type == 'share':
webpage = self._get_real_webpage(url, base_url, video_id, 'share')
@ -80,6 +95,7 @@ class ZoomIE(InfoExtractor):
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
video_id, note='Downloading share info JSON')['result']['redirectUrl']
url = urljoin(base_url, redirect_path)
query['continueMode'] = 'true'
webpage = self._get_real_webpage(url, base_url, video_id, 'play')
file_id = self._get_page_data(webpage, video_id)['fileId']
@ -88,7 +104,7 @@ class ZoomIE(InfoExtractor):
raise ExtractorError('Unable to extract file ID')
data = self._download_json(
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
note='Downloading play info JSON')['result']
subtitles = {}
@ -104,10 +120,10 @@ class ZoomIE(InfoExtractor):
if data.get('viewMp4Url'):
formats.append({
'format_note': 'Camera stream',
'url': str_or_none(data.get('viewMp4Url')),
'url': data['viewMp4Url'],
'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
'format_id': 'view',
'ext': 'mp4',
'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
'preference': 0
@ -116,14 +132,26 @@ class ZoomIE(InfoExtractor):
if data.get('shareMp4Url'):
formats.append({
'format_note': 'Screen share stream',
'url': str_or_none(data.get('shareMp4Url')),
'url': data['shareMp4Url'],
'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
'format_id': 'share',
'ext': 'mp4',
'preference': -1
})
view_with_share_url = data.get('viewMp4WithshareUrl')
if view_with_share_url:
formats.append({
**parse_resolution(self._search_regex(
r'_(\d+x\d+)\.mp4', url_basename(view_with_share_url), 'resolution', default=None)),
'format_note': 'Screen share with camera',
'url': view_with_share_url,
'format_id': 'view_with_share',
'ext': 'mp4',
'preference': 1
})
return {
'id': video_id,
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),