Compare commits

..

16 Commits

Author SHA1 Message Date
Frank Aurich
8afd9468b0
[ie/n-tv.de] Fix extractor (#8414)
Closes #3179
Authored by: 1100101
2023-11-11 21:00:06 +00:00
SirElderling
ef12dbdcd3
[ie/radiocomercial] Add extractors (#8508)
Authored by: SirElderling
2023-11-11 20:10:19 +00:00
LoserFox
46acc418a5
[ie/neteasemusic] Improve metadata extraction (#8531)
Closes #8530
Authored by: LoserFox
2023-11-11 20:08:53 +00:00
Esokrates
6ba3085616
[ie/orf:podcast] Add extractor (#8486)
Closes #5265
Authored by: Esokrates
2023-11-11 20:06:25 +00:00
bashonly
f6e97090d2
[ie/twitter:broadcast] Support --wait-for-video (#8475)
Closes #8473
Authored by: bashonly
2023-11-11 20:05:07 +00:00
bashonly
2863fcf2b6
[ie/theatercomplextown] Add extractors (#8560)
Closes #8491
Authored by: bashonly
2023-11-11 20:04:29 +00:00
bashonly
c76c96677f
[ie/thisoldhouse] Add login support (#8561)
Closes #8257
Authored by: bashonly
2023-11-11 20:03:50 +00:00
c-basalt
15b252dfd2
[ie/weibo] Fix extraction (#8463)
Closes #8445
Authored by: c-basalt
2023-11-11 20:02:59 +00:00
Aniol Pagès
312a2d1e8b
[ie/LaXarxaMes] Add extractor (#8412)
Authored by: aniolpages
2023-11-11 20:00:31 +00:00
garret
54579be436
[ie/nhk] Improve metadata extraction (#8388)
Authored by: garret1317
2023-11-11 19:59:01 +00:00
sepro
05adfd883a
[ie/ondemandkorea] Overhaul extractor (#8386)
Closes #8374
Authored by: seproDev
2023-11-11 19:57:56 +00:00
Martin Pecka
3ff494f6f4
[ie/NovaEmbed] Improve _VALID_URL (#8368)
Authored by: peci1
2023-11-11 19:56:29 +00:00
Mozi
9b5bedf13a
[ie/brilliantpala] Fix cookies support (#8352)
Authored by: pzhlkj6612
2023-11-11 19:54:53 +00:00
bashonly
cb480e390d
[ie/thisav] Remove (#8346)
Authored by: bashonly
2023-11-11 19:53:59 +00:00
sepro
25a4bd345a
[ie/sbs.co.kr] Add extractors (#8326)
Authored by: seproDev
2023-11-11 19:53:10 +00:00
Tom
3906de0755
[ie/zoom] Extract combined view formats (#7847)
Authored by: Mipsters
2023-11-11 19:51:54 +00:00
19 changed files with 1022 additions and 249 deletions

View File

@ -953,6 +953,7 @@ from .lastfm import (
LastFMPlaylistIE, LastFMPlaylistIE,
LastFMUserIE, LastFMUserIE,
) )
from .laxarxames import LaXarxaMesIE
from .lbry import ( from .lbry import (
LBRYIE, LBRYIE,
LBRYChannelIE, LBRYChannelIE,
@ -1387,7 +1388,10 @@ from .oftv import (
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE from .olympics import OlympicsReplayIE
from .on24 import On24IE from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE from .ondemandkorea import (
OnDemandKoreaIE,
OnDemandKoreaProgramIE,
)
from .onefootball import OneFootballIE from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE from .oneplace import OnePlacePodcastIE
@ -1416,6 +1420,7 @@ from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFRadioIE, ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE, ORFIPTVIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE
@ -1578,6 +1583,10 @@ from .radiocanada import (
RadioCanadaIE, RadioCanadaIE,
RadioCanadaAudioVideoIE, RadioCanadaAudioVideoIE,
) )
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE from .radiobremen import RadioBremenIE
@ -1758,6 +1767,11 @@ from .samplefocus import SampleFocusIE
from .sapo import SapoIE from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .sbscokr import (
SBSCoKrIE,
SBSCoKrAllvodProgramIE,
SBSCoKrProgramsVodIE,
)
from .screen9 import Screen9IE from .screen9 import Screen9IE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE from .screencastify import ScreencastifyIE
@ -1902,6 +1916,8 @@ from .srmediathek import SRMediathekIE
from .stacommu import ( from .stacommu import (
StacommuLiveIE, StacommuLiveIE,
StacommuVODIE, StacommuVODIE,
TheaterComplexTownVODIE,
TheaterComplexTownPPVIE,
) )
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE from .startv import StarTVIE
@ -2014,7 +2030,6 @@ from .thestar import TheStarIE
from .thesun import TheSunIE from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .thisvid import ( from .thisvid import (
ThisVidIE, ThisVidIE,

View File

@ -21,10 +21,10 @@ class BrilliantpalaBaseIE(InfoExtractor):
def _get_logged_in_username(self, url, video_id): def _get_logged_in_username(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
if self._LOGIN_API == urlh.url: if urlh.url.startswith(self._LOGIN_API):
self.raise_login_required() self.raise_login_required()
return self._html_search_regex( return self._html_search_regex(
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username') r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password): def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage( login_form = self._hidden_inputs(self._download_webpage(

View File

@ -0,0 +1,73 @@
import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils.traversal import traverse_obj
class LaXarxaMesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laxarxames\.cat/(?:[^/?#]+/)*?(player|movie-details)/(?P<id>\d+)'
_NETRC_MACHINE = 'laxarxames'
_TOKEN = None
_TESTS = [{
'url': 'https://www.laxarxames.cat/player/3459421',
'md5': '0966f46c34275934c19af78f3df6e2bc',
'info_dict': {
'id': '6339612436112',
'ext': 'mp4',
'title': 'Resum | UA Horta — UD Viladecans',
'timestamp': 1697905186,
'thumbnail': r're:https?://.*\.jpg',
'description': '',
'upload_date': '20231021',
'duration': 129.44,
'tags': ['ott', 'esports', '23-24', ' futbol', ' futbol-partits', 'elit', 'resum'],
'uploader_id': '5779379807001',
},
'skip': 'Requires login',
}]
def _perform_login(self, username, password):
if self._TOKEN:
return
login = self._download_json(
'https://api.laxarxames.cat/Authorization/SignIn', None, note='Logging in', headers={
'X-Tenantorigin': 'https://laxarxames.cat',
'Content-Type': 'application/json',
}, data=json.dumps({
'Username': username,
'Password': password,
'Device': {
'PlatformCode': 'WEB',
'Name': 'Mac OS ()',
},
}).encode(), expected_status=401)
self._TOKEN = traverse_obj(login, ('AuthorizationToken', 'Token', {str}))
if not self._TOKEN:
raise ExtractorError('Login failed', expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
if not self._TOKEN:
self.raise_login_required()
media_play_info = self._download_json(
'https://api.laxarxames.cat/Media/GetMediaPlayInfo', video_id,
data=json.dumps({
'MediaId': int(video_id),
'StreamType': 'MAIN'
}).encode(), headers={
'Authorization': f'Bearer {self._TOKEN}',
'X-Tenantorigin': 'https://laxarxames.cat',
'Content-Type': 'application/json',
})
if not traverse_obj(media_play_info, ('ContentUrl', {str})):
self.raise_no_formats('No video found', expected=True)
return self.url_result(
f'https://players.brightcove.net/5779379807001/default_default/index.html?videoId={media_play_info["ContentUrl"]}',
BrightcoveNewIE, video_id, media_play_info.get('Title'))

View File

@ -142,6 +142,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'subtitles': {'lyrics': [{'ext': 'lrc'}]}, 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
"duration": 256, "duration": 256,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'album': '偶像练习生 表演曲目合集',
'average_rating': int,
'album_artist': '偶像练习生',
}, },
}, { }, {
'note': 'No lyrics.', 'note': 'No lyrics.',
@ -155,6 +158,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1202745600, 'timestamp': 1202745600,
'duration': 263, 'duration': 263,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'album': 'Piano Solos Vol. 2',
'album_artist': 'Dustin O\'Halloran',
'average_rating': int,
}, },
}, { }, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@ -171,6 +177,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'duration': 268, 'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团', 'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'average_rating': int,
'album': '红色摇滚',
'album_artist': '侯牧人',
}, },
}, { }, {
'url': 'http://music.163.com/#/song?id=32102397', 'url': 'http://music.163.com/#/song?id=32102397',
@ -186,6 +195,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'subtitles': {'lyrics': [{'ext': 'lrc'}]}, 'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 199, 'duration': 199,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'album': 'Bad Blood',
'average_rating': int,
'album_artist': 'Taylor Swift',
}, },
'skip': 'Blocked outside Mainland China', 'skip': 'Blocked outside Mainland China',
}, { }, {
@ -203,6 +215,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'duration': 229, 'duration': 229,
'alt_title': '说出愿望吧(Genie)', 'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'average_rating': int,
'album': 'Oh!',
'album_artist': '少女时代',
}, },
'skip': 'Blocked outside Mainland China', 'skip': 'Blocked outside Mainland China',
}] }]
@ -253,12 +268,15 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'formats': formats, 'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None, 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None, 'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
**lyric_data, **lyric_data,
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}), 'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}), 'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}), 'duration': ('duration', {self.kilo_or_none}),
'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}),
}), }),
} }

View File

@ -3,6 +3,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
get_element_by_class,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_duration, parse_duration,
@ -45,25 +47,36 @@ class NhkBaseIE(InfoExtractor):
self.cache.store('nhk', 'api_info', api_info) self.cache.store('nhk', 'api_info', api_info)
return api_info return api_info
def _extract_formats_and_subtitles(self, vod_id): def _extract_stream_info(self, vod_id):
for refresh in (False, True): for refresh in (False, True):
api_info = self._get_api_info(refresh) api_info = self._get_api_info(refresh)
if not api_info: if not api_info:
continue continue
api_url = api_info.pop('url') api_url = api_info.pop('url')
stream_url = traverse_obj( meta = traverse_obj(
self._download_json( self._download_json(
api_url, vod_id, 'Downloading stream url info', fatal=False, query={ api_url, vod_id, 'Downloading stream url info', fatal=False, query={
**api_info, **api_info,
'type': 'json', 'type': 'json',
'optional_id': vod_id, 'optional_id': vod_id,
'active_flg': 1, 'active_flg': 1,
}), }), ('meta', 0))
('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) stream_url = traverse_obj(
if stream_url: meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
if stream_url:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
return {
**traverse_obj(meta, {
'duration': ('duration', {int_or_none}),
'timestamp': ('publication_date', {unified_timestamp}),
'release_timestamp': ('insert_date', {unified_timestamp}),
'modified_timestamp': ('update_date', {unified_timestamp}),
}),
'formats': formats,
'subtitles': subtitles,
}
raise ExtractorError('Unable to extract stream url') raise ExtractorError('Unable to extract stream url')
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
@ -77,11 +90,11 @@ class NhkBaseIE(InfoExtractor):
if fetch_episode: if fetch_episode:
episode = self._call_api( episode = self._call_api(
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
title = episode.get('sub_title_clean') or episode['sub_title']
def get_clean_field(key): def get_clean_field(key):
return episode.get(key + '_clean') or episode.get(key) return clean_html(episode.get(key + '_clean') or episode.get(key))
title = get_clean_field('sub_title')
series = get_clean_field('title') series = get_clean_field('title')
thumbnails = [] thumbnails = []
@ -96,22 +109,30 @@ class NhkBaseIE(InfoExtractor):
'url': 'https://www3.nhk.or.jp' + img_path, 'url': 'https://www3.nhk.or.jp' + img_path,
}) })
episode_name = title
if series and title:
title = f'{series} - {title}'
elif series and not title:
title = series
series = None
episode_name = None
else: # title, no series
episode_name = None
info = { info = {
'id': episode_id + '-' + lang, 'id': episode_id + '-' + lang,
'title': '%s - %s' % (series, title) if series and title else title, 'title': title,
'description': get_clean_field('description'), 'description': get_clean_field('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'series': series, 'series': series,
'episode': title, 'episode': episode_name,
} }
if is_video: if is_video:
vod_id = episode['vod_id'] vod_id = episode['vod_id']
formats, subs = self._extract_formats_and_subtitles(vod_id)
info.update({ info.update({
**self._extract_stream_info(vod_id),
'id': vod_id, 'id': vod_id,
'formats': formats,
'subtitles': subs,
}) })
else: else:
@ -148,6 +169,14 @@ class NhkVodIE(NhkBaseIE):
'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463', 'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
'episode': 'The Tohoku Shinkansen: Full Speed Ahead', 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
'series': 'Japan Railway Journal', 'series': 'Japan Railway Journal',
'modified_timestamp': 1694243656,
'timestamp': 1681428600,
'release_timestamp': 1693883728,
'duration': 1679,
'upload_date': '20230413',
'modified_date': '20230909',
'release_date': '20230905',
}, },
}, { }, {
# video clip # video clip
@ -161,6 +190,13 @@ class NhkVodIE(NhkBaseIE):
'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed', 'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
'series': 'Dining with the Chef', 'series': 'Dining with the Chef',
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
'duration': 148,
'upload_date': '20190816',
'release_date': '20230902',
'release_timestamp': 1693619292,
'modified_timestamp': 1694168033,
'modified_date': '20230908',
'timestamp': 1565997540,
}, },
}, { }, {
# radio # radio
@ -170,7 +206,7 @@ class NhkVodIE(NhkBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
'series': 'Living in Japan', 'series': 'Living in Japan',
'description': 'md5:850611969932874b4a3309e0cae06c2f', 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545', 'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines' 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
}, },
@ -212,6 +248,23 @@ class NhkVodIE(NhkBaseIE):
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0', 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
}, },
'skip': 'expires 2023-10-15', 'skip': 'expires 2023-10-15',
}, {
# a one-off (single-episode series). title from the api is just '<p></p>'
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
'info_dict': {
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
'ext': 'mp4',
'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
'description': 'md5:5db620c46a0698451cc59add8816b797',
'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
'release_date': '20230905',
'timestamp': 1690103400,
'duration': 2939,
'release_timestamp': 1693898699,
'modified_timestamp': 1698057495,
'modified_date': '20231023',
'upload_date': '20230723',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -226,13 +279,15 @@ class NhkVodProgramIE(NhkBaseIE):
'info_dict': { 'info_dict': {
'id': 'sumo', 'id': 'sumo',
'title': 'GRAND SUMO Highlights', 'title': 'GRAND SUMO Highlights',
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
}, },
'playlist_mincount': 12, 'playlist_mincount': 0,
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
'info_dict': { 'info_dict': {
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
@ -241,6 +296,7 @@ class NhkVodProgramIE(NhkBaseIE):
'info_dict': { 'info_dict': {
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
}, },
'playlist_mincount': 5, 'playlist_mincount': 5,
}, { }, {
@ -265,11 +321,11 @@ class NhkVodProgramIE(NhkBaseIE):
entries.append(self._extract_episode_info( entries.append(self._extract_episode_info(
urljoin(url, episode_path), episode)) urljoin(url, episode_path), episode))
program_title = None html = self._download_webpage(url, program_id)
if entries: program_title = clean_html(get_element_by_class('p-programDetail__title', html))
program_title = entries[0].get('series') program_description = clean_html(get_element_by_class('p-programDetail__text', html))
return self.playlist_result(entries, program_id, program_title) return self.playlist_result(entries, program_id, program_title, program_description)
class NhkForSchoolBangumiIE(InfoExtractor): class NhkForSchoolBangumiIE(InfoExtractor):
@ -421,6 +477,7 @@ class NhkRadiruIE(InfoExtractor):
'skip': 'Episode expired on 2023-04-16', 'skip': 'Episode expired on 2023-04-16',
'info_dict': { 'info_dict': {
'channel': 'NHK-FM', 'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9', 'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'ext': 'm4a', 'ext': 'm4a',
'id': '0449_01_3853544', 'id': '0449_01_3853544',
@ -441,6 +498,7 @@ class NhkRadiruIE(InfoExtractor):
'title': 'ベストオブクラシック', 'title': 'ベストオブクラシック',
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
'channel': 'NHK-FM', 'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
@ -454,6 +512,7 @@ class NhkRadiruIE(InfoExtractor):
'title': '有島武郎「一房のぶどう」', 'title': '有島武郎「一房のぶどう」',
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より', 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より',
'channel': 'NHKラジオ第1、NHK-FM', 'channel': 'NHKラジオ第1、NHK-FM',
'uploader': 'NHKラジオ第1、NHK-FM',
'timestamp': 1635757200, 'timestamp': 1635757200,
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg', 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
'release_date': '20161207', 'release_date': '20161207',
@ -469,6 +528,7 @@ class NhkRadiruIE(InfoExtractor):
'id': 'F261_01_3855109', 'id': 'F261_01_3855109',
'ext': 'm4a', 'ext': 'm4a',
'channel': 'NHKラジオ第1', 'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1',
'timestamp': 1681635900, 'timestamp': 1681635900,
'release_date': '20230416', 'release_date': '20230416',
'series': 'NHKラジオニュース', 'series': 'NHKラジオニュース',
@ -513,6 +573,7 @@ class NhkRadiruIE(InfoExtractor):
series_meta = traverse_obj(meta, { series_meta = traverse_obj(meta, {
'title': 'program_name', 'title': 'program_name',
'channel': 'media_name', 'channel': 'media_name',
'uploader': 'media_name',
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
}, get_all=False) }, get_all=False)
@ -541,6 +602,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d', 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
'channel': 'NHKラジオ第1', 'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1',
'title': 'NHKラジオニュース', 'title': 'NHKラジオニュース',
} }
}] }]

View File

@ -13,7 +13,7 @@ from ..utils import (
class NovaEmbedIE(InfoExtractor): class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://media(?:tn)?\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'info_dict': { 'info_dict': {
@ -37,6 +37,16 @@ class NovaEmbedIE(InfoExtractor):
'duration': 114, 'duration': 114,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://mediatn.cms.nova.cz/embed/EU5ELEsmOHt?autoplay=1',
'info_dict': {
'id': 'EU5ELEsmOHt',
'ext': 'mp4',
'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1780,
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,21 +1,21 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
parse_duration, url_or_none,
) )
from ..utils.traversal import traverse_obj
class NTVDeIE(InfoExtractor): class NTVDeIE(InfoExtractor):
IE_NAME = 'n-tv.de' IE_NAME = 'n-tv.de'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html' _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/(?:videos|magazine)/[^/?#]+/[^/?#]+-article(?P<id>[^/?#]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html', 'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
'md5': '6ef2514d4b1e8e03ca24b49e2f167153', 'md5': '6bcf2a6638cb83f45d5561659a1cb498',
'info_dict': { 'info_dict': {
'id': '14438086', 'id': '14438086',
'ext': 'mp4', 'ext': 'mp4',
@ -23,51 +23,61 @@ class NTVDeIE(InfoExtractor):
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus', 'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
'alt_title': 'Winterchaos auf deutschen Straßen', 'alt_title': 'Winterchaos auf deutschen Straßen',
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.', 'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
'duration': 4020, 'duration': 67,
'timestamp': 1422892797, 'timestamp': 1422892797,
'upload_date': '20150202', 'upload_date': '20150202',
}, },
}, {
'url': 'https://www.n-tv.de/mediathek/magazine/auslandsreport/Juedische-Siedler-wollten-Rache-die-wollten-nur-toeten-article24523089.html',
'md5': 'c5c6014c014ccc3359470e1d34472bfd',
'info_dict': {
'id': '24523089',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Jüdische Siedler "wollten Rache, die wollten nur töten"',
'alt_title': 'Israelische Gewalt fern von Gaza',
'description': 'Vier Tage nach dem Massaker der Hamas greifen jüdische Siedler das Haus einer palästinensischen Familie im Westjordanland an. Die Überlebenden berichten, sie waren unbewaffnet, die Angreifer seien nur auf "Rache und Töten" aus gewesen. Als die Toten beerdigt werden sollen, eröffnen die Siedler erneut das Feuer.',
'duration': 326,
'timestamp': 1699688294,
'upload_date': '20231111',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
info = self._parse_json(self._search_regex( info = self._search_json(
r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'), r'article:', webpage, 'info', video_id, transform_source=js_to_json)
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._search_json(
vdata = self._parse_json(self._search_regex( r'\$\(\s*"#playerwrapper"\s*\)\s*\.data\(\s*"player",',
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);', webpage, 'player data', video_id,
webpage, 'player data'), video_id, transform_source=lambda s: js_to_json(re.sub(r'ivw:[^},]+', '', s)))['setup']['source']
transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
duration = parse_duration(vdata.get('duration'))
formats = [] formats = []
if vdata.get('video'): if vdata.get('progressive'):
formats.append({ formats.append({
'format_id': 'flash', 'format_id': 'http',
'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'], 'url': vdata['progressive'],
}) })
if vdata.get('videoMp4'): if vdata.get('hls'):
formats.append({
'format_id': 'mobile',
'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
'tbr': 400, # estimation
})
if vdata.get('videoM3u8'):
m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', vdata['hls'], video_id, 'mp4', m3u8_id='hls', fatal=False))
quality=1, m3u8_id='hls', fatal=False)) if vdata.get('dash'):
formats.extend(self._extract_mpd_formats(vdata['dash'], video_id, fatal=False, mpd_id='dash'))
return { return {
'id': video_id, 'id': video_id,
'title': info['headline'], **traverse_obj(info, {
'description': info.get('intro'), 'title': 'headline',
'alt_title': info.get('kicker'), 'description': 'intro',
'timestamp': timestamp, 'alt_title': 'kicker',
'thumbnail': vdata.get('html5VideoPoster'), 'timestamp': ('publishedDateAsUnixTimeStamp', {int_or_none}),
'duration': duration, }),
**traverse_obj(vdata, {
'thumbnail': ('poster', {url_or_none}),
'duration': ('length', {int_or_none}),
}),
'formats': formats, 'formats': formats,
} }

View File

@ -1,87 +1,167 @@
import functools
import re import re
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, OnDemandPagedList,
float_or_none,
int_or_none,
join_nonempty,
parse_age_limit,
parse_qs,
unified_strdate,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class OnDemandKoreaIE(InfoExtractor): class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
_GEO_COUNTRIES = ['US', 'CA'] _GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{ _TESTS = [{
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html', 'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
'md5': 'e2ff77255d989e3135bde0c5889fbce8',
'info_dict': { 'info_dict': {
'id': 'ask-us-anything-e351', 'id': '686471',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022', 'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.jpg$', 'duration': 5486.955,
'release_date': '20220924',
'series': 'Ask Us Anything',
'series_id': 11790,
'episode_number': 351,
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
}, },
'params': {
'skip_download': 'm3u8 download'
}
}, { }, {
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html', 'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
'md5': '57266c720006962be7ff415b24775caa',
'info_dict': { 'info_dict': {
'id': 'work-later-drink-now-e1', 'id': '1595796',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Work Later, Drink Now : E01', 'title': 'Breakup Probation, A Week: E08',
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.png$', 'duration': 1586.0,
'subtitles': { 'release_date': '20231001',
'English': 'mincount:1', 'series': 'Breakup Probation, A Week',
}, 'series_id': 22912,
'episode_number': 8,
'episode': 'E08',
}, },
'params': { }, {
'skip_download': 'm3u8 download' 'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
} 'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
'info_dict': {
'id': '369531',
'ext': 'mp4',
'release_date': '20220519',
'duration': 7267.0,
'title': 'The Outlaws: Main Movie',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'age_limit': 18,
},
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage: data = self._download_json(
# Page sometimes returns captcha page with HTTP 403 f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
raise ExtractorError( headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
'Unable to access page. You may have been blocked.', if not traverse_obj(data, ('result', {dict})):
expected=True) msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
if 'msg_block_01.png' in webpage: data = data['result']
self.raise_geo_restricted(
msg='This content is not available in your region',
countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage: def try_geo_bypass(url):
raise ExtractorError( return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
'This video is only available to ODK PLUS members.',
expected=True)
if 'ODK PREMIUM Members Only' in webpage: def try_upgrade_quality(url):
raise ExtractorError( mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
'This video is only available to ODK PREMIUM members.', return mod_url if mod_url != url and self._request_webpage(
expected=True) HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
title = self._search_regex( formats = []
r'class=["\']episode_title["\'][^>]*>([^<]+)', for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage) formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
jw_config = self._parse_json( subtitles = {}
self._search_regex(( for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$', subtitles.setdefault(track.get('language', 'und'), []).append({
r'playlist\s*=\s*\[(?P<options>.+)];?$', 'url': track['url'],
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;', 'ext': track.get('codec'),
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'), 'name': track.get('label'),
video_id, transform_source=js_to_json) })
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
info.update({ def if_series(key=None):
'title': title, return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage) return {
}) 'id': video_id,
return info 'title': join_nonempty(
('episode', 'program', 'title'),
('episode', 'title'), from_dict=data, delim=': '),
**traverse_obj(data, {
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
'series': ('episode', {if_series(key='program')}, 'title'),
'series_id': ('episode', {if_series(key='program')}, 'id'),
'episode': ('episode', {if_series(key='title')}),
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
class OnDemandKoreaProgramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
'info_dict': {
'id': 'uskn-news',
},
'playlist_mincount': 755,
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
'info_dict': {
'id': 'the-land',
},
'playlist_count': 52,
}]
_PAGE_SIZE = 100
def _fetch_page(self, display_id, page):
page += 1
page_data = self._download_json(
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
headers={'service-name': 'odk'}, query={
'page': page,
'page_size': self._PAGE_SIZE,
}, note=f'Downloading page {page}', expected_status=404)
for episode in traverse_obj(page_data, ('result', 'results', ...)):
yield self.url_result(
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
ie=OnDemandKoreaIE, video_title=episode.get('title'))
def _real_extract(self, url):
display_id = self._match_id(url)
entries = OnDemandPagedList(functools.partial(
self._fetch_page, display_id), self._PAGE_SIZE)
return self.playlist_result(entries, display_id)

View File

@ -4,15 +4,16 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
InAdvancePagedList,
clean_html, clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
InAdvancePagedList,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
make_archive_id,
mimetype2ext,
orderedSet, orderedSet,
remove_end, remove_end,
make_archive_id,
smuggle_url, smuggle_url,
strip_jsonp, strip_jsonp,
try_call, try_call,
@ -21,6 +22,7 @@ from ..utils import (
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class ORFTVthekIE(InfoExtractor): class ORFTVthekIE(InfoExtractor):
@ -334,6 +336,45 @@ class ORFRadioIE(InfoExtractor):
self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle'))) self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
class ORFPodcastIE(InfoExtractor):
IE_NAME = 'orf:podcast'
_STATION_RE = '|'.join(map(re.escape, (
'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
_VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
'md5': '526a5700e03d271a1505386a8721ab9b',
'info_dict': {
'id': 'nicolas-stockhammer-15102023',
'ext': 'mp3',
'title': 'Nicolas Stockhammer (15.10.2023)',
'duration': 3396.0,
'series': 'Frühstück bei mir',
},
'skip': 'ORF podcasts are only available for a limited time'
}]
def _real_extract(self, url):
station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
data = self._download_json(
f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
return {
'id': show_id,
'ext': 'mp3',
'vcodec': 'none',
**traverse_obj(data, ('payload', {
'url': ('enclosures', 0, 'url'),
'ext': ('enclosures', 0, 'type', {mimetype2ext}),
'title': 'title',
'description': ('description', {clean_html}),
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
'series': ('podcast', 'title'),
})),
}
class ORFIPTVIE(InfoExtractor): class ORFIPTVIE(InfoExtractor):
IE_NAME = 'orf:iptv' IE_NAME = 'orf:iptv'
IE_DESC = 'iptv.ORF.at' IE_DESC = 'iptv.ORF.at'

View File

@ -4,6 +4,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
unescapeHTML, unescapeHTML,
) )
from ..utils.traversal import traverse_obj
class PeriscopeBaseIE(InfoExtractor): class PeriscopeBaseIE(InfoExtractor):
@ -20,8 +21,6 @@ class PeriscopeBaseIE(InfoExtractor):
title = broadcast.get('status') or 'Periscope Broadcast' title = broadcast.get('status') or 'Periscope Broadcast'
uploader = broadcast.get('user_display_name') or broadcast.get('username') uploader = broadcast.get('user_display_name') or broadcast.get('username')
title = '%s - %s' % (uploader, title) if uploader else title title = '%s - %s' % (uploader, title) if uploader else title
is_live = broadcast.get('state').lower() == 'running'
thumbnails = [{ thumbnails = [{
'url': broadcast[image], 'url': broadcast[image],
} for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)] } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]
@ -31,12 +30,16 @@ class PeriscopeBaseIE(InfoExtractor):
'title': title, 'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none( 'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
broadcast.get('created_at_ms'), scale=1000), broadcast.get('created_at_ms'), scale=1000),
'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000),
'uploader': uploader, 'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'), 'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'view_count': int_or_none(broadcast.get('total_watched')), 'view_count': int_or_none(broadcast.get('total_watched')),
'tags': broadcast.get('tags'), 'tags': broadcast.get('tags'),
'is_live': is_live, 'live_status': {
'running': 'is_live',
'not_started': 'is_upcoming',
}.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live'
} }
@staticmethod @staticmethod

View File

@ -0,0 +1,150 @@
import itertools
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
get_element_text_and_html_by_tag,
get_elements_html_by_class,
int_or_none,
join_nonempty,
try_call,
unified_strdate,
update_url,
urljoin
)
from ..utils.traversal import traverse_obj
class RadioComercialIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper',
'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4',
'info_dict': {
'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas',
'ext': 'mp3',
'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.',
'release_date': '20231025',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 6
}
}, {
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem',
'md5': '47e96c273aef96a8eb160cd6cf46d782',
'info_dict': {
'id': 'convenca-me-num-minuto-que-os-lobisomens-existem',
'ext': 'mp3',
'title': 'Convença-me num minuto que os lobisomens existem',
'release_date': '20231026',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 3
}
}, {
'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao',
'md5': '69be64255420fec23b7259955d771e54',
'info_dict': {
'id': 'o-desastre-de-aviao',
'ext': 'mp3',
'title': 'O desastre de avião',
'description': 'md5:8a82beeb372641614772baab7246245f',
'release_date': '20231101',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 2
},
'params': {
# inconsistant md5
'skip_download': True,
},
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro',
'md5': '91d32d4d4b1407272068b102730fc9fa',
'info_dict': {
'id': 't-n-t-29-de-outubro',
'ext': 'mp3',
'title': 'T.N.T 29 de outubro',
'release_date': '20231029',
'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg',
'season': 2023
}
}]
def _real_extract(self, url):
video_id, season = self._match_valid_url(url).group('id', 'season')
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'description': self._og_search_description(webpage, default=None),
'release_date': unified_strdate(get_element_by_class(
'date', get_element_html_by_class('descriptions', webpage) or '')),
'thumbnail': self._og_search_thumbnail(webpage),
'season': int_or_none(season),
'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'),
}
class RadioComercialPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])'
_TESTS = [{
'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3',
'info_dict': {
'id': 'convenca-me-num-minuto_t3',
'title': 'Convença-me num Minuto - Temporada 3',
},
'playlist_mincount': 32
}, {
'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao',
'info_dict': {
'id': 'o-homem-que-mordeu-o-cao',
'title': 'O Homem Que Mordeu o Cão',
},
'playlist_mincount': 19
}, {
'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas',
'info_dict': {
'id': 'as-minhas-coisas-favoritas',
'title': 'As Minhas Coisas Favoritas',
},
'playlist_mincount': 131
}, {
'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023',
'info_dict': {
'id': 'tnt-todos-no-top_t2023',
'title': 'TNT - Todos No Top - Temporada 2023',
},
'playlist_mincount': 39
}]
def _entries(self, url, playlist_id):
for page in itertools.count(1):
try:
webpage = self._download_webpage(
f'{url}/{page}', playlist_id, f'Downloading page {page}')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage)
if not episodes:
break
for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')):
episode_url = urljoin(url, url_path)
if RadioComercialIE.suitable(episode_url):
yield episode_url
def _real_extract(self, url):
podcast, season = self._match_valid_url(url).group('id', 'season')
playlist_id = join_nonempty(podcast, season, delim='_t')
url = update_url(url, query=None, fragment=None)
webpage = self._download_webpage(url, playlist_id)
name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
title = name if name == season else join_nonempty(name, season, delim=' - Temporada ')
return self.playlist_from_matches(
self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE)

200
yt_dlp/extractor/sbscokr.py Normal file
View File

@ -0,0 +1,200 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
parse_resolution,
url_or_none,
)
from ..utils.traversal import traverse_obj
class SBSCoKrIE(InfoExtractor):
IE_NAME = 'sbs.co.kr'
_VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)',
r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)']
_TESTS = [{
'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip',
'md5': 'c3f6d45e1fb5682039d94cda23c36f19',
'info_dict': {
'id': 'OC467706746',
'ext': 'mp4',
'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨',
'description': 'md5:6a71eb1979ee4a94ea380310068ccab4',
'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg',
'release_timestamp': 1696889400,
'release_date': '20231009',
'view_count': int,
'like_count': int,
'duration': 238,
'age_limit': 15,
'series': '동상이몽2_너는 내 운명',
'episode': '레이디제인, ‘혼전임신설’ 3개월 앞당긴 결혼식 비하인드 스토리 최초 공개!',
'episode_number': 311,
},
}, {
'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=',
'md5': 'bf46b2e89fda7ae7de01f5743cef7236',
'info_dict': {
'id': '22000489324',
'ext': 'mp4',
'title': '[다시보기] 트롤리 15회',
'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4',
'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg',
'release_timestamp': 1676325600,
'release_date': '20230213',
'view_count': int,
'like_count': int,
'duration': 5931,
'age_limit': 15,
'series': '트롤리',
'episode': '이거 다 거짓말이야',
'episode_number': 15,
},
}, {
'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948',
'md5': '41e8ae4cc6c8424f4e4d76661a4becbf',
'info_dict': {
'id': '22000508948',
'ext': 'mp4',
'title': '[다시보기] 신발 벗고 돌싱포맨 104회',
'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e',
'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg',
'release_timestamp': 1693342800,
'release_date': '20230829',
'view_count': int,
'like_count': int,
'duration': 7036,
'age_limit': 15,
'series': '신발 벗고 돌싱포맨',
'episode': '돌싱포맨 저격수들 등장!',
'episode_number': 104,
},
}]
def _call_api(self, video_id, rscuse=''):
return self._download_json(
f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id,
note=f'Downloading m3u8 information {rscuse}',
query={
'platform': 'pcweb',
'protocol': 'download',
'absolute_show': 'Y',
'service': 'program',
'ssl': 'Y',
'rscuse': rscuse,
})
def _real_extract(self, url):
video_id = self._match_id(url)
details = self._call_api(video_id)
source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {}
formats = []
for stream in traverse_obj(details, (
'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse']
), default=[source]):
if not stream.get('mediaurl'):
new_source = traverse_obj(
self._call_api(video_id, rscuse=stream['mediarscuse']),
('vod', 'source', 'mediasource', {dict})) or {}
if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'):
continue
stream = new_source
formats.append({
'url': stream['mediaurl'],
'format_id': stream.get('mediarscuse'),
'format_note': stream.get('medianame'),
**parse_resolution(stream.get('quality')),
'preference': int_or_none(stream.get('mediarscuse'))
})
caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none}))
return {
'id': video_id,
**traverse_obj(details, ('vod', {
'title': ('info', 'title'),
'duration': ('info', 'duration', {int_or_none}),
'view_count': ('info', 'viewcount', {int_or_none}),
'like_count': ('info', 'likecount', {int_or_none}),
'description': ('info', 'synopsis', {clean_html}),
'episode': ('info', 'content', ('contenttitle', 'title')),
'episode_number': ('info', 'content', 'number', {int_or_none}),
'series': ('info', 'program', 'programtitle'),
'age_limit': ('info', 'targetage', {int_or_none}),
'release_timestamp': ('info', 'broaddate', {parse_iso8601}),
'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}),
}), get_all=False),
'formats': formats,
'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None,
}
class SBSCoKrAllvodProgramIE(InfoExtractor):
IE_NAME = 'sbs.co.kr:allvod_program'
_VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)'
_TESTS = [{
'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc',
'info_dict': {
'_type': 'playlist',
'id': '22000010159',
},
'playlist_count': 18,
}, {
'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577',
'info_dict': {
'_type': 'playlist',
'id': 'P460810577',
},
'playlist_count': 13,
}]
def _real_extract(self, url):
program_id = self._match_id(url)
details = self._download_json(
'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do',
program_id, note='Downloading program details',
query={
'pgmId': program_id,
'currentCount': '10000',
})
return self.playlist_result(
[self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE)
for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id)
class SBSCoKrProgramsVodIE(InfoExtractor):
IE_NAME = 'sbs.co.kr:programs_vod'
_VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods'
_TESTS = [{
'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007',
'info_dict': {
'_type': 'playlist',
'id': '00000210215',
},
'playlist_mincount': 9782,
}, {
'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006',
'info_dict': {
'_type': 'playlist',
'id': '22000010476',
},
'playlist_mincount': 312,
}]
def _real_extract(self, url):
program_slug = self._match_id(url)
program_id = self._download_json(
f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug,
note='Downloading program menu data')['program']['programid']
return self.url_result(
f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE)

View File

@ -38,9 +38,48 @@ class StacommuBaseIE(WrestleUniverseBaseIE):
return None return None
return traverse_obj(encryption_data, {'key': ('key', {decrypt}), 'iv': ('iv', {decrypt})}) return traverse_obj(encryption_data, {'key': ('key', {decrypt}), 'iv': ('iv', {decrypt})})
def _extract_vod(self, url):
video_id = self._match_id(url)
video_info = self._download_metadata(
url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watch', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'description': ('description', {str}),
'timestamp': ('watchStartTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'cast': ('casts', ..., 'displayName', {str}),
'duration': ('duration', {int}),
}),
}
def _extract_ppv(self, url):
video_id = self._match_id(url)
video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'timestamp': ('startTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'duration': ('duration', {int_or_none}),
}),
}
class StacommuVODIE(StacommuBaseIE): class StacommuVODIE(StacommuBaseIE):
_VALID_URL = r'https?://www\.stacommu\.jp/videos/episodes/(?P<id>[\da-zA-Z]+)' _VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?videos/episodes/(?P<id>[\da-zA-Z]+)'
_TESTS = [{ _TESTS = [{
# not encrypted # not encrypted
'url': 'https://www.stacommu.jp/videos/episodes/aXcVKjHyAENEjard61soZZ', 'url': 'https://www.stacommu.jp/videos/episodes/aXcVKjHyAENEjard61soZZ',
@ -79,34 +118,19 @@ class StacommuVODIE(StacommuBaseIE):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://www.stacommu.jp/en/videos/episodes/aXcVKjHyAENEjard61soZZ',
'only_matching': True,
}] }]
_API_PATH = 'videoEpisodes' _API_PATH = 'videoEpisodes'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) return self._extract_vod(url)
video_info = self._download_metadata(
url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watch', 'stream information', data={'method': 1})
return {
'id': video_id,
'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
**traverse_obj(video_info, {
'title': ('displayName', {str}),
'description': ('description', {str}),
'timestamp': ('watchStartTime', {int_or_none}),
'thumbnail': ('keyVisualUrl', {url_or_none}),
'cast': ('casts', ..., 'displayName', {str}),
'duration': ('duration', {int}),
}),
}
class StacommuLiveIE(StacommuBaseIE): class StacommuLiveIE(StacommuBaseIE):
_VALID_URL = r'https?://www\.stacommu\.jp/live/(?P<id>[\da-zA-Z]+)' _VALID_URL = r'https?://www\.stacommu\.jp/(?:en/)?live/(?P<id>[\da-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.stacommu.jp/live/d2FJ3zLnndegZJCAEzGM3m', 'url': 'https://www.stacommu.jp/live/d2FJ3zLnndegZJCAEzGM3m',
'info_dict': { 'info_dict': {
@ -125,24 +149,83 @@ class StacommuLiveIE(StacommuBaseIE):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://www.stacommu.jp/en/live/d2FJ3zLnndegZJCAEzGM3m',
'only_matching': True,
}] }]
_API_PATH = 'events' _API_PATH = 'events'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) return self._extract_ppv(url)
video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
hls_info, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'stream information', data={'method': 1})
return {
'id': video_id, class TheaterComplexTownBaseIE(StacommuBaseIE):
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id), _NETRC_MACHINE = 'theatercomplextown'
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt), _API_HOST = 'api.theater-complex.town'
**traverse_obj(video_info, { _LOGIN_QUERY = {'key': 'AIzaSyAgNCqToaIz4a062EeIrkhI_xetVfAOrfc'}
'title': ('displayName', {str}), _LOGIN_HEADERS = {
'timestamp': ('startTime', {int_or_none}), 'Accept': '*/*',
'thumbnail': ('keyVisualUrl', {url_or_none}), 'Content-Type': 'application/json',
'duration': ('duration', {int_or_none}), 'X-Client-Version': 'Chrome/JsCore/9.23.0/FirebaseCore-web',
}), 'Referer': 'https://www.theater-complex.town/',
} 'Origin': 'https://www.theater-complex.town',
}
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:vod'
_TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
'info_dict': {
'id': 'hoxqidYNoAn7bP92DN6p78',
'ext': 'mp4',
'title': '演劇ドラフトグランプリ2023 劇団『恋のぼり』〜劇団名決定秘話ラジオ',
'description': 'md5:a7e2e9cf570379ea67fb630f345ff65d',
'cast': ['玉城 裕規', '石川 凌雅'],
'thumbnail': 'https://image.theater-complex.town/5URnXX6KCeDysuFrPkP38o/5URnXX6KCeDysuFrPkP38o',
'upload_date': '20231103',
'timestamp': 1699016400,
'duration': 868,
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
def _real_extract(self, url):
return self._extract_vod(url)
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
'info_dict': {
'id': 'wytW3X7khrjJBUpKuV3jen',
'ext': 'mp4',
'title': 'BREAK FREE STARS 11月5日12:30千秋楽公演',
'thumbnail': 'https://image.theater-complex.town/5GWEB31JcTUfjtgdeV5t6o/5GWEB31JcTUfjtgdeV5t6o',
'upload_date': '20231105',
'timestamp': 1699155000,
'duration': 8378,
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True,
}]
_API_PATH = 'events'
def _real_extract(self, url):
return self._extract_ppv(url)

View File

@ -1,66 +0,0 @@
from .common import InfoExtractor
from ..utils import remove_end
class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TESTS = [{
# jwplayer
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
'id': '47734',
'ext': 'flv',
'title': '高樹マリア - Just fit',
'uploader': 'dj7970',
'uploader_id': 'dj7970'
}
}, {
# html5 media
'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
'md5': 'ba90c076bd0f80203679e5b60bf523ee',
'info_dict': {
'id': '242352',
'ext': 'mp4',
'title': 'Nerdy 18yo Big Ass Tattoos and Glasses',
'uploader': 'cybersluts',
'uploader_id': 'cybersluts',
},
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = remove_end(self._html_extract_title(webpage), ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
if video_url:
info_dict = {
'formats': [{
'url': video_url,
}],
}
else:
entries = self._parse_html5_media_entries(url, webpage, video_id)
if entries:
info_dict = entries[0]
else:
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
uploader = self._html_search_regex(
r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
info_dict.update({
'id': video_id,
'uploader': uploader,
'uploader_id': uploader_id,
'title': title,
})
return info_dict

View File

@ -1,11 +1,23 @@
import json
from .common import InfoExtractor from .common import InfoExtractor
from .zype import ZypeIE
from ..networking import HEADRequest from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
filter_dict,
parse_qs,
try_call,
urlencode_postdata,
)
class ThisOldHouseIE(InfoExtractor): class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)' _NETRC_MACHINE = 'thisoldhouse'
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
'info_dict': { 'info_dict': {
'id': '5dcdddf673c3f956ef5db202', 'id': '5dcdddf673c3f956ef5db202',
'ext': 'mp4', 'ext': 'mp4',
@ -23,13 +35,16 @@ class ThisOldHouseIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# Page no longer has video
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True, 'only_matching': True,
}, { }, {
# 404 Not Found
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench', # 404 Not Found
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost', 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
@ -39,17 +54,51 @@ class ThisOldHouseIE(InfoExtractor):
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project', 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
'only_matching': True, 'only_matching': True,
}] }]
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
def _perform_login(self, username, password):
self._request_webpage(
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
urlh = self._request_webpage(
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
try:
auth_form = self._download_webpage(
self._LOGIN_URL, None, 'Submitting credentials', headers={
'Content-Type': 'application/json',
'Referer': urlh.url,
}, data=json.dumps(filter_dict({
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
'tenant': 'thisoldhouse',
'username': username,
'password': password,
'popup_options': {},
'sso': True,
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
'_intstate': 'deprecated',
}), separators=(',', ':')).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username or password', expected=True)
raise
self._request_webpage(
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
data=urlencode_postdata(self._hidden_inputs(auth_form)))
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
if 'To Unlock This content' in webpage: if 'To Unlock This content' in webpage:
self.raise_login_required(method='cookies') self.raise_login_required(
video_url = self._search_regex( 'This video is only available for subscribers. '
'Note that --cookies-from-browser may not work due to this site using session cookies')
video_url, video_id = self._search_regex(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]', r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
webpage, 'video url') webpage, 'video url', group=(1, 2))
if 'subscription_required=true' in video_url or 'c-entry-group-labels__image' in webpage: video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
return self.url_result(self._request_webpage(HEADRequest(video_url), display_id).url, 'Zype', display_id)
video_id = self._search_regex(r'(?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})', video_url, 'video id') return self.url_result(video_url, ZypeIE, video_id)
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)

View File

@ -1619,6 +1619,9 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info['title'] = broadcast.get('status') or info.get('title') info['title'] = broadcast.get('status') or info.get('title')
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id') info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None) info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
if info['live_status'] == 'is_upcoming':
return info
media_key = broadcast['media_key'] media_key = broadcast['media_key']
source = self._call_api( source = self._call_api(
f'live_video_stream/status/{media_key}', media_key)['source'] f'live_video_stream/status/{media_key}', media_key)['source']

View File

@ -164,11 +164,15 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'viewsb\.com', r'viewsb\.com',
r'filemoon\.sx', r'filemoon\.sx',
r'hentai\.animestigma\.com', r'hentai\.animestigma\.com',
r'thisav\.com',
) )
_TESTS = [{ _TESTS = [{
'url': 'http://dood.to/e/5s1wmbdacezb', 'url': 'http://dood.to/e/5s1wmbdacezb',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://thisav.com/en/terms',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,3 +1,4 @@
import json
import random import random
import itertools import itertools
import urllib.parse import urllib.parse
@ -18,24 +19,33 @@ from ..utils import (
class WeiboBaseIE(InfoExtractor): class WeiboBaseIE(InfoExtractor):
def _update_visitor_cookies(self, video_id): def _update_visitor_cookies(self, visitor_url, video_id):
headers = {'Referer': visitor_url}
chrome_ver = self._search_regex(
r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
visitor_data = self._download_json( visitor_data = self._download_json(
'https://passport.weibo.com/visitor/genvisitor', video_id, 'https://passport.weibo.com/visitor/genvisitor', video_id,
note='Generating first-visit guest request', note='Generating first-visit guest request',
transform_source=strip_jsonp, headers=headers, transform_source=strip_jsonp,
data=urlencode_postdata({ data=urlencode_postdata({
'cb': 'gen_callback', 'cb': 'gen_callback',
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}', 'fp': json.dumps({
})) 'os': '1',
'browser': f'Chrome{chrome_ver},0,0,0',
'fonts': 'undefined',
'screenInfo': '1920*1080*24',
'plugins': ''
}, separators=(',', ':'))}))['data']
self._download_webpage( self._download_webpage(
'https://passport.weibo.com/visitor/visitor', video_id, 'https://passport.weibo.com/visitor/visitor', video_id,
note='Running first-visit callback to get guest cookies', note='Running first-visit callback to get guest cookies',
query={ headers=headers, query={
'a': 'incarnate', 'a': 'incarnate',
't': visitor_data['data']['tid'], 't': visitor_data['tid'],
'w': 2, 'w': 3 if visitor_data.get('new_tid') else 2,
'c': '%03d' % visitor_data['data']['confidence'], 'c': f'{visitor_data.get("confidence", 100):03d}',
'gc': '',
'cb': 'cross_domain', 'cb': 'cross_domain',
'from': 'weibo', 'from': 'weibo',
'_rand': random.random(), '_rand': random.random(),
@ -44,7 +54,7 @@ class WeiboBaseIE(InfoExtractor):
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(video_id) self._update_visitor_cookies(urlh.url, video_id)
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
return self._parse_json(webpage, video_id, fatal=fatal) return self._parse_json(webpage, video_id, fatal=fatal)

View File

@ -2,10 +2,12 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_or_none,
js_to_json, js_to_json,
parse_filesize, parse_filesize,
parse_resolution,
str_or_none,
traverse_obj, traverse_obj,
url_basename,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -41,6 +43,18 @@ class ZoomIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Timea Andrea Lelik\'s Personal Meeting Room', 'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
}, },
'skip': 'This recording has expired',
}, {
# view_with_share URL
'url': 'https://cityofdetroit.zoom.us/rec/share/VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
'md5': 'bdc7867a5934c151957fb81321b3c024',
'info_dict': {
'id': 'VjE-5kW3xmgbEYqR5KzRgZ1OFZvtMtiXk5HyRJo5kK4m5PYE6RF4rF_oiiO_9qaM.UTAg1MI7JSnF3ZjX',
'ext': 'mp4',
'title': 'February 2022 Detroit Revenue Estimating Conference',
'duration': 7299,
'formats': 'mincount:3',
},
}] }]
def _get_page_data(self, webpage, video_id): def _get_page_data(self, webpage, video_id):
@ -72,6 +86,7 @@ class ZoomIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id') base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
query = {}
if url_type == 'share': if url_type == 'share':
webpage = self._get_real_webpage(url, base_url, video_id, 'share') webpage = self._get_real_webpage(url, base_url, video_id, 'share')
@ -80,6 +95,7 @@ class ZoomIE(InfoExtractor):
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}', f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
video_id, note='Downloading share info JSON')['result']['redirectUrl'] video_id, note='Downloading share info JSON')['result']['redirectUrl']
url = urljoin(base_url, redirect_path) url = urljoin(base_url, redirect_path)
query['continueMode'] = 'true'
webpage = self._get_real_webpage(url, base_url, video_id, 'play') webpage = self._get_real_webpage(url, base_url, video_id, 'play')
file_id = self._get_page_data(webpage, video_id)['fileId'] file_id = self._get_page_data(webpage, video_id)['fileId']
@ -88,7 +104,7 @@ class ZoomIE(InfoExtractor):
raise ExtractorError('Unable to extract file ID') raise ExtractorError('Unable to extract file ID')
data = self._download_json( data = self._download_json(
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
note='Downloading play info JSON')['result'] note='Downloading play info JSON')['result']
subtitles = {} subtitles = {}
@ -104,10 +120,10 @@ class ZoomIE(InfoExtractor):
if data.get('viewMp4Url'): if data.get('viewMp4Url'):
formats.append({ formats.append({
'format_note': 'Camera stream', 'format_note': 'Camera stream',
'url': str_or_none(data.get('viewMp4Url')), 'url': data['viewMp4Url'],
'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))), 'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))), 'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))), 'format_id': 'view',
'ext': 'mp4', 'ext': 'mp4',
'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))), 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
'preference': 0 'preference': 0
@ -116,14 +132,26 @@ class ZoomIE(InfoExtractor):
if data.get('shareMp4Url'): if data.get('shareMp4Url'):
formats.append({ formats.append({
'format_note': 'Screen share stream', 'format_note': 'Screen share stream',
'url': str_or_none(data.get('shareMp4Url')), 'url': data['shareMp4Url'],
'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))), 'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))), 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))), 'format_id': 'share',
'ext': 'mp4', 'ext': 'mp4',
'preference': -1 'preference': -1
}) })
view_with_share_url = data.get('viewMp4WithshareUrl')
if view_with_share_url:
formats.append({
**parse_resolution(self._search_regex(
r'_(\d+x\d+)\.mp4', url_basename(view_with_share_url), 'resolution', default=None)),
'format_note': 'Screen share with camera',
'url': view_with_share_url,
'format_id': 'view_with_share',
'ext': 'mp4',
'preference': 1
})
return { return {
'id': video_id, 'id': video_id,
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))), 'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),