Compare commits

..

No commits in common. "e21545c18f47b43b758f73f86afe7526b838fa17" and "64e39ede9a12d071206e883c7b0c937f50fec4fe" have entirely different histories.

6 changed files with 585 additions and 0 deletions

View File

@ -354,6 +354,7 @@ from .clipchamp import ClipchampIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
from .cloudflarestream import CloudflareStreamIE
from .clubic import ClubicIE
@ -642,6 +643,7 @@ from .funimation import (
)
from .funk import FunkIE
from .funker530 import Funker530IE
from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import (
GabTVIE,
@ -721,6 +723,7 @@ from .hgtv import HGTVComShowIE
from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hollywoodreporter import (
HollywoodReporterIE,
@ -1076,6 +1079,7 @@ from .mlb import (
MLBArticleIE,
)
from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE
from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
@ -2255,6 +2259,7 @@ from .wasdtv import (
WASDTVClipIE,
)
from .wat import WatIE
from .watchbox import WatchBoxIE
from .wdr import (
WDRIE,
WDRPageIE,

View File

@ -0,0 +1,52 @@
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
fix_xml_ampersands
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
'info_dict': {
'id': '4629301',
'ext': 'mp4',
'title': 'Brick Briscoe',
'duration': 612,
'thumbnail': r're:^https?://.+\.jpg',
},
}, {
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, 'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
pdoc = self._download_xml(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, 'Downloading video info',
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@ -0,0 +1,81 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
)
class FusionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': '3145868',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
'timestamp': 1442589635,
'uploader': 'UNIVISON',
'upload_date': '20150918',
},
'params': {
'skip_download': True,
},
'add_ie': ['Anvato'],
}, {
'url': 'http://fusion.tv/video/201781',
'only_matching': True,
}, {
'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
info = {
'id': video_id,
'title': video['title'],
'description': video.get('excerpt'),
'timestamp': parse_iso8601(video.get('published')),
'series': video.get('show'),
}
formats = []
src = video.get('src') or {}
for f_id, f in src.items():
for q_id, q in f.items():
q_url = q.get('url')
if not q_url:
continue
ext = determine_ext(q_url, mimetype2ext(q.get('type')))
if ext == 'smil':
formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
formats.extend(self._extract_m3u8_formats(
q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': '-'.join([f_id, q_id]),
'url': q_url,
'width': int_or_none(q.get('width')),
'height': int_or_none(q.get('height')),
'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
'ext': 'mp4' if ext == 'm3u8' else ext,
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
})
if formats:
info['formats'] = formats
else:
info.update({
'_type': 'url',
'url': 'anvato:uni:' + video['video_ids']['anvato'],
'ie_key': 'Anvato',
})
return info

209
yt_dlp/extractor/hitbox.py Normal file
View File

@ -0,0 +1,209 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
determine_ext,
float_or_none,
int_or_none,
parse_iso8601,
)
class HitboxIE(InfoExtractor):
IE_NAME = 'hitbox'
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.hitbox.tv/video/203213',
'info_dict': {
'id': '203213',
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
'alt_title': 'hitboxlive - Aug 9th #6',
'description': '',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 215.1666,
'resolution': 'HD 720p',
'uploader': 'hitboxlive',
'view_count': int,
'timestamp': 1407576133,
'upload_date': '20140809',
'categories': ['Live Show'],
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
'only_matching': True,
}]
def _extract_metadata(self, url, video_id):
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
date = 'media_live_since'
media_type = 'livestream'
if metadata.get('media_type') == 'video':
media_type = 'video'
date = 'media_date_added'
video_meta = metadata.get(media_type, [])[0]
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
description = clean_html(
video_meta.get('media_description')
or video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration'))
uploader = video_meta.get('media_user_name')
views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ')
categories = [video_meta.get('category_name')]
thumbs = [{
'url': thumb_base + video_meta.get('media_thumbnail'),
'width': 320,
'height': 180
}, {
'url': thumb_base + video_meta.get('media_thumbnail_large'),
'width': 768,
'height': 432
}]
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'ext': 'mp4',
'thumbnails': thumbs,
'duration': duration,
'uploader': uploader,
'view_count': views,
'timestamp': timestamp,
'categories': categories,
}
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
video_id, 'Downloading video JSON')
formats = []
for video in player_config['clip']['bitrates']:
label = video.get('label')
if label == 'Auto':
continue
video_url = video.get('url')
if not video_url:
continue
bitrate = int_or_none(video.get('bitrate'))
if determine_ext(video_url) == 'm3u8':
if not video_url.startswith('http'):
continue
formats.append({
'url': video_url,
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'protocol': 'm3u8_native',
})
else:
formats.append({
'url': video_url,
'tbr': bitrate,
'format_note': label,
})
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/video', video_id)
metadata['formats'] = formats
return metadata
class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'hitbox:live'
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.hitbox.tv/dimak',
'info_dict': {
'id': 'dimak',
'ext': 'mp4',
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
'timestamp': int,
'upload_date': compat_str,
'title': compat_str,
'uploader': 'Dimak',
},
'params': {
# live
'skip_download': True,
},
}, {
'url': 'https://www.smashcast.tv/dimak',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
video_id)
formats = []
cdns = player_config.get('cdns')
servers = []
for cdn in cdns:
# Subscribe URLs are not playable
if cdn.get('rtmpSubscribe') is True:
continue
base_url = cdn.get('netConnectionUrl')
host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
if base_url not in servers:
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
if label == 'Auto':
continue
stream_url = stream.get('url')
if not stream_url:
continue
bitrate = int_or_none(stream.get('bitrate'))
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
if not stream_url.startswith('http'):
continue
formats.append({
'url': stream_url,
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'rtmp_live': True,
})
else:
formats.append({
'url': '%s/%s' % (base_url, stream_url),
'ext': 'mp4',
'tbr': bitrate,
'rtmp_live': True,
'format_note': host,
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/live', video_id)
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = metadata.get('title')
return metadata

85
yt_dlp/extractor/mnet.py Normal file
View File

@ -0,0 +1,85 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class MnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.mnet.com/tv/vod/171008',
'info_dict': {
'id': '171008',
'title': 'SS_이해인@히든박스',
'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
'duration': 88,
'upload_date': '20151231',
'timestamp': 1451564040,
'age_limit': 0,
'thumbnails': 'mincount:5',
'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'flv',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://mnet.interest.me/tv/vod/172790',
'only_matching': True,
}, {
'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
# TODO: extract rtmp formats
# no stype -> rtmp url
# stype=H -> m3u8 url
# stype=M -> mpd url
info = self._download_json(
'http://content.api.mnet.com/player/vodConfig',
video_id, 'Downloading vod config JSON', query={
'id': video_id,
'ctype': 'CLIP',
'stype': 'H',
})['data']['info']
title = info['title']
cdn_data = self._download_json(
info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0]
m3u8_url = cdn_data['url']
token = cdn_data.get('token')
if token and token != '-':
m3u8_url += '?' + token
formats = self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
description = info.get('ment')
duration = parse_duration(info.get('time'))
timestamp = parse_iso8601(info.get('date'), delimiter=' ')
age_limit = info.get('adult')
if age_limit is not None:
age_limit = 0 if age_limit == 'N' else 18
thumbnails = [{
'id': thumb_format,
'url': thumb['url'],
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
} for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -0,0 +1,153 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
js_to_json,
strip_or_none,
try_get,
unescapeHTML,
unified_timestamp,
)
class WatchBoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
_TESTS = [{
# film
'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
'info_dict': {
'id': '341368',
'ext': 'mp4',
'title': 'Free Jimmy',
'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4890,
'age_limit': 16,
'release_year': 2009,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
# episode
'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
'info_dict': {
'id': '328286',
'ext': 'mp4',
'title': 'S01 E01 - Date in der Hölle',
'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1291,
'age_limit': 12,
'release_year': 2010,
'series': 'Ugly Americans',
'season_number': 1,
'episode': 'Date in der Hölle',
'episode_number': 1,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
kind, video_id = mobj.group('kind', 'id')
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(
self._search_regex(
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
'player config', default='{}', group='data'),
video_id, transform_source=unescapeHTML, fatal=False)
if not player_config:
player_config = self._parse_json(
self._search_regex(
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
default='{}'),
video_id, transform_source=js_to_json, fatal=False) or {}
source = player_config.get('source') or {}
video_id = compat_str(source.get('videoId') or video_id)
devapi = self._download_json(
'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
'format': 'json',
'apikey': 'hbbtv',
}, fatal=False)
item = try_get(devapi, lambda x: x['items'][0], dict) or {}
title = item.get('title') or try_get(
item, lambda x: x['movie']['headline_movie'],
compat_str) or source['title']
formats = []
hls_url = item.get('media_videourl_hls') or source.get('hls')
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
dash_url = item.get('media_videourl_wv') or source.get('dash')
if dash_url:
formats.extend(self._extract_mpd_formats(
dash_url, video_id, mpd_id='dash', fatal=False))
mp4_url = item.get('media_videourl')
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
'width': int_or_none(item.get('width')),
'height': int_or_none(item.get('height')),
'tbr': int_or_none(item.get('bitrate')),
})
description = strip_or_none(item.get('descr'))
thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
duration = int_or_none(item.get('media_length') or source.get('length'))
timestamp = unified_timestamp(item.get('pubDate'))
view_count = int_or_none(item.get('media_views'))
age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
info = {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'age_limit': age_limit,
'release_year': release_year,
'formats': formats,
}
if kind.lower() == 'serien':
series = try_get(
item, lambda x: x['special']['title'],
compat_str) or source.get('format')
season_number = int_or_none(self._search_regex(
r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
default=None) or self._search_regex(
r'/staffel-(\d+)/', url, 'season number', default=None))
episode = source.get('title')
episode_number = int_or_none(self._search_regex(
r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
default=None))
info.update({
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
})
return info