Compare commits

..

No commits in common. "64e39ede9a12d071206e883c7b0c937f50fec4fe" and "71402e86497539ed59cd44b1d3683b07162b59a6" have entirely different histories.

9 changed files with 320 additions and 135 deletions

View File

@ -1081,6 +1081,7 @@ from .mlb import (
from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE
from .mocha import MochaVideoIE
from .moevideo import MoeVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
@ -1090,6 +1091,7 @@ from .motherless import (
MotherlessGalleryIE,
)
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE
@ -1253,7 +1255,6 @@ from .niconicochannelplus import (
)
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
from .nitter import NitterIE
from .nobelprize import NobelPrizeIE
from .noice import NoicePodcastIE
@ -1560,6 +1561,7 @@ from .rcti import (
RCTIPlusTVIE,
)
from .rds import RDSIE
from .recurbate import RecurbateIE
from .redbee import ParliamentLiveUKIE, RTBFIE
from .redbulltv import (
RedBullTVIE,
@ -2248,6 +2250,7 @@ from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
)
from .vzaar import VzaarIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,

View File

@ -30,6 +30,5 @@ class BFIPlayerIE(InfoExtractor):
if not ooyala_id:
continue
entries.append(self.url_result(
'ooyala:' + ooyala_id, 'Ooyala',
ooyala_id, player_attr.get('data-label')))
'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=player_attr.get('data-label')))
return self.playlist_result(entries)

View File

@ -374,6 +374,47 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
'md5': '166dd577b433b4d4ebfee10b0824d8ff',
'info_dict': {
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
'duration': 238.231,
},
'skip': 'Ooyala was shut down',
},
{
# ooyala video embedded with http://player.ooyala.com/iframe.js
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
'info_dict': {
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
'duration': 135.427,
},
'params': {
'skip_download': True,
},
'skip': 'movie expired',
},
# ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
{
'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
'info_dict': {
'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
'ext': 'mp4',
'title': 'Steampunk Fest Comes to Honesdale',
'duration': 43.276,
},
'params': {
'skip_download': True,
},
'skip': 'Ooyala was shut down',
},
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@ -1530,6 +1571,16 @@ class GenericIE(InfoExtractor):
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
},
},
{
# vzaar embed
'url': 'http://help.vzaar.com/article/165-embedding-video',
'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
'info_dict': {
'id': '8707641',
'ext': 'mp4',
'title': 'Building A Business Online: Principal Chairs Q & A',
},
},
{
# multiple HTML5 videos on one page
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',

View File

@ -0,0 +1,74 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
)
class MoeVideoIE(InfoExtractor):
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
(?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
_API_URL = 'http://api.letitbit.net/'
_API_KEY = 'tVL0gjqo5'
_TESTS = [
{
'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
'info_dict': {
'id': '00297.0036103fe3d513ef27915216fd29',
'ext': 'flv',
'title': 'Sink cut out machine',
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
'thumbnail': r're:^https?://.*\.jpg$',
'width': 540,
'height': 360,
'duration': 179,
'filesize': 17822500,
},
'skip': 'Video has been removed',
},
{
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
'md5': '74f0a014d5b661f0f0e2361300d1620e',
'info_dict': {
'id': '77107.7f325710a627383d40540d8e991a',
'ext': 'flv',
'title': 'Operacion Condor.',
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
'thumbnail': r're:^https?://.*\.jpg$',
'width': 480,
'height': 296,
'duration': 6027,
'filesize': 588257923,
},
'skip': 'Video has been removed',
},
]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(
'http://%s/video/%s' % (host, video_id),
video_id, 'Downloading webpage')
title = self._og_search_title(webpage)
embed_webpage = self._download_webpage(
'http://%s/embed/%s' % (host, video_id),
video_id, 'Downloading embed webpage')
video = self._parse_json(self._search_regex(
r'mvplayer\("#player"\s*,\s*({.+})',
embed_webpage, 'mvplayer'), video_id)['video']
return {
'id': video_id,
'title': title,
'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
'description': clean_html(self._og_search_description(webpage)),
'duration': int_or_none(self._og_search_property('video:duration', webpage)),
'url': video['ourUrl'],
}

View File

@ -0,0 +1,47 @@
from .common import InfoExtractor
from ..utils import (
smuggle_url,
float_or_none,
parse_iso8601,
update_url_query,
)
class MovieClipsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
_TEST = {
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
'md5': '42b5a0352d4933a7bd54f2104f481244',
'info_dict': {
'id': 'pKIGmG83AqD9',
'ext': 'mp4',
'title': 'Warcraft Trailer 1',
'description': 'Watch Trailer 1 from Warcraft (2016). Legendarys WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1446843055,
'upload_date': '20151106',
'uploader': 'Movieclips',
},
'add_ie': ['ThePlatform'],
'skip': 'redirects to YouTube',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video = next(v for v in self._parse_json(self._search_regex(
r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(
video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
'title': self._og_search_title(webpage),
'description': self._html_search_meta('description', webpage),
'duration': float_or_none(video.get('duration')),
'timestamp': parse_iso8601(video.get('dateCreated')),
'thumbnail': video.get('defaultImage'),
'uploader': video.get('provider'),
}

View File

@ -1,131 +0,0 @@
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
make_archive_id,
unified_timestamp,
urljoin,
)
from ..utils.traversal import traverse_obj
class NintendoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:(?P<locale>\w{2}(?:-\w{2})?)/)?nintendo-direct/(?P<slug>[^/?#]+)'
_TESTS = [{
'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
'info_dict': {
'ext': 'mp4',
'id': '2oPmiviVePUA1IqAZzjuVh',
'display_id': '09-04-2019',
'title': 'Nintendo Direct 9.4.2019',
'timestamp': 1567580400,
'description': 'md5:8aac2780361d8cb772b6d1de66d7d6f4',
'upload_date': '20190904',
'age_limit': 17,
'_old_archive_ids': ['nintendo J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V'],
},
}, {
'url': 'https://www.nintendo.com/en-ca/nintendo-direct/08-31-2023/',
'info_dict': {
'ext': 'mp4',
'id': '2TB2w2rJhNYF84qQ9E57hU',
'display_id': '08-31-2023',
'title': 'Super Mario Bros. Wonder Direct 8.31.2023',
'timestamp': 1693465200,
'description': 'md5:3067c5b824bcfdae9090a7f38ab2d200',
'tags': ['Mild Fantasy Violence', 'In-Game Purchases'],
'upload_date': '20230831',
'age_limit': 6,
},
}, {
'url': 'https://www.nintendo.com/us/nintendo-direct/50-fact-extravaganza/',
'info_dict': {
'ext': 'mp4',
'id': 'j0BBGzfw0pQ',
'channel_follower_count': int,
'view_count': int,
'description': 'Learn new details about Super Smash Bros. for Wii U, which launches on November 21.',
'duration': 2123,
'availability': 'public',
'thumbnail': 'https://i.ytimg.com/vi_webp/j0BBGzfw0pQ/maxresdefault.webp',
'timestamp': 1414047600,
'channel_id': 'UCGIY_O-8vW4rfX98KlMkvRg',
'chapters': 'count:53',
'heatmap': 'count:100',
'upload_date': '20141023',
'uploader_id': '@NintendoAmerica',
'playable_in_embed': True,
'categories': ['Gaming'],
'display_id': '50-fact-extravaganza',
'channel': 'Nintendo of America',
'tags': ['Comic Mischief', 'Cartoon Violence', 'Mild Suggestive Themes'],
'like_count': int,
'channel_url': 'https://www.youtube.com/channel/UCGIY_O-8vW4rfX98KlMkvRg',
'age_limit': 10,
'uploader_url': 'https://www.youtube.com/@NintendoAmerica',
'comment_count': int,
'live_status': 'not_live',
'uploader': 'Nintendo of America',
'title': '50-FACT Extravaganza',
},
}]
def _create_asset_url(self, path):
return urljoin('https://assets.nintendo.com/', urllib.parse.quote(path))
def _real_extract(self, url):
locale, slug = self._match_valid_url(url).group('locale', 'slug')
language, _, country = (locale or 'US').rpartition('-')
parsed_locale = f'{language.lower() or "en"}_{country.upper()}'
self.write_debug(f'Using locale {parsed_locale} (from {locale})', only_once=True)
response = self._download_json('https://graph.nintendo.com/', slug, query={
'operationName': 'NintendoDirect',
'variables': json.dumps({
'locale': parsed_locale,
'slug': slug,
}, separators=(',', ':')),
'extensions': json.dumps({
'persistedQuery': {
'version': 1,
'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb'
},
}, separators=(',', ':')),
})
# API returns `{"data": {"direct": null}}` if no matching id
direct_info = traverse_obj(response, ('data', 'direct', {dict}))
if not direct_info:
raise ExtractorError(f'No Nintendo Direct with id {slug} exists', expected=True)
errors = ', '.join(traverse_obj(response, ('errors', ..., 'message')))
if errors:
raise ExtractorError(f'GraphQL API error: {errors or "Unknown error"}')
result = traverse_obj(direct_info, {
'id': ('id', {str}),
'title': ('name', {str}),
'timestamp': ('startDate', {unified_timestamp}),
'description': ('description', 'text', {str}),
'age_limit': ('contentRating', 'order', {int}),
'tags': ('contentDescriptors', ..., 'label', {str}),
'thumbnail': ('thumbnail', {self._create_asset_url}),
})
result['display_id'] = slug
asset_id = traverse_obj(direct_info, ('video', 'publicId', {str}))
if not asset_id:
youtube_id = traverse_obj(direct_info, ('liveStream', {str}))
if not youtube_id:
self.raise_no_formats('Could not find any video formats', video_id=slug)
return self.url_result(youtube_id, **result, url_transparent=True)
if asset_id.startswith('Legacy Videos/'):
result['_old_archive_ids'] = [make_archive_id(self, asset_id[14:])]
result['formats'] = self._extract_m3u8_formats(
self._create_asset_url(f'/video/upload/sp_full_hd/v1/{asset_id}.m3u8'), slug)
return result

View File

@ -24,4 +24,4 @@ class NRLTVIE(InfoExtractor):
r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
ooyala_id = q_data['videoId']
return self.url_result(
'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=q_data.get('title'))

View File

@ -0,0 +1,42 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, merge_dicts
class RecurbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)'
_TESTS = [{
'url': 'https://recurbate.com/play.php?video=39161415',
'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f',
'info_dict': {
'id': '39161415',
'ext': 'mp4',
'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51',
'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive Recurbate',
'age_limit': 18,
},
'skip': 'Website require membership.',
}]
def _real_extract(self, url):
SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.'
video_id = self._match_id(url)
try:
webpage = self._download_webpage(url, video_id)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
raise
token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}'
video_webpage = self._download_webpage(video_url, video_id)
if video_webpage == 'shall_subscribe':
self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
entries = self._parse_html5_media_entries(video_url, video_webpage, video_id)
return merge_dicts({
'id': video_id,
'title': self._html_extract_title(webpage, 'title'),
'description': self._og_search_description(webpage),
'age_limit': self._rta_search(webpage),
}, entries[0])

100
yt_dlp/extractor/vzaar.py Normal file
View File

@ -0,0 +1,100 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
unified_timestamp,
url_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
_TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
'id': '1152805',
'ext': 'mp4',
'title': 'sample video (public)',
},
}, {
'url': 'https://view.vzaar.com/27272/player',
'md5': '3b50012ac9bbce7f445550d54e0508f2',
'info_dict': {
'id': '27272',
'ext': 'mp3',
'title': 'MP3',
},
}, {
# hlsAes = true
'url': 'https://view.vzaar.com/11379930/player',
'info_dict': {
'id': '11379930',
'ext': 'mp4',
'title': 'Videoaula',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# with null videoTitle
'url': 'https://view.vzaar.com/20313539/download',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
title = video_data.get('videoTitle') or video_id
formats = []
source_url = url_or_none(video_data.get('sourceUrl'))
if source_url:
f = {
'url': source_url,
'format_id': 'http',
'quality': 1,
}
if 'audio' in source_url:
f.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
f.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
'fps': float_or_none(video_data.get('fps')),
})
formats.append(f)
video_guid = video_data.get('guid')
usp = video_data.get('usp')
if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
hls_aes = video_data.get('hlsAes')
qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
m3u8_formats = self._extract_m3u8_formats(
url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
if hls_aes:
for f in m3u8_formats:
f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs}
formats.extend(m3u8_formats)
return {
'id': video_id,
'title': title,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
'timestamp': unified_timestamp(video_data.get('ts')),
'formats': formats,
}