2024-09-23 19:01:24 +02:00
9 changed files with 320 additions and 135 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1081,6 +1081,7 @@ from .mlb import (
 from .mlssoccer import MLSSoccerIE
 from .mnet import MnetIE
 from .mocha import MochaVideoIE
+from .moevideo import MoeVideoIE
 from .mojvideo import MojvideoIE
 from .monstercat import MonstercatIE
 from .morningstar import MorningstarIE
@ -1090,6 +1091,7 @@ from .motherless import (
    MotherlessGalleryIE,
 )
 from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
 from .moviepilot import MoviepilotIE
 from .moview import MoviewPlayIE
 from .moviezine import MoviezineIE
@ -1253,7 +1255,6 @@ from .niconicochannelplus import (
 )
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
-from .nintendo import NintendoIE
 from .nitter import NitterIE
 from .nobelprize import NobelPrizeIE
 from .noice import NoicePodcastIE
@ -1560,6 +1561,7 @@ from .rcti import (
    RCTIPlusTVIE,
 )
 from .rds import RDSIE
+from .recurbate import RecurbateIE
 from .redbee import ParliamentLiveUKIE, RTBFIE
 from .redbulltv import (
    RedBullTVIE,
@ -2248,6 +2250,7 @@ from .vvvvid import (
    VVVVIDIE,
    VVVVIDShowIE,
 )
+from .vzaar import VzaarIE
 from .walla import WallaIE
 from .washingtonpost import (
    WashingtonPostIE,
--- a/yt_dlp/extractor/bfi.py
+++ b/yt_dlp/extractor/bfi.py
@ -30,6 +30,5 @@ class BFIPlayerIE(InfoExtractor):
            if not ooyala_id:
                continue
            entries.append(self.url_result(
-                'ooyala:' + ooyala_id, 'Ooyala',
-                ooyala_id, player_attr.get('data-label')))
+                'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=player_attr.get('data-label')))
        return self.playlist_result(entries)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -374,6 +374,47 @@ class GenericIE(InfoExtractor):
            },
            'skip': 'There is a limit of 200 free downloads / month for the test song',
        },
+        # ooyala video
+        {
+            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
+            'info_dict': {
+                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+                'ext': 'mp4',
+                'title': '2cc213299525360.mov',  # that's what we get
+                'duration': 238.231,
+            },
+            'skip': 'Ooyala was shut down',
+        },
+        {
+            # ooyala video embedded with http://player.ooyala.com/iframe.js
+            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+            'info_dict': {
+                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+                'ext': 'mp4',
+                'title': '"Steve Jobs: Man in the Machine" trailer',
+                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+                'duration': 135.427,
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'skip': 'movie expired',
+        },
+        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+        {
+            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+            'info_dict': {
+                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+                'ext': 'mp4',
+                'title': 'Steampunk Fest Comes to Honesdale',
+                'duration': 43.276,
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'skip': 'Ooyala was shut down',
+        },
        # embed.ly video
        {
            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@ -1530,6 +1571,16 @@ class GenericIE(InfoExtractor):
                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
            },
        },
+        {
+            # vzaar embed
+            'url': 'http://help.vzaar.com/article/165-embedding-video',
+            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+            'info_dict': {
+                'id': '8707641',
+                'ext': 'mp4',
+                'title': 'Building A Business Online: Principal Chairs Q & A',
+            },
+        },
        {
            # multiple HTML5 videos on one page
            'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
--- a/yt_dlp/extractor/moevideo.py
+++ b/yt_dlp/extractor/moevideo.py
@ -0,0 +1,74 @@
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+)
+
+
+class MoeVideoIE(InfoExtractor):
+    IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
+    _VALID_URL = r'''(?x)
+        https?://(?P<host>(?:www\.)?
+        (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
+        (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
+    _API_URL = 'http://api.letitbit.net/'
+    _API_KEY = 'tVL0gjqo5'
+    _TESTS = [
+        {
+            'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
+            'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
+            'info_dict': {
+                'id': '00297.0036103fe3d513ef27915216fd29',
+                'ext': 'flv',
+                'title': 'Sink cut out machine',
+                'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'width': 540,
+                'height': 360,
+                'duration': 179,
+                'filesize': 17822500,
+            },
+            'skip': 'Video has been removed',
+        },
+        {
+            'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
+            'md5': '74f0a014d5b661f0f0e2361300d1620e',
+            'info_dict': {
+                'id': '77107.7f325710a627383d40540d8e991a',
+                'ext': 'flv',
+                'title': 'Operacion Condor.',
+                'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'width': 480,
+                'height': 296,
+                'duration': 6027,
+                'filesize': 588257923,
+            },
+            'skip': 'Video has been removed',
+        },
+    ]
+
+    def _real_extract(self, url):
+        host, video_id = self._match_valid_url(url).groups()
+
+        webpage = self._download_webpage(
+            'http://%s/video/%s' % (host, video_id),
+            video_id, 'Downloading webpage')
+
+        title = self._og_search_title(webpage)
+
+        embed_webpage = self._download_webpage(
+            'http://%s/embed/%s' % (host, video_id),
+            video_id, 'Downloading embed webpage')
+        video = self._parse_json(self._search_regex(
+            r'mvplayer\("#player"\s*,\s*({.+})',
+            embed_webpage, 'mvplayer'), video_id)['video']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
+            'description': clean_html(self._og_search_description(webpage)),
+            'duration': int_or_none(self._og_search_property('video:duration', webpage)),
+            'url': video['ourUrl'],
+        }
--- a/yt_dlp/extractor/movieclips.py
+++ b/yt_dlp/extractor/movieclips.py
@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    float_or_none,
+    parse_iso8601,
+    update_url_query,
+)
+
+
+class MovieClipsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
+    _TEST = {
+        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
+        'md5': '42b5a0352d4933a7bd54f2104f481244',
+        'info_dict': {
+            'id': 'pKIGmG83AqD9',
+            'ext': 'mp4',
+            'title': 'Warcraft Trailer 1',
+            'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1446843055,
+            'upload_date': '20151106',
+            'uploader': 'Movieclips',
+        },
+        'add_ie': ['ThePlatform'],
+        'skip': 'redirects to YouTube',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video = next(v for v in self._parse_json(self._search_regex(
+            r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
+            webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(update_url_query(
+                video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
+            'title': self._og_search_title(webpage),
+            'description': self._html_search_meta('description', webpage),
+            'duration': float_or_none(video.get('duration')),
+            'timestamp': parse_iso8601(video.get('dateCreated')),
+            'thumbnail': video.get('defaultImage'),
+            'uploader': video.get('provider'),
+        }
--- a/yt_dlp/extractor/nintendo.py
+++ b/yt_dlp/extractor/nintendo.py
@ -1,131 +0,0 @@
-import json
-import urllib.parse
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    make_archive_id,
-    unified_timestamp,
-    urljoin,
-)
-from ..utils.traversal import traverse_obj
-
-
-class NintendoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:(?P<locale>\w{2}(?:-\w{2})?)/)?nintendo-direct/(?P<slug>[^/?#]+)'
-    _TESTS = [{
-        'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
-        'info_dict': {
-            'ext': 'mp4',
-            'id': '2oPmiviVePUA1IqAZzjuVh',
-            'display_id': '09-04-2019',
-            'title': 'Nintendo Direct 9.4.2019',
-            'timestamp': 1567580400,
-            'description': 'md5:8aac2780361d8cb772b6d1de66d7d6f4',
-            'upload_date': '20190904',
-            'age_limit': 17,
-            '_old_archive_ids': ['nintendo J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V'],
-        },
-    }, {
-        'url': 'https://www.nintendo.com/en-ca/nintendo-direct/08-31-2023/',
-        'info_dict': {
-            'ext': 'mp4',
-            'id': '2TB2w2rJhNYF84qQ9E57hU',
-            'display_id': '08-31-2023',
-            'title': 'Super Mario Bros. Wonder Direct 8.31.2023',
-            'timestamp': 1693465200,
-            'description': 'md5:3067c5b824bcfdae9090a7f38ab2d200',
-            'tags': ['Mild Fantasy Violence', 'In-Game Purchases'],
-            'upload_date': '20230831',
-            'age_limit': 6,
-        },
-    }, {
-        'url': 'https://www.nintendo.com/us/nintendo-direct/50-fact-extravaganza/',
-        'info_dict': {
-            'ext': 'mp4',
-            'id': 'j0BBGzfw0pQ',
-            'channel_follower_count': int,
-            'view_count': int,
-            'description': 'Learn new details about Super Smash Bros. for Wii U, which launches on November 21.',
-            'duration': 2123,
-            'availability': 'public',
-            'thumbnail': 'https://i.ytimg.com/vi_webp/j0BBGzfw0pQ/maxresdefault.webp',
-            'timestamp': 1414047600,
-            'channel_id': 'UCGIY_O-8vW4rfX98KlMkvRg',
-            'chapters': 'count:53',
-            'heatmap': 'count:100',
-            'upload_date': '20141023',
-            'uploader_id': '@NintendoAmerica',
-            'playable_in_embed': True,
-            'categories': ['Gaming'],
-            'display_id': '50-fact-extravaganza',
-            'channel': 'Nintendo of America',
-            'tags': ['Comic Mischief', 'Cartoon Violence', 'Mild Suggestive Themes'],
-            'like_count': int,
-            'channel_url': 'https://www.youtube.com/channel/UCGIY_O-8vW4rfX98KlMkvRg',
-            'age_limit': 10,
-            'uploader_url': 'https://www.youtube.com/@NintendoAmerica',
-            'comment_count': int,
-            'live_status': 'not_live',
-            'uploader': 'Nintendo of America',
-            'title': '50-FACT Extravaganza',
-        },
-    }]
-
-    def _create_asset_url(self, path):
-        return urljoin('https://assets.nintendo.com/', urllib.parse.quote(path))
-
-    def _real_extract(self, url):
-        locale, slug = self._match_valid_url(url).group('locale', 'slug')
-
-        language, _, country = (locale or 'US').rpartition('-')
-        parsed_locale = f'{language.lower() or "en"}_{country.upper()}'
-        self.write_debug(f'Using locale {parsed_locale} (from {locale})', only_once=True)
-
-        response = self._download_json('https://graph.nintendo.com/', slug, query={
-            'operationName': 'NintendoDirect',
-            'variables': json.dumps({
-                'locale': parsed_locale,
-                'slug': slug,
-            }, separators=(',', ':')),
-            'extensions': json.dumps({
-                'persistedQuery': {
-                    'version': 1,
-                    'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb'
-                },
-            }, separators=(',', ':')),
-        })
-        # API returns `{"data": {"direct": null}}` if no matching id
-        direct_info = traverse_obj(response, ('data', 'direct', {dict}))
-        if not direct_info:
-            raise ExtractorError(f'No Nintendo Direct with id {slug} exists', expected=True)
-
-        errors = ', '.join(traverse_obj(response, ('errors', ..., 'message')))
-        if errors:
-            raise ExtractorError(f'GraphQL API error: {errors or "Unknown error"}')
-
-        result = traverse_obj(direct_info, {
-            'id': ('id', {str}),
-            'title': ('name', {str}),
-            'timestamp': ('startDate', {unified_timestamp}),
-            'description': ('description', 'text', {str}),
-            'age_limit': ('contentRating', 'order', {int}),
-            'tags': ('contentDescriptors', ..., 'label', {str}),
-            'thumbnail': ('thumbnail', {self._create_asset_url}),
-        })
-        result['display_id'] = slug
-
-        asset_id = traverse_obj(direct_info, ('video', 'publicId', {str}))
-        if not asset_id:
-            youtube_id = traverse_obj(direct_info, ('liveStream', {str}))
-            if not youtube_id:
-                self.raise_no_formats('Could not find any video formats', video_id=slug)
-
-            return self.url_result(youtube_id, **result, url_transparent=True)
-
-        if asset_id.startswith('Legacy Videos/'):
-            result['_old_archive_ids'] = [make_archive_id(self, asset_id[14:])]
-        result['formats'] = self._extract_m3u8_formats(
-            self._create_asset_url(f'/video/upload/sp_full_hd/v1/{asset_id}.m3u8'), slug)
-
-        return result
--- a/yt_dlp/extractor/nrl.py
+++ b/yt_dlp/extractor/nrl.py
@ -24,4 +24,4 @@ class NRLTVIE(InfoExtractor):
            r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
        ooyala_id = q_data['videoId']
        return self.url_result(
-            'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
+            'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=q_data.get('title'))
--- a/yt_dlp/extractor/recurbate.py
+++ b/yt_dlp/extractor/recurbate.py
@ -0,0 +1,42 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, merge_dicts
+
+
+class RecurbateIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://recurbate.com/play.php?video=39161415',
+        'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f',
+        'info_dict': {
+            'id': '39161415',
+            'ext': 'mp4',
+            'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51',
+            'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate',
+            'age_limit': 18,
+        },
+        'skip': 'Website require membership.',
+    }]
+
+    def _real_extract(self, url):
+        SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.'
+        video_id = self._match_id(url)
+        try:
+            webpage = self._download_webpage(url, video_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
+            raise
+        token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
+        video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}'
+
+        video_webpage = self._download_webpage(video_url, video_id)
+        if video_webpage == 'shall_subscribe':
+            self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
+        entries = self._parse_html5_media_entries(video_url, video_webpage, video_id)
+        return merge_dicts({
+            'id': video_id,
+            'title': self._html_extract_title(webpage, 'title'),
+            'description': self._og_search_description(webpage),
+            'age_limit': self._rta_search(webpage),
+        }, entries[0])
--- a/yt_dlp/extractor/vzaar.py
+++ b/yt_dlp/extractor/vzaar.py
@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class VzaarIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+    _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
+    _TESTS = [{
+        # HTTP and HLS
+        'url': 'https://vzaar.com/videos/1152805',
+        'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
+        'info_dict': {
+            'id': '1152805',
+            'ext': 'mp4',
+            'title': 'sample video (public)',
+        },
+    }, {
+        'url': 'https://view.vzaar.com/27272/player',
+        'md5': '3b50012ac9bbce7f445550d54e0508f2',
+        'info_dict': {
+            'id': '27272',
+            'ext': 'mp3',
+            'title': 'MP3',
+        },
+    }, {
+        # hlsAes = true
+        'url': 'https://view.vzaar.com/11379930/player',
+        'info_dict': {
+            'id': '11379930',
+            'ext': 'mp4',
+            'title': 'Videoaula',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # with null videoTitle
+        'url': 'https://view.vzaar.com/20313539/download',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
+
+        title = video_data.get('videoTitle') or video_id
+
+        formats = []
+
+        source_url = url_or_none(video_data.get('sourceUrl'))
+        if source_url:
+            f = {
+                'url': source_url,
+                'format_id': 'http',
+                'quality': 1,
+            }
+            if 'audio' in source_url:
+                f.update({
+                    'vcodec': 'none',
+                    'ext': 'mp3',
+                })
+            else:
+                f.update({
+                    'width': int_or_none(video_data.get('width')),
+                    'height': int_or_none(video_data.get('height')),
+                    'ext': 'mp4',
+                    'fps': float_or_none(video_data.get('fps')),
+                })
+            formats.append(f)
+
+        video_guid = video_data.get('guid')
+        usp = video_data.get('usp')
+        if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+            hls_aes = video_data.get('hlsAes')
+            qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+            url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+            m3u8_formats = self._extract_m3u8_formats(
+                url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False)
+            if hls_aes:
+                for f in m3u8_formats:
+                    f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs}
+            formats.extend(m3u8_formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': self._proto_relative_url(video_data.get('poster')),
+            'duration': float_or_none(video_data.get('videoDuration')),
+            'timestamp': unified_timestamp(video_data.get('ts')),
+            'formats': formats,
+        }