Remove vzaar

Site shut down with api dead. Direct download links still work, but handled by generic.
Revert some ooyala changes
2024-09-23 19:01:24 +02:00 · 2023-11-19 00:11:03 +01:00 · 2023-11-18 23:23:40 +01:00 · 2023-11-18 22:50:39 +01:00 · 2023-11-18 22:39:25 +01:00 · 2023-11-18 21:56:11 +01:00
9 changed files with 135 additions and 320 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1081,7 +1081,6 @@ from .mlb import (
 from .mlssoccer import MLSSoccerIE
 from .mnet import MnetIE
 from .mocha import MochaVideoIE
 from .moevideo import MoeVideoIE
 from .mojvideo import MojvideoIE
 from .monstercat import MonstercatIE
 from .morningstar import MorningstarIE
@ -1091,7 +1090,6 @@ from .motherless import (
    MotherlessGalleryIE,
 )
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviepilot import MoviepilotIE
 from .moview import MoviewPlayIE
 from .moviezine import MoviezineIE
@ -1255,6 +1253,7 @@ from .niconicochannelplus import (
 )
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
 from .nintendo import NintendoIE
 from .nitter import NitterIE
 from .nobelprize import NobelPrizeIE
 from .noice import NoicePodcastIE
@ -1561,7 +1560,6 @@ from .rcti import (
    RCTIPlusTVIE,
 )
 from .rds import RDSIE
 from .recurbate import RecurbateIE
 from .redbee import ParliamentLiveUKIE, RTBFIE
 from .redbulltv import (
    RedBullTVIE,
@ -2250,7 +2248,6 @@ from .vvvvid import (
    VVVVIDIE,
    VVVVIDShowIE,
 )
 from .vzaar import VzaarIE
 from .walla import WallaIE
 from .washingtonpost import (
    WashingtonPostIE,
--- a/yt_dlp/extractor/bfi.py
+++ b/yt_dlp/extractor/bfi.py
@ -30,5 +30,6 @@ class BFIPlayerIE(InfoExtractor):
            if not ooyala_id:
                continue
            entries.append(self.url_result(
-                'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=player_attr.get('data-label')))
+                'ooyala:' + ooyala_id, 'Ooyala',
                ooyala_id, player_attr.get('data-label')))
        return self.playlist_result(entries)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -374,47 +374,6 @@ class GenericIE(InfoExtractor):
            },
            'skip': 'There is a limit of 200 free downloads / month for the test song',
        },
        # ooyala video
        {
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
            'info_dict': {
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                'ext': 'mp4',
                'title': '2cc213299525360.mov',  # that's what we get
                'duration': 238.231,
            },
            'skip': 'Ooyala was shut down',
        },
        {
            # ooyala video embedded with http://player.ooyala.com/iframe.js
            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
            'info_dict': {
                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
                'ext': 'mp4',
                'title': '"Steve Jobs: Man in the Machine" trailer',
                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
                'duration': 135.427,
            },
            'params': {
                'skip_download': True,
            },
            'skip': 'movie expired',
        },
        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
        {
            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
            'info_dict': {
                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
                'ext': 'mp4',
                'title': 'Steampunk Fest Comes to Honesdale',
                'duration': 43.276,
            },
            'params': {
                'skip_download': True,
            },
            'skip': 'Ooyala was shut down',
        },
        # embed.ly video
        {
            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@ -1571,16 +1530,6 @@ class GenericIE(InfoExtractor):
                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
            },
        },
        {
            # vzaar embed
            'url': 'http://help.vzaar.com/article/165-embedding-video',
            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
            'info_dict': {
                'id': '8707641',
                'ext': 'mp4',
                'title': 'Building A Business Online: Principal Chairs Q & A',
            },
        },
        {
            # multiple HTML5 videos on one page
            'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
--- a/yt_dlp/extractor/moevideo.py
+++ b/yt_dlp/extractor/moevideo.py
@ -1,74 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    int_or_none,
 )
 class MoeVideoIE(InfoExtractor):
    IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
    _VALID_URL = r'''(?x)
        https?://(?P<host>(?:www\.)?
        (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
        (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
    _API_URL = 'http://api.letitbit.net/'
    _API_KEY = 'tVL0gjqo5'
    _TESTS = [
        {
            'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
            'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
            'info_dict': {
                'id': '00297.0036103fe3d513ef27915216fd29',
                'ext': 'flv',
                'title': 'Sink cut out machine',
                'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
                'thumbnail': r're:^https?://.*\.jpg$',
                'width': 540,
                'height': 360,
                'duration': 179,
                'filesize': 17822500,
            },
            'skip': 'Video has been removed',
        },
        {
            'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
            'md5': '74f0a014d5b661f0f0e2361300d1620e',
            'info_dict': {
                'id': '77107.7f325710a627383d40540d8e991a',
                'ext': 'flv',
                'title': 'Operacion Condor.',
                'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
                'thumbnail': r're:^https?://.*\.jpg$',
                'width': 480,
                'height': 296,
                'duration': 6027,
                'filesize': 588257923,
            },
            'skip': 'Video has been removed',
        },
    ]
    def _real_extract(self, url):
        host, video_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(
            'http://%s/video/%s' % (host, video_id),
            video_id, 'Downloading webpage')
        title = self._og_search_title(webpage)
        embed_webpage = self._download_webpage(
            'http://%s/embed/%s' % (host, video_id),
            video_id, 'Downloading embed webpage')
        video = self._parse_json(self._search_regex(
            r'mvplayer\("#player"\s*,\s*({.+})',
            embed_webpage, 'mvplayer'), video_id)['video']
        return {
            'id': video_id,
            'title': title,
            'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
            'description': clean_html(self._og_search_description(webpage)),
            'duration': int_or_none(self._og_search_property('video:duration', webpage)),
            'url': video['ourUrl'],
        }
--- a/yt_dlp/extractor/movieclips.py
+++ b/yt_dlp/extractor/movieclips.py
@ -1,47 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    smuggle_url,
    float_or_none,
    parse_iso8601,
    update_url_query,
 )
 class MovieClipsIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
    _TEST = {
        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
        'md5': '42b5a0352d4933a7bd54f2104f481244',
        'info_dict': {
            'id': 'pKIGmG83AqD9',
            'ext': 'mp4',
            'title': 'Warcraft Trailer 1',
            'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1446843055,
            'upload_date': '20151106',
            'uploader': 'Movieclips',
        },
        'add_ie': ['ThePlatform'],
        'skip': 'redirects to YouTube',
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video = next(v for v in self._parse_json(self._search_regex(
            r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
            webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'url': smuggle_url(update_url_query(
                video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
            'title': self._og_search_title(webpage),
            'description': self._html_search_meta('description', webpage),
            'duration': float_or_none(video.get('duration')),
            'timestamp': parse_iso8601(video.get('dateCreated')),
            'thumbnail': video.get('defaultImage'),
            'uploader': video.get('provider'),
        }
--- a/yt_dlp/extractor/nintendo.py
+++ b/yt_dlp/extractor/nintendo.py
@ -0,0 +1,131 @@
 import json
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    make_archive_id,
    unified_timestamp,
    urljoin,
 )
 from ..utils.traversal import traverse_obj
 class NintendoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:(?P<locale>\w{2}(?:-\w{2})?)/)?nintendo-direct/(?P<slug>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
        'info_dict': {
            'ext': 'mp4',
            'id': '2oPmiviVePUA1IqAZzjuVh',
            'display_id': '09-04-2019',
            'title': 'Nintendo Direct 9.4.2019',
            'timestamp': 1567580400,
            'description': 'md5:8aac2780361d8cb772b6d1de66d7d6f4',
            'upload_date': '20190904',
            'age_limit': 17,
            '_old_archive_ids': ['nintendo J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V'],
        },
    }, {
        'url': 'https://www.nintendo.com/en-ca/nintendo-direct/08-31-2023/',
        'info_dict': {
            'ext': 'mp4',
            'id': '2TB2w2rJhNYF84qQ9E57hU',
            'display_id': '08-31-2023',
            'title': 'Super Mario Bros. Wonder Direct 8.31.2023',
            'timestamp': 1693465200,
            'description': 'md5:3067c5b824bcfdae9090a7f38ab2d200',
            'tags': ['Mild Fantasy Violence', 'In-Game Purchases'],
            'upload_date': '20230831',
            'age_limit': 6,
        },
    }, {
        'url': 'https://www.nintendo.com/us/nintendo-direct/50-fact-extravaganza/',
        'info_dict': {
            'ext': 'mp4',
            'id': 'j0BBGzfw0pQ',
            'channel_follower_count': int,
            'view_count': int,
            'description': 'Learn new details about Super Smash Bros. for Wii U, which launches on November 21.',
            'duration': 2123,
            'availability': 'public',
            'thumbnail': 'https://i.ytimg.com/vi_webp/j0BBGzfw0pQ/maxresdefault.webp',
            'timestamp': 1414047600,
            'channel_id': 'UCGIY_O-8vW4rfX98KlMkvRg',
            'chapters': 'count:53',
            'heatmap': 'count:100',
            'upload_date': '20141023',
            'uploader_id': '@NintendoAmerica',
            'playable_in_embed': True,
            'categories': ['Gaming'],
            'display_id': '50-fact-extravaganza',
            'channel': 'Nintendo of America',
            'tags': ['Comic Mischief', 'Cartoon Violence', 'Mild Suggestive Themes'],
            'like_count': int,
            'channel_url': 'https://www.youtube.com/channel/UCGIY_O-8vW4rfX98KlMkvRg',
            'age_limit': 10,
            'uploader_url': 'https://www.youtube.com/@NintendoAmerica',
            'comment_count': int,
            'live_status': 'not_live',
            'uploader': 'Nintendo of America',
            'title': '50-FACT Extravaganza',
        },
    }]
    def _create_asset_url(self, path):
        return urljoin('https://assets.nintendo.com/', urllib.parse.quote(path))
    def _real_extract(self, url):
        locale, slug = self._match_valid_url(url).group('locale', 'slug')
        language, _, country = (locale or 'US').rpartition('-')
        parsed_locale = f'{language.lower() or "en"}_{country.upper()}'
        self.write_debug(f'Using locale {parsed_locale} (from {locale})', only_once=True)
        response = self._download_json('https://graph.nintendo.com/', slug, query={
            'operationName': 'NintendoDirect',
            'variables': json.dumps({
                'locale': parsed_locale,
                'slug': slug,
            }, separators=(',', ':')),
            'extensions': json.dumps({
                'persistedQuery': {
                    'version': 1,
                    'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb'
                },
            }, separators=(',', ':')),
        })
        # API returns `{"data": {"direct": null}}` if no matching id
        direct_info = traverse_obj(response, ('data', 'direct', {dict}))
        if not direct_info:
            raise ExtractorError(f'No Nintendo Direct with id {slug} exists', expected=True)
        errors = ', '.join(traverse_obj(response, ('errors', ..., 'message')))
        if errors:
            raise ExtractorError(f'GraphQL API error: {errors or "Unknown error"}')
        result = traverse_obj(direct_info, {
            'id': ('id', {str}),
            'title': ('name', {str}),
            'timestamp': ('startDate', {unified_timestamp}),
            'description': ('description', 'text', {str}),
            'age_limit': ('contentRating', 'order', {int}),
            'tags': ('contentDescriptors', ..., 'label', {str}),
            'thumbnail': ('thumbnail', {self._create_asset_url}),
        })
        result['display_id'] = slug
        asset_id = traverse_obj(direct_info, ('video', 'publicId', {str}))
        if not asset_id:
            youtube_id = traverse_obj(direct_info, ('liveStream', {str}))
            if not youtube_id:
                self.raise_no_formats('Could not find any video formats', video_id=slug)
            return self.url_result(youtube_id, **result, url_transparent=True)
        if asset_id.startswith('Legacy Videos/'):
            result['_old_archive_ids'] = [make_archive_id(self, asset_id[14:])]
        result['formats'] = self._extract_m3u8_formats(
            self._create_asset_url(f'/video/upload/sp_full_hd/v1/{asset_id}.m3u8'), slug)
        return result
--- a/yt_dlp/extractor/nrl.py
+++ b/yt_dlp/extractor/nrl.py
@ -24,4 +24,4 @@ class NRLTVIE(InfoExtractor):
            r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
        ooyala_id = q_data['videoId']
        return self.url_result(
-            'ooyala:' + ooyala_id, video_id=ooyala_id, video_title=q_data.get('title'))
+            'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
--- a/yt_dlp/extractor/recurbate.py
+++ b/yt_dlp/extractor/recurbate.py
@ -1,42 +0,0 @@
 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError, merge_dicts
 class RecurbateIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://recurbate.com/play.php?video=39161415',
        'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f',
        'info_dict': {
            'id': '39161415',
            'ext': 'mp4',
            'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51',
            'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate',
            'age_limit': 18,
        },
        'skip': 'Website require membership.',
    }]
    def _real_extract(self, url):
        SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.'
        video_id = self._match_id(url)
        try:
            webpage = self._download_webpage(url, video_id)
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
            raise
        token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
        video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}'
        video_webpage = self._download_webpage(video_url, video_id)
        if video_webpage == 'shall_subscribe':
            self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
        entries = self._parse_html5_media_entries(video_url, video_webpage, video_id)
        return merge_dicts({
            'id': video_id,
            'title': self._html_extract_title(webpage, 'title'),
            'description': self._og_search_description(webpage),
            'age_limit': self._rta_search(webpage),
        }, entries[0])
--- a/yt_dlp/extractor/vzaar.py
+++ b/yt_dlp/extractor/vzaar.py
@ -1,100 +0,0 @@
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    int_or_none,
    float_or_none,
    unified_timestamp,
    url_or_none,
 )
 class VzaarIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
    _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
    _TESTS = [{
        # HTTP and HLS
        'url': 'https://vzaar.com/videos/1152805',
        'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
        'info_dict': {
            'id': '1152805',
            'ext': 'mp4',
            'title': 'sample video (public)',
        },
    }, {
        'url': 'https://view.vzaar.com/27272/player',
        'md5': '3b50012ac9bbce7f445550d54e0508f2',
        'info_dict': {
            'id': '27272',
            'ext': 'mp3',
            'title': 'MP3',
        },
    }, {
        # hlsAes = true
        'url': 'https://view.vzaar.com/11379930/player',
        'info_dict': {
            'id': '11379930',
            'ext': 'mp4',
            'title': 'Videoaula',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        # with null videoTitle
        'url': 'https://view.vzaar.com/20313539/download',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
            'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
        title = video_data.get('videoTitle') or video_id
        formats = []
        source_url = url_or_none(video_data.get('sourceUrl'))
        if source_url:
            f = {
                'url': source_url,
                'format_id': 'http',
                'quality': 1,
            }
            if 'audio' in source_url:
                f.update({
                    'vcodec': 'none',
                    'ext': 'mp3',
                })
            else:
                f.update({
                    'width': int_or_none(video_data.get('width')),
                    'height': int_or_none(video_data.get('height')),
                    'ext': 'mp4',
                    'fps': float_or_none(video_data.get('fps')),
                })
            formats.append(f)
        video_guid = video_data.get('guid')
        usp = video_data.get('usp')
        if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
            hls_aes = video_data.get('hlsAes')
            qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
            url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
            m3u8_formats = self._extract_m3u8_formats(
                url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False)
            if hls_aes:
                for f in m3u8_formats:
                    f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs}
            formats.extend(m3u8_formats)
        return {
            'id': video_id,
            'title': title,
            'thumbnail': self._proto_relative_url(video_data.get('poster')),
            'duration': float_or_none(video_data.get('videoDuration')),
            'timestamp': unified_timestamp(video_data.get('ts')),
            'formats': formats,
        }
Author	SHA1	Message	Date
sepro	64e39ede9a	Remove vzaar Site shut down with api dead. Direct download links still work, but handled by generic.	2023-11-19 00:11:03 +01:00
sepro	5d1d0f5ac7	Revert some ooyala changes	2023-11-18 23:23:40 +01:00
sepro	f8a7bd44a9	Remove recurbate Domain was seized	2023-11-18 22:50:39 +01:00
sepro	ba03401ad2	Remove movieclips Site has shut down and only redirects to YouTube Channel	2023-11-18 22:39:25 +01:00
sepro	3a4abc0863	Remove moevideo	2023-11-18 21:56:11 +01:00
sepro	71fa331c34	Merge branch 'master' into dead	2023-11-18 21:10:09 +01:00
Simon Sawicki	1d24da6c89	[ie/nintendo] Fix Nintendo Direct extraction (#8609 ) Authored by: Grub4K	2023-11-18 21:04:42 +01:00
sepro	d369afa23b	Restore NintendoIE Will be updated by separate PR	2023-11-18 19:12:49 +01:00
sepro	91a9087e8f	Remove ooyala tests from generic	2023-11-18 19:10:49 +01:00