Merge e767973d8c into da252d9d32

[cleanup] Misc (#11554 )
Closes #6884 Authored by: bashonly, Grub4K, seproDev Co-authored-by: Simon Sawicki <contact@grub4k.xyz> Co-authored-by: sepro <sepro@sepr0.com>
2024-11-25 00:31:26 +01:00 · 2024-11-18 02:47:53 +02:00 · 2024-11-17 23:25:05 +00:00 · 2024-11-17 21:37:15 +00:00 · 2024-11-17 21:16:22 +00:00 · 2024-11-17 21:06:50 +00:00
21 changed files with 657 additions and 960 deletions
--- a/README.md
+++ b/README.md
@ -342,8 +342,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                    extractor plugins; postprocessor plugins can
                                    only be loaded from the default plugin
                                    directories
-    --flat-playlist                 Do not extract the videos of a playlist,
+    --flat-playlist                 Do not extract a playlist's URL result
-                                    only list them
+                                    entries; some entry metadata may be missing
                                    and downloading may be bypassed
    --no-flat-playlist              Fully extract the videos of a playlist
                                    (default)
    --live-from-start               Download livestreams from the start.
@ -1869,6 +1870,9 @@ The following extractors use this feature:
 #### digitalconcerthall
 * `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats
 #### sonylivseries
 * `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -234,5 +234,10 @@
        "when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
        "short": "[ie/vimeo] Fix API retries (#11351)",
        "authors": ["bashonly"]
    },
    {
        "action": "add",
        "when": "52c0ffe40ad6e8404d93296f575007b05b04c686",
        "short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)"
    }
 ]
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1093,10 +1093,6 @@ from .manoto import (
 )
 from .manyvids import ManyVidsIE
 from .maoritv import MaoriTVIE
 from .markiza import (
    MarkizaIE,
    MarkizaPageIE,
 )
 from .massengeschmacktv import MassengeschmackTVIE
 from .masters import MastersIE
 from .matchtv import MatchTVIE
@ -1139,12 +1135,6 @@ from .microsoftembed import (
    MicrosoftMediusIE,
 )
 from .microsoftstream import MicrosoftStreamIE
 from .mildom import (
    MildomClipIE,
    MildomIE,
    MildomUserVodIE,
    MildomVodIE,
 )
 from .minds import (
    MindsChannelIE,
    MindsGroupIE,
@ -1526,8 +1516,8 @@ from .pgatour import PGATourIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .pialive import PiaLiveIE
 from .piapro import PiaproIE
 from .piaulizaportal import PIAULIZAPortalIE
 from .picarto import (
    PicartoIE,
    PicartoVodIE,
@ -1563,10 +1553,6 @@ from .podbayfm import (
 )
 from .podchaser import PodchaserIE
 from .podomatic import PodomaticIE
 from .pokemon import (
    PokemonIE,
    PokemonWatchIE,
 )
 from .pokergo import (
    PokerGoCollectionIE,
    PokerGoIE,
@ -2260,6 +2246,10 @@ from .ufctv import (
 )
 from .ukcolumn import UkColumnIE
 from .uktvplay import UKTVPlayIE
 from .uliza import (
    UlizaPlayerIE,
    UlizaPortalIE,
 )
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
@ -2288,10 +2278,6 @@ from .utreon import UtreonIE
 from .varzesh3 import Varzesh3IE
 from .vbox7 import Vbox7IE
 from .veo import VeoIE
 from .veoh import (
    VeohIE,
    VeohUserIE,
 )
 from .vesti import VestiIE
 from .vevo import (
    VevoIE,
--- a/yt_dlp/extractor/bandlab.py
+++ b/yt_dlp/extractor/bandlab.py
@ -1,4 +1,3 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -3767,7 +3767,7 @@ class InfoExtractor:
        """ Merge subtitle dictionaries, language by language. """
        if target is None:
            target = {}
-        for d in dicts:
+        for d in filter(None, dicts):
            for lang, subs in d.items():
                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
        return target
--- a/yt_dlp/extractor/ctvnews.py
+++ b/yt_dlp/extractor/ctvnews.py
@ -1,14 +1,27 @@
 import json
 import re
 import urllib.parse
 from .common import InfoExtractor
-from ..utils import orderedSet
+from .ninecninemedia import NineCNineMediaIE
 from ..utils import extract_attributes, orderedSet
 from ..utils.traversal import find_element, traverse_obj
 class CTVNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+    _BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
    _VIDEO_ID_RE = r'(?P<id>\d{5,})'
    _PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
    _VALID_URL = [
        rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
        rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
        rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
        rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
        rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
    ]
    _TESTS = [{
        'url': 'http://www.ctvnews.ca/video?clipId=901995',
-        'md5': '9b8624ba66351a23e0b6e1391971f9af',
+        'md5': 'b608f466c7fa24b9666c6439d766ab7e',
        'info_dict': {
            'id': '901995',
            'ext': 'flv',
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
            'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
            'timestamp': 1467286284,
            'upload_date': '20160630',
            'categories': [],
            'season_number': 0,
            'season': 'Season 0',
            'tags': [],
            'series': 'CTV News National | Archive | Stories 2',
            'season_id': '57981',
            'thumbnail': r're:https?://.*\.jpg$',
            'duration': 764.631,
        },
    }, {
        'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
        'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
        'info_dict': {
            'id': '3030933',
            'ext': 'flv',
            'title': 'Here’s what’s making news for Nov. 15',
            'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
            'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
            'season_id': '58104',
            'season_number': 0,
            'tags': [],
            'season': 'Season 0',
            'categories': [],
            'series': 'CTV News Barrie',
            'upload_date': '20241116',
            'duration': 42.943,
            'timestamp': 1731722452,
        },
    }, {
        'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
            'id': '1.2876780',
        },
        'playlist_mincount': 100,
    }, {
        'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
        'info_dict':
        {
            'id': '1.5736957',
        },
        'playlist_mincount': 6,
    }, {
        'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
        'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
        'info_dict': {
            'id': '2695026',
            'ext': 'flv',
            'season_id': '89852',
            'series': 'From CTV News Channel',
            'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
            'season': '2023',
            'title': 'Bank of Canada asks public about digital currency',
            'categories': [],
            'tags': [],
            'upload_date': '20230526',
            'season_number': 2023,
            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
            'timestamp': 1685105157,
            'duration': 253.553,
        },
    }, {
        'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
        'md5': '135cc592df607d29dddc931f1b756ae2',
        'info_dict': {
            'id': '582589',
            'ext': 'flv',
            'categories': [],
            'timestamp': 1427906183,
            'season_number': 0,
            'duration': 125.559,
            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
            'series': 'CTV News Stox',
            'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
            'title': 'Berlin Wall',
            'season_id': '63817',
            'season': 'Season 0',
            'tags': [],
            'upload_date': '20150401',
        },
    }, {
        'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
        'md5': 'a14c0603557decc6531260791c23cc5e',
        'info_dict': {
            'id': '3023759',
            'ext': 'flv',
            'season_number': 2024,
            'timestamp': 1731798000,
            'season': '2024',
            'episode': 'Episode 125',
            'description': 'CTV News Ottawa at Six',
            'duration': 2712.076,
            'episode_number': 125,
            'upload_date': '20241116',
            'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
            'categories': [],
            'tags': [],
            'series': 'CTV News Ottawa at Six',
            'season_id': '92667',
        },
    }, {
        'url': 'http://www.ctvnews.ca/1.810401',
        'only_matching': True,
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
        'only_matching': True,
    }]
    def _ninecninemedia_url_result(self, clip_id):
        return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
    def _real_extract(self, url):
        page_id = self._match_id(url)
-        def ninecninemedia_url_result(clip_id):
+        if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
-            return {
+            page_id = mobj.group('id')
                '_type': 'url_transparent',
                'id': clip_id,
                'url': f'9c9media:ctvnews_web:{clip_id}',
                'ie_key': 'NineCNineMedia',
            }
-        if page_id.isdigit():
+        if re.fullmatch(self._VIDEO_ID_RE, page_id):
-            return ninecninemedia_url_result(page_id)
+            return self._ninecninemedia_url_result(page_id)
-        else:
+
-            webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={
+        webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
-                'ot': 'example.AjaxPageLayout.ot',
+            'ot': 'example.AjaxPageLayout.ot',
-                'maxItemsPerPage': 1000000,
+            'maxItemsPerPage': 1000000,
-            })
+        })
-            entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
+        entries = [self._ninecninemedia_url_result(clip_id)
-                re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
+                   for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
-            if not entries:
+        if not entries:
-                webpage = self._download_webpage(url, page_id)
+            webpage = self._download_webpage(url, page_id)
-                if 'getAuthStates("' in webpage:
+            if 'getAuthStates("' in webpage:
-                    entries = [ninecninemedia_url_result(clip_id) for clip_id in
+                entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
-                               self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
+                           self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
-            return self.playlist_result(entries, page_id)
+            else:
                entries = [
                    self._ninecninemedia_url_result(clip_id) for clip_id in
                    traverse_obj(webpage, (
                        {find_element(tag='jasper-player-container', html=True)},
                        {extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
                ]
        return self.playlist_result(entries, page_id)
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -569,7 +569,7 @@ class FacebookIE(InfoExtractor):
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
-                    mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
+                    mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
        def process_formats(info):
            # Downloads with browser's User-Agent are rate limited. Working around
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@ -1,30 +1,32 @@
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    join_nonempty,
    smuggle_url,
    traverse_obj,
    try_call,
    unsmuggle_url,
    urljoin,
 )
 class LiTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
+    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
-
+    _URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
-    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
+    _GEO_COUNTRIES = ['TW']
    _TESTS = [{
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'info_dict': {
            'id': 'VOD00041606',
            'title': '花千骨',
        },
        'playlist_count': 51,  # 50 episodes + 1 trailer
    }, {
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
        'info_dict': {
            'id': 'VOD00041610',
@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
            'title': '花千骨第1集',
            'thumbnail': r're:https?://.*\.jpg$',
            'description': '《花千骨》陸劇線上看。十六年前，平靜的村莊內，一名女嬰隨異相出生，途徑此地的蜀山掌門清虛道長算出此女命運非同一般，她體內散發的異香易招惹妖魔。一念慈悲下，他在村莊周邊設下結界阻擋妖魔入侵，讓其年滿十六後去蜀山，並賜名花千骨。',
-            'categories': ['奇幻', '愛情', '中國', '仙俠'],
+            'categories': ['奇幻', '愛情', '仙俠', '古裝'],
            'episode': 'Episode 1',
            'episode_number': 1,
        },
        'params': {
            'noplaylist': True,
        },
        'skip': 'Georestricted to Taiwan',
    }, {
-        'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+        'url': 'https://www.litv.tv/drama/watch/VOD00044841',
        'md5': '88322ea132f848d6e3e18b32a832b918',
        'info_dict': {
            'id': 'VOD00044841',
@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
    def _extract_playlist(self, playlist_data, content_type):
        all_episodes = [
            self.url_result(smuggle_url(
-                self._URL_TEMPLATE % (content_type, episode['contentId']),
+                self._URL_TEMPLATE % (content_type, episode['content_id']),
                {'force_noplaylist': True}))  # To prevent infinite recursion
-            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
+            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
-        return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
+        return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
-        if self._search_regex(
+        program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
-                r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
+        playlist_data = traverse_obj(vod_data, ('seriesTree'))
-                webpage, 'meta refresh redirect', default=False, group=0):
+        if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
-            raise ExtractorError('No such content found', expected=True)
+            return self._extract_playlist(playlist_data, program_info.get('content_type'))
-        program_info = self._parse_json(self._search_regex(
+        asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
-            r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+        if asset_id:  # This is a VOD
-            video_id)
+            media_type = 'vod'
        else:  # This is a live stream
            asset_id = program_info['content_id']
            media_type = program_info['content_type']
        puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
        if puid:
            endpoint = 'get-urls'
        else:
            puid = str(uuid.uuid4())
            endpoint = 'get-urls-no-auth'
        video_data = self._download_json(
            f'https://www.litv.tv/api/{endpoint}', video_id,
            data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
            headers={'Content-Type': 'application/json'})
-        # In browsers `getProgramInfo` request is always issued. Usually this
+        if error := traverse_obj(video_data, ('error', {dict})):
-        # endpoint gives the same result as the data embedded in the webpage.
+            error_msg = traverse_obj(error, ('message', {str}))
-        # If, for some reason, there are no embedded data, we do an extra request.
+            if error_msg and 'OutsideRegionError' in error_msg:
        if 'assetId' not in program_info:
            program_info = self._download_json(
                'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
                query={'contentId': video_id},
                headers={'Accept': 'application/json'})
        series_id = program_info['seriesId']
        if self._yes_playlist(series_id, video_id, smuggled_data):
            playlist_data = self._download_json(
                'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
                query={'seriesId': series_id}, headers={'Accept': 'application/json'})
            return self._extract_playlist(playlist_data, program_info['contentType'])
        video_data = self._parse_json(self._search_regex(
            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
            webpage, 'video data', default='{}'), video_id)
        if not video_data:
            payload = {'assetId': program_info['assetId']}
            puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
            if puid:
                payload.update({
                    'type': 'auth',
                    'puid': puid,
                })
                endpoint = 'getUrl'
            else:
                payload.update({
                    'watchDevices': program_info['watchDevices'],
                    'contentType': program_info['contentType'],
                })
                endpoint = 'getMainUrlNoAuth'
            video_data = self._download_json(
                f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
                data=json.dumps(payload).encode(),
                headers={'Content-Type': 'application/json'})
        if not video_data.get('fullpath'):
            error_msg = video_data.get('errorMessage')
            if error_msg == 'vod.error.outsideregionerror':
                self.raise_geo_restricted('This video is available in Taiwan only')
-            if error_msg:
+            elif error_msg:
                raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
-            raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
+            raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
        formats = self._extract_m3u8_formats(
-            video_data['fullpath'], video_id, ext='mp4',
+            video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
            entry_protocol='m3u8_native', m3u8_id='hls')
        for a_format in formats:
            # LiTV HLS segments doesn't like compressions
            a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
        title = program_info['title'] + program_info.get('secondaryMark', '')
        description = program_info.get('description')
        thumbnail = program_info.get('imageFile')
        categories = [item['name'] for item in program_info.get('category', [])]
        episode = int_or_none(program_info.get('episode'))
        return {
            'id': video_id,
            'formats': formats,
-            'title': title,
+            'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
-            'description': description,
+            **traverse_obj(program_info, {
-            'thumbnail': thumbnail,
+                'description': ('description', {str}),
-            'categories': categories,
+                'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
-            'episode_number': episode,
+                'categories': ('genres', ..., 'name', {str}),
                'episode_number': ('episode', {int_or_none}),
            }),
        }
--- a/yt_dlp/extractor/markiza.py
+++ b/yt_dlp/extractor/markiza.py
@ -1,123 +0,0 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    orderedSet,
    parse_duration,
    try_get,
 )
 class MarkizaIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
    _TESTS = [{
        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
        'md5': 'ada4e9fad038abeed971843aa028c7b0',
        'info_dict': {
            'id': '139078',
            'ext': 'mp4',
            'title': 'Oteckovia 109',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2760,
        },
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
        'info_dict': {
            'id': '85430',
            'title': 'Televízne noviny',
        },
        'playlist_count': 23,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/84723',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/embed/85295',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        data = self._download_json(
            'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
            video_id, query={'id': video_id})
        info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
        if info.get('_type') == 'playlist':
            info.update({
                'id': video_id,
                'title': try_get(
                    data, lambda x: x['details']['name'], str),
            })
        else:
            info['duration'] = parse_duration(
                try_get(data, lambda x: x['details']['duration'], str))
        return info
 class MarkizaPageIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
    _TESTS = [{
        'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
        'md5': 'ada4e9fad038abeed971843aa028c7b0',
        'info_dict': {
            'id': '139355',
            'ext': 'mp4',
            'title': 'Oteckovia 110',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2604,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
        'only_matching': True,
    }, {
        'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
        'only_matching': True,
    }, {
        'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
        'only_matching': True,
    }, {
        'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
        'only_matching': True,
    }, {
        'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
        'only_matching': True,
    }]
    @classmethod
    def suitable(cls, url):
        return False if MarkizaIE.suitable(url) else super().suitable(url)
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(
            # Downloading for some hosts (e.g. dajto, doma) fails with 500
            # although everything seems to be OK, so considering 500
            # status code to be expected.
            url, playlist_id, expected_status=500)
        entries = [
            self.url_result(f'http://videoarchiv.markiza.sk/video/{video_id}')
            for video_id in orderedSet(re.findall(
                r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
                webpage))]
        return self.playlist_result(entries, playlist_id)
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@ -1,291 +0,0 @@
 import functools
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    determine_ext,
    dict_get,
    float_or_none,
    traverse_obj,
 )
 class MildomBaseIE(InfoExtractor):
    _GUEST_ID = None
    def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
        if not self._GUEST_ID:
            self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
        content = self._download_json(
            url, video_id, note=note, data=json.dumps(body).encode() if body else None,
            headers={'Content-Type': 'application/json'} if body else {},
            query={
                '__guest_id': self._GUEST_ID,
                '__platform': 'web',
                **(query or {}),
            })
        if content['code'] != 0:
            raise ExtractorError(
                f'Mildom says: {content["message"]} (code {content["code"]})',
                expected=True)
        return content['body']
 class MildomIE(MildomBaseIE):
    IE_NAME = 'mildom'
    IE_DESC = 'Record ongoing live by specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
        enterstudio = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
            note='Downloading live metadata', query={'user_id': video_id})
        result_video_id = enterstudio.get('log_id', video_id)
        servers = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
            note='Downloading live server list', query={
                'user_id': video_id,
                'live_server_type': 'hls',
            })
        playback_token = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
            note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
        playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
        if not playback_token:
            raise ExtractorError('Failed to obtain live playback token')
        formats = self._extract_m3u8_formats(
            f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
            result_video_id, 'mp4', headers={
                'Referer': 'https://www.mildom.com/',
                'Origin': 'https://www.mildom.com',
            })
        for fmt in formats:
            fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
        return {
            'id': result_video_id,
            'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
            'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
            'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
            'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
            'uploader_id': video_id,
            'formats': formats,
            'is_live': True,
        }
 class MildomVodIE(MildomBaseIE):
    IE_NAME = 'mildom:vod'
    IE_DESC = 'VOD in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
    _TESTS = [{
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
        'info_dict': {
            'id': '10882672-1597662269',
            'ext': 'mp4',
            'title': '始めてのミルダム配信じゃぃ！',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'upload_date': '20200817',
            'duration': 4138.37,
            'description': 'ゲームをしたくて！',
            'timestamp': 1597662269.0,
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
        'info_dict': {
            'id': '10882672-1597758589870-477',
            'ext': 'mp4',
            'title': '【kson】感染メイズ！麻酔銃で無双する',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'timestamp': 1597759093.0,
            'uploader': 'kson組長(けいそん)',
            'duration': 4302.58,
            'uploader_id': '10882672',
            'description': 'このステージ絶対乗り越えたい',
            'upload_date': '20200818',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
        'info_dict': {
            'id': '10882672-buha9td2lrn97fk2jme0',
            'ext': 'mp4',
            'title': '【kson組長】CART RACER!!!',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
            'upload_date': '20201104',
            'timestamp': 1604494797.0,
            'duration': 4657.25,
            'description': 'WTF',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
        autoplay = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
            note='Downloading playback metadata', query={
                'v_id': video_id,
            })['playback']
        formats = [{
            'url': autoplay['audio_url'],
            'format_id': 'audio',
            'protocol': 'm3u8_native',
            'vcodec': 'none',
            'acodec': 'aac',
            'ext': 'm4a',
        }]
        for fmt in autoplay['video_link']:
            formats.append({
                'format_id': 'video-{}'.format(fmt['name']),
                'url': fmt['url'],
                'protocol': 'm3u8_native',
                'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
                'height': fmt['level'],
                'vcodec': 'h264',
                'acodec': 'aac',
                'ext': 'mp4',
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
            'description': traverse_obj(autoplay, 'video_intro'),
            'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
            'duration': float_or_none(autoplay.get('video_length'), scale=1000),
            'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
            'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
            'uploader_id': user_id,
            'formats': formats,
        }
 class MildomClipIE(MildomBaseIE):
    IE_NAME = 'mildom:clip'
    IE_DESC = 'Clip in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
        'info_dict': {
            'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
            'title': '全然違ったよ',
            'timestamp': 1619181890,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': 'ざきんぽ',
            'uploader_id': '10042245',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
        'info_dict': {
            'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
            'title': 'かっこいい',
            'timestamp': 1621094003,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': '(ルーキー',
            'uploader_id': '10111524',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
        'info_dict': {
            'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
            'title': 'あ',
            'timestamp': 1614769431,
            'duration': 31,
            'thumbnail': r're:https?://.+',
            'uploader': 'ドルゴルスレンギーン＝ダグワドルジ',
            'uploader_id': '10660174',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
        clip_detail = self._call_api(
            'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
            note='Downloading playback metadata', query={
                'clip_id': video_id,
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(
                ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
            'timestamp': float_or_none(clip_detail.get('create_time')),
            'duration': float_or_none(clip_detail.get('length')),
            'thumbnail': clip_detail.get('cover'),
            'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
            'uploader_id': user_id,
            'url': clip_detail['url'],
            'ext': determine_ext(clip_detail.get('url'), 'mp4'),
        }
 class MildomUserVodIE(MildomBaseIE):
    IE_NAME = 'mildom:user:vod'
    IE_DESC = 'Download all VODs from specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/profile/10093333',
        'info_dict': {
            'id': '10093333',
            'title': 'Uploads from ねこばたけ',
        },
        'playlist_mincount': 732,
    }, {
        'url': 'https://www.mildom.com/profile/10882672',
        'info_dict': {
            'id': '10882672',
            'title': 'Uploads from kson組長(けいそん)',
        },
        'playlist_mincount': 201,
    }]
    def _fetch_page(self, user_id, page):
        page += 1
        reply = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
            user_id, note=f'Downloading page {page}', query={
                'user_id': user_id,
                'page': page,
                'limit': '30',
            })
        if not reply:
            return
        for x in reply:
            v_id = x.get('v_id')
            if not v_id:
                continue
            yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
    def _real_extract(self, url):
        user_id = self._match_id(url)
        self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
        profile = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
            query={'user_id': user_id}, note='Downloading user profile')['user_info']
        return self.playlist_result(
            OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
            user_id, f'Uploads from {profile["loginname"]}')
--- a/yt_dlp/extractor/nova.py
+++ b/yt_dlp/extractor/nova.py
@ -6,14 +6,20 @@ from ..utils import (
    determine_ext,
    int_or_none,
    js_to_json,
-    traverse_obj,
+    strip_or_none,
    unified_strdate,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class NovaEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://media(?:tn)?\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
+    _DOMAINS = [
        r'media(?:tn)?\.cms\.nova\.cz',
        r'media\.cms\.(?:markiza|tvnoviny)\.sk',
    ]
    _VALID_URL = [rf'https?://{domain}/embed/(?P<id>[^/?#&"\']+)' for domain in _DOMAINS]
    _EMBED_REGEX = [rf'(?x)<iframe[^>]+\b(?:data-)?src=["\'](?P<url>{url})' for url in _VALID_URL]
    _TESTS = [{
        'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
        'info_dict': {
@ -43,19 +49,179 @@ class NovaEmbedIE(InfoExtractor):
            'id': 'EU5ELEsmOHt',
            'ext': 'mp4',
            'title': 'Haptické křeslo, bionická ruka nebo roboti. Reportérka se podívala na Týden inovací',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://cloudia\.cms\.nova\.cz/.+',
            'duration': 1780,
        },
        'params': {'skip_download': 'm3u8'},
    }]
    _WEBPAGE_TESTS = [{
        'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
        'md5': 'a478390ea7f36aeb36004a107db8b031',
        'info_dict': {
            'id': '4q3zP2DsORO',
            'ext': 'mp4',
            'title': 'Oteckovia 110',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2603,
        },
    }, {
        'url': 'https://tvnoviny.sk/domace/clanok/141815-byvaly-sportovec-udajne-vyrabal-mast-z-marihuany-sud-mu-vymeral-20-rocny-trest-a-vzal-aj-rodinny-dom',
        'md5': '51de0754352a36b4d623f98c9636a5e1',
        'info_dict': {
            'id': '2LcfYRqGuYP',
            'ext': 'mp4',
            'title': 'Marihuanový mastičkár si vypočul vysoký trest a prepad majetku',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 119,
        },
    }, {
        'url': 'https://tvnoviny.sk/domace/clanok/144055-robert-z-kosic-dostal-najnizsi-mozny-trest-za-to-co-spravil-je-to-aj-tak-vela-tvrdia-blizki',
        'md5': 'c9a8467b37951877336a9ae6309558b0',
        'info_dict': {
            'id': '82N7FrJK7cR',
            'ext': 'mp4',
            'title': 'Robovi z Košíc znížili trest za marihuanu, odsúdili ho na päť rokov',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 152,
        },
    }, {
        'url': 'https://tvnoviny.sk/domace/clanok/338907-preco-sa-mnozia-utoky-tinedzerov-podla-psychologiciek-je-za-tym-rastuca-frustracia',
        'md5': '869b589e99d7c19dd66f024a7d088502',
        'info_dict': {
            'id': 'DeiezcjCJmg',
            'ext': 'mp4',
            'title': '2022-11-03-TN-2-Nasilie-medzi-mladymi',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 142,
        },
    }, {
        'url': 'http://tvnoviny.sk/domace/clanok/890183-vlada-chysta-postavit-novu-nemocnicu-v-presove-informoval-premier-robert-fico',
        'md5': 'b9ef0b4917deee2c930f2248b568a90c',
        'info_dict': {
            'id': '7VCyuyfGsNZ',
            'ext': 'mp4',
            'title': '2024-04-15-PTN-1-Co-caka-zdravotnictvo',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 137,
        },
    }, {
        'url': 'https://www.markiza.sk/live/1-markiza',
        'info_dict': {
            'id': 'markiza-live',
            'ext': 'mp4',
            'title': r're:^CRA Markiza SD \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'thumbnail': r're:^https?://cloudia\.cms\.markiza\.sk/.+',
            'live_status': 'is_live',
        },
    }, {
        'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
        'md5': 'e3e0f1e98172ea64147cada308276df8',
        'info_dict': {
            'id': 'JxqRvQkFwHK',
            'ext': 'mp4',
            'title': 'Po smrti manžela ju čakalo prekvapenie',
            'thumbnail': r're:^https?://.*\.(?:jpg)',
            'duration': 108,
        },
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
        'md5': 'b40d04d5cb4cf529e2ff14d6726a3548',
        'info_dict': {
            'id': '9ZnlOQp2MRa',
            'ext': 'mp4',
            'title': 'Príbeh Alžbetky',
            'thumbnail': r're:^https?://.*\.(?:jpg)',
            'duration': 361,
        },
    }, {
        'url': 'https://www.markiza.sk/relacie/superstar/clanok/549972-v-zakulisi-superstar-to-bolo-obcas-drsne-moderator-priznal-ze-musel-pouzit-aj-hrubu-silu',
        'info_dict': {
            'id': '549972-v-zakulisi-superstar-to-bolo-obcas-drsne-moderator-priznal-ze-musel-pouzit-aj-hrubu-silu',
            'title': 'V zákulisí SuperStar to bolo občas drsné. Moderátor priznal, že musel použiť aj hrubú silu | TV Markíza',
            'description': 'md5:02e240e302bddfd0cd352bc886d95161',
            'thumbnail': r're:^https?://cmesk-ott-images-avod\.ssl\.cdn\.cra\.cz/.+',
            'age_limit': 0,
        },
        'playlist_count': 2,
    }, {
        'url': 'https://voyo.markiza.sk/filmy/6702-vysnivana-svadba',
        'info_dict': {
            'id': '20kSOHBD8DQ',
            'title': 'Vysnívaná svadba - 0000',
            'thumbnail': r're:^https?://.*\.(?:jpg)',
            'duration': 4924,
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'Requested format is not available',
            'This video is DRM protected',
        ],
        'skip': 'premium member only',
    }, {
        # Another URLs:
        #   http://videoarchiv.markiza.sk/video/84723
        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
        'info_dict': {
            'id': '2a5fQmhjvYm',
            'title': 'Oteckovia 109',
            'thumbnail': r're:^https?://.*\.(?:jpg)',
            'duration': 2759,
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'Requested format is not available',
            'This video is DRM protected',
        ],
        'skip': 'premium member only',
    }, {
        'url': 'https://voyo.markiza.sk/filmy/1377-frajeri-vo-vegas#player-fullscreen',
        'info_dict': {
            'id': '1377-frajeri-vo-vegas#player-fullscreen',
            'title': 'Frajeri vo Vegas | Voyo',
            'description': 'md5:7f16168f669f144986d862312949627c',
            'thumbnail': r're:^https?://cmesk-ott-images-svod\.ssl\.cdn\.cra\.cz/.+',
            'age_limit': 0,
        },
        'playlist': [{
            'info_dict': {
                'id': 'K8H4IvKNBbw',
                'ext': 'mp4',
                'title': 'frajeri-vo-vegas-hd-15_frajeri-trailer',
                'duration': 90,
                'thumbnail': r're:^https?://.*\.(?:jpg)',
            },
        },
            # BUG: The 2nd item (CDjGcqcCYKy) is the movie itself and it's DRM-protected.
            #      The "ext" field can neither be here nor omitted.
        ],
        'playlist_count': 2,
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'Requested format is not available',
            'This video is DRM protected',
        ],
        'skip': 'premium member only',
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        if 'player_not_logged_in' in webpage:
            self.raise_login_required()
        has_drm = False
        duration = None
        is_live = False
        formats = []
        def process_format_list(format_list, format_id=''):
@ -77,11 +243,11 @@ class NovaEmbedIE(InfoExtractor):
                    formats.extend(self._extract_m3u8_formats(
                        format_url, video_id, 'mp4',
                        entry_protocol='m3u8_native', m3u8_id='hls',
-                        fatal=False))
+                        fatal=False, headers={'Referer': url}))
                elif (format_type == 'application/dash+xml'
                      or format_id == 'DASH' or ext == 'mpd'):
                    formats.extend(self._extract_mpd_formats(
-                        format_url, video_id, mpd_id='dash', fatal=False))
+                        format_url, video_id, mpd_id='dash', fatal=False, headers={'Referer': url}))
                else:
                    formats.append({
                        'url': format_url,
@ -93,6 +259,7 @@ class NovaEmbedIE(InfoExtractor):
            for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
                process_format_list(src)
            duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
            is_live = player.get('isLive', False)
        if not formats and not has_drm:
            # older code path, in use before August 2023
            player = self._parse_json(
@ -108,11 +275,11 @@ class NovaEmbedIE(InfoExtractor):
        if not formats and has_drm:
            self.report_drm(video_id)
-        title = self._og_search_title(
+        title = strip_or_none(self._og_search_title(
            webpage, default=None) or self._search_regex(
            (r'<value>(?P<title>[^<]+)',
             r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
-            'title', group='value')
+            'title', group='value'))
        thumbnail = self._og_search_thumbnail(
            webpage, default=None) or self._search_regex(
            r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
@ -127,6 +294,8 @@ class NovaEmbedIE(InfoExtractor):
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
            'http_headers': {'Referer': url},
            'is_live': is_live,
        }
--- a/yt_dlp/extractor/pialive.py
+++ b/yt_dlp/extractor/pialive.py
@ -0,0 +1,122 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_class,
    multipart_encode,
    str_or_none,
    unified_timestamp,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class PiaLiveIE(InfoExtractor):
    _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
    _PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
    _PIA_LIVE_API_URL = 'https://api.pia-live.jp'
    _API_KEY = 'kfds)FKFps-dms9e'
    _TESTS = [{
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'display_id': '2431867_001',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }, {
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
        'info_dict': {
            'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
            'display_id': '2431867_002',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }]
    def _extract_var(self, variable, html):
        return self._search_regex(
            rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
            html, f'variable {variable}', group='value')
    def _real_extract(self, url):
        video_key = self._match_id(url)
        webpage = self._download_webpage(url, video_key)
        program_code = self._extract_var('programCode', webpage)
        article_code = self._extract_var('articleCode', webpage)
        title = self._html_extract_title(webpage)
        if get_element_html_by_class('play-end', webpage):
            raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
        if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
            date, time = self._search_regex(
                r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
                start_info, 'start_info', fatal=False, group=('date', 'time'))
            if date and time:
                release_timestamp_str = f'{date} {time} +09:00'
                release_timestamp = unified_timestamp(release_timestamp_str)
                self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
                return {
                    'id': program_code,
                    'title': title,
                    'live_status': 'is_upcoming',
                    'release_timestamp': release_timestamp,
                }
        payload, content_type = multipart_encode({
            'play_url': video_key,
            'api_key': self._API_KEY,
        })
        api_data_and_headers = {
            'data': payload,
            'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
        }
        player_tag_list = self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
            'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
        return self.url_result(
            extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
            url_transparent=True, title=title, display_id=program_code,
            __post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
    def _get_comments(self, program_code, article_code, api_data_and_headers):
        chat_room_url = traverse_obj(self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
            'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
            ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
        if not chat_room_url:
            return
        comment_page = self._download_webpage(
            chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
            fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
        if not comment_page:
            return
        yield from traverse_obj(self._search_json(
            r'var\s+_history\s*=', comment_page, 'comment list',
            program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
                'timestamp': (0, {int}),
                'author_is_uploader': (1, {lambda x: x == 2}),
                'author': (2, {str}),
                'text': (3, {str}),
                'id': (4, {str_or_none}),
            }))
--- a/yt_dlp/extractor/piaulizaportal.py
+++ b/yt_dlp/extractor/piaulizaportal.py
@ -1,70 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    time_seconds,
    traverse_obj,
 )
 class PIAULIZAPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data = self._download_webpage(
            self._search_regex(
                r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
                webpage, 'player data url'),
            video_id, headers={'Referer': 'https://ulizaportal.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        formats = self._extract_m3u8_formats(
            self._search_regex(
                r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
                'm3u8 url', default=None),
            video_id, fatal=False)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': self._html_extract_title(webpage),
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
        }
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@ -1,136 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    extract_attributes,
    int_or_none,
    js_to_json,
    merge_dicts,
 )
 class PokemonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
        'md5': '2fe8eaec69768b25ef898cda9c43062e',
        'info_dict': {
            'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
            'ext': 'mp4',
            'title': 'The Ol’ Raise and Switch!',
            'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
        },
        'add_id': ['LimelightMedia'],
    }, {
        # no data-video-title
        'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
        'info_dict': {
            'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
            'ext': 'mp4',
            'title': "Pokémon : L'ascension de Darkrai",
            'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
        },
        'add_id': ['LimelightMedia'],
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(url, video_id or display_id)
        video_data = extract_attributes(self._search_regex(
            r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
            webpage, 'video data element'))
        video_id = video_data['data-video-id']
        title = video_data.get('data-video-title') or self._html_search_meta(
            'pkm-title', webpage, ' title', default=None) or self._search_regex(
            r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'title': title,
            'description': video_data.get('data-video-summary'),
            'thumbnail': video_data.get('data-video-poster'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('data-video-season')),
            'episode': title,
            'episode_number': int_or_none(video_data.get('data-video-episode')),
            'ie_key': 'LimelightMedia',
        }
 class PokemonWatchIE(InfoExtractor):
    _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
    _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
    _TESTS = [{
        'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
        'md5': '62833938a31e61ab49ada92f524c42ff',
        'info_dict': {
            'id': '8309a40969894a8e8d5bc1311e9c5667',
            'ext': 'mp4',
            'title': 'Lillier and the Staff!',
            'description': 'md5:338841b8c21b283d24bdc9b568849f04',
        },
    }, {
        'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
        'only_matching': True,
    }, {
        'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
        'only_matching': True,
    }]
    def _extract_media(self, channel_array, video_id):
        for channel in channel_array:
            for media in channel.get('media'):
                if media.get('id') == video_id:
                    return media
        return None
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = {
            '_type': 'url',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'ie_key': 'LimelightMedia',
        }
        # API call can be avoided entirely if we are listing formats
        if self.get_param('listformats', False):
            return info
        webpage = self._download_webpage(url, video_id)
        build_vars = self._parse_json(self._search_regex(
            r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
            video_id, transform_source=js_to_json)
        region = build_vars.get('region')
        channel_array = self._download_json(self._API_URL.format(region), video_id)
        video_data = self._extract_media(channel_array, video_id)
        if video_data is None:
            raise ExtractorError(
                f'Video {video_id} does not exist', expected=True)
        info['_type'] = 'url_transparent'
        images = video_data.get('images')
        return merge_dicts(info, {
            'title': video_data.get('title'),
            'description': video_data.get('description'),
            'thumbnail': images.get('medium') or images.get('small'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('season')),
            'episode': video_data.get('title'),
            'episode_number': int_or_none(video_data.get('episode')),
        })
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@ -13,7 +13,10 @@ from ..utils import (
    unified_timestamp,
    url_or_none,
 )
-from ..utils.traversal import traverse_obj
+from ..utils.traversal import (
    subs_list_to_dict,
    traverse_obj,
 )
 class RutubeBaseIE(InfoExtractor):
@ -92,11 +95,11 @@ class RutubeBaseIE(InfoExtractor):
                hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
-        for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))):
+        self._merge_subtitles(traverse_obj(options, ('captions', ..., {
-            subtitles.setdefault(caption.get('code') or 'ru', []).append({
+            'id': 'code',
-                'url': caption['file'],
+            'url': 'file',
-                'name': caption.get('langTitle'),
+            'name': ('langTitle', {str}),
-            })
+        }, all, {subs_list_to_dict(lang='ru')})), target=subtitles)
        return formats, subtitles
    def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@ -199,8 +199,9 @@ class SonyLIVSeriesIE(InfoExtractor):
        },
    }]
    _API_BASE = 'https://apiv2.sonyliv.com/AGL'
    _SORT_ORDERS = ('asc', 'desc')
-    def _entries(self, show_id):
+    def _entries(self, show_id, sort_order):
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Referer': 'https://www.sonyliv.com',
@ -215,6 +216,9 @@ class SonyLIVSeriesIE(InfoExtractor):
                'from': '0',
                'to': '49',
            }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
        if sort_order == 'desc':
            seasons = reversed(seasons)
        for season in seasons:
            season_id = str(season['id'])
            note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
@ -226,7 +230,7 @@ class SonyLIVSeriesIE(InfoExtractor):
                        'from': str(cursor),
                        'to': str(cursor + 99),
                        'orderBy': 'episodeNumber',
-                        'sortOrder': 'asc',
+                        'sortOrder': sort_order,
                    }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
                if not episodes:
                    break
@ -237,4 +241,10 @@ class SonyLIVSeriesIE(InfoExtractor):
    def _real_extract(self, url):
        show_id = self._match_id(url)
-        return self.playlist_result(self._entries(show_id), playlist_id=show_id)
+
        sort_order = self._configuration_arg('sort_order', [self._SORT_ORDERS[0]])[0]
        if sort_order not in self._SORT_ORDERS:
            raise ValueError(
                f'Invalid sort order "{sort_order}". Allowed values are: {", ".join(self._SORT_ORDERS)}')
        return self.playlist_result(self._entries(show_id, sort_order), playlist_id=show_id)
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -241,7 +241,7 @@ class SoundcloudBaseIE(InfoExtractor):
                    format_urls.add(format_url)
                    formats.append({
                        'format_id': 'download',
-                        'ext': urlhandle_detect_ext(urlh) or 'mp3',
+                        'ext': urlhandle_detect_ext(urlh, default='mp3'),
                        'filesize': int_or_none(urlh.headers.get('Content-Length')),
                        'url': format_url,
                        'quality': 10,
--- a/yt_dlp/extractor/uliza.py
+++ b/yt_dlp/extractor/uliza.py
@ -0,0 +1,113 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    make_archive_id,
    parse_qs,
    time_seconds,
 )
 from ..utils.traversal import traverse_obj
 class UlizaPlayerIE(InfoExtractor):
    _VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
    _TESTS = [{
        'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'ext': 'mp4',
            'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'live_status': 'was_live',
            '_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'ext': 'mp4',
            'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'ext': 'mp4',
            'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        player_data = self._download_webpage(
            url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        m3u8_url = self._search_regex(
            r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
        video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
        formats = self._extract_m3u8_formats(m3u8_url, video_id)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
            '_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
        }
 class UlizaPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data_url = self._search_regex(
            r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
            webpage, 'player data url')
        return self.url_result(
            player_data_url, UlizaPlayerIE, url_transparent=True,
            display_id=video_id, video_title=self._html_extract_title(webpage))
--- a/yt_dlp/extractor/veoh.py
+++ b/yt_dlp/extractor/veoh.py
@ -1,189 +0,0 @@
 import functools
 import json
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    int_or_none,
    parse_duration,
    qualities,
    remove_start,
    strip_or_none,
 )
 class VeohIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
    _TESTS = [{
        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
        'md5': '620e68e6a3cff80086df3348426c9ca3',
        'info_dict': {
            'id': 'v56314296nk7Zdmz3',
            'ext': 'mp4',
            'title': 'Straight Backs Are Stronger',
            'description': 'md5:203f976279939a6dc664d4001e13f5f4',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
            'uploader': 'LUMOback',
            'duration': 46,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 0,
            'categories': ['technology_and_gaming'],
            'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
        },
    }, {
        'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
        'only_matching': True,
    }, {
        'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
        'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
        'info_dict': {
            'id': '27701988',
            'ext': 'mp4',
            'title': 'Chile workers cover up to avoid skin damage',
            'description': 'md5:2bd151625a60a32822873efc246ba20d',
            'uploader': 'afp-news',
            'duration': 123,
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
        'md5': '4fde7b9e33577bab2f2f8f260e30e979',
        'note': 'Embedded ooyala video',
        'info_dict': {
            'id': '69525809',
            'ext': 'mp4',
            'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
            'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
            'uploader': 'newsy-videos',
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
        'only_matching': True,
    }, {
        'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
        'md5': 'cceb73f3909063d64f4b93d4defca1b3',
        'info_dict': {
            'id': 'v16374379WA437rMH',
            'ext': 'mp4',
            'title': 'Phantasmagoria 2, pt. 1-3',
            'description': 'Phantasmagoria: a Puzzle of Flesh',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
            'uploader': 'davidspackage',
            'duration': 968,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 18,
            'categories': ['technology_and_gaming', 'gaming'],
            'tags': ['puzzle', 'of', 'flesh'],
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        metadata = self._download_json(
            'https://www.veoh.com/watch/getVideo/' + video_id,
            video_id)
        video = metadata['video']
        title = video['title']
        thumbnail_url = None
        q = qualities(['Regular', 'HQ'])
        formats = []
        for f_id, f_url in video.get('src', {}).items():
            if not f_url:
                continue
            if f_id == 'poster':
                thumbnail_url = f_url
            else:
                formats.append({
                    'format_id': f_id,
                    'quality': q(f_id),
                    'url': f_url,
                })
        categories = metadata.get('categoryPath')
        if not categories:
            category = remove_start(strip_or_none(video.get('category')), 'category_')
            categories = [category] if category else None
        tags = video.get('tags')
        return {
            'id': video_id,
            'title': title,
            'description': video.get('description'),
            'thumbnail': thumbnail_url,
            'uploader': video.get('author', {}).get('nickname'),
            'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
            'view_count': int_or_none(video.get('views')),
            'formats': formats,
            'average_rating': int_or_none(video.get('rating')),
            'comment_count': int_or_none(video.get('numOfComments')),
            'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
            'categories': categories,
            'tags': tags.split(', ') if tags else None,
        }
 class VeohUserIE(VeohIE):  # XXX: Do not subclass from concrete IE
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
    IE_NAME = 'veoh:user'
    _TESTS = [
        {
            'url': 'https://www.veoh.com/users/valentinazoe',
            'info_dict': {
                'id': 'valentinazoe',
                'title': 'valentinazoe (Uploads)',
            },
            'playlist_mincount': 75,
        },
        {
            'url': 'https://www.veoh.com/users/PiensaLibre',
            'info_dict': {
                'id': 'PiensaLibre',
                'title': 'PiensaLibre (Uploads)',
            },
            'playlist_mincount': 2,
        }]
    _PAGE_SIZE = 16
    def _fetch_page(self, uploader, page):
        response = self._download_json(
            'https://www.veoh.com/users/published/videos', uploader,
            note=f'Downloading videos page {page + 1}',
            headers={
                'x-csrf-token': self._TOKEN,
                'content-type': 'application/json;charset=UTF-8',
            },
            data=json.dumps({
                'username': uploader,
                'maxResults': self._PAGE_SIZE,
                'page': page + 1,
                'requestName': 'userPage',
            }).encode())
        if not response.get('success'):
            raise ExtractorError(response['message'])
        for video in response['videos']:
            yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
                                  video['permalinkId'], video.get('title'))
    def _real_initialize(self):
        webpage = self._download_webpage(
            'https://www.veoh.com', None, note='Downloading authorization token')
        self._TOKEN = self._search_regex(
            r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
            'request token', group='token')
    def _real_extract(self, url):
        uploader = self._match_id(url)
        return self.playlist_result(OnDemandPagedList(
            functools.partial(self._fetch_page, uploader),
            self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
    def _rich_entries(self, rich_grid_renderer):
        renderer = traverse_obj(
            rich_grid_renderer,
-            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
+            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
        video_id = renderer.get('videoId')
        if video_id:
            yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
                })),
                thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
            return
        # lockupViewModel extraction
        content_id = renderer.get('contentId')
        if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
            yield self.url_result(
                f'https://www.youtube.com/playlist?list={content_id}',
                ie=YoutubeTabIE, video_id=content_id,
                **traverse_obj(renderer, {
                    'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
                }),
                thumbnails=self._extract_thumbnails(renderer, (
                    'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
            return
    def _video_entry(self, video_renderer):
        video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        },
        'playlist_count': 0,
    }, {
-        # Podcasts tab, with rich entry playlistRenderers
+        # Podcasts tab, with rich entry lockupViewModel
        'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
        'info_dict': {
            'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
            'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
-            'title': '99 Percent Invisible - Podcasts',
+            'title': '99% Invisible - Podcasts',
-            'uploader': '99 Percent Invisible',
+            'uploader': '99% Invisible',
            'channel_follower_count': int,
            'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'tags': [],
-            'channel': '99 Percent Invisible',
+            'channel': '99% Invisible',
            'uploader_id': '@99percentinvisiblepodcast',
        },
-        'playlist_count': 0,
+        'playlist_count': 5,
    }, {
        # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
        'url': 'https://www.youtube.com/@AHimitsu/releases',
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -419,7 +419,9 @@ def create_parser():
    general.add_option(
        '--flat-playlist',
        action='store_const', dest='extract_flat', const='in_playlist', default=False,
-        help='Do not extract the videos of a playlist, only list them')
+        help=(
            'Do not extract a playlist\'s URL result entries; '
            'some entry metadata may be missing and downloading may be bypassed'))
    general.add_option(
        '--no-flat-playlist',
        action='store_false', dest='extract_flat',
Author	SHA1	Message	Date
Mozi	0164ee9dd8	Merge `e767973d8c` into `da252d9d32`	2024-11-18 02:47:53 +02:00
bashonly	da252d9d32	[cleanup] Misc (#11554 ) Closes #6884 Authored by: bashonly, Grub4K, seproDev Co-authored-by: Simon Sawicki <contact@grub4k.xyz> Co-authored-by: sepro <sepro@sepr0.com>	2024-11-17 23:25:05 +00:00
gillux	e079ffbda6	[ie/litv] Fix extractor (#11071 ) Authored by: jiru	2024-11-17 21:37:15 +00:00
bashonly	2009cb27e1	[ie/SonyLIVSeries] Add `sort_order` extractor-arg (#11569 ) Authored by: bashonly	2024-11-17 21:16:22 +00:00
Jackson Humphrey	f351440f1d	[ie/ctvnews] Fix extractor (#11534 ) Closes #8689 Authored by: jshumphrey, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-11-17 21:06:50 +00:00
qbnu	f9d98509a8	[ie/ctvnews] Fix playlist ID extraction (#8892 ) Authored by: qbnu	2024-11-17 19:35:10 +00:00
sepro	37cd7660ea	[ie/youtube:tab] Fix podcasts tab extraction (#11567 ) Authored by: seproDev	2024-11-17 19:46:04 +01:00
ChocoLZS	d867f99622	[ie/PiaLive] Add extractor (#10811 ) Authored by: ChocoLZS	2024-11-17 19:41:57 +01:00
doe1080	10fc719bc7	[cleanup] Remove dead extractors (#11566 ) - Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE - Removes PokemonIE, PokemonWatchIE - Removes VeohIE, VeohUserIE Closes #3373, Closes #7059 Authored by: doe1080	2024-11-17 16:22:40 +00:00
Mozi	e767973d8c	merge 'master'	2024-11-16 07:57:41 +00:00
Mozi	dc2239391b	imports	2024-11-16 07:57:30 +00:00
Mozi	507b7c6d4a	[ie/nova:embed] Support live and VOD on markiza.sk and tvnoviny.sk	2024-10-13 10:01:53 +00:00