Merge fc5ae7f534 into 37cd7660ea

[ie/youtube:tab] Fix podcasts tab extraction (#11567 )
Authored by: seproDev
2024-11-25 08:41:28 +01:00 · 2024-11-17 19:46:31 +01:00 · 2024-11-17 19:46:04 +01:00 · 2024-11-17 19:41:57 +01:00 · 2024-11-17 16:22:40 +00:00 · 2024-11-17 14:12:26 +00:00
11 changed files with 472 additions and 785 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -946,6 +946,10 @@ from .kaltura import KalturaIE
 from .kankanews import KankaNewsIE
 from .karaoketv import KaraoketvIE
 from .kelbyone import KelbyOneIE
 from .kenh14 import (
    Kenh14PlaylistIE,
    Kenh14VideoIE,
 )
 from .khanacademy import (
    KhanAcademyIE,
    KhanAcademyUnitIE,
@ -1135,12 +1139,6 @@ from .microsoftembed import (
    MicrosoftMediusIE,
 )
 from .microsoftstream import MicrosoftStreamIE
 from .mildom import (
    MildomClipIE,
    MildomIE,
    MildomUserVodIE,
    MildomVodIE,
 )
 from .minds import (
    MindsChannelIE,
    MindsGroupIE,
@ -1522,8 +1520,8 @@ from .pgatour import PGATourIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .pialive import PiaLiveIE
 from .piapro import PiaproIE
 from .piaulizaportal import PIAULIZAPortalIE
 from .picarto import (
    PicartoIE,
    PicartoVodIE,
@ -1559,10 +1557,6 @@ from .podbayfm import (
 )
 from .podchaser import PodchaserIE
 from .podomatic import PodomaticIE
 from .pokemon import (
    PokemonIE,
    PokemonWatchIE,
 )
 from .pokergo import (
    PokerGoCollectionIE,
    PokerGoIE,
@ -2256,6 +2250,10 @@ from .ufctv import (
 )
 from .ukcolumn import UkColumnIE
 from .uktvplay import UKTVPlayIE
 from .uliza import (
    UlizaPlayerIE,
    UlizaPortalIE,
 )
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
@ -2284,10 +2282,6 @@ from .utreon import UtreonIE
 from .varzesh3 import Varzesh3IE
 from .vbox7 import Vbox7IE
 from .veo import VeoIE
 from .veoh import (
    VeohIE,
    VeohUserIE,
 )
 from .vesti import VestiIE
 from .vevo import (
    VevoIE,
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
            'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
        }
-    def _extract_from_webpage(self, video_id, tld):
+    def _extract_from_html(self, video_id, tld):
        webpage = self._download_webpage(
            f'https://chaturbate.{tld}/{video_id}/', video_id,
            headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
    def _real_extract(self, url):
        video_id, tld = self._match_valid_url(url).group('id', 'tld')
-        return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)
+        return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
--- a/yt_dlp/extractor/kenh14.py
+++ b/yt_dlp/extractor/kenh14.py
@ -0,0 +1,160 @@
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_attribute,
    get_elements_html_by_class,
    int_or_none,
    parse_duration,
    parse_iso8601,
    remove_start,
    strip_or_none,
    unescapeHTML,
    update_url,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class Kenh14VideoIE(InfoExtractor):
    _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
    _TESTS = [{
        'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
        'md5': '1ed67f9c3a1e74acf15db69590cf6210',
        'info_dict': {
            'id': '316173',
            'ext': 'mp4',
            'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
            'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
            'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
            'tags': [],
            'uploader': 'Unbox Therapy',
            'upload_date': '20220517',
            'view_count': int,
            'duration': 722.86,
            'timestamp': 1652764468,
        },
    }, {
        'url': 'https://video.kenh14.vn/video-316174.chn',
        'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
        'info_dict': {
            'id': '316174',
            'ext': 'mp4',
            'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
            'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
            'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
            'tags': [],
            'upload_date': '20220517',
            'view_count': int,
            'duration': 70.04,
            'timestamp': 1652766021,
        },
    }, {
        'url': 'https://video.kenh14.vn/0-344740.chn',
        'md5': 'b843495d5e728142c8870c09b46df2a9',
        'info_dict': {
            'id': '344740',
            'ext': 'mov',
            'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
            'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
            'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
            'uploader': 'Quang Vũ',
            'upload_date': '20241024',
            'view_count': int,
            'duration': 198.88,
            'timestamp': 1729741590,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
        direct_url = attrs['data-vid']
        metadata = self._download_json(
            'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
                remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
        formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
        subtitles = {}
        video_data = self._download_json(
            f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
        if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
            fmts, subs = self._extract_m3u8_formats_and_subtitles(
                hls_url, video_id, m3u8_id='hls', fatal=False)
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
        if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
            fmts, subs = self._extract_mpd_formats_and_subtitles(
                dash_url, video_id, mpd_id='dash', fatal=False)
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
        return {
            **traverse_obj(metadata, {
                'duration': ('duration', {parse_duration}),
                'uploader': ('author', {strip_or_none}),
                'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
                'view_count': ('views', {int_or_none}),
            }),
            'id': video_id,
            'title': (
                traverse_obj(metadata, ('title', {strip_or_none}))
                or clean_html(self._og_search_title(webpage))
                or clean_html(get_element_by_class('vdbw-title', webpage))),
            'formats': formats,
            'subtitles': subtitles,
            'description': (
                clean_html(self._og_search_description(webpage))
                or clean_html(get_element_by_class('vdbw-sapo', webpage))),
            'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
            'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
                {lambda x: x.split(';')}, ..., filter)),
        }
 class Kenh14PlaylistIE(InfoExtractor):
    _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
    _TESTS = [{
        'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
        'info_dict': {
            'id': '71',
            'title': 'Trần Tình (Naked love) mùa 2',
            'description': 'md5:e9522339304956dea931722dd72eddb2',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
        },
        'playlist_count': 9,
    }, {
        'url': 'https://video.kenh14.vn/playlist/0-72.chn',
        'info_dict': {
            'id': '72',
            'title': 'Lau Lại Đầu Từ',
            'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
        },
        'playlist_count': 6,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        category_detail = get_element_by_class('category-detail', webpage) or ''
        embed_info = traverse_obj(
            self._yield_json_ld(webpage, playlist_id),
            (lambda _, v: v['name'] and v['alternateName'], any)) or {}
        return self.playlist_from_matches(
            get_elements_html_by_class('video-item', webpage), playlist_id,
            (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
            getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
            ie=Kenh14VideoIE, playlist_description=(
                clean_html(get_element_by_class('description', category_detail))
                or unescapeHTML(embed_info.get('alternateName'))),
            thumbnail=traverse_obj(
                self._og_search_thumbnail(webpage),
                ({url_or_none}, {update_url(query=None)})))
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@ -1,30 +1,35 @@
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    join_nonempty,
    smuggle_url,
    traverse_obj,
    try_call,
    unsmuggle_url,
    urljoin,
 )
 class LiTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
+    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
-    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
+    _URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
    _GEO_COUNTRIES = ['TW']
    _TESTS = [{
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'info_dict': {
            'id': 'VOD00041606',
            'title': '花千骨',
        },
        'playlist_count': 51,  # 50 episodes + 1 trailer
    }, {
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
        'info_dict': {
            'id': 'VOD00041610',
@ -32,16 +37,15 @@ class LiTVIE(InfoExtractor):
            'title': '花千骨第1集',
            'thumbnail': r're:https?://.*\.jpg$',
            'description': '《花千骨》陸劇線上看。十六年前，平靜的村莊內，一名女嬰隨異相出生，途徑此地的蜀山掌門清虛道長算出此女命運非同一般，她體內散發的異香易招惹妖魔。一念慈悲下，他在村莊周邊設下結界阻擋妖魔入侵，讓其年滿十六後去蜀山，並賜名花千骨。',
-            'categories': ['奇幻', '愛情', '中國', '仙俠'],
+            'categories': ['奇幻', '愛情', '仙俠', '古裝'],
            'episode': 'Episode 1',
            'episode_number': 1,
        },
        'params': {
            'noplaylist': True,
        },
        'skip': 'Georestricted to Taiwan',
    }, {
-        'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+        'url': 'https://www.litv.tv/drama/watch/VOD00044841',
        'md5': '88322ea132f848d6e3e18b32a832b918',
        'info_dict': {
            'id': 'VOD00044841',
@ -55,94 +59,62 @@ class LiTVIE(InfoExtractor):
    def _extract_playlist(self, playlist_data, content_type):
        all_episodes = [
            self.url_result(smuggle_url(
-                self._URL_TEMPLATE % (content_type, episode['contentId']),
+                self._URL_TEMPLATE % (content_type, episode['content_id']),
                {'force_noplaylist': True}))  # To prevent infinite recursion
-            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
+            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
-        return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
+        return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
-        if self._search_regex(
+        program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
-                r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
+        playlist_data = traverse_obj(vod_data, ('seriesTree'))
-                webpage, 'meta refresh redirect', default=False, group=0):
+        if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
-            raise ExtractorError('No such content found', expected=True)
+            return self._extract_playlist(playlist_data, program_info.get('content_type'))
-        program_info = self._parse_json(self._search_regex(
+        asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
-            r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+        if asset_id:  # This is a VOD
-            video_id)
+            media_type = 'vod'
-
+        else:  # This is a live stream
-        # In browsers `getProgramInfo` request is always issued. Usually this
+            asset_id = program_info['content_id']
-        # endpoint gives the same result as the data embedded in the webpage.
+            media_type = program_info['content_type']
        # If, for some reason, there are no embedded data, we do an extra request.
        if 'assetId' not in program_info:
            program_info = self._download_json(
                'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
                query={'contentId': video_id},
                headers={'Accept': 'application/json'})
        series_id = program_info['seriesId']
        if self._yes_playlist(series_id, video_id, smuggled_data):
            playlist_data = self._download_json(
                'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
                query={'seriesId': series_id}, headers={'Accept': 'application/json'})
            return self._extract_playlist(playlist_data, program_info['contentType'])
        video_data = self._parse_json(self._search_regex(
            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
            webpage, 'video data', default='{}'), video_id)
        if not video_data:
            payload = {'assetId': program_info['assetId']}
        puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
        if puid:
-                payload.update({
+            endpoint = 'get-urls'
                    'type': 'auth',
                    'puid': puid,
                })
                endpoint = 'getUrl'
        else:
-                payload.update({
+            puid = str(uuid.uuid4())
-                    'watchDevices': program_info['watchDevices'],
+            endpoint = 'get-urls-no-auth'
                    'contentType': program_info['contentType'],
                })
                endpoint = 'getMainUrlNoAuth'
        video_data = self._download_json(
-                f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
+            f'https://www.litv.tv/api/{endpoint}', video_id,
-                data=json.dumps(payload).encode(),
+            data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
            headers={'Content-Type': 'application/json'})
-        if not video_data.get('fullpath'):
+        if error := traverse_obj(video_data, ('error', {dict})):
-            error_msg = video_data.get('errorMessage')
+            error_msg = traverse_obj(error, ('message', {str}))
-            if error_msg == 'vod.error.outsideregionerror':
+            if error_msg and 'OutsideRegionError' in error_msg:
                self.raise_geo_restricted('This video is available in Taiwan only')
-            if error_msg:
+            elif error_msg:
                raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
-            raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
+            raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
        formats = self._extract_m3u8_formats(
-            video_data['fullpath'], video_id, ext='mp4',
+            video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
            entry_protocol='m3u8_native', m3u8_id='hls')
        for a_format in formats:
            # LiTV HLS segments doesn't like compressions
            a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
        title = program_info['title'] + program_info.get('secondaryMark', '')
        description = program_info.get('description')
        thumbnail = program_info.get('imageFile')
        categories = [item['name'] for item in program_info.get('category', [])]
        episode = int_or_none(program_info.get('episode'))
        return {
            'id': video_id,
            'formats': formats,
-            'title': title,
+            'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
-            'description': description,
+            **traverse_obj(program_info, {
-            'thumbnail': thumbnail,
+                'description': ('description', {str}),
-            'categories': categories,
+                'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
-            'episode_number': episode,
+                'categories': ('genres', ..., 'name', {str}),
                'episode_number': ('episode', {int_or_none}),
            }),
        }
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@ -1,291 +0,0 @@
 import functools
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    determine_ext,
    dict_get,
    float_or_none,
    traverse_obj,
 )
 class MildomBaseIE(InfoExtractor):
    _GUEST_ID = None
    def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
        if not self._GUEST_ID:
            self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
        content = self._download_json(
            url, video_id, note=note, data=json.dumps(body).encode() if body else None,
            headers={'Content-Type': 'application/json'} if body else {},
            query={
                '__guest_id': self._GUEST_ID,
                '__platform': 'web',
                **(query or {}),
            })
        if content['code'] != 0:
            raise ExtractorError(
                f'Mildom says: {content["message"]} (code {content["code"]})',
                expected=True)
        return content['body']
 class MildomIE(MildomBaseIE):
    IE_NAME = 'mildom'
    IE_DESC = 'Record ongoing live by specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
        enterstudio = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
            note='Downloading live metadata', query={'user_id': video_id})
        result_video_id = enterstudio.get('log_id', video_id)
        servers = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
            note='Downloading live server list', query={
                'user_id': video_id,
                'live_server_type': 'hls',
            })
        playback_token = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
            note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
        playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
        if not playback_token:
            raise ExtractorError('Failed to obtain live playback token')
        formats = self._extract_m3u8_formats(
            f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
            result_video_id, 'mp4', headers={
                'Referer': 'https://www.mildom.com/',
                'Origin': 'https://www.mildom.com',
            })
        for fmt in formats:
            fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
        return {
            'id': result_video_id,
            'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
            'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
            'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
            'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
            'uploader_id': video_id,
            'formats': formats,
            'is_live': True,
        }
 class MildomVodIE(MildomBaseIE):
    IE_NAME = 'mildom:vod'
    IE_DESC = 'VOD in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
    _TESTS = [{
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
        'info_dict': {
            'id': '10882672-1597662269',
            'ext': 'mp4',
            'title': '始めてのミルダム配信じゃぃ！',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'upload_date': '20200817',
            'duration': 4138.37,
            'description': 'ゲームをしたくて！',
            'timestamp': 1597662269.0,
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
        'info_dict': {
            'id': '10882672-1597758589870-477',
            'ext': 'mp4',
            'title': '【kson】感染メイズ！麻酔銃で無双する',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'timestamp': 1597759093.0,
            'uploader': 'kson組長(けいそん)',
            'duration': 4302.58,
            'uploader_id': '10882672',
            'description': 'このステージ絶対乗り越えたい',
            'upload_date': '20200818',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
        'info_dict': {
            'id': '10882672-buha9td2lrn97fk2jme0',
            'ext': 'mp4',
            'title': '【kson組長】CART RACER!!!',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
            'upload_date': '20201104',
            'timestamp': 1604494797.0,
            'duration': 4657.25,
            'description': 'WTF',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
        autoplay = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
            note='Downloading playback metadata', query={
                'v_id': video_id,
            })['playback']
        formats = [{
            'url': autoplay['audio_url'],
            'format_id': 'audio',
            'protocol': 'm3u8_native',
            'vcodec': 'none',
            'acodec': 'aac',
            'ext': 'm4a',
        }]
        for fmt in autoplay['video_link']:
            formats.append({
                'format_id': 'video-{}'.format(fmt['name']),
                'url': fmt['url'],
                'protocol': 'm3u8_native',
                'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
                'height': fmt['level'],
                'vcodec': 'h264',
                'acodec': 'aac',
                'ext': 'mp4',
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
            'description': traverse_obj(autoplay, 'video_intro'),
            'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
            'duration': float_or_none(autoplay.get('video_length'), scale=1000),
            'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
            'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
            'uploader_id': user_id,
            'formats': formats,
        }
 class MildomClipIE(MildomBaseIE):
    IE_NAME = 'mildom:clip'
    IE_DESC = 'Clip in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
        'info_dict': {
            'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
            'title': '全然違ったよ',
            'timestamp': 1619181890,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': 'ざきんぽ',
            'uploader_id': '10042245',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
        'info_dict': {
            'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
            'title': 'かっこいい',
            'timestamp': 1621094003,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': '(ルーキー',
            'uploader_id': '10111524',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
        'info_dict': {
            'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
            'title': 'あ',
            'timestamp': 1614769431,
            'duration': 31,
            'thumbnail': r're:https?://.+',
            'uploader': 'ドルゴルスレンギーン＝ダグワドルジ',
            'uploader_id': '10660174',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
        clip_detail = self._call_api(
            'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
            note='Downloading playback metadata', query={
                'clip_id': video_id,
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(
                ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
            'timestamp': float_or_none(clip_detail.get('create_time')),
            'duration': float_or_none(clip_detail.get('length')),
            'thumbnail': clip_detail.get('cover'),
            'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
            'uploader_id': user_id,
            'url': clip_detail['url'],
            'ext': determine_ext(clip_detail.get('url'), 'mp4'),
        }
 class MildomUserVodIE(MildomBaseIE):
    IE_NAME = 'mildom:user:vod'
    IE_DESC = 'Download all VODs from specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/profile/10093333',
        'info_dict': {
            'id': '10093333',
            'title': 'Uploads from ねこばたけ',
        },
        'playlist_mincount': 732,
    }, {
        'url': 'https://www.mildom.com/profile/10882672',
        'info_dict': {
            'id': '10882672',
            'title': 'Uploads from kson組長(けいそん)',
        },
        'playlist_mincount': 201,
    }]
    def _fetch_page(self, user_id, page):
        page += 1
        reply = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
            user_id, note=f'Downloading page {page}', query={
                'user_id': user_id,
                'page': page,
                'limit': '30',
            })
        if not reply:
            return
        for x in reply:
            v_id = x.get('v_id')
            if not v_id:
                continue
            yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
    def _real_extract(self, url):
        user_id = self._match_id(url)
        self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
        profile = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
            query={'user_id': user_id}, note='Downloading user profile')['user_info']
        return self.playlist_result(
            OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
            user_id, f'Uploads from {profile["loginname"]}')
--- a/yt_dlp/extractor/pialive.py
+++ b/yt_dlp/extractor/pialive.py
@ -0,0 +1,122 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_class,
    multipart_encode,
    str_or_none,
    unified_timestamp,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class PiaLiveIE(InfoExtractor):
    _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
    _PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
    _PIA_LIVE_API_URL = 'https://api.pia-live.jp'
    _API_KEY = 'kfds)FKFps-dms9e'
    _TESTS = [{
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'display_id': '2431867_001',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }, {
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
        'info_dict': {
            'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
            'display_id': '2431867_002',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }]
    def _extract_var(self, variable, html):
        return self._search_regex(
            rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
            html, f'variable {variable}', group='value')
    def _real_extract(self, url):
        video_key = self._match_id(url)
        webpage = self._download_webpage(url, video_key)
        program_code = self._extract_var('programCode', webpage)
        article_code = self._extract_var('articleCode', webpage)
        title = self._html_extract_title(webpage)
        if get_element_html_by_class('play-end', webpage):
            raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
        if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
            date, time = self._search_regex(
                r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
                start_info, 'start_info', fatal=False, group=('date', 'time'))
            if date and time:
                release_timestamp_str = f'{date} {time} +09:00'
                release_timestamp = unified_timestamp(release_timestamp_str)
                self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
                return {
                    'id': program_code,
                    'title': title,
                    'live_status': 'is_upcoming',
                    'release_timestamp': release_timestamp,
                }
        payload, content_type = multipart_encode({
            'play_url': video_key,
            'api_key': self._API_KEY,
        })
        api_data_and_headers = {
            'data': payload,
            'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
        }
        player_tag_list = self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
            'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
        return self.url_result(
            extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
            url_transparent=True, title=title, display_id=program_code,
            __post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
    def _get_comments(self, program_code, article_code, api_data_and_headers):
        chat_room_url = traverse_obj(self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
            'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
            ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
        if not chat_room_url:
            return
        comment_page = self._download_webpage(
            chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
            fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
        if not comment_page:
            return
        yield from traverse_obj(self._search_json(
            r'var\s+_history\s*=', comment_page, 'comment list',
            program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
                'timestamp': (0, {int}),
                'author_is_uploader': (1, {lambda x: x == 2}),
                'author': (2, {str}),
                'text': (3, {str}),
                'id': (4, {str_or_none}),
            }))
--- a/yt_dlp/extractor/piaulizaportal.py
+++ b/yt_dlp/extractor/piaulizaportal.py
@ -1,70 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    time_seconds,
    traverse_obj,
 )
 class PIAULIZAPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data = self._download_webpage(
            self._search_regex(
                r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
                webpage, 'player data url'),
            video_id, headers={'Referer': 'https://ulizaportal.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        formats = self._extract_m3u8_formats(
            self._search_regex(
                r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
                'm3u8 url', default=None),
            video_id, fatal=False)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': self._html_extract_title(webpage),
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
        }
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@ -1,136 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    extract_attributes,
    int_or_none,
    js_to_json,
    merge_dicts,
 )
 class PokemonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
        'md5': '2fe8eaec69768b25ef898cda9c43062e',
        'info_dict': {
            'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
            'ext': 'mp4',
            'title': 'The Ol’ Raise and Switch!',
            'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
        },
        'add_id': ['LimelightMedia'],
    }, {
        # no data-video-title
        'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
        'info_dict': {
            'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
            'ext': 'mp4',
            'title': "Pokémon : L'ascension de Darkrai",
            'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
        },
        'add_id': ['LimelightMedia'],
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(url, video_id or display_id)
        video_data = extract_attributes(self._search_regex(
            r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
            webpage, 'video data element'))
        video_id = video_data['data-video-id']
        title = video_data.get('data-video-title') or self._html_search_meta(
            'pkm-title', webpage, ' title', default=None) or self._search_regex(
            r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'title': title,
            'description': video_data.get('data-video-summary'),
            'thumbnail': video_data.get('data-video-poster'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('data-video-season')),
            'episode': title,
            'episode_number': int_or_none(video_data.get('data-video-episode')),
            'ie_key': 'LimelightMedia',
        }
 class PokemonWatchIE(InfoExtractor):
    _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
    _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
    _TESTS = [{
        'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
        'md5': '62833938a31e61ab49ada92f524c42ff',
        'info_dict': {
            'id': '8309a40969894a8e8d5bc1311e9c5667',
            'ext': 'mp4',
            'title': 'Lillier and the Staff!',
            'description': 'md5:338841b8c21b283d24bdc9b568849f04',
        },
    }, {
        'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
        'only_matching': True,
    }, {
        'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
        'only_matching': True,
    }]
    def _extract_media(self, channel_array, video_id):
        for channel in channel_array:
            for media in channel.get('media'):
                if media.get('id') == video_id:
                    return media
        return None
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = {
            '_type': 'url',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'ie_key': 'LimelightMedia',
        }
        # API call can be avoided entirely if we are listing formats
        if self.get_param('listformats', False):
            return info
        webpage = self._download_webpage(url, video_id)
        build_vars = self._parse_json(self._search_regex(
            r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
            video_id, transform_source=js_to_json)
        region = build_vars.get('region')
        channel_array = self._download_json(self._API_URL.format(region), video_id)
        video_data = self._extract_media(channel_array, video_id)
        if video_data is None:
            raise ExtractorError(
                f'Video {video_id} does not exist', expected=True)
        info['_type'] = 'url_transparent'
        images = video_data.get('images')
        return merge_dicts(info, {
            'title': video_data.get('title'),
            'description': video_data.get('description'),
            'thumbnail': images.get('medium') or images.get('small'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('season')),
            'episode': video_data.get('title'),
            'episode_number': int_or_none(video_data.get('episode')),
        })
--- a/yt_dlp/extractor/uliza.py
+++ b/yt_dlp/extractor/uliza.py
@ -0,0 +1,113 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    make_archive_id,
    parse_qs,
    time_seconds,
 )
 from ..utils.traversal import traverse_obj
 class UlizaPlayerIE(InfoExtractor):
    _VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
    _TESTS = [{
        'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'ext': 'mp4',
            'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'live_status': 'was_live',
            '_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'ext': 'mp4',
            'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'ext': 'mp4',
            'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        player_data = self._download_webpage(
            url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        m3u8_url = self._search_regex(
            r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
        video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
        formats = self._extract_m3u8_formats(m3u8_url, video_id)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
            '_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
        }
 class UlizaPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data_url = self._search_regex(
            r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
            webpage, 'player data url')
        return self.url_result(
            player_data_url, UlizaPlayerIE, url_transparent=True,
            display_id=video_id, video_title=self._html_extract_title(webpage))
--- a/yt_dlp/extractor/veoh.py
+++ b/yt_dlp/extractor/veoh.py
@ -1,189 +0,0 @@
 import functools
 import json
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    int_or_none,
    parse_duration,
    qualities,
    remove_start,
    strip_or_none,
 )
 class VeohIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
    _TESTS = [{
        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
        'md5': '620e68e6a3cff80086df3348426c9ca3',
        'info_dict': {
            'id': 'v56314296nk7Zdmz3',
            'ext': 'mp4',
            'title': 'Straight Backs Are Stronger',
            'description': 'md5:203f976279939a6dc664d4001e13f5f4',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
            'uploader': 'LUMOback',
            'duration': 46,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 0,
            'categories': ['technology_and_gaming'],
            'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
        },
    }, {
        'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
        'only_matching': True,
    }, {
        'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
        'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
        'info_dict': {
            'id': '27701988',
            'ext': 'mp4',
            'title': 'Chile workers cover up to avoid skin damage',
            'description': 'md5:2bd151625a60a32822873efc246ba20d',
            'uploader': 'afp-news',
            'duration': 123,
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
        'md5': '4fde7b9e33577bab2f2f8f260e30e979',
        'note': 'Embedded ooyala video',
        'info_dict': {
            'id': '69525809',
            'ext': 'mp4',
            'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
            'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
            'uploader': 'newsy-videos',
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
        'only_matching': True,
    }, {
        'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
        'md5': 'cceb73f3909063d64f4b93d4defca1b3',
        'info_dict': {
            'id': 'v16374379WA437rMH',
            'ext': 'mp4',
            'title': 'Phantasmagoria 2, pt. 1-3',
            'description': 'Phantasmagoria: a Puzzle of Flesh',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
            'uploader': 'davidspackage',
            'duration': 968,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 18,
            'categories': ['technology_and_gaming', 'gaming'],
            'tags': ['puzzle', 'of', 'flesh'],
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        metadata = self._download_json(
            'https://www.veoh.com/watch/getVideo/' + video_id,
            video_id)
        video = metadata['video']
        title = video['title']
        thumbnail_url = None
        q = qualities(['Regular', 'HQ'])
        formats = []
        for f_id, f_url in video.get('src', {}).items():
            if not f_url:
                continue
            if f_id == 'poster':
                thumbnail_url = f_url
            else:
                formats.append({
                    'format_id': f_id,
                    'quality': q(f_id),
                    'url': f_url,
                })
        categories = metadata.get('categoryPath')
        if not categories:
            category = remove_start(strip_or_none(video.get('category')), 'category_')
            categories = [category] if category else None
        tags = video.get('tags')
        return {
            'id': video_id,
            'title': title,
            'description': video.get('description'),
            'thumbnail': thumbnail_url,
            'uploader': video.get('author', {}).get('nickname'),
            'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
            'view_count': int_or_none(video.get('views')),
            'formats': formats,
            'average_rating': int_or_none(video.get('rating')),
            'comment_count': int_or_none(video.get('numOfComments')),
            'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
            'categories': categories,
            'tags': tags.split(', ') if tags else None,
        }
 class VeohUserIE(VeohIE):  # XXX: Do not subclass from concrete IE
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
    IE_NAME = 'veoh:user'
    _TESTS = [
        {
            'url': 'https://www.veoh.com/users/valentinazoe',
            'info_dict': {
                'id': 'valentinazoe',
                'title': 'valentinazoe (Uploads)',
            },
            'playlist_mincount': 75,
        },
        {
            'url': 'https://www.veoh.com/users/PiensaLibre',
            'info_dict': {
                'id': 'PiensaLibre',
                'title': 'PiensaLibre (Uploads)',
            },
            'playlist_mincount': 2,
        }]
    _PAGE_SIZE = 16
    def _fetch_page(self, uploader, page):
        response = self._download_json(
            'https://www.veoh.com/users/published/videos', uploader,
            note=f'Downloading videos page {page + 1}',
            headers={
                'x-csrf-token': self._TOKEN,
                'content-type': 'application/json;charset=UTF-8',
            },
            data=json.dumps({
                'username': uploader,
                'maxResults': self._PAGE_SIZE,
                'page': page + 1,
                'requestName': 'userPage',
            }).encode())
        if not response.get('success'):
            raise ExtractorError(response['message'])
        for video in response['videos']:
            yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
                                  video['permalinkId'], video.get('title'))
    def _real_initialize(self):
        webpage = self._download_webpage(
            'https://www.veoh.com', None, note='Downloading authorization token')
        self._TOKEN = self._search_regex(
            r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
            'request token', group='token')
    def _real_extract(self, url):
        uploader = self._match_id(url)
        return self.playlist_result(OnDemandPagedList(
            functools.partial(self._fetch_page, uploader),
            self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
    def _rich_entries(self, rich_grid_renderer):
        renderer = traverse_obj(
            rich_grid_renderer,
-            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
+            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
        video_id = renderer.get('videoId')
        if video_id:
            yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
                })),
                thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
            return
        # lockupViewModel extraction
        content_id = renderer.get('contentId')
        if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
            yield self.url_result(
                f'https://www.youtube.com/playlist?list={content_id}',
                ie=YoutubeTabIE, video_id=content_id,
                **traverse_obj(renderer, {
                    'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
                }),
                thumbnails=self._extract_thumbnails(renderer, (
                    'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
            return
    def _video_entry(self, video_renderer):
        video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        },
        'playlist_count': 0,
    }, {
-        # Podcasts tab, with rich entry playlistRenderers
+        # Podcasts tab, with rich entry lockupViewModel
        'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
        'info_dict': {
            'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
            'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
-            'title': '99 Percent Invisible - Podcasts',
+            'title': '99% Invisible - Podcasts',
-            'uploader': '99 Percent Invisible',
+            'uploader': '99% Invisible',
            'channel_follower_count': int,
            'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'tags': [],
-            'channel': '99 Percent Invisible',
+            'channel': '99% Invisible',
            'uploader_id': '@99percentinvisiblepodcast',
        },
-        'playlist_count': 0,
+        'playlist_count': 5,
    }, {
        # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
        'url': 'https://www.youtube.com/@AHimitsu/releases',
Author	SHA1	Message	Date
gillux	a80b037748	Merge `fc5ae7f534` into `37cd7660ea`	2024-11-17 19:46:31 +01:00
sepro	37cd7660ea	[ie/youtube:tab] Fix podcasts tab extraction (#11567 ) Authored by: seproDev	2024-11-17 19:46:04 +01:00
ChocoLZS	d867f99622	[ie/PiaLive] Add extractor (#10811 ) Authored by: ChocoLZS	2024-11-17 19:41:57 +01:00
doe1080	10fc719bc7	[cleanup] Remove dead extractors (#11566 ) - Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE - Removes PokemonIE, PokemonWatchIE - Removes VeohIE, VeohUserIE Closes #3373, Closes #7059 Authored by: doe1080	2024-11-17 16:22:40 +00:00
krichbanana	eb15fd5a32	[ie/kenh14] Add extractor (#3996 ) Closes #3937 Authored by: krichbanana, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>	2024-11-17 14:12:26 +00:00
sepro	7cecd299e4	[ie/chaturbate] Don't break embed detection (#11565 ) Bugfix for `720b3dc453` Authored by: seproDev	2024-11-17 13:32:12 +01:00
bashonly	fc5ae7f534	cleanup	2024-11-16 18:54:15 +00:00
時流	fd6888d6b2	[ie/litv] Remove unnecessary functools	2024-11-16 12:50:35 +08:00
時流	b0f4f71a10	[ie/litv] Remove unnecessary object traversal	2024-11-16 12:50:35 +08:00
時流	81c2c60073	[ie/litv] Fix comments	2024-11-16 12:50:35 +08:00
bashonly	cc3e61855a	Apply suggestions from code review	2024-11-15 23:21:09 +00:00
時流	21c38e5cb4	[ie/litv] remove unnecessary parameter 'm3u8_native' is already the default for entry_protocol parameter	2024-11-14 13:56:32 +08:00
時流	25b793b8cd	[ie/litv] improve error handling Returned error might not even be a dict, so we need traverse_obj() here. On the other hand, no need to try/except around asset_url = ... since the following request will fail anyway without it.	2024-11-14 13:56:32 +08:00
時流	2efc271b9b	[ie/litv] clarify if/else order	2024-11-14 13:56:32 +08:00
時流	ddeb70eaed	[ie/litv] inline variables	2024-11-14 13:56:32 +08:00
時流	c5315c16d7	[ie/litv] lint	2024-11-14 13:06:41 +08:00
時流	0c942a029e	[ie/litv] Use _search_nextjs_data()	2024-11-14 13:06:39 +08:00
時流	652bf9c1f4	Merge branch 'master' into fix-litv	2024-11-14 12:32:17 +08:00
時流	7fa603bcc3	[litv] Fix test Fixes the following failing test: ____________________ TestAllURLsMatching.test_no_duplicates ____________________ test/test_all_urls.py:79: in test_no_duplicates self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') E AssertionError: False is not true : LiTVIE should match URL 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&'	2024-09-24 08:04:51 +00:00
時流	29964cb9a5	[ie/litv] Bypass georestriction Adds X-Forwarded-For header with Taiwanese IP.	2024-09-24 07:03:03 +00:00
時流	48160e64c6	[ie/litv] Fix extractor - The URL scheme changed. - The general process to get the m3u8 playlist did not changed, but a lot of implementation details changed.	2024-09-24 07:03:03 +00:00