Merge c59ce7d6a6 into f9d98509a8

[ie/ctvnews] Fix playlist ID extraction (#8892 )
Authored by: qbnu
2024-11-25 00:31:26 +01:00 · 2024-11-17 20:50:56 +01:00 · 2024-11-17 19:35:10 +00:00 · 2024-11-17 19:46:04 +01:00 · 2024-11-17 19:41:57 +01:00 · 2024-11-17 16:22:40 +00:00
12 changed files with 962 additions and 711 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -285,6 +285,16 @@ from .bloomberg import BloombergIE
 from .bluesky import BlueskyIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE
 from .boomplay import (
    BoomplayEpisodeIE,
    BoomplayGenericPlaylistIE,
    BoomplayMusicIE,
    BoomplayPlaylistIE,
    BoomplayPodcastIE,
    BoomplaySearchIE,
    BoomplaySearchURLIE,
    BoomplayVideoIE,
 )
 from .boosty import BoostyIE
 from .bostonglobe import BostonGlobeIE
 from .box import BoxIE
@ -946,6 +956,10 @@ from .kaltura import KalturaIE
 from .kankanews import KankaNewsIE
 from .karaoketv import KaraoketvIE
 from .kelbyone import KelbyOneIE
 from .kenh14 import (
    Kenh14PlaylistIE,
    Kenh14VideoIE,
 )
 from .khanacademy import (
    KhanAcademyIE,
    KhanAcademyUnitIE,
@ -1135,12 +1149,6 @@ from .microsoftembed import (
    MicrosoftMediusIE,
 )
 from .microsoftstream import MicrosoftStreamIE
 from .mildom import (
    MildomClipIE,
    MildomIE,
    MildomUserVodIE,
    MildomVodIE,
 )
 from .minds import (
    MindsChannelIE,
    MindsGroupIE,
@ -1522,8 +1530,8 @@ from .pgatour import PGATourIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .pialive import PiaLiveIE
 from .piapro import PiaproIE
 from .piaulizaportal import PIAULIZAPortalIE
 from .picarto import (
    PicartoIE,
    PicartoVodIE,
@ -1559,10 +1567,6 @@ from .podbayfm import (
 )
 from .podchaser import PodchaserIE
 from .podomatic import PodomaticIE
 from .pokemon import (
    PokemonIE,
    PokemonWatchIE,
 )
 from .pokergo import (
    PokerGoCollectionIE,
    PokerGoIE,
@ -2256,6 +2260,10 @@ from .ufctv import (
 )
 from .ukcolumn import UkColumnIE
 from .uktvplay import UKTVPlayIE
 from .uliza import (
    UlizaPlayerIE,
    UlizaPortalIE,
 )
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
@ -2284,10 +2292,6 @@ from .utreon import UtreonIE
 from .varzesh3 import Varzesh3IE
 from .vbox7 import Vbox7IE
 from .veo import VeoIE
 from .veoh import (
    VeohIE,
    VeohUserIE,
 )
 from .vesti import VestiIE
 from .vevo import (
    VevoIE,
--- a/yt_dlp/extractor/boomplay.py
+++ b/yt_dlp/extractor/boomplay.py
@ -0,0 +1,511 @@
 import base64
 import functools
 import json
 import re
 import urllib.parse
 from .common import InfoExtractor, SearchInfoExtractor
 from ..aes import aes_cbc_decrypt_bytes, aes_cbc_encrypt_bytes, unpad_pkcs7
 from ..utils import (
    ExtractorError,
    classproperty,
    clean_html,
    extract_attributes,
    get_elements_text_and_html_by_attribute,
    int_or_none,
    join_nonempty,
    merge_dicts,
    parse_count,
    parse_duration,
    smuggle_url,
    strip_or_none,
    unified_strdate,
    unsmuggle_url,
    url_or_none,
    urlencode_postdata,
    urljoin,
    variadic,
 )
 from ..utils.traversal import traverse_obj
 class BoomplayBaseIE(InfoExtractor):
    # Calculated from const values, see lhx.AESUtils.encrypt in public.js
    # Note that the real key/iv differs from `lhx.AESUtils.key`/`lhx.AESUtils.iv`
    _KEY = b'boomplayVr3xopAM'
    _IV = b'boomplay8xIsKTn9'
    _BASE = 'https://www.boomplay.com'
    _MEDIA_TYPES = ('songs', 'video', 'episode', 'podcasts', 'playlists', 'artists', 'albums')
    _GEO_COUNTRIES = ['NG']
    @staticmethod
    def __yield_elements_text_and_html_by_class_and_tag(class_, tag, html):
        """
        Yields content of all element matching `tag.class_` in html
        class_ must be re escaped
        """
        # get_elements_text_and_html_by_attribute returns a generator
        return get_elements_text_and_html_by_attribute(
            attribute='class', value=rf'''[^'"]*(?<=['"\s]){class_}(?=['"\s])[^'"]*''', html=html,
            tag=tag, escape_value=False)
    @classmethod
    def __yield_elements_by_class_and_tag(cls, *args, **kwargs):
        return (content for content, _ in cls.__yield_elements_text_and_html_by_class_and_tag(*args, **kwargs))
    @classmethod
    def __yield_elements_html_by_class_and_tag(cls, *args, **kwargs):
        return (whole for _, whole in cls.__yield_elements_text_and_html_by_class_and_tag(*args, **kwargs))
    @classmethod
    def _get_elements_by_class_and_tag(cls, class_, tag, html):
        return list(cls.__yield_elements_by_class_and_tag(class_, tag, html))
    @classmethod
    def _get_element_by_class_and_tag(cls, class_, tag, html):
        return next(cls.__yield_elements_by_class_and_tag(class_, tag, html), None)
    @classmethod
    def _urljoin(cls, path):
        return url_or_none(urljoin(base=cls._BASE, path=path))
    def _get_playurl(self, item_id, item_type):
        resp = self._download_json(
            'https://www.boomplay.com/getResourceAddr', item_id,
            note='Downloading play URL', errnote='Failed to download play URL',
            data=urlencode_postdata({
                'param': base64.b64encode(aes_cbc_encrypt_bytes(json.dumps({
                    'itemID': item_id,
                    'itemType': item_type,
                }).encode(), self._KEY, self._IV)).decode(),
            }), headers={
                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            })
        if not (source := resp.get('source')) and (code := resp.get('code')):
            if 'unavailable in your country' in (desc := resp.get('desc')) or '':
                # since NG must have failed ...
                self.raise_geo_restricted(countries=['GH', 'KE', 'TZ', 'CM', 'CI'])
            else:
                raise ExtractorError(desc or f'Failed to get play url, code: {code}')
        return unpad_pkcs7(aes_cbc_decrypt_bytes(
            base64.b64decode(source),
            self._KEY, self._IV)).decode()
    def _extract_formats(self, item_id, item_type='MUSIC', **kwargs):
        if url := url_or_none(self._get_playurl(item_id, item_type)):
            return [{
                'format_id': '0',
                'url': url,
                'http_headers': {
                    'Origin': 'https://www.boomplay.com',
                    'Referer': 'https://www.boomplay.com',
                    'X-Boomplay-Ref': 'Boomplay_WEBV1',
                },
                **kwargs,
            }]
        else:
            self.raise_no_formats('No formats found')
    def _extract_page_metadata(self, webpage, item_id):
        metadata_div = self._get_element_by_class_and_tag('summary', 'div', webpage) or ''
        metadata_entries = re.findall(r'(?si)<strong>(?P<entry>.*?)</strong>', metadata_div) or []
        description = re.sub(
            r'(?i)Listen and download music for free on Boomplay!', '',
            clean_html(self._get_element_by_class_and_tag(
                'description_content', 'span', webpage)) or '') or None
        details_section = self._get_element_by_class_and_tag('songDetailInfo', 'section', webpage) or ''
        metadata_entries.extend(re.findall(r'(?si)<li>(?P<entry>.*?)</li>', details_section) or [])
        page_metadata = {
            'id': item_id,
            **self._extract_title_from_webpage(webpage),
            'thumbnail': self._html_search_meta(['og:image', 'twitter:image'],
                                                webpage, 'thumbnail', default=None),
            'like_count': parse_count(self._get_element_by_class_and_tag('btn_favorite', 'button', metadata_div)),
            'repost_count': parse_count(self._get_element_by_class_and_tag('btn_share', 'button', metadata_div)),
            'comment_count': parse_count(self._get_element_by_class_and_tag('btn_comment', 'button', metadata_div)),
            'duration': parse_duration(self._get_element_by_class_and_tag('btn_duration', 'button', metadata_div)),
            'upload_date': unified_strdate(strip_or_none(
                self._get_element_by_class_and_tag('btn_pubDate', 'button', metadata_div))),
            'description': description,
        }
        for metadata_entry in metadata_entries:
            if ':' not in metadata_entry:
                continue
            k, v = clean_html(metadata_entry).split(':', 1)
            v = v.strip()
            if 'artist' in k.lower():
                page_metadata['artists'] = [v]
            elif 'album' in k.lower():
                page_metadata['album'] = v
            elif 'genre' in k.lower():
                page_metadata['genres'] = [v]
            elif 'year of release' in k.lower():
                page_metadata['release_year'] = int_or_none(v)
        return page_metadata
    def _extract_title_from_webpage(self, webpage):
        if h1_title := self._html_search_regex(r'(?i)<h1[^>]*>([^<]+)</h1>', webpage, 'title', default=None):
            return {'title': h1_title}
        else:
            return self._fix_title(
                self._html_search_meta(['og:title', 'twitter:title'], webpage, 'title', default=None)
                or self._html_search_regex(r'(?i)<title[^>]*>([^<]+)</title>', webpage, 'title', default=None))
    @staticmethod
    def _fix_title(title):
        """
        fix various types of titles(og:title, twitter:title, title tag in html head)
        """
        if not title:
            return {}
        title_patterns = (
            r'^(?P<title>(?P<artist>.+)) Songs MP3 Download, New Songs \& Albums \| Boomplay$',  # artists
            r'^(?P<artist>.+?) - (?P<title>.+) MP3\ Download \& Lyrics \| Boomplay$',  # music
            r'^Download (?P<artist>.+) album songs: (?P<title>.+?) \| Boomplay Music$',  # album
            r'^Search:(?P<title>.+) \| Boomplay Music$',  # search url
            r'^(?P<title>.+) \| Podcast \| Boomplay$',  # podcast, episode
            r'^(?P<title>.+) \| Boomplay(?: Music)?$',  # video, playlist, generic playlists
        )
        for pattern in title_patterns:
            if match := re.search(pattern, title):
                return {
                    'title': match.group('title'),
                    'artists': [match.group('artist')] if 'artist' in match.groupdict() else None,
                }
        return {'title': title}
    @classmethod
    def _extract_from_webpage(cls, url, webpage, **kwargs):
        if kwargs:
            url = smuggle_url(url, kwargs)
        return super()._extract_from_webpage(url, webpage)
    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        url, smuggled_data = unsmuggle_url(url)
        media_types = variadic(smuggled_data.get('media_types', cls._MEDIA_TYPES))
        media_types = join_nonempty(*(
            re.escape(v)for v in media_types if v in cls._MEDIA_TYPES),
            delim='|')
        for mobj in re.finditer(
                rf'''(?ix)
                <a
                    (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
                        (?<=\s)href\s*=\s*(?P<_q>['"])
                            (?P<href>/(?:{media_types})/\d+/?[\-\w=?&#:;@]*)
                        (?P=_q)
                    (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
                >''', webpage):
            if url := cls._urljoin(mobj.group('href')):
                yield url
    @classmethod
    def _extract_playlist_entries(cls, webpage, media_types, warn=True):
        song_list = strip_or_none(
            cls._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
            or cls._get_element_by_class_and_tag('morePart', 'ol', webpage)
            or '')
        entries = traverse_obj(cls.__yield_elements_html_by_class_and_tag(
            'songName', 'a', song_list),
            (..., {extract_attributes}, 'href', {cls._urljoin}, {cls.url_result}))
        if not entries:
            if warn:
                cls.report_warning('Failed to extract playlist entries, finding suitable links instead!')
            def strip_ie(entry):
                # All our IEs have a _VALID_URL and set a key: don't use it
                entry.pop('ie_key', None)
                return entry
            return (strip_ie(result) for result in
                    cls._extract_from_webpage(cls._BASE, webpage, media_types=media_types))
        return entries
 class BoomplayMusicIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/songs/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.boomplay.com/songs/165481965',
        'md5': 'c5fb4f23e6aae98064230ef3c39c2178',
        'info_dict': {
            'title': 'Rise of the Fallen Heroes',
            'ext': 'mp3',
            'id': '165481965',
            'artists': ['fatbunny'],
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/04/29/375ecda38f6f48179a93c72ab909118f_464_464.jpg',
            'channel_url': 'https://www.boomplay.com/artists/52723101',
            'duration': 125.0,
            'release_year': 2024,
            'comment_count': int,
            'like_count': int,
            'repost_count': int,
            'album': 'Legendary Battle',
            'genres': ['Metal'],
        },
    }]
    def _real_extract(self, url):
        song_id = self._match_id(url)
        webpage = self._download_webpage(url, song_id)
        ld_json_meta = next(self._yield_json_ld(webpage, song_id))
        # TODO: extract comments(and lyrics? they don't have timestamps)
        # example: https://www.boomplay.com/songs/96352673?from=home
        return merge_dicts(
            self._extract_page_metadata(webpage, song_id),
            traverse_obj(ld_json_meta, {
                'title': 'name',
                'thumbnail': 'image',
                'channel_url': ('byArtist', 0, '@id'),
                'artists': ('byArtist', ..., 'name'),
                'duration': ('duration', {parse_duration}),
            }), {
                'formats': self._extract_formats(song_id, 'MUSIC', vcodec='none'),
            })
 class BoomplayVideoIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.boomplay.com/video/1154892',
        'md5': 'd9b67ad333d2292a82922062d065352d',
        'info_dict': {
            'id': '1154892',
            'ext': 'mp4',
            'title': 'Autumn blues',
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/10/10/2171dee9e1f8452e84021560729edb88.jpg',
            'upload_date': '20241010',
            'timestamp': 1728599214,
            'view_count': int,
            'duration': 177.0,
            'description': 'Autumn blues by Lugo',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        return merge_dicts(
            self._extract_page_metadata(webpage, video_id),
            self._search_json_ld(webpage, video_id), {
                'formats': self._extract_formats(video_id, 'VIDEO', ext='mp4'),
            })
 class BoomplayEpisodeIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/episode/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.boomplay.com/episode/7132706',
        'md5': 'f26e236b764baa53d7a2cbb7e9ce6dc4',
        'info_dict': {
            'id': '7132706',
            'ext': 'mp3',
            'title': 'Letting Go',
            'repost_count': int,
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/05/06/fc535eaa25714b43a47185a9831887a5_320_320.jpg',
            'comment_count': int,
            'duration': 921.0,
            'upload_date': '20240506',
            'description': 'md5:5ec684b281fa0f9e4c31b3ee20c5e57a',
        },
    }]
    def _real_extract(self, url):
        ep_id = self._match_id(url)
        webpage = self._download_webpage(url, ep_id)
        return merge_dicts(
            self._extract_page_metadata(webpage, ep_id), {
                'description': self._html_search_meta(
                    ['description', 'og:description', 'twitter:description'], webpage),
                'formats': self._extract_formats(ep_id, 'EPISODE', vcodec='none'),
            })
 class BoomplayPodcastIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/podcasts/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.boomplay.com/podcasts/5372',
        'playlist_count': 200,
        'info_dict': {
            'id': '5372',
            'title': 'TED Talks Daily',
            'description': r're:(?s)Every weekday, TED Talks Daily brings you the latest talks .{328} learn something new\.$',
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/12/22/6f9cf97ad6f846a0a7882c98dfcf4f8c_320_320.jpg',
            'repost_count': int,
            'comment_count': int,
            'like_count': int,
        },
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        song_list = self._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
        song_list = traverse_obj(re.finditer(
            r'''(?ix)
            <li
                (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
                    \sdata-id\s*=\s*
                        (?P<_q>['"]?)
                            (?P<id>\d+)
                        (?P=_q)
                (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
            >''',
            song_list),
            (..., 'id', {
                lambda x: self.url_result(
                    f'https://www.boomplay.com/episode/{x}', BoomplayEpisodeIE, x),
            }))
        return self.playlist_result(
            song_list, playlist_id,
            **self._extract_page_metadata(webpage, playlist_id))
 class BoomplayPlaylistIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/(?:playlists|artists|albums)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.boomplay.com/playlists/33792494',
        'info_dict': {
            'id': '33792494',
            'title': 'Daily Trending Indonesia',
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/08/19/d05d431ee616412caeacd7f78f4f68f5_320_320.jpeg',
            'repost_count': int,
            'comment_count': int,
            'like_count': int,
            'description': 'md5:7ebdffc5137c77acb62acb3c89248445',
        },
        'playlist_count': 10,
    }, {
        'url': 'https://www.boomplay.com/artists/52723101',
        'only_matching': True,
    }, {
        'url': 'https://www.boomplay.com/albums/89611238?from=home#google_vignette',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        json_ld_metadata = next(self._yield_json_ld(webpage, playlist_id))
        # schema `MusicGroup` not supported by self._json_ld()
        return self.playlist_result(**merge_dicts(
            self._extract_page_metadata(webpage, playlist_id),
            traverse_obj(json_ld_metadata, {
                'entries': ('track', ..., 'url', {
                    functools.partial(self.url_result, ie=BoomplayMusicIE),
                }),
                'playlist_title': 'name',
                'thumbnail': 'image',
                'artists': ('byArtist', ..., 'name'),
                'channel_url': ('byArtist', 0, '@id'),
            })))
 class BoomplayGenericPlaylistIE(BoomplayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?boomplay\.com/.+'
    _TESTS = [{
        'url': 'https://www.boomplay.com/new-songs',
        'playlist_mincount': 20,
        'info_dict': {
            'id': 'new-songs',
            'title': 'New Songs',
            'thumbnail': 'http://www.boomplay.com/pc/img/og_default_v3.jpg',
        },
    }, {
        'url': 'https://www.boomplay.com/trending-songs',
        'playlist_mincount': 20,
        'info_dict': {
            'id': 'trending-songs',
            'title': 'Trending Songs',
            'thumbnail': 'http://www.boomplay.com/pc/img/og_default_v3.jpg',
        },
    }]
    @classmethod
    def suitable(cls, url):
        return super().suitable(url) and all(not ie.suitable(url) for ie in (
            BoomplayEpisodeIE,
            BoomplayMusicIE,
            BoomplayPlaylistIE,
            BoomplayPodcastIE,
            BoomplaySearchURLIE,
            BoomplayVideoIE,
        ))
    def _real_extract(self, url):
        playlist_id = self._generic_id(url)
        webpage = self._download_webpage(url, playlist_id)
        return self.playlist_result(
            self._extract_playlist_entries(webpage, self._MEDIA_TYPES),
            **self._extract_page_metadata(webpage, playlist_id))
 class BoomplaySearchURLIE(BoomplayBaseIE):
    _TESTS = [{
        'url': 'https://www.boomplay.com/search/default/%20Rise%20of%20the%20Falletesn%20Heroes%20fatbunny',
        'md5': 'c5fb4f23e6aae98064230ef3c39c2178',
        'info_dict': {
            'id': '165481965',
            'ext': 'mp3',
            'title': 'Rise of the Fallen Heroes',
            'duration': 125.0,
            'genres': ['Metal'],
            'artists': ['fatbunny'],
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/04/29/375ecda38f6f48179a93c72ab909118f_464_464.jpg',
            'channel_url': 'https://www.boomplay.com/artists/52723101',
            'comment_count': int,
            'repost_count': int,
            'album': 'Legendary Battle',
            'release_year': 2024,
            'like_count': int,
        },
    }, {
        'url': 'https://www.boomplay.com/search/video/%20Autumn%20blues',
        'md5': 'd9b67ad333d2292a82922062d065352d',
        'info_dict': {
            'id': '1154892',
            'title': 'Autumn blues',
            'ext': 'mp4',
            'timestamp': 1728599214,
            'view_count': int,
            'thumbnail': 'https://source.boomplaymusic.com/group10/M00/10/10/2171dee9e1f8452e84021560729edb88.jpg',
            'description': 'Autumn blues by Lugo',
            'upload_date': '20241010',
            'duration': 177.0,
        },
        'params': {'playlist_items': '1'},
    }]
    @classproperty
    def _VALID_URL(cls):
        return r'https?://(?:www\.)?boomplay\.com/search/(?P<media_type>default|video|episode|podcasts|playlists|artists|albums)/(?P<query>[^?&#/]+)'
    def _real_extract(self, url):
        media_type, query = self._match_valid_url(url).group('media_type', 'query')
        if media_type == 'default':
            media_type = 'songs'
        webpage = self._download_webpage(url, query)
        return self.playlist_result(
            self._extract_playlist_entries(webpage, media_type, warn=media_type == 'songs'),
            **self._extract_page_metadata(webpage, query))
 class BoomplaySearchIE(SearchInfoExtractor):
    _SEARCH_KEY = 'boomplaysearch'
    _RETURN_TYPE = 'url'
    _TESTS = [{
        'url': 'boomplaysearch:rise of the fallen heroes',
        'only_matching': True,
    }]
    def _search_results(self, query):
        yield self.url_result(
            f'https://www.boomplay.com/search/default/{urllib.parse.quote(query)}',
            BoomplaySearchURLIE)
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
            'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
        }
-    def _extract_from_webpage(self, video_id, tld):
+    def _extract_from_html(self, video_id, tld):
        webpage = self._download_webpage(
            f'https://chaturbate.{tld}/{video_id}/', video_id,
            headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
    def _real_extract(self, url):
        video_id, tld = self._match_valid_url(url).group('id', 'tld')
-        return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)
+        return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
--- a/yt_dlp/extractor/ctvnews.py
+++ b/yt_dlp/extractor/ctvnews.py
@ -5,10 +5,10 @@ from ..utils import orderedSet
 class CTVNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+    _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)(?:$|[#?&])'
    _TESTS = [{
        'url': 'http://www.ctvnews.ca/video?clipId=901995',
-        'md5': '9b8624ba66351a23e0b6e1391971f9af',
+        'md5': 'b608f466c7fa24b9666c6439d766ab7e',
        'info_dict': {
            'id': '901995',
            'ext': 'flv',
@ -16,6 +16,14 @@ class CTVNewsIE(InfoExtractor):
            'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
            'timestamp': 1467286284,
            'upload_date': '20160630',
            'categories': [],
            'tags': [],
            'season_id': 57981,
            'duration': 764.631,
            'series': 'CTV News National story',
            'thumbnail': r're:^https?://.*\.jpg$',
            'season': 'Season 0',
            'season_number': 0,
        },
    }, {
        'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +39,13 @@ class CTVNewsIE(InfoExtractor):
            'id': '1.2876780',
        },
        'playlist_mincount': 100,
    }, {
        'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
        'info_dict':
        {
            'id': '1.5736957',
        },
        'playlist_mincount': 6,
    }, {
        'url': 'http://www.ctvnews.ca/1.810401',
        'only_matching': True,
--- a/yt_dlp/extractor/kenh14.py
+++ b/yt_dlp/extractor/kenh14.py
@ -0,0 +1,160 @@
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_attribute,
    get_elements_html_by_class,
    int_or_none,
    parse_duration,
    parse_iso8601,
    remove_start,
    strip_or_none,
    unescapeHTML,
    update_url,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class Kenh14VideoIE(InfoExtractor):
    _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
    _TESTS = [{
        'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
        'md5': '1ed67f9c3a1e74acf15db69590cf6210',
        'info_dict': {
            'id': '316173',
            'ext': 'mp4',
            'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
            'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
            'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
            'tags': [],
            'uploader': 'Unbox Therapy',
            'upload_date': '20220517',
            'view_count': int,
            'duration': 722.86,
            'timestamp': 1652764468,
        },
    }, {
        'url': 'https://video.kenh14.vn/video-316174.chn',
        'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
        'info_dict': {
            'id': '316174',
            'ext': 'mp4',
            'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
            'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
            'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
            'tags': [],
            'upload_date': '20220517',
            'view_count': int,
            'duration': 70.04,
            'timestamp': 1652766021,
        },
    }, {
        'url': 'https://video.kenh14.vn/0-344740.chn',
        'md5': 'b843495d5e728142c8870c09b46df2a9',
        'info_dict': {
            'id': '344740',
            'ext': 'mov',
            'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
            'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
            'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
            'uploader': 'Quang Vũ',
            'upload_date': '20241024',
            'view_count': int,
            'duration': 198.88,
            'timestamp': 1729741590,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
        direct_url = attrs['data-vid']
        metadata = self._download_json(
            'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
                remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
        formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
        subtitles = {}
        video_data = self._download_json(
            f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
        if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
            fmts, subs = self._extract_m3u8_formats_and_subtitles(
                hls_url, video_id, m3u8_id='hls', fatal=False)
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
        if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
            fmts, subs = self._extract_mpd_formats_and_subtitles(
                dash_url, video_id, mpd_id='dash', fatal=False)
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
        return {
            **traverse_obj(metadata, {
                'duration': ('duration', {parse_duration}),
                'uploader': ('author', {strip_or_none}),
                'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
                'view_count': ('views', {int_or_none}),
            }),
            'id': video_id,
            'title': (
                traverse_obj(metadata, ('title', {strip_or_none}))
                or clean_html(self._og_search_title(webpage))
                or clean_html(get_element_by_class('vdbw-title', webpage))),
            'formats': formats,
            'subtitles': subtitles,
            'description': (
                clean_html(self._og_search_description(webpage))
                or clean_html(get_element_by_class('vdbw-sapo', webpage))),
            'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
            'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
                {lambda x: x.split(';')}, ..., filter)),
        }
 class Kenh14PlaylistIE(InfoExtractor):
    _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
    _TESTS = [{
        'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
        'info_dict': {
            'id': '71',
            'title': 'Trần Tình (Naked love) mùa 2',
            'description': 'md5:e9522339304956dea931722dd72eddb2',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
        },
        'playlist_count': 9,
    }, {
        'url': 'https://video.kenh14.vn/playlist/0-72.chn',
        'info_dict': {
            'id': '72',
            'title': 'Lau Lại Đầu Từ',
            'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
            'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
        },
        'playlist_count': 6,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        category_detail = get_element_by_class('category-detail', webpage) or ''
        embed_info = traverse_obj(
            self._yield_json_ld(webpage, playlist_id),
            (lambda _, v: v['name'] and v['alternateName'], any)) or {}
        return self.playlist_from_matches(
            get_elements_html_by_class('video-item', webpage), playlist_id,
            (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
            getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
            ie=Kenh14VideoIE, playlist_description=(
                clean_html(get_element_by_class('description', category_detail))
                or unescapeHTML(embed_info.get('alternateName'))),
            thumbnail=traverse_obj(
                self._og_search_thumbnail(webpage),
                ({url_or_none}, {update_url(query=None)})))
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@ -1,291 +0,0 @@
 import functools
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    determine_ext,
    dict_get,
    float_or_none,
    traverse_obj,
 )
 class MildomBaseIE(InfoExtractor):
    _GUEST_ID = None
    def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
        if not self._GUEST_ID:
            self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
        content = self._download_json(
            url, video_id, note=note, data=json.dumps(body).encode() if body else None,
            headers={'Content-Type': 'application/json'} if body else {},
            query={
                '__guest_id': self._GUEST_ID,
                '__platform': 'web',
                **(query or {}),
            })
        if content['code'] != 0:
            raise ExtractorError(
                f'Mildom says: {content["message"]} (code {content["code"]})',
                expected=True)
        return content['body']
 class MildomIE(MildomBaseIE):
    IE_NAME = 'mildom'
    IE_DESC = 'Record ongoing live by specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
        enterstudio = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
            note='Downloading live metadata', query={'user_id': video_id})
        result_video_id = enterstudio.get('log_id', video_id)
        servers = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
            note='Downloading live server list', query={
                'user_id': video_id,
                'live_server_type': 'hls',
            })
        playback_token = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
            note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
        playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
        if not playback_token:
            raise ExtractorError('Failed to obtain live playback token')
        formats = self._extract_m3u8_formats(
            f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
            result_video_id, 'mp4', headers={
                'Referer': 'https://www.mildom.com/',
                'Origin': 'https://www.mildom.com',
            })
        for fmt in formats:
            fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
        return {
            'id': result_video_id,
            'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
            'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
            'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
            'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
            'uploader_id': video_id,
            'formats': formats,
            'is_live': True,
        }
 class MildomVodIE(MildomBaseIE):
    IE_NAME = 'mildom:vod'
    IE_DESC = 'VOD in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
    _TESTS = [{
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
        'info_dict': {
            'id': '10882672-1597662269',
            'ext': 'mp4',
            'title': '始めてのミルダム配信じゃぃ！',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'upload_date': '20200817',
            'duration': 4138.37,
            'description': 'ゲームをしたくて！',
            'timestamp': 1597662269.0,
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
        'info_dict': {
            'id': '10882672-1597758589870-477',
            'ext': 'mp4',
            'title': '【kson】感染メイズ！麻酔銃で無双する',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'timestamp': 1597759093.0,
            'uploader': 'kson組長(けいそん)',
            'duration': 4302.58,
            'uploader_id': '10882672',
            'description': 'このステージ絶対乗り越えたい',
            'upload_date': '20200818',
        },
    }, {
        'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
        'info_dict': {
            'id': '10882672-buha9td2lrn97fk2jme0',
            'ext': 'mp4',
            'title': '【kson組長】CART RACER!!!',
            'thumbnail': r're:^https?://.*\.(png|jpg)$',
            'uploader_id': '10882672',
            'uploader': 'kson組長(けいそん)',
            'upload_date': '20201104',
            'timestamp': 1604494797.0,
            'duration': 4657.25,
            'description': 'WTF',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
        autoplay = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
            note='Downloading playback metadata', query={
                'v_id': video_id,
            })['playback']
        formats = [{
            'url': autoplay['audio_url'],
            'format_id': 'audio',
            'protocol': 'm3u8_native',
            'vcodec': 'none',
            'acodec': 'aac',
            'ext': 'm4a',
        }]
        for fmt in autoplay['video_link']:
            formats.append({
                'format_id': 'video-{}'.format(fmt['name']),
                'url': fmt['url'],
                'protocol': 'm3u8_native',
                'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
                'height': fmt['level'],
                'vcodec': 'h264',
                'acodec': 'aac',
                'ext': 'mp4',
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
            'description': traverse_obj(autoplay, 'video_intro'),
            'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
            'duration': float_or_none(autoplay.get('video_length'), scale=1000),
            'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
            'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
            'uploader_id': user_id,
            'formats': formats,
        }
 class MildomClipIE(MildomBaseIE):
    IE_NAME = 'mildom:clip'
    IE_DESC = 'Clip in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
        'info_dict': {
            'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
            'title': '全然違ったよ',
            'timestamp': 1619181890,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': 'ざきんぽ',
            'uploader_id': '10042245',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
        'info_dict': {
            'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
            'title': 'かっこいい',
            'timestamp': 1621094003,
            'duration': 59,
            'thumbnail': r're:https?://.+',
            'uploader': '(ルーキー',
            'uploader_id': '10111524',
        },
    }, {
        'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
        'info_dict': {
            'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
            'title': 'あ',
            'timestamp': 1614769431,
            'duration': 31,
            'thumbnail': r're:https?://.+',
            'uploader': 'ドルゴルスレンギーン＝ダグワドルジ',
            'uploader_id': '10660174',
        },
    }]
    def _real_extract(self, url):
        user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
        webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
        clip_detail = self._call_api(
            'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
            note='Downloading playback metadata', query={
                'clip_id': video_id,
            })
        return {
            'id': video_id,
            'title': self._html_search_meta(
                ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
            'timestamp': float_or_none(clip_detail.get('create_time')),
            'duration': float_or_none(clip_detail.get('length')),
            'thumbnail': clip_detail.get('cover'),
            'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
            'uploader_id': user_id,
            'url': clip_detail['url'],
            'ext': determine_ext(clip_detail.get('url'), 'mp4'),
        }
 class MildomUserVodIE(MildomBaseIE):
    IE_NAME = 'mildom:user:vod'
    IE_DESC = 'Download all VODs from specific user in Mildom'
    _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mildom.com/profile/10093333',
        'info_dict': {
            'id': '10093333',
            'title': 'Uploads from ねこばたけ',
        },
        'playlist_mincount': 732,
    }, {
        'url': 'https://www.mildom.com/profile/10882672',
        'info_dict': {
            'id': '10882672',
            'title': 'Uploads from kson組長(けいそん)',
        },
        'playlist_mincount': 201,
    }]
    def _fetch_page(self, user_id, page):
        page += 1
        reply = self._call_api(
            'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
            user_id, note=f'Downloading page {page}', query={
                'user_id': user_id,
                'page': page,
                'limit': '30',
            })
        if not reply:
            return
        for x in reply:
            v_id = x.get('v_id')
            if not v_id:
                continue
            yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
    def _real_extract(self, url):
        user_id = self._match_id(url)
        self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
        profile = self._call_api(
            'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
            query={'user_id': user_id}, note='Downloading user profile')['user_info']
        return self.playlist_result(
            OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
            user_id, f'Uploads from {profile["loginname"]}')
--- a/yt_dlp/extractor/pialive.py
+++ b/yt_dlp/extractor/pialive.py
@ -0,0 +1,122 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    extract_attributes,
    get_element_by_class,
    get_element_html_by_class,
    multipart_encode,
    str_or_none,
    unified_timestamp,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class PiaLiveIE(InfoExtractor):
    _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
    _PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
    _PIA_LIVE_API_URL = 'https://api.pia-live.jp'
    _API_KEY = 'kfds)FKFps-dms9e'
    _TESTS = [{
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'display_id': '2431867_001',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }, {
        'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
        'info_dict': {
            'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
            'display_id': '2431867_002',
            'title': 'こながめでたい日２０２４の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
            'live_status': 'was_live',
            'comment_count': int,
        },
        'params': {
            'getcomments': True,
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'skip': 'The video is no longer available',
    }]
    def _extract_var(self, variable, html):
        return self._search_regex(
            rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
            html, f'variable {variable}', group='value')
    def _real_extract(self, url):
        video_key = self._match_id(url)
        webpage = self._download_webpage(url, video_key)
        program_code = self._extract_var('programCode', webpage)
        article_code = self._extract_var('articleCode', webpage)
        title = self._html_extract_title(webpage)
        if get_element_html_by_class('play-end', webpage):
            raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
        if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
            date, time = self._search_regex(
                r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
                start_info, 'start_info', fatal=False, group=('date', 'time'))
            if date and time:
                release_timestamp_str = f'{date} {time} +09:00'
                release_timestamp = unified_timestamp(release_timestamp_str)
                self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
                return {
                    'id': program_code,
                    'title': title,
                    'live_status': 'is_upcoming',
                    'release_timestamp': release_timestamp,
                }
        payload, content_type = multipart_encode({
            'play_url': video_key,
            'api_key': self._API_KEY,
        })
        api_data_and_headers = {
            'data': payload,
            'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
        }
        player_tag_list = self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
            'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
        return self.url_result(
            extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
            url_transparent=True, title=title, display_id=program_code,
            __post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
    def _get_comments(self, program_code, article_code, api_data_and_headers):
        chat_room_url = traverse_obj(self._download_json(
            f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
            'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
            ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
        if not chat_room_url:
            return
        comment_page = self._download_webpage(
            chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
            fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
        if not comment_page:
            return
        yield from traverse_obj(self._search_json(
            r'var\s+_history\s*=', comment_page, 'comment list',
            program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
                'timestamp': (0, {int}),
                'author_is_uploader': (1, {lambda x: x == 2}),
                'author': (2, {str}),
                'text': (3, {str}),
                'id': (4, {str_or_none}),
            }))
--- a/yt_dlp/extractor/piaulizaportal.py
+++ b/yt_dlp/extractor/piaulizaportal.py
@ -1,70 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    time_seconds,
    traverse_obj,
 )
 class PIAULIZAPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data = self._download_webpage(
            self._search_regex(
                r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
                webpage, 'player data url'),
            video_id, headers={'Referer': 'https://ulizaportal.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        formats = self._extract_m3u8_formats(
            self._search_regex(
                r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
                'm3u8 url', default=None),
            video_id, fatal=False)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': self._html_extract_title(webpage),
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
        }
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@ -1,136 +0,0 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    extract_attributes,
    int_or_none,
    js_to_json,
    merge_dicts,
 )
 class PokemonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
        'md5': '2fe8eaec69768b25ef898cda9c43062e',
        'info_dict': {
            'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
            'ext': 'mp4',
            'title': 'The Ol’ Raise and Switch!',
            'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
        },
        'add_id': ['LimelightMedia'],
    }, {
        # no data-video-title
        'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
        'info_dict': {
            'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
            'ext': 'mp4',
            'title': "Pokémon : L'ascension de Darkrai",
            'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
        },
        'add_id': ['LimelightMedia'],
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
        'only_matching': True,
    }, {
        'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(url, video_id or display_id)
        video_data = extract_attributes(self._search_regex(
            r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
            webpage, 'video data element'))
        video_id = video_data['data-video-id']
        title = video_data.get('data-video-title') or self._html_search_meta(
            'pkm-title', webpage, ' title', default=None) or self._search_regex(
            r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'title': title,
            'description': video_data.get('data-video-summary'),
            'thumbnail': video_data.get('data-video-poster'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('data-video-season')),
            'episode': title,
            'episode_number': int_or_none(video_data.get('data-video-episode')),
            'ie_key': 'LimelightMedia',
        }
 class PokemonWatchIE(InfoExtractor):
    _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
    _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
    _TESTS = [{
        'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
        'md5': '62833938a31e61ab49ada92f524c42ff',
        'info_dict': {
            'id': '8309a40969894a8e8d5bc1311e9c5667',
            'ext': 'mp4',
            'title': 'Lillier and the Staff!',
            'description': 'md5:338841b8c21b283d24bdc9b568849f04',
        },
    }, {
        'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
        'only_matching': True,
    }, {
        'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
        'only_matching': True,
    }]
    def _extract_media(self, channel_array, video_id):
        for channel in channel_array:
            for media in channel.get('media'):
                if media.get('id') == video_id:
                    return media
        return None
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = {
            '_type': 'url',
            'id': video_id,
            'url': f'limelight:media:{video_id}',
            'ie_key': 'LimelightMedia',
        }
        # API call can be avoided entirely if we are listing formats
        if self.get_param('listformats', False):
            return info
        webpage = self._download_webpage(url, video_id)
        build_vars = self._parse_json(self._search_regex(
            r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
            video_id, transform_source=js_to_json)
        region = build_vars.get('region')
        channel_array = self._download_json(self._API_URL.format(region), video_id)
        video_data = self._extract_media(channel_array, video_id)
        if video_data is None:
            raise ExtractorError(
                f'Video {video_id} does not exist', expected=True)
        info['_type'] = 'url_transparent'
        images = video_data.get('images')
        return merge_dicts(info, {
            'title': video_data.get('title'),
            'description': video_data.get('description'),
            'thumbnail': images.get('medium') or images.get('small'),
            'series': 'Pokémon',
            'season_number': int_or_none(video_data.get('season')),
            'episode': video_data.get('title'),
            'episode_number': int_or_none(video_data.get('episode')),
        })
--- a/yt_dlp/extractor/uliza.py
+++ b/yt_dlp/extractor/uliza.py
@ -0,0 +1,113 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    make_archive_id,
    parse_qs,
    time_seconds,
 )
 from ..utils.traversal import traverse_obj
 class UlizaPlayerIE(InfoExtractor):
    _VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
    _TESTS = [{
        'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
        'info_dict': {
            'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'ext': 'mp4',
            'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
            'live_status': 'was_live',
            '_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'ext': 'mp4',
            'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
    }, {
        'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'ext': 'mp4',
            'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        player_data = self._download_webpage(
            url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
            note='Fetching player data', errnote='Unable to fetch player data')
        m3u8_url = self._search_regex(
            r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
        video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
        formats = self._extract_m3u8_formats(m3u8_url, video_id)
        m3u8_type = self._search_regex(
            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
            'live_status': {
                'video': 'is_live',
                'dvr': 'was_live',  # short-term archives
            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
            '_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
        }
 class UlizaPortalIE(InfoExtractor):
    IE_DESC = 'ulizaportal.jp'
    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
    _TESTS = [{
        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
        'info_dict': {
            'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
            'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
            'title': 'プレゼンテーションプレイヤーのサンプル',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }, {
        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
        'info_dict': {
            'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
            'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
            'title': '【確認用】視聴サンプルページ（ULIZA）',
            'live_status': 'not_live',
            '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
        if expires and expires <= time_seconds():
            raise ExtractorError('The link is expired', video_id=video_id, expected=True)
        webpage = self._download_webpage(url, video_id)
        player_data_url = self._search_regex(
            r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
            webpage, 'player data url')
        return self.url_result(
            player_data_url, UlizaPlayerIE, url_transparent=True,
            display_id=video_id, video_title=self._html_extract_title(webpage))
--- a/yt_dlp/extractor/veoh.py
+++ b/yt_dlp/extractor/veoh.py
@ -1,189 +0,0 @@
 import functools
 import json
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    int_or_none,
    parse_duration,
    qualities,
    remove_start,
    strip_or_none,
 )
 class VeohIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
    _TESTS = [{
        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
        'md5': '620e68e6a3cff80086df3348426c9ca3',
        'info_dict': {
            'id': 'v56314296nk7Zdmz3',
            'ext': 'mp4',
            'title': 'Straight Backs Are Stronger',
            'description': 'md5:203f976279939a6dc664d4001e13f5f4',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
            'uploader': 'LUMOback',
            'duration': 46,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 0,
            'categories': ['technology_and_gaming'],
            'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
        },
    }, {
        'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
        'only_matching': True,
    }, {
        'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
        'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
        'info_dict': {
            'id': '27701988',
            'ext': 'mp4',
            'title': 'Chile workers cover up to avoid skin damage',
            'description': 'md5:2bd151625a60a32822873efc246ba20d',
            'uploader': 'afp-news',
            'duration': 123,
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
        'md5': '4fde7b9e33577bab2f2f8f260e30e979',
        'note': 'Embedded ooyala video',
        'info_dict': {
            'id': '69525809',
            'ext': 'mp4',
            'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
            'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
            'uploader': 'newsy-videos',
        },
        'skip': 'This video has been deleted.',
    }, {
        'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
        'only_matching': True,
    }, {
        'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
        'md5': 'cceb73f3909063d64f4b93d4defca1b3',
        'info_dict': {
            'id': 'v16374379WA437rMH',
            'ext': 'mp4',
            'title': 'Phantasmagoria 2, pt. 1-3',
            'description': 'Phantasmagoria: a Puzzle of Flesh',
            'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
            'uploader': 'davidspackage',
            'duration': 968,
            'view_count': int,
            'average_rating': int,
            'comment_count': int,
            'age_limit': 18,
            'categories': ['technology_and_gaming', 'gaming'],
            'tags': ['puzzle', 'of', 'flesh'],
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        metadata = self._download_json(
            'https://www.veoh.com/watch/getVideo/' + video_id,
            video_id)
        video = metadata['video']
        title = video['title']
        thumbnail_url = None
        q = qualities(['Regular', 'HQ'])
        formats = []
        for f_id, f_url in video.get('src', {}).items():
            if not f_url:
                continue
            if f_id == 'poster':
                thumbnail_url = f_url
            else:
                formats.append({
                    'format_id': f_id,
                    'quality': q(f_id),
                    'url': f_url,
                })
        categories = metadata.get('categoryPath')
        if not categories:
            category = remove_start(strip_or_none(video.get('category')), 'category_')
            categories = [category] if category else None
        tags = video.get('tags')
        return {
            'id': video_id,
            'title': title,
            'description': video.get('description'),
            'thumbnail': thumbnail_url,
            'uploader': video.get('author', {}).get('nickname'),
            'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
            'view_count': int_or_none(video.get('views')),
            'formats': formats,
            'average_rating': int_or_none(video.get('rating')),
            'comment_count': int_or_none(video.get('numOfComments')),
            'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
            'categories': categories,
            'tags': tags.split(', ') if tags else None,
        }
 class VeohUserIE(VeohIE):  # XXX: Do not subclass from concrete IE
    _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
    IE_NAME = 'veoh:user'
    _TESTS = [
        {
            'url': 'https://www.veoh.com/users/valentinazoe',
            'info_dict': {
                'id': 'valentinazoe',
                'title': 'valentinazoe (Uploads)',
            },
            'playlist_mincount': 75,
        },
        {
            'url': 'https://www.veoh.com/users/PiensaLibre',
            'info_dict': {
                'id': 'PiensaLibre',
                'title': 'PiensaLibre (Uploads)',
            },
            'playlist_mincount': 2,
        }]
    _PAGE_SIZE = 16
    def _fetch_page(self, uploader, page):
        response = self._download_json(
            'https://www.veoh.com/users/published/videos', uploader,
            note=f'Downloading videos page {page + 1}',
            headers={
                'x-csrf-token': self._TOKEN,
                'content-type': 'application/json;charset=UTF-8',
            },
            data=json.dumps({
                'username': uploader,
                'maxResults': self._PAGE_SIZE,
                'page': page + 1,
                'requestName': 'userPage',
            }).encode())
        if not response.get('success'):
            raise ExtractorError(response['message'])
        for video in response['videos']:
            yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
                                  video['permalinkId'], video.get('title'))
    def _real_initialize(self):
        webpage = self._download_webpage(
            'https://www.veoh.com', None, note='Downloading authorization token')
        self._TOKEN = self._search_regex(
            r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
            'request token', group='token')
    def _real_extract(self, url):
        uploader = self._match_id(url)
        return self.playlist_result(OnDemandPagedList(
            functools.partial(self._fetch_page, uploader),
            self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
    def _rich_entries(self, rich_grid_renderer):
        renderer = traverse_obj(
            rich_grid_renderer,
-            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
+            ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
        video_id = renderer.get('videoId')
        if video_id:
            yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
                })),
                thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
            return
        # lockupViewModel extraction
        content_id = renderer.get('contentId')
        if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
            yield self.url_result(
                f'https://www.youtube.com/playlist?list={content_id}',
                ie=YoutubeTabIE, video_id=content_id,
                **traverse_obj(renderer, {
                    'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
                }),
                thumbnails=self._extract_thumbnails(renderer, (
                    'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
            return
    def _video_entry(self, video_renderer):
        video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
        },
        'playlist_count': 0,
    }, {
-        # Podcasts tab, with rich entry playlistRenderers
+        # Podcasts tab, with rich entry lockupViewModel
        'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
        'info_dict': {
            'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
            'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
-            'title': '99 Percent Invisible - Podcasts',
+            'title': '99% Invisible - Podcasts',
-            'uploader': '99 Percent Invisible',
+            'uploader': '99% Invisible',
            'channel_follower_count': int,
            'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
            'tags': [],
-            'channel': '99 Percent Invisible',
+            'channel': '99% Invisible',
            'uploader_id': '@99percentinvisiblepodcast',
        },
-        'playlist_count': 0,
+        'playlist_count': 5,
    }, {
        # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
        'url': 'https://www.youtube.com/@AHimitsu/releases',
Author	SHA1	Message	Date
N/Ame	f069573aed	Merge `c59ce7d6a6` into `f9d98509a8`	2024-11-17 20:50:56 +01:00
qbnu	f9d98509a8	[ie/ctvnews] Fix playlist ID extraction (#8892 ) Authored by: qbnu	2024-11-17 19:35:10 +00:00
sepro	37cd7660ea	[ie/youtube:tab] Fix podcasts tab extraction (#11567 ) Authored by: seproDev	2024-11-17 19:46:04 +01:00
ChocoLZS	d867f99622	[ie/PiaLive] Add extractor (#10811 ) Authored by: ChocoLZS	2024-11-17 19:41:57 +01:00
doe1080	10fc719bc7	[cleanup] Remove dead extractors (#11566 ) - Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE - Removes PokemonIE, PokemonWatchIE - Removes VeohIE, VeohUserIE Closes #3373, Closes #7059 Authored by: doe1080	2024-11-17 16:22:40 +00:00
krichbanana	eb15fd5a32	[ie/kenh14] Add extractor (#3996 ) Closes #3937 Authored by: krichbanana, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>	2024-11-17 14:12:26 +00:00
sepro	7cecd299e4	[ie/chaturbate] Don't break embed detection (#11565 ) Bugfix for `720b3dc453` Authored by: seproDev	2024-11-17 13:32:12 +01:00
grqx_wsl	c59ce7d6a6	[ie/boomplaypodcast] use the base extractor's method to extract title	2024-11-06 01:04:36 +13:00
grqx_wsl	bd857a06a0	fix: do not use classmethod; fix title in the base extractor	2024-11-06 00:10:40 +13:00
grqx_wsl	c58ee488a9	simplify BoomplayGenericPlaylistIE.suitable	2024-11-05 13:31:53 +13:00
grqx_wsl	eacad11a5a	code formatting	2024-11-05 00:18:14 +13:00
grqx_wsl	d69a1be537	_urljoin(): let url_or_none sanitize the url; more classmethods	2024-11-04 23:17:26 +13:00
grqx_wsl	5cbf04763b	Merge remote-tracking branch 'upstream/master' into boomplay	2024-11-04 17:57:49 +13:00
grqx_wsl	901e78af62	improve regex	2024-11-04 14:19:52 +13:00
grqx_wsl	9a6f9843c0	use _extract_from_webpage and _extract_embed_urls - `_extract_playlist_entries` is now a `classmethod` - case insensitive html tag matching Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-11-04 14:09:42 +13:00
grqx_wsl	8ef2294282	case insensitive tag matching	2024-11-02 02:18:16 +13:00
grqx_wsl	0e344b806f	[ie/boomplaypodcast]extract full description	2024-11-02 02:11:49 +13:00
grqx_termux	60b763c50f	`_TEST` -> `_TESTS` actually meant this. working on 2 branches simultanously can lead to results like this...	2024-10-24 15:49:16 +13:00
grqx_termux	195af478f3	Revert "`_TEST` -> `_TESTS`" This reverts commit `aa34d34596`.	2024-10-24 15:41:59 +13:00
dirkf	8a1daf41ab	[ie/BoomplayEpisode] Make title extraction non-fatal Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-10-23 23:58:57 +13:00
grqx_wsl	0f9b09842e	remove `playlist` argument from `BoomplayBaseIE._extract_page_metadata` Will consider `require_title` later if moving title extraction here	2024-10-23 23:52:44 +13:00
grqx_wsl	1066a94acf	Merge remote-tracking branch 'upstream/master' into boomplay	2024-10-23 23:38:20 +13:00
grqx_wsl	aa34d34596	`_TEST` -> `_TESTS`	2024-10-23 22:53:24 +13:00
grqx_wsl	0e1851bc34	Merge remote-tracking branch 'refs/remotes/origin/boomplay' into boomplay	2024-10-18 13:35:10 +13:00
grqx_wsl	a886439396	`_id` -> `item_id` Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-10-18 13:34:40 +13:00
N/Ame	38383ea313	use `re.sub` instead in description extraction Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-10-18 13:34:07 +13:00
grqx_wsl	28a1163010	consistency: BoomplaySearchPageIE => BoomplaySearchURLIE	2024-10-18 13:28:49 +13:00
grqx_wsl	cee1c763e4	fix the docstring of `BoomplayBaseIE.__yield_elements_text_and_html_by_class_and_tag`	2024-10-16 23:48:40 +13:00
grqx_wsl	bbb121c2af	Correct extractor name: `BoomPlay`==>`Boomplay`	2024-10-16 23:47:36 +13:00
grqx_wsl	6beca5eb57	revert	2024-10-15 14:56:37 +13:00
grqx_wsl	82d7e40908	Merge remote-tracking branch 'refs/remotes/origin/boomplay' into boomplay	2024-10-15 14:55:54 +13:00
grqx_wsl	5b1b5bb1b6	updxate _VALID_URL	2024-10-15 14:53:36 +13:00
N/Ame	445531c5a0	Update yt_dlp/extractor/boomplay.py	2024-10-15 13:27:09 +13:00
N/Ame	16d68723dc	Update yt_dlp/extractor/boomplay.py	2024-10-15 13:23:54 +13:00
grqx_wsl	5b962d70de	improve metadata extraction, add extractor for search pages - pass tests&code formatting Co-authored-by: dirkf <fieldhouse@gmx.net> Co-authored-by: grqx_wsl <173253225+grqx@users.noreply.github.com>	2024-10-14 23:49:07 +13:00
grqx_wsl	98d9edf823	Merge branch 'master' into boomplay	2024-10-14 16:41:30 +13:00
grqx_wsl	6d2de79b7a	BoomPlayGenericPlaylistIE, BoomPlaySearchIE	2024-10-13 23:07:33 +13:00
grqx_wsl	a8769f672b	[ie/boomplay] add extractors	2024-10-13 12:46:03 +13:00