Compare commits

...

38 Commits

Author SHA1 Message Date
N/Ame
f069573aed
Merge c59ce7d6a6 into f9d98509a8 2024-11-17 20:50:56 +01:00
qbnu
f9d98509a8
[ie/ctvnews] Fix playlist ID extraction (#8892)
Authored by: qbnu
2024-11-17 19:35:10 +00:00
sepro
37cd7660ea
[ie/youtube:tab] Fix podcasts tab extraction (#11567)
Authored by: seproDev
2024-11-17 19:46:04 +01:00
ChocoLZS
d867f99622
[ie/PiaLive] Add extractor (#10811)
Authored by: ChocoLZS
2024-11-17 19:41:57 +01:00
doe1080
10fc719bc7
[cleanup] Remove dead extractors (#11566)
- Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE
- Removes PokemonIE, PokemonWatchIE
- Removes VeohIE, VeohUserIE

Closes #3373, Closes #7059
Authored by: doe1080
2024-11-17 16:22:40 +00:00
krichbanana
eb15fd5a32
[ie/kenh14] Add extractor (#3996)
Closes #3937
Authored by: krichbanana, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-17 14:12:26 +00:00
sepro
7cecd299e4
[ie/chaturbate] Don't break embed detection (#11565)
Bugfix for 720b3dc453

Authored by: seproDev
2024-11-17 13:32:12 +01:00
grqx_wsl
c59ce7d6a6 [ie/boomplaypodcast] use the base extractor's method to extract title 2024-11-06 01:04:36 +13:00
grqx_wsl
bd857a06a0 fix: do not use classmethod; fix title in the base extractor 2024-11-06 00:10:40 +13:00
grqx_wsl
c58ee488a9 simplify BoomplayGenericPlaylistIE.suitable 2024-11-05 13:31:53 +13:00
grqx_wsl
eacad11a5a code formatting 2024-11-05 00:18:14 +13:00
grqx_wsl
d69a1be537 _urljoin(): let url_or_none sanitize the url;
more classmethods
2024-11-04 23:17:26 +13:00
grqx_wsl
5cbf04763b Merge remote-tracking branch 'upstream/master' into boomplay 2024-11-04 17:57:49 +13:00
grqx_wsl
901e78af62 improve regex 2024-11-04 14:19:52 +13:00
grqx_wsl
9a6f9843c0 use _extract_from_webpage and _extract_embed_urls
- `_extract_playlist_entries` is now a `classmethod`
- case insensitive html tag matching

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-11-04 14:09:42 +13:00
grqx_wsl
8ef2294282 case insensitive tag matching 2024-11-02 02:18:16 +13:00
grqx_wsl
0e344b806f [ie/boomplaypodcast]extract full description 2024-11-02 02:11:49 +13:00
grqx_termux
60b763c50f _TEST -> _TESTS
actually meant this. working on 2 branches simultanously can lead to
results like this...
2024-10-24 15:49:16 +13:00
grqx_termux
195af478f3 Revert "_TEST -> _TESTS"
This reverts commit aa34d34596.
2024-10-24 15:41:59 +13:00
dirkf
8a1daf41ab [ie/BoomplayEpisode] Make title extraction non-fatal
Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-10-23 23:58:57 +13:00
grqx_wsl
0f9b09842e remove playlist argument from BoomplayBaseIE._extract_page_metadata
Will consider `require_title` later if moving title extraction here
2024-10-23 23:52:44 +13:00
grqx_wsl
1066a94acf Merge remote-tracking branch 'upstream/master' into boomplay 2024-10-23 23:38:20 +13:00
grqx_wsl
aa34d34596 _TEST -> _TESTS 2024-10-23 22:53:24 +13:00
grqx_wsl
0e1851bc34 Merge remote-tracking branch 'refs/remotes/origin/boomplay' into boomplay 2024-10-18 13:35:10 +13:00
grqx_wsl
a886439396 _id -> item_id
Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-10-18 13:34:40 +13:00
N/Ame
38383ea313
use re.sub instead in description extraction
Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-10-18 13:34:07 +13:00
grqx_wsl
28a1163010 consistency: BoomplaySearchPageIE => BoomplaySearchURLIE 2024-10-18 13:28:49 +13:00
grqx_wsl
cee1c763e4 fix the docstring of BoomplayBaseIE.__yield_elements_text_and_html_by_class_and_tag 2024-10-16 23:48:40 +13:00
grqx_wsl
bbb121c2af Correct extractor name: BoomPlay==>Boomplay 2024-10-16 23:47:36 +13:00
grqx_wsl
6beca5eb57 revert 2024-10-15 14:56:37 +13:00
grqx_wsl
82d7e40908 Merge remote-tracking branch 'refs/remotes/origin/boomplay' into boomplay 2024-10-15 14:55:54 +13:00
grqx_wsl
5b1b5bb1b6 updxate _VALID_URL 2024-10-15 14:53:36 +13:00
N/Ame
445531c5a0
Update yt_dlp/extractor/boomplay.py 2024-10-15 13:27:09 +13:00
N/Ame
16d68723dc
Update yt_dlp/extractor/boomplay.py 2024-10-15 13:23:54 +13:00
grqx_wsl
5b962d70de improve metadata extraction, add extractor for search pages
- pass tests&code formatting

Co-authored-by: dirkf <fieldhouse@gmx.net>
Co-authored-by: grqx_wsl <173253225+grqx@users.noreply.github.com>
2024-10-14 23:49:07 +13:00
grqx_wsl
98d9edf823 Merge branch 'master' into boomplay 2024-10-14 16:41:30 +13:00
grqx_wsl
6d2de79b7a BoomPlayGenericPlaylistIE, BoomPlaySearchIE 2024-10-13 23:07:33 +13:00
grqx_wsl
a8769f672b [ie/boomplay] add extractors 2024-10-13 12:46:03 +13:00
12 changed files with 962 additions and 711 deletions

View File

@ -285,6 +285,16 @@ from .bloomberg import BloombergIE
from .bluesky import BlueskyIE from .bluesky import BlueskyIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
from .boomplay import (
BoomplayEpisodeIE,
BoomplayGenericPlaylistIE,
BoomplayMusicIE,
BoomplayPlaylistIE,
BoomplayPodcastIE,
BoomplaySearchIE,
BoomplaySearchURLIE,
BoomplayVideoIE,
)
from .boosty import BoostyIE from .boosty import BoostyIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE
from .box import BoxIE from .box import BoxIE
@ -946,6 +956,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1135,12 +1149,6 @@ from .microsoftembed import (
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1522,8 +1530,8 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import ( from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
@ -1559,10 +1567,6 @@ from .podbayfm import (
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -2256,6 +2260,10 @@ from .ufctv import (
) )
from .ukcolumn import UkColumnIE from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
)
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
@ -2284,10 +2292,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View File

@ -0,0 +1,511 @@
import base64
import functools
import json
import re
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..aes import aes_cbc_decrypt_bytes, aes_cbc_encrypt_bytes, unpad_pkcs7
from ..utils import (
ExtractorError,
classproperty,
clean_html,
extract_attributes,
get_elements_text_and_html_by_attribute,
int_or_none,
join_nonempty,
merge_dicts,
parse_count,
parse_duration,
smuggle_url,
strip_or_none,
unified_strdate,
unsmuggle_url,
url_or_none,
urlencode_postdata,
urljoin,
variadic,
)
from ..utils.traversal import traverse_obj
class BoomplayBaseIE(InfoExtractor):
# Calculated from const values, see lhx.AESUtils.encrypt in public.js
# Note that the real key/iv differs from `lhx.AESUtils.key`/`lhx.AESUtils.iv`
_KEY = b'boomplayVr3xopAM'
_IV = b'boomplay8xIsKTn9'
_BASE = 'https://www.boomplay.com'
_MEDIA_TYPES = ('songs', 'video', 'episode', 'podcasts', 'playlists', 'artists', 'albums')
_GEO_COUNTRIES = ['NG']
@staticmethod
def __yield_elements_text_and_html_by_class_and_tag(class_, tag, html):
"""
Yields content of all element matching `tag.class_` in html
class_ must be re escaped
"""
# get_elements_text_and_html_by_attribute returns a generator
return get_elements_text_and_html_by_attribute(
attribute='class', value=rf'''[^'"]*(?<=['"\s]){class_}(?=['"\s])[^'"]*''', html=html,
tag=tag, escape_value=False)
@classmethod
def __yield_elements_by_class_and_tag(cls, *args, **kwargs):
return (content for content, _ in cls.__yield_elements_text_and_html_by_class_and_tag(*args, **kwargs))
@classmethod
def __yield_elements_html_by_class_and_tag(cls, *args, **kwargs):
return (whole for _, whole in cls.__yield_elements_text_and_html_by_class_and_tag(*args, **kwargs))
@classmethod
def _get_elements_by_class_and_tag(cls, class_, tag, html):
return list(cls.__yield_elements_by_class_and_tag(class_, tag, html))
@classmethod
def _get_element_by_class_and_tag(cls, class_, tag, html):
return next(cls.__yield_elements_by_class_and_tag(class_, tag, html), None)
@classmethod
def _urljoin(cls, path):
return url_or_none(urljoin(base=cls._BASE, path=path))
def _get_playurl(self, item_id, item_type):
resp = self._download_json(
'https://www.boomplay.com/getResourceAddr', item_id,
note='Downloading play URL', errnote='Failed to download play URL',
data=urlencode_postdata({
'param': base64.b64encode(aes_cbc_encrypt_bytes(json.dumps({
'itemID': item_id,
'itemType': item_type,
}).encode(), self._KEY, self._IV)).decode(),
}), headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
})
if not (source := resp.get('source')) and (code := resp.get('code')):
if 'unavailable in your country' in (desc := resp.get('desc')) or '':
# since NG must have failed ...
self.raise_geo_restricted(countries=['GH', 'KE', 'TZ', 'CM', 'CI'])
else:
raise ExtractorError(desc or f'Failed to get play url, code: {code}')
return unpad_pkcs7(aes_cbc_decrypt_bytes(
base64.b64decode(source),
self._KEY, self._IV)).decode()
def _extract_formats(self, item_id, item_type='MUSIC', **kwargs):
if url := url_or_none(self._get_playurl(item_id, item_type)):
return [{
'format_id': '0',
'url': url,
'http_headers': {
'Origin': 'https://www.boomplay.com',
'Referer': 'https://www.boomplay.com',
'X-Boomplay-Ref': 'Boomplay_WEBV1',
},
**kwargs,
}]
else:
self.raise_no_formats('No formats found')
def _extract_page_metadata(self, webpage, item_id):
metadata_div = self._get_element_by_class_and_tag('summary', 'div', webpage) or ''
metadata_entries = re.findall(r'(?si)<strong>(?P<entry>.*?)</strong>', metadata_div) or []
description = re.sub(
r'(?i)Listen and download music for free on Boomplay!', '',
clean_html(self._get_element_by_class_and_tag(
'description_content', 'span', webpage)) or '') or None
details_section = self._get_element_by_class_and_tag('songDetailInfo', 'section', webpage) or ''
metadata_entries.extend(re.findall(r'(?si)<li>(?P<entry>.*?)</li>', details_section) or [])
page_metadata = {
'id': item_id,
**self._extract_title_from_webpage(webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'],
webpage, 'thumbnail', default=None),
'like_count': parse_count(self._get_element_by_class_and_tag('btn_favorite', 'button', metadata_div)),
'repost_count': parse_count(self._get_element_by_class_and_tag('btn_share', 'button', metadata_div)),
'comment_count': parse_count(self._get_element_by_class_and_tag('btn_comment', 'button', metadata_div)),
'duration': parse_duration(self._get_element_by_class_and_tag('btn_duration', 'button', metadata_div)),
'upload_date': unified_strdate(strip_or_none(
self._get_element_by_class_and_tag('btn_pubDate', 'button', metadata_div))),
'description': description,
}
for metadata_entry in metadata_entries:
if ':' not in metadata_entry:
continue
k, v = clean_html(metadata_entry).split(':', 1)
v = v.strip()
if 'artist' in k.lower():
page_metadata['artists'] = [v]
elif 'album' in k.lower():
page_metadata['album'] = v
elif 'genre' in k.lower():
page_metadata['genres'] = [v]
elif 'year of release' in k.lower():
page_metadata['release_year'] = int_or_none(v)
return page_metadata
def _extract_title_from_webpage(self, webpage):
if h1_title := self._html_search_regex(r'(?i)<h1[^>]*>([^<]+)</h1>', webpage, 'title', default=None):
return {'title': h1_title}
else:
return self._fix_title(
self._html_search_meta(['og:title', 'twitter:title'], webpage, 'title', default=None)
or self._html_search_regex(r'(?i)<title[^>]*>([^<]+)</title>', webpage, 'title', default=None))
@staticmethod
def _fix_title(title):
"""
fix various types of titles(og:title, twitter:title, title tag in html head)
"""
if not title:
return {}
title_patterns = (
r'^(?P<title>(?P<artist>.+)) Songs MP3 Download, New Songs \& Albums \| Boomplay$', # artists
r'^(?P<artist>.+?) - (?P<title>.+) MP3\ Download \& Lyrics \| Boomplay$', # music
r'^Download (?P<artist>.+) album songs: (?P<title>.+?) \| Boomplay Music$', # album
r'^Search:(?P<title>.+) \| Boomplay Music$', # search url
r'^(?P<title>.+) \| Podcast \| Boomplay$', # podcast, episode
r'^(?P<title>.+) \| Boomplay(?: Music)?$', # video, playlist, generic playlists
)
for pattern in title_patterns:
if match := re.search(pattern, title):
return {
'title': match.group('title'),
'artists': [match.group('artist')] if 'artist' in match.groupdict() else None,
}
return {'title': title}
@classmethod
def _extract_from_webpage(cls, url, webpage, **kwargs):
if kwargs:
url = smuggle_url(url, kwargs)
return super()._extract_from_webpage(url, webpage)
@classmethod
def _extract_embed_urls(cls, url, webpage):
url, smuggled_data = unsmuggle_url(url)
media_types = variadic(smuggled_data.get('media_types', cls._MEDIA_TYPES))
media_types = join_nonempty(*(
re.escape(v)for v in media_types if v in cls._MEDIA_TYPES),
delim='|')
for mobj in re.finditer(
rf'''(?ix)
<a
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
(?<=\s)href\s*=\s*(?P<_q>['"])
(?P<href>/(?:{media_types})/\d+/?[\-\w=?&#:;@]*)
(?P=_q)
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
>''', webpage):
if url := cls._urljoin(mobj.group('href')):
yield url
@classmethod
def _extract_playlist_entries(cls, webpage, media_types, warn=True):
song_list = strip_or_none(
cls._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
or cls._get_element_by_class_and_tag('morePart', 'ol', webpage)
or '')
entries = traverse_obj(cls.__yield_elements_html_by_class_and_tag(
'songName', 'a', song_list),
(..., {extract_attributes}, 'href', {cls._urljoin}, {cls.url_result}))
if not entries:
if warn:
cls.report_warning('Failed to extract playlist entries, finding suitable links instead!')
def strip_ie(entry):
# All our IEs have a _VALID_URL and set a key: don't use it
entry.pop('ie_key', None)
return entry
return (strip_ie(result) for result in
cls._extract_from_webpage(cls._BASE, webpage, media_types=media_types))
return entries
class BoomplayMusicIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/songs/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.boomplay.com/songs/165481965',
'md5': 'c5fb4f23e6aae98064230ef3c39c2178',
'info_dict': {
'title': 'Rise of the Fallen Heroes',
'ext': 'mp3',
'id': '165481965',
'artists': ['fatbunny'],
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/04/29/375ecda38f6f48179a93c72ab909118f_464_464.jpg',
'channel_url': 'https://www.boomplay.com/artists/52723101',
'duration': 125.0,
'release_year': 2024,
'comment_count': int,
'like_count': int,
'repost_count': int,
'album': 'Legendary Battle',
'genres': ['Metal'],
},
}]
def _real_extract(self, url):
song_id = self._match_id(url)
webpage = self._download_webpage(url, song_id)
ld_json_meta = next(self._yield_json_ld(webpage, song_id))
# TODO: extract comments(and lyrics? they don't have timestamps)
# example: https://www.boomplay.com/songs/96352673?from=home
return merge_dicts(
self._extract_page_metadata(webpage, song_id),
traverse_obj(ld_json_meta, {
'title': 'name',
'thumbnail': 'image',
'channel_url': ('byArtist', 0, '@id'),
'artists': ('byArtist', ..., 'name'),
'duration': ('duration', {parse_duration}),
}), {
'formats': self._extract_formats(song_id, 'MUSIC', vcodec='none'),
})
class BoomplayVideoIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.boomplay.com/video/1154892',
'md5': 'd9b67ad333d2292a82922062d065352d',
'info_dict': {
'id': '1154892',
'ext': 'mp4',
'title': 'Autumn blues',
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/10/10/2171dee9e1f8452e84021560729edb88.jpg',
'upload_date': '20241010',
'timestamp': 1728599214,
'view_count': int,
'duration': 177.0,
'description': 'Autumn blues by Lugo',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return merge_dicts(
self._extract_page_metadata(webpage, video_id),
self._search_json_ld(webpage, video_id), {
'formats': self._extract_formats(video_id, 'VIDEO', ext='mp4'),
})
class BoomplayEpisodeIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/episode/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.boomplay.com/episode/7132706',
'md5': 'f26e236b764baa53d7a2cbb7e9ce6dc4',
'info_dict': {
'id': '7132706',
'ext': 'mp3',
'title': 'Letting Go',
'repost_count': int,
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/05/06/fc535eaa25714b43a47185a9831887a5_320_320.jpg',
'comment_count': int,
'duration': 921.0,
'upload_date': '20240506',
'description': 'md5:5ec684b281fa0f9e4c31b3ee20c5e57a',
},
}]
def _real_extract(self, url):
ep_id = self._match_id(url)
webpage = self._download_webpage(url, ep_id)
return merge_dicts(
self._extract_page_metadata(webpage, ep_id), {
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage),
'formats': self._extract_formats(ep_id, 'EPISODE', vcodec='none'),
})
class BoomplayPodcastIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/podcasts/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.boomplay.com/podcasts/5372',
'playlist_count': 200,
'info_dict': {
'id': '5372',
'title': 'TED Talks Daily',
'description': r're:(?s)Every weekday, TED Talks Daily brings you the latest talks .{328} learn something new\.$',
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/12/22/6f9cf97ad6f846a0a7882c98dfcf4f8c_320_320.jpg',
'repost_count': int,
'comment_count': int,
'like_count': int,
},
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
song_list = self._get_element_by_class_and_tag('morePart_musics', 'ol', webpage)
song_list = traverse_obj(re.finditer(
r'''(?ix)
<li
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
\sdata-id\s*=\s*
(?P<_q>['"]?)
(?P<id>\d+)
(?P=_q)
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
>''',
song_list),
(..., 'id', {
lambda x: self.url_result(
f'https://www.boomplay.com/episode/{x}', BoomplayEpisodeIE, x),
}))
return self.playlist_result(
song_list, playlist_id,
**self._extract_page_metadata(webpage, playlist_id))
class BoomplayPlaylistIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/(?:playlists|artists|albums)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.boomplay.com/playlists/33792494',
'info_dict': {
'id': '33792494',
'title': 'Daily Trending Indonesia',
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/08/19/d05d431ee616412caeacd7f78f4f68f5_320_320.jpeg',
'repost_count': int,
'comment_count': int,
'like_count': int,
'description': 'md5:7ebdffc5137c77acb62acb3c89248445',
},
'playlist_count': 10,
}, {
'url': 'https://www.boomplay.com/artists/52723101',
'only_matching': True,
}, {
'url': 'https://www.boomplay.com/albums/89611238?from=home#google_vignette',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
json_ld_metadata = next(self._yield_json_ld(webpage, playlist_id))
# schema `MusicGroup` not supported by self._json_ld()
return self.playlist_result(**merge_dicts(
self._extract_page_metadata(webpage, playlist_id),
traverse_obj(json_ld_metadata, {
'entries': ('track', ..., 'url', {
functools.partial(self.url_result, ie=BoomplayMusicIE),
}),
'playlist_title': 'name',
'thumbnail': 'image',
'artists': ('byArtist', ..., 'name'),
'channel_url': ('byArtist', 0, '@id'),
})))
class BoomplayGenericPlaylistIE(BoomplayBaseIE):
_VALID_URL = r'https?://(?:www\.)?boomplay\.com/.+'
_TESTS = [{
'url': 'https://www.boomplay.com/new-songs',
'playlist_mincount': 20,
'info_dict': {
'id': 'new-songs',
'title': 'New Songs',
'thumbnail': 'http://www.boomplay.com/pc/img/og_default_v3.jpg',
},
}, {
'url': 'https://www.boomplay.com/trending-songs',
'playlist_mincount': 20,
'info_dict': {
'id': 'trending-songs',
'title': 'Trending Songs',
'thumbnail': 'http://www.boomplay.com/pc/img/og_default_v3.jpg',
},
}]
@classmethod
def suitable(cls, url):
return super().suitable(url) and all(not ie.suitable(url) for ie in (
BoomplayEpisodeIE,
BoomplayMusicIE,
BoomplayPlaylistIE,
BoomplayPodcastIE,
BoomplaySearchURLIE,
BoomplayVideoIE,
))
def _real_extract(self, url):
playlist_id = self._generic_id(url)
webpage = self._download_webpage(url, playlist_id)
return self.playlist_result(
self._extract_playlist_entries(webpage, self._MEDIA_TYPES),
**self._extract_page_metadata(webpage, playlist_id))
class BoomplaySearchURLIE(BoomplayBaseIE):
_TESTS = [{
'url': 'https://www.boomplay.com/search/default/%20Rise%20of%20the%20Falletesn%20Heroes%20fatbunny',
'md5': 'c5fb4f23e6aae98064230ef3c39c2178',
'info_dict': {
'id': '165481965',
'ext': 'mp3',
'title': 'Rise of the Fallen Heroes',
'duration': 125.0,
'genres': ['Metal'],
'artists': ['fatbunny'],
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/04/29/375ecda38f6f48179a93c72ab909118f_464_464.jpg',
'channel_url': 'https://www.boomplay.com/artists/52723101',
'comment_count': int,
'repost_count': int,
'album': 'Legendary Battle',
'release_year': 2024,
'like_count': int,
},
}, {
'url': 'https://www.boomplay.com/search/video/%20Autumn%20blues',
'md5': 'd9b67ad333d2292a82922062d065352d',
'info_dict': {
'id': '1154892',
'title': 'Autumn blues',
'ext': 'mp4',
'timestamp': 1728599214,
'view_count': int,
'thumbnail': 'https://source.boomplaymusic.com/group10/M00/10/10/2171dee9e1f8452e84021560729edb88.jpg',
'description': 'Autumn blues by Lugo',
'upload_date': '20241010',
'duration': 177.0,
},
'params': {'playlist_items': '1'},
}]
@classproperty
def _VALID_URL(cls):
return r'https?://(?:www\.)?boomplay\.com/search/(?P<media_type>default|video|episode|podcasts|playlists|artists|albums)/(?P<query>[^?&#/]+)'
def _real_extract(self, url):
media_type, query = self._match_valid_url(url).group('media_type', 'query')
if media_type == 'default':
media_type = 'songs'
webpage = self._download_webpage(url, query)
return self.playlist_result(
self._extract_playlist_entries(webpage, media_type, warn=media_type == 'songs'),
**self._extract_page_metadata(webpage, query))
class BoomplaySearchIE(SearchInfoExtractor):
_SEARCH_KEY = 'boomplaysearch'
_RETURN_TYPE = 'url'
_TESTS = [{
'url': 'boomplaysearch:rise of the fallen heroes',
'only_matching': True,
}]
def _search_results(self, query):
yield self.url_result(
f'https://www.boomplay.com/search/default/{urllib.parse.quote(query)}',
BoomplaySearchURLIE)

View File

@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor):
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
} }
def _extract_from_webpage(self, video_id, tld): def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers(), impersonate=True) headers=self.geo_verification_headers(), impersonate=True)
@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld') video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View File

@ -5,10 +5,10 @@ from ..utils import orderedSet
class CTVNewsIE(InfoExtractor): class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)(?:$|[#?&])'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995', 'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '9b8624ba66351a23e0b6e1391971f9af', 'md5': 'b608f466c7fa24b9666c6439d766ab7e',
'info_dict': { 'info_dict': {
'id': '901995', 'id': '901995',
'ext': 'flv', 'ext': 'flv',
@ -16,6 +16,14 @@ class CTVNewsIE(InfoExtractor):
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284, 'timestamp': 1467286284,
'upload_date': '20160630', 'upload_date': '20160630',
'categories': [],
'tags': [],
'season_id': 57981,
'duration': 764.631,
'series': 'CTV News National story',
'thumbnail': r're:^https?://.*\.jpg$',
'season': 'Season 0',
'season_number': 0,
}, },
}, { }, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +39,13 @@ class CTVNewsIE(InfoExtractor):
'id': '1.2876780', 'id': '1.2876780',
}, },
'playlist_mincount': 100, 'playlist_mincount': 100,
}, {
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
'info_dict':
{
'id': '1.5736957',
},
'playlist_mincount': 6,
}, { }, {
'url': 'http://www.ctvnews.ca/1.810401', 'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True, 'only_matching': True,

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,291 +0,0 @@
import functools
import json
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
dict_get,
float_or_none,
traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
headers={'Content-Type': 'application/json'} if body else {},
query={
'__guest_id': self._GUEST_ID,
'__platform': 'web',
**(query or {}),
})
if content['code'] != 0:
raise ExtractorError(
f'Mildom says: {content["message"]} (code {content["code"]})',
expected=True)
return content['body']
class MildomIE(MildomBaseIE):
IE_NAME = 'mildom'
IE_DESC = 'Record ongoing live by specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
'user_id': video_id,
'live_server_type': 'hls',
})
playback_token = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
if not playback_token:
raise ExtractorError('Failed to obtain live playback token')
formats = self._extract_m3u8_formats(
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
result_video_id, 'mp4', headers={
'Referer': 'https://www.mildom.com/',
'Origin': 'https://www.mildom.com',
})
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
}
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
'info_dict': {
'id': '10882672-1597662269',
'ext': 'mp4',
'title': '始めてのミルダム配信じゃぃ!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'upload_date': '20200817',
'duration': 4138.37,
'description': 'ゲームをしたくて!',
'timestamp': 1597662269.0,
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
'info_dict': {
'id': '10882672-1597758589870-477',
'ext': 'mp4',
'title': '【kson】感染メイズ麻酔銃で無双する',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'timestamp': 1597759093.0,
'uploader': 'kson組長(けいそん)',
'duration': 4302.58,
'uploader_id': '10882672',
'description': 'このステージ絶対乗り越えたい',
'upload_date': '20200818',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
'info_dict': {
'id': '10882672-buha9td2lrn97fk2jme0',
'ext': 'mp4',
'title': '【kson組長】CART RACER!!!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
'upload_date': '20201104',
'timestamp': 1604494797.0,
'duration': 4657.25,
'description': 'WTF',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
note='Downloading playback metadata', query={
'v_id': video_id,
})['playback']
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
'protocol': 'm3u8_native',
'vcodec': 'none',
'acodec': 'aac',
'ext': 'm4a',
}]
for fmt in autoplay['video_link']:
formats.append({
'format_id': 'video-{}'.format(fmt['name']),
'url': fmt['url'],
'protocol': 'm3u8_native',
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
'height': fmt['level'],
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
})
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'description': traverse_obj(autoplay, 'video_intro'),
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
class MildomClipIE(MildomBaseIE):
IE_NAME = 'mildom:clip'
IE_DESC = 'Clip in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
'info_dict': {
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
'title': '全然違ったよ',
'timestamp': 1619181890,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': 'ざきんぽ',
'uploader_id': '10042245',
},
}, {
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'info_dict': {
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'title': 'かっこいい',
'timestamp': 1621094003,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': '(ルーキー',
'uploader_id': '10111524',
},
}, {
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'info_dict': {
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'title': '',
'timestamp': 1614769431,
'duration': 31,
'thumbnail': r're:https?://.+',
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
'uploader_id': '10660174',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
clip_detail = self._call_api(
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
note='Downloading playback metadata', query={
'clip_id': video_id,
})
return {
'id': video_id,
'title': self._html_search_meta(
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
'timestamp': float_or_none(clip_detail.get('create_time')),
'duration': float_or_none(clip_detail.get('length')),
'thumbnail': clip_detail.get('cover'),
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
'uploader_id': user_id,
'url': clip_detail['url'],
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
}
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mildom.com/profile/10093333',
'info_dict': {
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
'playlist_mincount': 201,
}]
def _fetch_page(self, user_id, page):
page += 1
reply = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
user_id, note=f'Downloading page {page}', query={
'user_id': user_id,
'page': page,
'limit': '30',
})
if not reply:
return
for x in reply:
v_id = x.get('v_id')
if not v_id:
continue
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
profile = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
user_id, f'Uploads from {profile["loginname"]}')

122
yt_dlp/extractor/pialive.py Normal file
View File

@ -0,0 +1,122 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
str_or_none,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PiaLiveIE(InfoExtractor):
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
_PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
_PIA_LIVE_API_URL = 'https://api.pia-live.jp'
_API_KEY = 'kfds)FKFps-dms9e'
_TESTS = [{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'display_id': '2431867_001',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}, {
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
'info_dict': {
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
'display_id': '2431867_002',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
}]
def _extract_var(self, variable, html):
return self._search_regex(
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, f'variable {variable}', group='value')
def _real_extract(self, url):
video_key = self._match_id(url)
webpage = self._download_webpage(url, video_key)
program_code = self._extract_var('programCode', webpage)
article_code = self._extract_var('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if date and time:
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self._API_KEY,
})
api_data_and_headers = {
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
url_transparent=True, title=title, display_id=program_code,
__post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
def _get_comments(self, program_code, article_code, api_data_and_headers):
chat_room_url = traverse_obj(self._download_json(
f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
if not chat_room_url:
return
comment_page = self._download_webpage(
chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
if not comment_page:
return
yield from traverse_obj(self._search_json(
r'var\s+_history\s*=', comment_page, 'comment list',
program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
'timestamp': (0, {int}),
'author_is_uploader': (1, {lambda x: x == 2}),
'author': (2, {str}),
'text': (3, {str}),
'id': (4, {str_or_none}),
}))

View File

@ -1,70 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
time_seconds,
traverse_obj,
)
class PIAULIZAPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'),
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}

View File

@ -1,136 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
js_to_json,
merge_dicts,
)
class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': {
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4',
'title': 'The Ol Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': {
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4',
'title': "Pokémon : L'ascension de Darkrai",
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
'only_matching': True,
}, {
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id or display_id)
video_data = extract_attributes(self._search_regex(
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element'))
video_id = video_data['data-video-id']
title = video_data.get('data-video-title') or self._html_search_meta(
'pkm-title', webpage, ' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'limelight:media:{video_id}',
'title': title,
'description': video_data.get('data-video-summary'),
'thumbnail': video_data.get('data-video-poster'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('data-video-season')),
'episode': title,
'episode_number': int_or_none(video_data.get('data-video-episode')),
'ie_key': 'LimelightMedia',
}
class PokemonWatchIE(InfoExtractor):
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
_TESTS = [{
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
'md5': '62833938a31e61ab49ada92f524c42ff',
'info_dict': {
'id': '8309a40969894a8e8d5bc1311e9c5667',
'ext': 'mp4',
'title': 'Lillier and the Staff!',
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
},
}, {
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
'only_matching': True,
}, {
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
'only_matching': True,
}]
def _extract_media(self, channel_array, video_id):
for channel in channel_array:
for media in channel.get('media'):
if media.get('id') == video_id:
return media
return None
def _real_extract(self, url):
video_id = self._match_id(url)
info = {
'_type': 'url',
'id': video_id,
'url': f'limelight:media:{video_id}',
'ie_key': 'LimelightMedia',
}
# API call can be avoided entirely if we are listing formats
if self.get_param('listformats', False):
return info
webpage = self._download_webpage(url, video_id)
build_vars = self._parse_json(self._search_regex(
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
video_id, transform_source=js_to_json)
region = build_vars.get('region')
channel_array = self._download_json(self._API_URL.format(region), video_id)
video_data = self._extract_media(channel_array, video_id)
if video_data is None:
raise ExtractorError(
f'Video {video_id} does not exist', expected=True)
info['_type'] = 'url_transparent'
images = video_data.get('images')
return merge_dicts(info, {
'title': video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': images.get('medium') or images.get('small'),
'series': 'Pokémon',
'season_number': int_or_none(video_data.get('season')),
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})

113
yt_dlp/extractor/uliza.py Normal file
View File

@ -0,0 +1,113 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
make_archive_id,
parse_qs,
time_seconds,
)
from ..utils.traversal import traverse_obj
class UlizaPlayerIE(InfoExtractor):
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
'_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
}, {
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
player_data = self._download_webpage(
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
'_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
}
class UlizaPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data_url = self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url')
return self.url_result(
player_data_url, UlizaPlayerIE, url_transparent=True,
display_id=video_id, video_title=self._html_extract_title(webpage))

View File

@ -1,189 +0,0 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
parse_duration,
qualities,
remove_start,
strip_or_none,
)
class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
'uploader': 'LUMOback',
'duration': 46,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 0,
'categories': ['technology_and_gaming'],
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
},
}, {
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
'only_matching': True,
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}, {
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
'info_dict': {
'id': 'v16374379WA437rMH',
'ext': 'mp4',
'title': 'Phantasmagoria 2, pt. 1-3',
'description': 'Phantasmagoria: a Puzzle of Flesh',
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
'uploader': 'davidspackage',
'duration': 968,
'view_count': int,
'average_rating': int,
'comment_count': int,
'age_limit': 18,
'categories': ['technology_and_gaming', 'gaming'],
'tags': ['puzzle', 'of', 'flesh'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
'https://www.veoh.com/watch/getVideo/' + video_id,
video_id)
video = metadata['video']
title = video['title']
thumbnail_url = None
q = qualities(['Regular', 'HQ'])
formats = []
for f_id, f_url in video.get('src', {}).items():
if not f_url:
continue
if f_id == 'poster':
thumbnail_url = f_url
else:
formats.append({
'format_id': f_id,
'quality': q(f_id),
'url': f_url,
})
categories = metadata.get('categoryPath')
if not categories:
category = remove_start(strip_or_none(video.get('category')), 'category_')
categories = [category] if category else None
tags = video.get('tags')
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': thumbnail_url,
'uploader': video.get('author', {}).get('nickname'),
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
'view_count': int_or_none(video.get('views')),
'formats': formats,
'average_rating': int_or_none(video.get('rating')),
'comment_count': int_or_none(video.get('numOfComments')),
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
'categories': categories,
'tags': tags.split(', ') if tags else None,
}
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
IE_NAME = 'veoh:user'
_TESTS = [
{
'url': 'https://www.veoh.com/users/valentinazoe',
'info_dict': {
'id': 'valentinazoe',
'title': 'valentinazoe (Uploads)',
},
'playlist_mincount': 75,
},
{
'url': 'https://www.veoh.com/users/PiensaLibre',
'info_dict': {
'id': 'PiensaLibre',
'title': 'PiensaLibre (Uploads)',
},
'playlist_mincount': 2,
}]
_PAGE_SIZE = 16
def _fetch_page(self, uploader, page):
response = self._download_json(
'https://www.veoh.com/users/published/videos', uploader,
note=f'Downloading videos page {page + 1}',
headers={
'x-csrf-token': self._TOKEN,
'content-type': 'application/json;charset=UTF-8',
},
data=json.dumps({
'username': uploader,
'maxResults': self._PAGE_SIZE,
'page': page + 1,
'requestName': 'userPage',
}).encode())
if not response.get('success'):
raise ExtractorError(response['message'])
for video in response['videos']:
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
video['permalinkId'], video.get('title'))
def _real_initialize(self):
webpage = self._download_webpage(
'https://www.veoh.com', None, note='Downloading authorization token')
self._TOKEN = self._search_regex(
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
'request token', group='token')
def _real_extract(self, url):
uploader = self._match_id(url)
return self.playlist_result(OnDemandPagedList(
functools.partial(self._fetch_page, uploader),
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')

View File

@ -5087,7 +5087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _rich_entries(self, rich_grid_renderer): def _rich_entries(self, rich_grid_renderer):
renderer = traverse_obj( renderer = traverse_obj(
rich_grid_renderer, rich_grid_renderer,
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {} ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel', 'lockupViewModel'), any)) or {}
video_id = renderer.get('videoId') video_id = renderer.get('videoId')
if video_id: if video_id:
yield self._extract_video(renderer) yield self._extract_video(renderer)
@ -5114,6 +5114,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
})), })),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources')) thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
return return
# lockupViewModel extraction
content_id = renderer.get('contentId')
if content_id and renderer.get('contentType') == 'LOCKUP_CONTENT_TYPE_PODCAST':
yield self.url_result(
f'https://www.youtube.com/playlist?list={content_id}',
ie=YoutubeTabIE, video_id=content_id,
**traverse_obj(renderer, {
'title': ('metadata', 'lockupMetadataViewModel', 'title', 'content', {str}),
}),
thumbnails=self._extract_thumbnails(renderer, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
return
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId') video_id = video_renderer.get('videoId')
@ -6706,22 +6718,22 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_count': 0, 'playlist_count': 0,
}, { }, {
# Podcasts tab, with rich entry playlistRenderers # Podcasts tab, with rich entry lockupViewModel
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts', 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
'info_dict': { 'info_dict': {
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast', 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c', 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
'title': '99 Percent Invisible - Podcasts', 'title': '99% Invisible - Podcasts',
'uploader': '99 Percent Invisible', 'uploader': '99% Invisible',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw', 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
'tags': [], 'tags': [],
'channel': '99 Percent Invisible', 'channel': '99% Invisible',
'uploader_id': '@99percentinvisiblepodcast', 'uploader_id': '@99percentinvisiblepodcast',
}, },
'playlist_count': 0, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',