Compare commits

...

34 Commits

Author SHA1 Message Date
ChocoLZS
4d9ff10975
Merge 8f5a765e25 into a9f85670d0 2024-11-12 00:04:06 +01:00
manav_chaudhary
a9f85670d0
[ie/Chaturbate] Support alternate domains (#10595)
Closes #10594
Authored by: manavchaudhary1
2024-11-11 23:41:56 +01:00
Sam
6b43a8d84b
[ie/goplay] Fix extractor (#11466)
Closes #10857
Authored by: SamDecrock, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-11 22:03:31 +00:00
Hugo
2db8c2e7d5
[ie/CloudflareStream] Avoid extraction via videodelivery.net (#11478)
Closes #11477
Authored by: hugovdev
2024-11-11 22:00:05 +00:00
bashonly
f9c8deb4e5
[build] Bump PyInstaller version pin to >=6.11.1 (#11507)
Authored by: bashonly
2024-11-11 21:19:03 +00:00
Sakura286
0ec9bfed4d
[ie/MixchMovie] Add extractor (#10897)
Closes #10765
Authored by: Sakura286
2024-11-11 21:40:29 +01:00
Subrat Lima
c673731061
[ie/spreaker] Support podcast and feed pages (#10968)
Closes #10925
Authored by: subrat-lima
2024-11-11 20:08:18 +01:00
sepro
e398217aae
[ie/rutube] Rework extractors (#11480)
Closes #9694, Closes #10104, Closes #11117, Closes #11415, Closes #11476
Authored by: seproDev
2024-11-11 18:44:53 +01:00
Julio Napurí
c39016f66d
[ie/spreaker] Support episode pages and access keys (#11489)
Authored by: julionc
2024-11-11 18:42:05 +01:00
ChocoLZS
8f5a765e25
Update yt_dlp/extractor/piaulizaportal.py
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-10-28 11:22:18 +08:00
ChocoLZS
75ea808d0a
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-10-05 22:49:09 +08:00
ChocoLZS
b42637c619
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-10-01 16:31:47 +08:00
Mozi
a75a02ad2a
[ie/pialive] Follow your steps (#1)
* [ie/pialive] Support detecting upcoming and ended live events

* Pack API arguments

* fix UnboundLocalError for "chat_room_url"

* extract video_id from query string by "parse_qs()"

* Fix tests
2024-09-25 14:47:15 +08:00
ChocoLZS
d993580e6f
Apply suggestions from code review
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-09-11 09:15:34 +08:00
chocoie
0f4cdc03d9 fix: suggestions 2024-09-09 21:35:40 +08:00
chocoie
7b94a0000c fix: error 2024-09-09 21:27:29 +08:00
ChocoLZS
aa410c803b
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-09-09 21:13:03 +08:00
ChocoLZS
83f4c5a98e fix: code style 2024-08-28 22:48:42 +08:00
ChocoLZS
841a557c0e fix: code style 2024-08-28 22:35:13 +08:00
ChocoLZS
bca2ca9852 fix: code style 2024-08-27 08:35:10 +00:00
ChocoLZS
a2ed14747b fix: code style 2024-08-25 23:49:08 +08:00
ChocoLZS
04f1bfde50 feat: use extract_comments instead 2024-08-25 23:26:37 +08:00
ChocoLZS
848a923252 doc: add note for json downloader 2024-08-24 23:44:28 +08:00
ChocoLZS
1ff33d1333 fix: remove unnecessary code 2024-08-24 23:34:06 +08:00
ChocoLZS
27b31cc3df chore: correct tests 2024-08-24 17:44:14 +08:00
ChocoLZS
fad7c2a75a chore: remove smuggled_url 2024-08-23 23:12:18 +08:00
ChocoLZS
1a22cc3d3f chore: remove unnecessary smuggled data 2024-08-23 10:01:07 +08:00
ChocoLZS
95dffc0e75 fix: use url_transparent 2024-08-23 09:18:01 +08:00
ChocoLZS
b3ebbaf8dd feat: fetch comments 2024-08-22 23:06:16 +08:00
ChocoLZS
1a16c62638 chore: rename regex and add referer 2024-08-22 19:51:05 +08:00
ChocoLZS
3fa3edc3c4 chore: use url instead of Request and add tests 2024-08-22 11:59:32 +08:00
ChocoLZS
edf1eebbce chore: extract id from m3u8 url 2024-08-21 23:55:59 +08:00
ChocoLZS
0639489bc5 feat: add embed player handler 2024-08-21 14:55:09 +08:00
ChocoLZS
9c2a6fa449 feat: add support for uliza in pia-live 2024-08-21 11:43:45 +08:00
11 changed files with 524 additions and 212 deletions

View File

@ -411,7 +411,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -460,7 +460,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |

View File

@ -83,7 +83,7 @@ test = [
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
] ]
[project.urls] [project.urls]

View File

@ -1156,6 +1156,7 @@ from .mitele import MiTeleIE
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -1517,8 +1518,12 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE from .piaulizaportal import (
PIAULIZAPortalAPIIE,
PIAULIZAPortalIE,
)
from .picarto import ( from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
@ -1939,9 +1944,7 @@ from .spotify import (
) )
from .spreaker import ( from .spreaker import (
SpreakerIE, SpreakerIE,
SpreakerPageIE,
SpreakerShowIE, SpreakerShowIE,
SpreakerShowPageIE,
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE

View File

@ -9,7 +9,7 @@ from ..utils import (
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ROOM_OFFLINE = 'Room is currently offline'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, tld = self._match_valid_url(url).group('id', 'tld')
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
found_m3u8_urls = [] found_m3u8_urls = []

View File

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View File

@ -5,56 +5,63 @@ import hashlib
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View File

@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
IE_NAME = 'mixch' IE_NAME = 'mixch'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/u/16943797/live', 'url': 'https://mixch.tv/u/16943797/live',
@ -74,7 +74,7 @@ class MixchIE(InfoExtractor):
class MixchArchiveIE(InfoExtractor): class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive' IE_NAME = 'mixch:archive'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/archive/421', 'url': 'https://mixch.tv/archive/421',
@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor):
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
} }
class MixchMovieIE(InfoExtractor):
IE_NAME = 'mixch:movie'
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
_TESTS = [{
'url': 'https://mixch.tv/m/Ve8KNkJ5',
'info_dict': {
'id': 'Ve8KNkJ5',
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
'ext': 'mp4',
'uploader': 'ミス東大No.5 松藤百香🍑💫',
'uploader_id': '12299174',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1724070828,
'uploader_url': 'https://mixch.tv/u/12299174',
'live_status': 'not_live',
'upload_date': '20240819',
},
}, {
'url': 'https://mixch.tv/m/61DzpIKE',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
return {
'id': video_id,
'formats': [{
'format_id': 'mp4',
'url': data['movie']['file'],
'ext': 'mp4',
}],
**traverse_obj(data, {
'title': ('movie', 'title', {str}),
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
'uploader': ('ownerInfo', 'name', {str}),
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
'view_count': ('ownerInfo', 'view', {int_or_none}),
'like_count': ('movie', 'favCount', {int_or_none}),
'comment_count': ('movie', 'commentCount', {int_or_none}),
'timestamp': ('movie', 'published', {int_or_none}),
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
}),
'live_status': 'not_live',
}

126
yt_dlp/extractor/pialive.py Normal file
View File

@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PiaLiveIE(InfoExtractor):
PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
PIA_LIVE_API_URL = 'https://api.pia-live.jp'
API_KEY = 'kfds)FKFps-dms9e'
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'display_id': '2431867_001',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
'info_dict': {
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
'display_id': '2431867_002',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
]
def _extract_vars(self, variable, html):
return self._search_regex(
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, f'variable {variable}', group='value')
def _real_extract(self, url):
video_key = self._match_id(url)
webpage = self._download_webpage(url, video_key)
program_code = self._extract_vars('programCode', webpage)
article_code = self._extract_vars('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if date and time:
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self.API_KEY,
})
api_kwargs = {
'video_id': program_code,
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,
note='Fetching player tag list', errnote='Unable to fetch player tag list')
chat_room_url = None
if self.get_param('getcomments'):
chat_room_url = traverse_obj(self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,
note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,
video_title=title, display_id=program_code, __post_extractor=self.extract_comments(
program_code, chat_room_url))
def _get_comments(self, video_id, chat_room_url):
if not chat_room_url:
return
if comment_page := self._download_webpage(
chat_room_url, video_id, headers={'Referer': self.PLAYER_ROOT_URL},
note='Fetching comment page', errnote='Unable to fetch comment page', fatal=False):
yield from traverse_obj(self._search_json(
r'var\s+_history\s*=', comment_page, 'comment list',
video_id, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
'timestamp': 0,
'author_is_uploader': (1, {lambda x: x == 2}),
'author': 2,
'text': 3,
'id': 4,
}))

View File

@ -1,11 +1,62 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError, int_or_none, parse_qs, time_seconds
ExtractorError, from ..utils.traversal import traverse_obj
int_or_none,
parse_qs,
time_seconds, class PIAULIZAPortalAPIIE(InfoExtractor):
traverse_obj, _VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
) _TESTS = [
{
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
},
},
{
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
},
},
{
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
},
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
player_data = self._download_webpage(
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}
class PIAULIZAPortalIE(InfoExtractor): class PIAULIZAPortalIE(InfoExtractor):
@ -14,7 +65,8 @@ class PIAULIZAPortalIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': { 'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44', 'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル', 'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live', 'live_status': 'not_live',
}, },
@ -25,7 +77,8 @@ class PIAULIZAPortalIE(InfoExtractor):
}, { }, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': { 'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', 'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA', 'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live', 'live_status': 'not_live',
}, },
@ -44,27 +97,9 @@ class PIAULIZAPortalIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage( player_data_url = self._search_regex(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"', r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'), webpage, 'player data url')
video_id, headers={'Referer': 'https://ulizaportal.jp/'}, return self.url_result(
note='Fetching player data', errnote='Unable to fetch player data') player_data_url, PIAULIZAPortalAPIIE, url_transparent=True,
display_id=video_id, video_title=self._html_extract_title(webpage))
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}

View File

@ -2,15 +2,18 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UnsupportedError,
bool_or_none, bool_or_none,
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json,
parse_qs, parse_qs,
traverse_obj, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class RutubeBaseIE(InfoExtractor): class RutubeBaseIE(InfoExtractor):
@ -19,7 +22,7 @@ class RutubeBaseIE(InfoExtractor):
query = {} query = {}
query['format'] = 'json' query['format'] = 'json'
return self._download_json( return self._download_json(
f'http://rutube.ru/api/video/{video_id}/', f'https://rutube.ru/api/video/{video_id}/',
video_id, 'Downloading video JSON', video_id, 'Downloading video JSON',
'Unable to download video JSON', query=query) 'Unable to download video JSON', query=query)
@ -61,18 +64,21 @@ class RutubeBaseIE(InfoExtractor):
query = {} query = {}
query['format'] = 'json' query['format'] = 'json'
return self._download_json( return self._download_json(
f'http://rutube.ru/api/play/options/{video_id}/', f'https://rutube.ru/api/play/options/{video_id}/',
video_id, 'Downloading options JSON', video_id, 'Downloading options JSON',
'Unable to download options JSON', 'Unable to download options JSON',
headers=self.geo_verification_headers(), query=query) headers=self.geo_verification_headers(), query=query)
def _extract_formats(self, options, video_id): def _extract_formats_and_subtitles(self, options, video_id):
formats = [] formats = []
subtitles = {}
for format_id, format_url in options['video_balancer'].items(): for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)) format_url, video_id, f4m_id=format_id, fatal=False))
@ -82,11 +88,19 @@ class RutubeBaseIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
}) })
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})): for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False)) fmts, subs = self._extract_m3u8_formats_and_subtitles(
return formats hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))):
subtitles.setdefault(caption.get('code') or 'ru', []).append({
'url': caption['file'],
'name': caption.get('langTitle'),
})
return formats, subtitles
def _download_and_extract_formats(self, video_id, query=None): def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
return self._extract_formats( return self._extract_formats_and_subtitles(
self._download_api_options(video_id, query=query), video_id) self._download_api_options(video_id, query=query), video_id)
@ -97,8 +111,8 @@ class RutubeIE(RutubeBaseIE):
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': 'e33ac625efca66aba86cbec9851f2692', 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4', 'ext': 'mp4',
@ -111,26 +125,25 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20131016', 'upload_date': '20131016',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
'categories': ['Новости и СМИ'], 'categories': ['Новости и СМИ'],
'chapters': [], 'chapters': [],
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
'md5': 'd106225f15d625538fe22971158e896f', 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
'info_dict': { 'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48', 'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4', 'ext': 'mp4',
@ -143,11 +156,10 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20221210', 'upload_date': '20221210',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
'categories': ['Видеоигры'], 'categories': ['Видеоигры'],
'chapters': [], 'chapters': [],
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
'info_dict': { 'info_dict': {
@ -156,17 +168,16 @@ class RutubeIE(RutubeBaseIE):
'chapters': 'count:4', 'chapters': 'count:4',
'categories': ['Бизнес и предпринимательство'], 'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d', 'description': 'md5:252feac1305257d8c1bab215cedde75d',
'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
'duration': 782, 'duration': 782,
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23491359', 'uploader_id': '23491359',
'timestamp': 1677153329, 'timestamp': 1677153329,
'view_count': int, 'view_count': int,
'upload_date': '20230223', 'upload_date': '20230223',
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
'uploader': 'Стас Быков', 'uploader': 'Стас Быков',
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': { 'info_dict': {
@ -174,7 +185,7 @@ class RutubeIE(RutubeBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'categories': ['Телепередачи'], 'categories': ['Телепередачи'],
'description': '', 'description': '',
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
'live_status': 'is_live', 'live_status': 'is_live',
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23460655', 'uploader_id': '23460655',
@ -184,6 +195,24 @@ class RutubeIE(RutubeBaseIE):
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader': 'Первый канал', 'uploader': 'Первый канал',
}, },
}, {
'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
'info_dict': {
'id': '03a9cb54bac3376af4c5cb0f18444e01',
'ext': 'mp4',
'age_limit': 0,
'description': '',
'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»',
'chapters': [],
'upload_date': '20240829',
'duration': 293,
'uploader': 'MOEX - Московская биржа',
'timestamp': 1724946628,
'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg',
'view_count': int,
'uploader_id': '38420507',
'categories': ['Интервью'],
},
}, { }, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True, 'only_matching': True,
@ -192,40 +221,46 @@ class RutubeIE(RutubeBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super().suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
query = parse_qs(url) query = parse_qs(url)
info = self._download_and_extract_info(video_id, query) info = self._download_and_extract_info(video_id, query)
info['formats'] = self._download_and_extract_formats(video_id, query) formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query)
return info return {
**info,
'formats': formats,
'subtitles': subtitles,
}
class RutubeEmbedIE(RutubeBaseIE): class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed' IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos' IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)(?:[?#/]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1387830582, 'timestamp': 1387830582,
'upload_date': '20131223', 'upload_date': '20131223',
'uploader_id': '297833', 'uploader_id': '297833',
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
'uploader': 'subziro89 ILya', 'uploader': 'subziro89 ILya',
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
'age_limit': 0,
'duration': 1395,
'chapters': [],
'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
'view_count': int,
'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg',
'categories': ['Сериалы'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'http://rutube.ru/play/embed/8083783', 'url': 'https://rutube.ru/play/embed/8083783',
'only_matching': True, 'only_matching': True,
}, { }, {
# private video # private video
@ -240,11 +275,12 @@ class RutubeEmbedIE(RutubeBaseIE):
query = parse_qs(url) query = parse_qs(url)
options = self._download_api_options(embed_id, query) options = self._download_api_options(embed_id, query)
video_id = options['effective_video'] video_id = options['effective_video']
formats = self._extract_formats(options, video_id) formats, subtitles = self._extract_formats_and_subtitles(options, video_id)
info = self._download_and_extract_info(video_id, query) info = self._download_and_extract_info(video_id, query)
info.update({ info.update({
'extractor_key': 'Rutube', 'extractor_key': 'Rutube',
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info return info
@ -295,14 +331,14 @@ class RutubeTagsIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube tags' IE_DESC = 'Rutube tags'
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/tags/video/1800/', 'url': 'https://rutube.ru/tags/video/1800/',
'info_dict': { 'info_dict': {
'id': '1800', 'id': '1800',
}, },
'playlist_mincount': 68, 'playlist_mincount': 68,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json'
class RutubeMovieIE(RutubePlaylistBaseIE): class RutubeMovieIE(RutubePlaylistBaseIE):
@ -310,8 +346,8 @@ class RutubeMovieIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube movies' IE_DESC = 'Rutube movies'
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' _MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
def _real_extract(self, url): def _real_extract(self, url):
movie_id = self._match_id(url) movie_id = self._match_id(url)
@ -327,62 +363,82 @@ class RutubePersonIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube person videos' IE_DESC = 'Rutube person videos'
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/person/313878/', 'url': 'https://rutube.ru/video/person/313878/',
'info_dict': { 'info_dict': {
'id': '313878', 'id': '313878',
}, },
'playlist_mincount': 37, 'playlist_mincount': 36,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json'
class RutubePlaylistIE(RutubePlaylistBaseIE): class RutubePlaylistIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:playlist' IE_NAME = 'rutube:playlist'
IE_DESC = 'Rutube playlists' IE_DESC = 'Rutube playlists'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/plst/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 'url': 'https://rutube.ru/plst/308547/',
'info_dict': { 'info_dict': {
'id': '3097', 'id': '308547',
}, },
'playlist_count': 27, 'playlist_mincount': 22,
}, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
'only_matching': True,
}] }]
_PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
@classmethod
def suitable(cls, url):
from ..utils import int_or_none, parse_qs
if not super().suitable(url):
return False
params = parse_qs(url)
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
def _next_page_url(self, page_num, playlist_id, item_kind):
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
def _real_extract(self, url):
qs = parse_qs(url)
playlist_kind = qs['pl_type'][0]
playlist_id = qs['pl_id'][0]
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
class RutubeChannelIE(RutubePlaylistBaseIE): class RutubeChannelIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:channel' IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channel' IE_DESC = 'Rutube channel'
_VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos' _VALID_URL = r'https?://rutube\.ru/(?:channel/(?P<id>\d+)|u/(?P<slug>\w+))(?:/(?P<section>videos|shorts|playlists))?'
_TESTS = [{ _TESTS = [{
'url': 'https://rutube.ru/channel/639184/videos/', 'url': 'https://rutube.ru/channel/639184/videos/',
'info_dict': { 'info_dict': {
'id': '639184', 'id': '639184_videos',
}, },
'playlist_mincount': 133, 'playlist_mincount': 129,
}, {
'url': 'https://rutube.ru/channel/25902603/shorts/',
'info_dict': {
'id': '25902603_shorts',
},
'playlist_mincount': 277,
}, {
'url': 'https://rutube.ru/channel/25902603/',
'info_dict': {
'id': '25902603',
},
'playlist_mincount': 406,
}, {
'url': 'https://rutube.ru/u/rutube/videos/',
'info_dict': {
'id': '23704195_videos',
},
'playlist_mincount': 113,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s'
def _next_page_url(self, page_num, playlist_id, section):
origin_type = {
'videos': 'rtb,rst,ifrm,rspa',
'shorts': 'rshorts',
None: '',
}.get(section)
return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type)
def _real_extract(self, url):
playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section')
if section == 'playlists':
raise UnsupportedError(url)
if slug:
webpage = self._download_webpage(url, slug)
redux_state = self._search_json(
r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json)
playlist_id = traverse_obj(redux_state, (
'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'),
'data', 'channel_id', {int}, {str_or_none}, any))
playlist = self._extract_playlist(playlist_id, section=section)
if section:
playlist['id'] = f'{playlist_id}_{section}'
return playlist

View File

@ -2,13 +2,16 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
filter_dict,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_qs,
str_or_none, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
def _extract_episode(data, episode_id=None): def _extract_episode(data, episode_id=None):
@ -58,15 +61,10 @@ def _extract_episode(data, episode_id=None):
class SpreakerIE(InfoExtractor): class SpreakerIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = [
https?:// r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
api\.spreaker\.com/ r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
(?: ]
(?:download/)?episode|
v2/episodes
)/
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://api.spreaker.com/episode/12534508', 'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': { 'info_dict': {
@ -83,7 +81,9 @@ class SpreakerIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'series': 'Success With Music (SWM)', 'series': 'Success With Music | SWM',
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
'creators': ['SWM'],
}, },
}, { }, {
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
@ -91,52 +91,75 @@ class SpreakerIE(InfoExtractor):
}, { }, {
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
'only_matching': True, 'only_matching': True,
}, {
'note': 'episode',
'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
'info_dict': {
'id': '60269615',
'display_id': 'grunge-music-origins-the-raw-sound-that-',
'ext': 'mp3',
'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
'description': str,
'timestamp': 1717468905,
'upload_date': '20240604',
'uploader': 'Katie Brown 2',
'uploader_id': '17733249',
'duration': 818.83,
'view_count': int,
'like_count': int,
'comment_count': int,
'series': '90s Grunge',
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
'creators': ['Katie Brown 2'],
},
}, {
'url': 'https://www.spreaker.com/episode/60269615',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
data = self._download_json( data = self._download_json(
f'https://api.spreaker.com/v2/episodes/{episode_id}', f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
episode_id)['response']['episode'] query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
return _extract_episode(data, episode_id) return _extract_episode(data, episode_id)
class SpreakerPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
episode_id = self._search_regex(
(r'data-episode_id=["\'](?P<id>\d+)',
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
return self.url_result(
f'https://api.spreaker.com/episode/{episode_id}',
ie=SpreakerIE.ie_key(), video_id=episode_id)
class SpreakerShowIE(InfoExtractor): class SpreakerShowIE(InfoExtractor):
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' _VALID_URL = [
r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
]
_TESTS = [{ _TESTS = [{
'url': 'https://api.spreaker.com/show/4652058', 'url': 'https://api.spreaker.com/show/4652058',
'info_dict': { 'info_dict': {
'id': '4652058', 'id': '4652058',
}, },
'playlist_mincount': 118, 'playlist_mincount': 118,
}, {
'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
'info_dict': {
'id': '5918323',
},
'playlist_mincount': 60,
}, {
'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
'info_dict': {
'id': '5887186',
},
'playlist_mincount': 290,
}] }]
def _entries(self, show_id): def _entries(self, show_id, key=None):
for page_num in itertools.count(1): for page_num in itertools.count(1):
episodes = self._download_json( episodes = self._download_json(
f'https://api.spreaker.com/show/{show_id}/episodes', f'https://api.spreaker.com/show/{show_id}/episodes',
show_id, note=f'Downloading JSON page {page_num}', query={ show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
'page': page_num, 'page': page_num,
'max_per_page': 100, 'max_per_page': 100,
}) 'key': key,
}))
pager = try_get(episodes, lambda x: x['response']['pager'], dict) pager = try_get(episodes, lambda x: x['response']['pager'], dict)
if not pager: if not pager:
break break
@ -152,21 +175,5 @@ class SpreakerShowIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id) key = traverse_obj(parse_qs(url), ('key', 0))
return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)
class SpreakerShowPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/success-with-music',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show_id = self._search_regex(
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
return self.url_result(
f'https://api.spreaker.com/show/{show_id}',
ie=SpreakerShowIE.ie_key(), video_id=show_id)