Compare commits

...

14 Commits

Author SHA1 Message Date
kclauhk
3f3f0e0cea
Merge bf4aa73fb7 into f2a4983df7 2024-11-14 20:34:26 +08:00
Jackson Humphrey
f2a4983df7
[ie/archive.org] Fix comments extraction (#11527)
Closes #11526
Authored by: jshumphrey
2024-11-12 23:26:18 +00:00
bashonly
bacc31b05a
[ie/facebook] Fix formats extraction (#11513)
Closes #11497
Authored by: bashonly
2024-11-12 23:23:10 +00:00
manav_chaudhary
a9f85670d0
[ie/Chaturbate] Support alternate domains (#10595)
Closes #10594
Authored by: manavchaudhary1
2024-11-11 23:41:56 +01:00
Sam
6b43a8d84b
[ie/goplay] Fix extractor (#11466)
Closes #10857
Authored by: SamDecrock, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-11 22:03:31 +00:00
Hugo
2db8c2e7d5
[ie/CloudflareStream] Avoid extraction via videodelivery.net (#11478)
Closes #11477
Authored by: hugovdev
2024-11-11 22:00:05 +00:00
bashonly
f9c8deb4e5
[build] Bump PyInstaller version pin to >=6.11.1 (#11507)
Authored by: bashonly
2024-11-11 21:19:03 +00:00
Sakura286
0ec9bfed4d
[ie/MixchMovie] Add extractor (#10897)
Closes #10765
Authored by: Sakura286
2024-11-11 21:40:29 +01:00
Subrat Lima
c673731061
[ie/spreaker] Support podcast and feed pages (#10968)
Closes #10925
Authored by: subrat-lima
2024-11-11 20:08:18 +01:00
sepro
e398217aae
[ie/rutube] Rework extractors (#11480)
Closes #9694, Closes #10104, Closes #11117, Closes #11415, Closes #11476
Authored by: seproDev
2024-11-11 18:44:53 +01:00
Julio Napurí
c39016f66d
[ie/spreaker] Support episode pages and access keys (#11489)
Authored by: julionc
2024-11-11 18:42:05 +01:00
kclauhk
bf4aa73fb7 add ffmpeg argument to avoid keepalive request failure in downloading live video 2024-09-10 15:12:58 +08:00
kclauhk
6a3d3dca93 Update test data 2024-09-07 16:22:12 +08:00
kclauhk
c071dbdc18 [ie/meritplus] Add extractor 2024-08-29 23:24:42 +08:00
12 changed files with 529 additions and 184 deletions

View File

@ -411,7 +411,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -460,7 +460,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |

View File

@ -83,7 +83,7 @@ test = [
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
] ]
[project.urls] [project.urls]

View File

@ -1120,6 +1120,7 @@ from .megatvcom import (
) )
from .meipai import MeipaiIE from .meipai import MeipaiIE
from .melonvod import MelonVODIE from .melonvod import MelonVODIE
from .meritplus import MeritPlusIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgtv import MGTVIE from .mgtv import MGTVIE
from .microsoftembed import ( from .microsoftembed import (
@ -1156,6 +1157,7 @@ from .mitele import MiTeleIE
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -1939,9 +1941,7 @@ from .spotify import (
) )
from .spreaker import ( from .spreaker import (
SpreakerIE, SpreakerIE,
SpreakerPageIE,
SpreakerShowIE, SpreakerShowIE,
SpreakerShowPageIE,
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE

View File

@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
}, },
}, },
], ],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}] }]
@staticmethod @staticmethod
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
info['comments'].append({ info['comments'].append({
'id': review.get('review_id'), 'id': review.get('review_id'),
'author': review.get('reviewer'), 'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')), 'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'}) 'parent': 'root'})

View File

@ -9,7 +9,7 @@ from ..utils import (
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ROOM_OFFLINE = 'Room is currently offline'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, tld = self._match_valid_url(url).group('id', 'tld')
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
found_m3u8_urls = [] found_m3u8_urls = []

View File

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View File

@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor):
return extract_video_data(try_get( return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or []) js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats): def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj( dash_manifest = traverse_obj(
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str) vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=url_or_none(video.get('dash_manifest_url')))) mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
def process_formats(info): def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around # Downloads with browser's User-Agent are rate limited. Working around
@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor):
video = video['creation_story'] video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info) video.update(reel_info)
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
# Legacy formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'), ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')): ('browser_native_sd_url', 'sd')):
@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor):
if not playable_url: if not playable_url:
continue continue
if determine_ext(playable_url) == 'mpd': if determine_ext(playable_url) == 'mpd':
formats.extend(self._extract_mpd_formats(playable_url, video_id)) formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(fmt_data, formats) extract_dash_manifest(fmt_data, formats)
# New videoDeliveryResponse formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
for idx, dash_manifest in enumerate(dash_manifests):
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
if not dash_manifests:
# Only extract from MPD URLs if the manifests are not already provided
for mpd_url in mpd_urls:
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
if not formats: if not formats:
# Do not append false positive entry w/o any formats # Do not append false positive entry w/o any formats
return return

View File

@ -5,56 +5,63 @@ import hashlib
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View File

@ -0,0 +1,152 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
traverse_obj,
url_or_none,
)
class MeritPlusIE(InfoExtractor):
_VALID_URL = r'https?://(?:[\w-]+\.)?meritplus\.com/(?:c/)?(?P<type>[a-z]+)/(?P<ep>[^/]+\?episodeId=)?(?P<id>[^/&\?]+)'
_TESTS = [{
'url': 'https://www.meritplus.com/c/s/VQ2aB6Sp?episodeId=uNLp2Rgg&play=1',
'info_dict': {
'id': 'uNLp2Rgg',
'ext': 'mp4',
'title': 'Right to Stand Your Ground | Dr. Phil Primetime',
'description': r're:^Employees who fought back against robbers, unruly customers, and',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/uNLp2Rgg/poster.jpg?width=1920',
'duration': 2519.0,
'tags': 'count:14',
'timestamp': 1724716800,
'upload_date': '20240827',
'series': 'Dr. Phil Primetime',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 79',
'episode_number': 79,
},
}, {
'url': 'https://www.meritplus.com/c/m/ebVUK1wS?r=ok5bikOE',
'info_dict': {
'id': 'ebVUK1wS',
'ext': 'mp4',
'title': 'PBR Teams 2024: Gambler Days | Day 1',
'description': r're:^Get ready for Gambler Days with the PBR! Watch elite riders battle',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/ebVUK1wS/poster.jpg?width=1920',
'duration': 10876.0,
'tags': 'count:6',
'timestamp': 1724461200,
'upload_date': '20240824',
},
}, {
'url': 'https://www.meritplus.com/m/AzVJ4sEH/bull-rider-najiah-knight-the-cowgirl-way?r=uzD8QNRj',
'info_dict': {
'id': 'AzVJ4sEH',
'ext': 'mp4',
'title': 'Bull Rider Najiah Knight | The Cowgirl Way',
'description': r're:^Najiah Knight is rewriting the rules and inspiring future Cowgirls',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/AzVJ4sEH/poster.jpg?width=1920',
'duration': 300.0,
'tags': ['The Cowgirl Way', 'seriesId_uzD8QNRj', 'PBR'],
'timestamp': 1720195320,
'upload_date': '20240705',
},
}, {
'url': 'https://www.meritplus.com/c/sns/jryHEWXj',
'info_dict': {
'id': 'jryHEWXj',
'title': 'Morning On Merit Street',
'description': r're:^Award winning journalist Dominique Sachse and co-host Fanchon Stinger',
'thumbnail': 'https://assets.mediabackstage.com/merit_prod/thumbnails/moms_thumbnail_1920x1080-1708708358462.jpg',
},
'playlist_count': 5,
}, {
'url': 'https://www.meritplus.com/c/s/eAzd5bqW',
'info_dict': {
'id': 'eAzd5bqW',
'title': 'Crime Stories with Nancy Grace',
'description': r're:^Nancy Grace explores the inside story of true crimes and cold cases',
'thumbnail': 'https://assets.mediabackstage.com/merit_prod%2Fthumbnails%2Fnancygrace_thumbnail_1920x1080-1721658217049.jpg',
},
'playlist_count': 15,
}]
def _real_extract(self, url):
video_id, c_type, is_episode = self._match_valid_url(url).group('id', 'type', 'ep')
if is_episode or c_type == 'm':
json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{video_id}', video_id)
else:
json = self._download_json(f'https://cdn.jwplayer.com/v2/playlists/{video_id}?format=json&page_limit=500', video_id)
entries = []
for video in json.get('playlist', []):
thumbnails, formats, subtitles = [], [], {}
for image in video.get('images', []):
thumbnails.append({
'url': url_or_none(image.get('src')),
'width': image.get('width'),
})
for caption in video.get('tracks', []):
if caption.get('kind') == 'captions':
subtitles.setdefault(caption.get('label', 'und'), []).append({
'url': caption.get('file'),
'name': caption.get('label'),
})
is_live = bool(video.get('is_live'))
for source in video.get('sources', []):
if media_url := url_or_none(source.get('file')):
if determine_ext(media_url) == 'm3u8':
hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
media_url, video['mediaid'], fatal=None)
if is_live:
for f in hls_fmts:
f['downloader_options'] = {'ffmpeg_args_out': ['-http_persistent', '0']}
formats.extend(hls_fmts)
self._merge_subtitles(hls_subs, target=subtitles)
else:
formats.append(traverse_obj(source, {
'format_id': ('label', {lambda v: 'audio' if 'Audio' in v else v}),
'url': ('file', {str}),
'height': ('height', {int_or_none}),
'width': ('width', {int_or_none}),
'filesize': ('filesize', {int}),
'fps': ('framerate', {float_or_none}),
'tbr': ('bitrate', {lambda v: int_or_none(v, 1000)}),
'acodec': ('label', {lambda v: 'aac' if 'AAC' in v else None}),
'vcodec': ('type', {lambda v: 'none' if 'audio' in v else None}),
}))
entries.append({**traverse_obj(video, {
'id': ('mediaid', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('pubdate', {int}),
'tags': ('tags', {lambda v: v.split(',') if v else None}),
'series': ('programName', {lambda v: v or None}),
'season_number': ('seasonNumber', {int_or_none}),
'episode_number': ('episodeNumber', {int_or_none}),
'cast': ('cast', {lambda v: v.split(',') if v else None}),
'duration': ('duration', {float_or_none}),
'webpage_url': ('mediaid', {lambda v: url + (f'?episodeId={v}' if v not in url else '')}),
'thumbnail': ('image', {lambda v: url_or_none(v) if not thumbnails else None}),
}),
'is_live': is_live,
'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
})
if len(entries) == 1:
return entries[0]
elif len(entries) > 1:
description = join_nonempty('shortDescription', 'description', delim=' ', from_dict=json)
thumbnail = traverse_obj(json, (('imgHomeRailThumb16x9', 'imgFeaturedTvBanner16x9'),
{url_or_none}), get_all=False)
return self.playlist_result(entries, id=json['seriesId'], title=json['title'],
description=description, thumbnail=thumbnail)
else:
self.raise_no_formats('No video formats found!')

View File

@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
IE_NAME = 'mixch' IE_NAME = 'mixch'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/u/16943797/live', 'url': 'https://mixch.tv/u/16943797/live',
@ -74,7 +74,7 @@ class MixchIE(InfoExtractor):
class MixchArchiveIE(InfoExtractor): class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive' IE_NAME = 'mixch:archive'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/archive/421', 'url': 'https://mixch.tv/archive/421',
@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor):
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
} }
class MixchMovieIE(InfoExtractor):
IE_NAME = 'mixch:movie'
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
_TESTS = [{
'url': 'https://mixch.tv/m/Ve8KNkJ5',
'info_dict': {
'id': 'Ve8KNkJ5',
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
'ext': 'mp4',
'uploader': 'ミス東大No.5 松藤百香🍑💫',
'uploader_id': '12299174',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1724070828,
'uploader_url': 'https://mixch.tv/u/12299174',
'live_status': 'not_live',
'upload_date': '20240819',
},
}, {
'url': 'https://mixch.tv/m/61DzpIKE',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
return {
'id': video_id,
'formats': [{
'format_id': 'mp4',
'url': data['movie']['file'],
'ext': 'mp4',
}],
**traverse_obj(data, {
'title': ('movie', 'title', {str}),
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
'uploader': ('ownerInfo', 'name', {str}),
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
'view_count': ('ownerInfo', 'view', {int_or_none}),
'like_count': ('movie', 'favCount', {int_or_none}),
'comment_count': ('movie', 'commentCount', {int_or_none}),
'timestamp': ('movie', 'published', {int_or_none}),
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
}),
'live_status': 'not_live',
}

View File

@ -2,15 +2,18 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UnsupportedError,
bool_or_none, bool_or_none,
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json,
parse_qs, parse_qs,
traverse_obj, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class RutubeBaseIE(InfoExtractor): class RutubeBaseIE(InfoExtractor):
@ -19,7 +22,7 @@ class RutubeBaseIE(InfoExtractor):
query = {} query = {}
query['format'] = 'json' query['format'] = 'json'
return self._download_json( return self._download_json(
f'http://rutube.ru/api/video/{video_id}/', f'https://rutube.ru/api/video/{video_id}/',
video_id, 'Downloading video JSON', video_id, 'Downloading video JSON',
'Unable to download video JSON', query=query) 'Unable to download video JSON', query=query)
@ -61,18 +64,21 @@ class RutubeBaseIE(InfoExtractor):
query = {} query = {}
query['format'] = 'json' query['format'] = 'json'
return self._download_json( return self._download_json(
f'http://rutube.ru/api/play/options/{video_id}/', f'https://rutube.ru/api/play/options/{video_id}/',
video_id, 'Downloading options JSON', video_id, 'Downloading options JSON',
'Unable to download options JSON', 'Unable to download options JSON',
headers=self.geo_verification_headers(), query=query) headers=self.geo_verification_headers(), query=query)
def _extract_formats(self, options, video_id): def _extract_formats_and_subtitles(self, options, video_id):
formats = [] formats = []
subtitles = {}
for format_id, format_url in options['video_balancer'].items(): for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)) format_url, video_id, f4m_id=format_id, fatal=False))
@ -82,11 +88,19 @@ class RutubeBaseIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
}) })
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})): for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False)) fmts, subs = self._extract_m3u8_formats_and_subtitles(
return formats hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))):
subtitles.setdefault(caption.get('code') or 'ru', []).append({
'url': caption['file'],
'name': caption.get('langTitle'),
})
return formats, subtitles
def _download_and_extract_formats(self, video_id, query=None): def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
return self._extract_formats( return self._extract_formats_and_subtitles(
self._download_api_options(video_id, query=query), video_id) self._download_api_options(video_id, query=query), video_id)
@ -97,8 +111,8 @@ class RutubeIE(RutubeBaseIE):
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': 'e33ac625efca66aba86cbec9851f2692', 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4', 'ext': 'mp4',
@ -111,26 +125,25 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20131016', 'upload_date': '20131016',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
'categories': ['Новости и СМИ'], 'categories': ['Новости и СМИ'],
'chapters': [], 'chapters': [],
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
'md5': 'd106225f15d625538fe22971158e896f', 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
'info_dict': { 'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48', 'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4', 'ext': 'mp4',
@ -143,11 +156,10 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20221210', 'upload_date': '20221210',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
'categories': ['Видеоигры'], 'categories': ['Видеоигры'],
'chapters': [], 'chapters': [],
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
'info_dict': { 'info_dict': {
@ -156,17 +168,16 @@ class RutubeIE(RutubeBaseIE):
'chapters': 'count:4', 'chapters': 'count:4',
'categories': ['Бизнес и предпринимательство'], 'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d', 'description': 'md5:252feac1305257d8c1bab215cedde75d',
'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
'duration': 782, 'duration': 782,
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23491359', 'uploader_id': '23491359',
'timestamp': 1677153329, 'timestamp': 1677153329,
'view_count': int, 'view_count': int,
'upload_date': '20230223', 'upload_date': '20230223',
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
'uploader': 'Стас Быков', 'uploader': 'Стас Быков',
}, },
'expected_warnings': ['Unable to download f4m'],
}, { }, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': { 'info_dict': {
@ -174,7 +185,7 @@ class RutubeIE(RutubeBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'categories': ['Телепередачи'], 'categories': ['Телепередачи'],
'description': '', 'description': '',
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
'live_status': 'is_live', 'live_status': 'is_live',
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23460655', 'uploader_id': '23460655',
@ -184,6 +195,24 @@ class RutubeIE(RutubeBaseIE):
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader': 'Первый канал', 'uploader': 'Первый канал',
}, },
}, {
'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
'info_dict': {
'id': '03a9cb54bac3376af4c5cb0f18444e01',
'ext': 'mp4',
'age_limit': 0,
'description': '',
'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»',
'chapters': [],
'upload_date': '20240829',
'duration': 293,
'uploader': 'MOEX - Московская биржа',
'timestamp': 1724946628,
'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg',
'view_count': int,
'uploader_id': '38420507',
'categories': ['Интервью'],
},
}, { }, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True, 'only_matching': True,
@ -192,40 +221,46 @@ class RutubeIE(RutubeBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super().suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
query = parse_qs(url) query = parse_qs(url)
info = self._download_and_extract_info(video_id, query) info = self._download_and_extract_info(video_id, query)
info['formats'] = self._download_and_extract_formats(video_id, query) formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query)
return info return {
**info,
'formats': formats,
'subtitles': subtitles,
}
class RutubeEmbedIE(RutubeBaseIE): class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed' IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos' IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)(?:[?#/]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1387830582, 'timestamp': 1387830582,
'upload_date': '20131223', 'upload_date': '20131223',
'uploader_id': '297833', 'uploader_id': '297833',
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
'uploader': 'subziro89 ILya', 'uploader': 'subziro89 ILya',
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
'age_limit': 0,
'duration': 1395,
'chapters': [],
'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
'view_count': int,
'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg',
'categories': ['Сериалы'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'http://rutube.ru/play/embed/8083783', 'url': 'https://rutube.ru/play/embed/8083783',
'only_matching': True, 'only_matching': True,
}, { }, {
# private video # private video
@ -240,11 +275,12 @@ class RutubeEmbedIE(RutubeBaseIE):
query = parse_qs(url) query = parse_qs(url)
options = self._download_api_options(embed_id, query) options = self._download_api_options(embed_id, query)
video_id = options['effective_video'] video_id = options['effective_video']
formats = self._extract_formats(options, video_id) formats, subtitles = self._extract_formats_and_subtitles(options, video_id)
info = self._download_and_extract_info(video_id, query) info = self._download_and_extract_info(video_id, query)
info.update({ info.update({
'extractor_key': 'Rutube', 'extractor_key': 'Rutube',
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info return info
@ -295,14 +331,14 @@ class RutubeTagsIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube tags' IE_DESC = 'Rutube tags'
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/tags/video/1800/', 'url': 'https://rutube.ru/tags/video/1800/',
'info_dict': { 'info_dict': {
'id': '1800', 'id': '1800',
}, },
'playlist_mincount': 68, 'playlist_mincount': 68,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json'
class RutubeMovieIE(RutubePlaylistBaseIE): class RutubeMovieIE(RutubePlaylistBaseIE):
@ -310,8 +346,8 @@ class RutubeMovieIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube movies' IE_DESC = 'Rutube movies'
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' _MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
def _real_extract(self, url): def _real_extract(self, url):
movie_id = self._match_id(url) movie_id = self._match_id(url)
@ -327,62 +363,82 @@ class RutubePersonIE(RutubePlaylistBaseIE):
IE_DESC = 'Rutube person videos' IE_DESC = 'Rutube person videos'
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/person/313878/', 'url': 'https://rutube.ru/video/person/313878/',
'info_dict': { 'info_dict': {
'id': '313878', 'id': '313878',
}, },
'playlist_mincount': 37, 'playlist_mincount': 36,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json'
class RutubePlaylistIE(RutubePlaylistBaseIE): class RutubePlaylistIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:playlist' IE_NAME = 'rutube:playlist'
IE_DESC = 'Rutube playlists' IE_DESC = 'Rutube playlists'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/plst/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 'url': 'https://rutube.ru/plst/308547/',
'info_dict': { 'info_dict': {
'id': '3097', 'id': '308547',
}, },
'playlist_count': 27, 'playlist_mincount': 22,
}, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
'only_matching': True,
}] }]
_PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
@classmethod
def suitable(cls, url):
from ..utils import int_or_none, parse_qs
if not super().suitable(url):
return False
params = parse_qs(url)
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
def _next_page_url(self, page_num, playlist_id, item_kind):
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
def _real_extract(self, url):
qs = parse_qs(url)
playlist_kind = qs['pl_type'][0]
playlist_id = qs['pl_id'][0]
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
class RutubeChannelIE(RutubePlaylistBaseIE): class RutubeChannelIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:channel' IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channel' IE_DESC = 'Rutube channel'
_VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos' _VALID_URL = r'https?://rutube\.ru/(?:channel/(?P<id>\d+)|u/(?P<slug>\w+))(?:/(?P<section>videos|shorts|playlists))?'
_TESTS = [{ _TESTS = [{
'url': 'https://rutube.ru/channel/639184/videos/', 'url': 'https://rutube.ru/channel/639184/videos/',
'info_dict': { 'info_dict': {
'id': '639184', 'id': '639184_videos',
}, },
'playlist_mincount': 133, 'playlist_mincount': 129,
}, {
'url': 'https://rutube.ru/channel/25902603/shorts/',
'info_dict': {
'id': '25902603_shorts',
},
'playlist_mincount': 277,
}, {
'url': 'https://rutube.ru/channel/25902603/',
'info_dict': {
'id': '25902603',
},
'playlist_mincount': 406,
}, {
'url': 'https://rutube.ru/u/rutube/videos/',
'info_dict': {
'id': '23704195_videos',
},
'playlist_mincount': 113,
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s'
def _next_page_url(self, page_num, playlist_id, section):
origin_type = {
'videos': 'rtb,rst,ifrm,rspa',
'shorts': 'rshorts',
None: '',
}.get(section)
return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type)
def _real_extract(self, url):
playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section')
if section == 'playlists':
raise UnsupportedError(url)
if slug:
webpage = self._download_webpage(url, slug)
redux_state = self._search_json(
r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json)
playlist_id = traverse_obj(redux_state, (
'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'),
'data', 'channel_id', {int}, {str_or_none}, any))
playlist = self._extract_playlist(playlist_id, section=section)
if section:
playlist['id'] = f'{playlist_id}_{section}'
return playlist

View File

@ -2,13 +2,16 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
filter_dict,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_qs,
str_or_none, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
def _extract_episode(data, episode_id=None): def _extract_episode(data, episode_id=None):
@ -58,15 +61,10 @@ def _extract_episode(data, episode_id=None):
class SpreakerIE(InfoExtractor): class SpreakerIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = [
https?:// r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
api\.spreaker\.com/ r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
(?: ]
(?:download/)?episode|
v2/episodes
)/
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://api.spreaker.com/episode/12534508', 'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': { 'info_dict': {
@ -83,7 +81,9 @@ class SpreakerIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'series': 'Success With Music (SWM)', 'series': 'Success With Music | SWM',
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
'creators': ['SWM'],
}, },
}, { }, {
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
@ -91,52 +91,75 @@ class SpreakerIE(InfoExtractor):
}, { }, {
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
'only_matching': True, 'only_matching': True,
}, {
'note': 'episode',
'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
'info_dict': {
'id': '60269615',
'display_id': 'grunge-music-origins-the-raw-sound-that-',
'ext': 'mp3',
'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
'description': str,
'timestamp': 1717468905,
'upload_date': '20240604',
'uploader': 'Katie Brown 2',
'uploader_id': '17733249',
'duration': 818.83,
'view_count': int,
'like_count': int,
'comment_count': int,
'series': '90s Grunge',
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
'creators': ['Katie Brown 2'],
},
}, {
'url': 'https://www.spreaker.com/episode/60269615',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
data = self._download_json( data = self._download_json(
f'https://api.spreaker.com/v2/episodes/{episode_id}', f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
episode_id)['response']['episode'] query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
return _extract_episode(data, episode_id) return _extract_episode(data, episode_id)
class SpreakerPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
episode_id = self._search_regex(
(r'data-episode_id=["\'](?P<id>\d+)',
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
return self.url_result(
f'https://api.spreaker.com/episode/{episode_id}',
ie=SpreakerIE.ie_key(), video_id=episode_id)
class SpreakerShowIE(InfoExtractor): class SpreakerShowIE(InfoExtractor):
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' _VALID_URL = [
r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
]
_TESTS = [{ _TESTS = [{
'url': 'https://api.spreaker.com/show/4652058', 'url': 'https://api.spreaker.com/show/4652058',
'info_dict': { 'info_dict': {
'id': '4652058', 'id': '4652058',
}, },
'playlist_mincount': 118, 'playlist_mincount': 118,
}, {
'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
'info_dict': {
'id': '5918323',
},
'playlist_mincount': 60,
}, {
'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
'info_dict': {
'id': '5887186',
},
'playlist_mincount': 290,
}] }]
def _entries(self, show_id): def _entries(self, show_id, key=None):
for page_num in itertools.count(1): for page_num in itertools.count(1):
episodes = self._download_json( episodes = self._download_json(
f'https://api.spreaker.com/show/{show_id}/episodes', f'https://api.spreaker.com/show/{show_id}/episodes',
show_id, note=f'Downloading JSON page {page_num}', query={ show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
'page': page_num, 'page': page_num,
'max_per_page': 100, 'max_per_page': 100,
}) 'key': key,
}))
pager = try_get(episodes, lambda x: x['response']['pager'], dict) pager = try_get(episodes, lambda x: x['response']['pager'], dict)
if not pager: if not pager:
break break
@ -152,21 +175,5 @@ class SpreakerShowIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id) key = traverse_obj(parse_qs(url), ('key', 0))
return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)
class SpreakerShowPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/success-with-music',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show_id = self._search_regex(
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
return self.url_result(
f'https://api.spreaker.com/show/{show_id}',
ie=SpreakerShowIE.ie_key(), video_id=show_id)