Compare commits

..

22 Commits

Author SHA1 Message Date
bashonly
887f6f07d1
[ie/slideslive] Update tests
Authored by: bashonly
2024-02-03 19:39:13 -06:00
bashonly
b1adb4e883
[ie/slideslive] Cleanup
Authored by: bashonly
2024-02-03 19:29:13 -06:00
bashonly
19a46f51c2
Merge branch 'yt-dlp:master' into cleanup/2024-01 2024-02-03 19:26:43 -06:00
YoshichikaAAA
e3ce2b385e
[ie/radiko] Extract more metadata (#9115)
Authored by: YoshichikaAAA
2024-02-03 18:44:17 +00:00
sepro
4253e3b7f4
[ie/CCMA] Extract 1080p DASH formats (#9130)
Closes #5755
Authored by: seproDev
2024-02-03 15:59:43 +01:00
bashonly
8e765755f7
[ie/vimeo] Fix API headers (#9125)
Closes #9124
Authored by: bashonly
2024-02-02 21:15:04 +00:00
c-basalt
ffa017cfc5
[ie/BiliBiliSearch] Set cookie to fix extraction (#9119)
Closes #5083
Authored by: c-basalt
2024-02-02 21:08:29 +00:00
HobbyistDev
a0d50aabc5
[ie/orf:on] Add extractor (#9113)
Closes #8903
Authored by: HobbyistDev
2024-02-02 20:57:53 +00:00
HobbyistDev
2f4b575946
[ie/zetland] Add extractor (#9116)
Closes #9024
Authored by: HobbyistDev
2024-02-02 20:56:29 +00:00
garret
fc2cc626f0
[ie/cineverse] Detect when login required (#9081)
Partially addresses #9072
Authored by: garret1317
2024-01-31 20:21:59 +00:00
columndeeply
a2bac6b7ad
[ie/PrankCastPost] Add extractor (#8933)
Authored by: columndeeply
2024-01-31 20:16:07 +00:00
rrgomes
4b8b0dded8
[ie/nfb] Add support for onf.ca and series (#8997)
Closes #8198
Authored by: bashonly, rrgomes

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-01-31 18:00:15 +00:00
jazz1611
4a6ff0b47a
[ie/redtube] Support redtube.com.br URLs (#9103)
Authored by: jazz1611
2024-01-31 17:56:29 +00:00
Radu Manole
62c65bfaf8
[ie/NinaProtocol] Add extractor (#8946)
Closes #8709, Closes #8764
Authored by: RaduManole, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-31 18:41:31 +01:00
bashonly
d63eae7e7f
[core] Don't select storyboard formats as fallback
Closes #7715
Authored by: bashonly
2024-01-31 03:17:51 -06:00
Simon Sawicki
2792092afd
[cookies] Improve error message for Windows --cookies-from-browser chrome issue (#9080)
Authored by: Grub4K
2024-01-31 09:56:14 +01:00
Simon Sawicki
cbed249aaa
[cookies] Fix --cookies-from-browser for snap Firefox (#9016)
Authored by: Grub4K
2024-01-31 09:43:52 +01:00
Simon Sawicki
3725b4f0c9
[core] Add --compat-options 2023 (#9084)
Authored by: Grub4K
2024-01-31 09:35:35 +01:00
sepro
67bb70cd70
[ie/Vbox7] Fix extractor (#9100)
Closes #1098, Closes #5661
Authored by: seproDev
2024-01-29 21:16:46 +01:00
kclauhk
9b5efaf86b
[ie/facebook] Support events (#9055)
Closes #5355
Authored by: kclauhk
2024-01-29 19:43:41 +00:00
sepro
999ea80beb
[ie/art19] Add extractors (#9099)
Authored by: seproDev
2024-01-29 20:38:25 +01:00
Nur Mahmud Ul Alam Tasin
41b6cdb419
[ie/viewlift] Add support for chorki.com (#9095)
Closes #3369
Authored by: NurTasin
2024-01-28 22:33:44 +00:00
21 changed files with 1277 additions and 181 deletions

View File

@ -167,7 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
# INSTALLATION # INSTALLATION

View File

@ -2451,7 +2451,7 @@ class YoutubeDL:
# for extractors with incomplete formats (audio only (soundcloud) # for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to # or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format # best/worst {video,audio}-only format
matches = formats matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
elif seperate_fallback and not ctx['has_merged_format']: elif seperate_fallback and not ctx['has_merged_format']:
# for compatibility with youtube-dl when there is no pre-merged format # for compatibility with youtube-dl when there is no pre-merged format
matches = list(filter(seperate_fallback, formats)) matches = list(filter(seperate_fallback, formats))

View File

@ -1,6 +1,7 @@
import base64 import base64
import collections import collections
import contextlib import contextlib
import glob
import http.cookiejar import http.cookiejar
import http.cookies import http.cookies
import io import io
@ -23,7 +24,8 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import functools from .compat import functools # isort: split
from .compat import compat_os_name
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -31,6 +33,7 @@ from .dependencies import (
) )
from .minicurses import MultilinePrinter, QuietMultilinePrinter from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import ( from .utils import (
DownloadError,
Popen, Popen,
error_to_str, error_to_str,
expand_path, expand_path,
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
return YoutubeDLCookieJar() return YoutubeDLCookieJar()
if profile is None: if profile is None:
search_root = _firefox_browser_dir() search_roots = list(_firefox_browser_dirs())
elif _is_path(profile): elif _is_path(profile):
search_root = profile search_roots = [profile]
else: else:
search_root = os.path.join(_firefox_browser_dir(), profile) search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
search_root = ', '.join(map(repr, search_roots))
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
if cookie_database_path is None: if cookie_database_path is None:
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
cursor.connection.close() cursor.connection.close()
def _firefox_browser_dir(): def _firefox_browser_dirs():
if sys.platform in ('cygwin', 'win32'): if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin': elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles') yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
return os.path.expanduser('~/.mozilla/firefox')
else:
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
def _firefox_cookie_dbs(roots):
for root in map(os.path.abspath, roots):
for pattern in ('', '*/', 'Profiles/*/'):
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
def _get_chromium_based_browser_settings(browser_name): def _get_chromium_based_browser_settings(browser_name):
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.error(f'{browser_name} does not support profiles') logger.error(f'{browser_name} does not support profiles')
search_root = config['browser_dir'] search_root = config['browser_dir']
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
if cookie_database_path is None: if cookie_database_path is None:
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
counts['unencrypted'] = unencrypted_cookies counts['unencrypted'] = unencrypted_cookies
logger.debug(f'cookie version breakdown: {counts}') logger.debug(f'cookie version breakdown: {counts}')
return jar return jar
except PermissionError as error:
if compat_os_name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message)
raise DownloadError(message) # force exit
raise
finally: finally:
if cursor is not None: if cursor is not None:
cursor.connection.close() cursor.connection.close()
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
References: References:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
""" """
path = _find_most_recently_used_file(browser_root, 'Local State', logger) path = _newest(_find_files(browser_root, 'Local State', logger))
if path is None: if path is None:
logger.error('could not find local state file') logger.error('could not find local state file')
return None return None
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
return [row[1].decode() for row in table_info] return [row[1].decode() for row in table_info]
def _find_most_recently_used_file(root, filename, logger): def _newest(files):
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
def _find_files(root, filename, logger):
# if there are multiple browser profiles, take the most recently used one # if there are multiple browser profiles, take the most recently used one
i, paths = 0, [] i = 0
with _create_progress_bar(logger) as progress_bar: with _create_progress_bar(logger) as progress_bar:
for curr_root, dirs, files in os.walk(root): for curr_root, _, files in os.walk(root):
for file in files: for file in files:
i += 1 i += 1
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
if file == filename: if file == filename:
paths.append(os.path.join(curr_root, file)) yield os.path.join(curr_root, file)
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
def _merge_cookie_jars(jars): def _merge_cookie_jars(jars):
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
def _is_path(value): def _is_path(value):
return os.path.sep in value return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):

View File

@ -138,6 +138,10 @@ from .ard import (
ARDMediathekCollectionIE, ARDMediathekCollectionIE,
ARDIE, ARDIE,
) )
from .art19 import (
Art19IE,
Art19ShowIE,
)
from .arte import ( from .arte import (
ArteTVIE, ArteTVIE,
ArteTVEmbedIE, ArteTVEmbedIE,
@ -1243,7 +1247,10 @@ from .nexx import (
NexxIE, NexxIE,
NexxEmbedIE, NexxEmbedIE,
) )
from .nfb import NFBIE from .nfb import (
NFBIE,
NFBSeriesIE,
)
from .nfhsnetwork import NFHSNetworkIE from .nfhsnetwork import NFHSNetworkIE
from .nfl import ( from .nfl import (
NFLIE, NFLIE,
@ -1280,6 +1287,7 @@ from .niconico import (
NicovideoTagURLIE, NicovideoTagURLIE,
NiconicoLiveIE, NiconicoLiveIE,
) )
from .ninaprotocol import NinaProtocolIE
from .ninecninemedia import ( from .ninecninemedia import (
NineCNineMediaIE, NineCNineMediaIE,
CPTwentyFourIE, CPTwentyFourIE,
@ -1386,6 +1394,7 @@ from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFONIE,
ORFRadioIE, ORFRadioIE,
ORFPodcastIE, ORFPodcastIE,
ORFIPTVIE, ORFIPTVIE,
@ -1510,7 +1519,7 @@ from .puhutv import (
PuhuTVSerieIE, PuhuTVSerieIE,
) )
from .pr0gramm import Pr0grammIE from .pr0gramm import Pr0grammIE
from .prankcast import PrankCastIE from .prankcast import PrankCastIE, PrankCastPostIE
from .premiershiprugby import PremiershipRugbyIE from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE from .projectveritas import ProjectVeritasIE
@ -2488,6 +2497,7 @@ from .zee5 import (
Zee5SeriesIE, Zee5SeriesIE,
) )
from .zeenews import ZeeNewsIE from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE from .zhihu import ZhihuIE
from .zingmp3 import ( from .zingmp3 import (
ZingMp3IE, ZingMp3IE,

303
yt_dlp/extractor/art19.py Normal file
View File

@ -0,0 +1,303 @@
import re
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class Art19IE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
_VALID_URL = [
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
_TESTS = [{
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
'info_dict': {
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'ext': 'mp3',
'title': 'Why Did DeSantis Drop Out?',
'series': 'The Daily Briefing',
'release_timestamp': 1705941275,
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
'episode': 'Episode 582',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
'upload_date': '20240122',
'timestamp': 1705940815,
'episode_number': 582,
'modified_date': '20240122',
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'modified_timestamp': 1705941275,
'release_date': '20240122',
'duration': 527.4,
},
}, {
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
'info_dict': {
'id': '8319b776-4153-4d22-8630-631f204a03dd',
'ext': 'mp3',
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
'modified_date': '20240116',
'upload_date': '20240105',
'modified_timestamp': 1705435802,
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
'release_timestamp': 1705305660,
'release_date': '20240115',
'timestamp': 1704481536,
'episode_number': 88,
'series': 'Scamfluencers',
'duration': 2588.37501,
'episode': 'Episode 88',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
'info_dict': {
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'ext': 'mp3',
'title': "'Verstappen wordt een synoniem voor Formule 1'",
'season': 'Seizoen 6',
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'duration': 3061.82111,
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
'release_date': '20231126',
'modified_timestamp': 1701156004,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'season_number': 6,
'episode_number': 52,
'modified_date': '20231128',
'upload_date': '20231126',
'timestamp': 1701025981,
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
'series': 'De Boordradio',
'release_timestamp': 1701026308,
'episode': 'Episode 52',
},
}, {
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
'info_dict': {
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'ext': 'mp3',
'title': 'Larry Bucshon announces retirement from congress',
'upload_date': '20240115',
'episode_number': 148,
'episode': 'Episode 148',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20240115',
'timestamp': 1705328205,
'release_timestamp': 1705329275,
'series': 'All INdiana Politics',
'modified_date': '20240117',
'modified_timestamp': 1705458901,
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
'duration': 1256.18848,
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for episode_id in re.findall(
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
def _real_extract(self, url):
episode_id = self._match_id(url)
player_metadata = self._download_json(
f'https://art19.com/episodes/{episode_id}', episode_id,
note='Downloading player metadata', fatal=False,
headers={'Accept': 'application/vnd.art19.v0+json'})
rss_metadata = self._download_json(
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
note='Downloading RSS metadata')
formats = [{
'format_id': 'direct',
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
'vcodec': 'none',
'acodec': 'mp3',
}]
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
if fmt_id == 'waveform_bin':
continue
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
if not fmt_url:
continue
formats.append({
'format_id': fmt_id,
'url': fmt_url,
'vcodec': 'none',
'acodec': fmt_id,
'quality': -2 if fmt_id == 'ogg' else -1,
})
return {
'id': episode_id,
'formats': formats,
**traverse_obj(player_metadata, ('episode', {
'title': ('title', {str}),
'description': ('description_plain', {str}),
'episode_id': ('id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season_id': ('season_id', {str}),
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601})
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),
'description': ('episode_description_plain', {str}),
'episode_id': ('episode_id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season': ('season_title', {str}),
'season_id': ('season_id', {str}),
'season_number': ('season_number', {int_or_none}),
'series': ('series_title', {str}),
'series_id': ('series_id', {str}),
'thumbnail': ('cover_image', {url_or_none}),
'duration': ('duration', {float_or_none}),
})),
}
class Art19ShowIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
_VALID_URL = [
rf'{_VALID_URL_BASE}(?:$|[#?])',
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
_TESTS = [{
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://www.art19.com/shows/echt-gebeurd',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://rss.art19.com/scamfluencers',
'info_dict': {
'_type': 'playlist',
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'display_id': 'scamfluencers',
'title': 'Scamfluencers',
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
'timestamp': 1647368573,
'upload_date': '20220315',
'modified_timestamp': int,
'modified_date': str,
'tags': [],
},
'playlist_mincount': 90,
}, {
'url': 'https://art19.com/shows/enthuellt/embed',
'info_dict': {
'_type': 'playlist',
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
'display_id': 'enthuellt',
'title': 'Enthüllt',
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
'timestamp': 1601645860,
'upload_date': '20201002',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:10',
},
'playlist_mincount': 10,
}]
_WEBPAGE_TESTS = [{
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
'info_dict': {
'_type': 'playlist',
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
'display_id': 'deconstructing-yourself',
'title': 'Deconstructing Yourself',
'description': 'md5:dab5082b28b248a35476abf64768854d',
'timestamp': 1570581181,
'upload_date': '20191009',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:5',
},
'playlist_mincount': 80,
}, {
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
'info_dict': {
'_type': 'playlist',
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
'display_id': 'the-ben-joravsky-show',
'title': 'The Ben Joravsky Show',
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
'timestamp': 1550875095,
'upload_date': '20190222',
'modified_timestamp': int,
'modified_date': str,
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
},
'playlist_mincount': 1900,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for series_id in re.findall(
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
yield f'https://art19.com/shows/{series_id}'
def _real_extract(self, url):
series_id = self._match_id(url)
series_metadata = self._download_json(
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
headers={'Accept': 'application/vnd.art19.v0+json'})
return {
'_type': 'playlist',
'entries': [
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
],
**traverse_obj(series_metadata, ('series', {
'id': ('id', {str}),
'display_id': ('slug', {str}),
'title': ('title', {str}),
'description': ('description_plain', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
}

View File

@ -7,6 +7,7 @@ import math
import re import re
import time import time
import urllib.parse import urllib.parse
import uuid
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome from ..dependencies import Cryptodome
@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search' IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000 _MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch' _SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年我怎么还在等你单推的女人睡觉后开播啊”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]
def _search_results(self, query): def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1): for page_num in itertools.count(1):
videos = self._download_json( videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query, 'https://api.bilibili.com/x/web-interface/search/type', query,

View File

@ -1,6 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_resolution, parse_resolution,
@ -60,6 +61,7 @@ class CCMAIE(InfoExtractor):
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm',
}) })
formats = [] formats = []
@ -69,6 +71,10 @@ class CCMAIE(InfoExtractor):
format_url = url_or_none(format_.get('file')) format_url = url_or_none(format_.get('file'))
if not format_url: if not format_url:
continue continue
if determine_ext(format_url) == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, media_id, mpd_id='dash', fatal=False))
continue
label = format_.get('label') label = format_.get('label')
f = parse_resolution(label) f = parse_resolution(label)
f.update({ f.update({

View File

@ -67,7 +67,10 @@ class CineverseIE(CineverseBaseIE):
html = self._download_webpage(url, video_id) html = self._download_webpage(url, video_id)
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails'] idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
if idetails.get('err_code') == 1200: err_code = idetails.get('err_code')
if err_code == 1002:
self.raise_login_required()
elif err_code == 1200:
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is not available from your location due to geo restriction. ' 'This video is not available from your location due to geo restriction. '
'You may be able to bypass it by using the /details/ page instead of the /watch/ page', 'You may be able to bypass it by using the /details/ page instead of the /watch/ page',

View File

@ -54,6 +54,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
events/(?:[^/]+/)?|
groups/[^/]+/(?:permalink|posts)/| groups/[^/]+/(?:permalink|posts)/|
watchparty/ watchparty/
)| )|
@ -399,6 +400,18 @@ class FacebookIE(InfoExtractor):
}, },
'playlist_count': 1, 'playlist_count': 1,
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, {
# data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': {
'id': '637246984455045',
'ext': 'mp4',
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
'thumbnail': r're:^https?://.*',
'uploader': 'Comitato Liberi Pensatori',
'uploader_id': '100065709540881',
},
}] }]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = { _api_config = {
@ -473,38 +486,10 @@ class FacebookIE(InfoExtractor):
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)] r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, ( post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
automatic_captions, subtitles = {}, {}
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
if url_or_none(captions): # if subs_data only had a 'captions_url'
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
subtitles[locale] = [{'url': captions}]
# or else subs_data had 'video_available_captions_locales', a list of dicts
for caption in traverse_obj(captions, (
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
):
lang = caption.get('localized_language') or ''
subs = {
'url': caption['captions_url'],
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
}
if caption.get('localized_creation_method') or is_video_broadcast:
automatic_captions.setdefault(caption['locale'], []).append(subs)
else:
subtitles.setdefault(caption['locale'], []).append(subs)
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text')) title = get_first(media, ('title', 'text'))
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
uploader_data = (
get_first(media, ('owner', {dict}))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or {})
page_title = title or self._html_search_regex(( page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>', r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>', r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
@ -513,11 +498,15 @@ class FacebookIE(InfoExtractor):
description = description or self._html_search_meta( description = description or self._html_search_meta(
['description', 'og:description', 'twitter:description'], ['description', 'og:description', 'twitter:description'],
webpage, 'description', default=None) webpage, 'description', default=None)
uploader_data = (
get_first(media, ('owner', {dict}))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
uploader = uploader_data.get('name') or ( uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex( or self._search_regex(
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False)) (r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
timestamp = int_or_none(self._search_regex( timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage, r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None)) 'timestamp', default=None))
@ -539,8 +528,6 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)), webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, ( 'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
'automatic_captions': automatic_captions,
'subtitles': subtitles,
} }
info_json_ld = self._search_json_ld(webpage, video_id, default={}) info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -638,6 +625,29 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(video, formats)
automatic_captions, subtitles = {}, {}
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
for caption in traverse_obj(video, (
'video_available_captions_locales',
{lambda x: sorted(x, key=lambda c: c['locale'])},
lambda _, v: url_or_none(v['captions_url'])
)):
lang = caption.get('localized_language') or 'und'
subs = {
'url': caption['captions_url'],
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
}
if caption.get('localized_creation_method') or is_broadcast:
automatic_captions.setdefault(caption['locale'], []).append(subs)
else:
subtitles.setdefault(caption['locale'], []).append(subs)
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
if captions_url and not automatic_captions and not subtitles:
locale = self._html_search_meta(
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
info = { info = {
'id': v_id, 'id': v_id,
'formats': formats, 'formats': formats,
@ -647,6 +657,8 @@ class FacebookIE(InfoExtractor):
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none), 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000) 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
or float_or_none(video.get('length_in_second'))), or float_or_none(video.get('length_in_second'))),
'automatic_captions': automatic_captions,
'subtitles': subtitles,
} }
process_formats(info) process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text']) description = try_get(video, lambda x: x['savable_description']['text'])
@ -681,7 +693,8 @@ class FacebookIE(InfoExtractor):
for edge in edges: for edge in edges:
parse_attachment(edge, key='node') parse_attachment(edge, key='node')
video = data.get('video') or {} video = traverse_obj(data, (
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
if video: if video:
attachments = try_get(video, [ attachments = try_get(video, [
lambda x: x['story']['attachments'], lambda x: x['story']['attachments'],

View File

@ -1,10 +1,54 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
join_nonempty,
merge_dicts,
parse_count,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class NFBIE(InfoExtractor): class NFBBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)' _VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
_GEO_COUNTRIES = ['CA']
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'uploader': ('data_layer', 'episodeMaker', {str}),
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
'episode': ('data_layer', 'episodeTitle', {str}),
'season': ('data_layer', 'seasonTitle', {str}),
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
'series': ('data_layer', 'seriesTitle', {str}),
}), get_all=False)
return {
**info,
'id': video_id,
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
'episode_number': int_or_none(self._search_regex(
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
}
class NFBIE(NFBBaseIE):
IE_NAME = 'nfb'
IE_DESC = 'nfb.ca and onf.ca films and episodes'
_VALID_URL = [
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
]
_TESTS = [{ _TESTS = [{
'note': 'NFB film',
'url': 'https://www.nfb.ca/film/trafficopter/', 'url': 'https://www.nfb.ca/film/trafficopter/',
'info_dict': { 'info_dict': {
'id': 'trafficopter', 'id': 'trafficopter',
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Barrie Howells', 'uploader': 'Barrie Howells',
'release_year': 1972, 'release_year': 1972,
'duration': 600.0,
}, },
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF film',
'url': 'https://www.onf.ca/film/mal-du-siecle/',
'info_dict': {
'id': 'mal-du-siecle',
'ext': 'mp4',
'title': 'Le mal du siècle',
'description': 'md5:1abf774d77569ebe603419f2d344102b',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Catherine Lepage',
'release_year': 2019,
'duration': 300.0,
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with English title',
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
'info_dict': {
'id': 'true-north-episode9-true-north-finale-making-it',
'ext': 'mp4',
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
'series': 'True North: Inside the Rise of Toronto Basketball',
'release_year': 2018,
'season': 'Season 1',
'season_number': 1,
'episode': 'Finale: Making It',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with French title',
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
'info_dict': {
'id': 'direction-nord-episode-9',
'ext': 'mp4',
'title': 'Direction nord La montée du basketball à Toronto - Finale : Réussir',
'description': 'md5:349a57419b71432b97bf6083d92b029d',
'series': 'Direction nord La montée du basketball à Toronto',
'release_year': 2018,
'season': 'Saison 1',
'season_number': 1,
'episode': 'Finale : Réussir',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with French title (needs geo-bypass)',
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with English title (needs geo-bypass)',
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
'info_dict': {
'id': 'liste-des-choses-qui-existent-saison-2-ours',
'ext': 'mp4',
'title': 'La liste des choses qui existent - L\'ours en peluche',
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
'series': 'La liste des choses qui existent',
'release_year': 2022,
'season': 'Saison 2',
'season_number': 2,
'episode': 'L\'ours en peluche',
'episode_number': 12,
'uploader': 'Francis Papillon',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB film /embed/player/ page',
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
'info_dict': {
'id': 'afterlife',
'ext': 'mp4',
'title': 'Afterlife',
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
'release_year': 1978,
'duration': 420.0,
'uploader': 'Ishu Patel',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
# Need to construct the URL since we match /embed/player/ URLs as well
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id) embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
video_id = self._match_id(embed_url) # embed url has unique slug
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
if 'MESSAGE_GEOBLOCKED' in player:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
iframe = self._html_search_regex( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)', self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
webpage, 'iframe', default=None, fatal=True) video_id, 'mp4', m3u8_id='hls')
if iframe.startswith('/'):
iframe = f'https://www.nfb.ca{iframe}'
player = self._download_webpage(iframe, video_id) if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
fmt['format_note'] = 'described video'
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
source = self._html_search_regex( info = {
r'source:\s*\'([^\']+)',
player, 'source', default=None, fatal=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
return {
'id': video_id, 'id': video_id,
'title': self._html_search_regex( 'title': self._html_search_regex(
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>', r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
@ -45,14 +252,49 @@ class NFBIE(InfoExtractor):
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)', r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
webpage, 'description', default=None), webpage, 'description', default=None),
'thumbnail': self._html_search_regex( 'thumbnail': self._html_search_regex(
r'poster:\s*\'([^\']+)', r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
player, 'thumbnail', default=None),
'uploader': self._html_search_regex( 'uploader': self._html_search_regex(
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex( 'release_year': int_or_none(self._html_search_regex(
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)', r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
webpage, 'release_year', default=None)), webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
return merge_dicts({
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }, info, self._search_json_ld(webpage, video_id, default={}))
class NFBSeriesIE(NFBBaseIE):
IE_NAME = 'nfb:series'
IE_DESC = 'nfb.ca and onf.ca series'
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
'playlist_mincount': 9,
'info_dict': {
'id': 'true-north-inside-the-rise-of-toronto-basketball',
},
}, {
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
'playlist_mincount': 26,
'info_dict': {
'id': 'la-liste-des-choses-qui-existent-serie',
},
}]
def _entries(self, episodes):
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
mobj = NFBIE._match_valid_url(episode['embed_url'])
yield self.url_result(
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
def _real_extract(self, url):
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
season_path = 'saison' if type_ == 'serie' else 'season'
webpage = self._download_webpage(
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
return self.playlist_result(self._entries(episodes), series_id)

View File

@ -0,0 +1,225 @@
from .common import InfoExtractor
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NinaProtocolIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'title': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'channel': 'ppm',
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'display_id': 'the-spatulas-march-chant',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
},
'playlist': [{
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
'title': 'March Chant In April',
'track': 'March Chant In April',
'ext': 'mp3',
'duration': 152,
'track_number': 1,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'channel': 'ppm',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
'title': 'Rescue Mission',
'track': 'Rescue Mission',
'ext': 'mp3',
'duration': 212,
'track_number': 2,
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'channel': 'ppm',
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'timestamp': 1701417610,
'album': 'The Spatulas - March Chant',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
'title': 'Slinger Style',
'track': 'Slinger Style',
'ext': 'mp3',
'duration': 179,
'track_number': 3,
'timestamp': 1701417610,
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album_artist': 'Post Present Medium ',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel': 'ppm',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
'title': 'Psychic Signal',
'track': 'Psychic Signal',
'ext': 'mp3',
'duration': 220,
'track_number': 4,
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'upload_date': '20231201',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'album_artist': 'Post Present Medium ',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'channel': 'ppm',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
'title': 'Curvy Color',
'track': 'Curvy Color',
'ext': 'mp3',
'duration': 148,
'track_number': 5,
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album': 'The Spatulas - March Chant',
'album_artist': 'Post Present Medium ',
'channel': 'ppm',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
'title': 'Caveman Star',
'track': 'Caveman Star',
'ext': 'mp3',
'duration': 121,
'track_number': 6,
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'album': 'The Spatulas - March Chant',
'channel': 'ppm',
'upload_date': '20231201',
},
}],
}, {
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
'info_dict': {
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
'description': 'md5:63f08d5db558b4b36e1896f317062721',
'title': 'F.G.S. - American Shield',
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
'channel': 'tinkscough',
'tags': [],
'album_artist': 'F.G.S.',
'album': 'F.G.S. - American Shield',
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
'display_id': 'f-g-s-american-shield',
'uploader': 'flannerysilva',
'timestamp': 1702395858,
'upload_date': '20231212',
},
'playlist_count': 1,
}, {
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
'info_dict': {
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
'display_id': 'time-to-figure-things-out',
'description': 'md5:960202ed01c3134bb8958f1008527e35',
'timestamp': 1706283607,
'title': 'DJ STEPDAD - time to figure things out',
'album_artist': 'DJ STEPDAD',
'uploader': 'tddvsss',
'upload_date': '20240126',
'album': 'time to figure things out',
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
'tags': [],
},
'playlist_count': 4,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
release = self._download_json(
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
video_id = release.get('publicKey') or video_id
common_info = traverse_obj(release, {
'album': ('metadata', 'properties', 'title', {str}),
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
'timestamp': ('datetime', {parse_iso8601}),
'thumbnail': ('metadata', 'image', {url_or_none}),
'uploader': ('publisherAccount', 'handle', {str}),
'uploader_id': ('publisherAccount', 'publicKey', {str}),
'channel': ('hub', 'handle', {str}),
'channel_id': ('hub', 'publicKey', {str}),
}, get_all=False)
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
entries = []
for track_num, track in enumerate(traverse_obj(release, (
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
entries.append({
'id': f'{video_id}_{track_num}',
'url': track['uri'],
**traverse_obj(track, {
'title': ('track_title', {str}),
'track': ('track_title', {str}),
'ext': ('type', {mimetype2ext}),
'track_number': ('track', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'vcodec': 'none',
**common_info,
})
return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
**traverse_obj(release, {
'display_id': ('slug', {str}),
'title': ('metadata', 'name', {str}),
'description': ('metadata', 'description', {str}),
}),
**common_info,
}

View File

@ -1,3 +1,4 @@
import base64
import functools import functools
import re import re
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
}) })
return self.playlist_result(entries) return self.playlist_result(entries)
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
'id': '14210000',
'ext': 'mp4',
'duration': 2651.08,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
}
}]
def _extract_video(self, video_id, display_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(api_json, {
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
}
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, display_id, fatal=False),
**self._extract_video(video_id, display_id),
}

View File

@ -1,5 +1,8 @@
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601, traverse_obj, try_call from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
from ..utils.traversal import traverse_obj
class PrankCastIE(InfoExtractor): class PrankCastIE(InfoExtractor):
@ -64,3 +67,71 @@ class PrankCastIE(InfoExtractor):
'categories': [json_info.get('broadcast_category')], 'categories': [json_info.get('broadcast_category')],
'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) 'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
} }
class PrankCastPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
_TESTS = [{
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
'info_dict': {
'id': '6214',
'ext': 'mp3',
'title': 'Happy National Rachel Day!',
'display_id': 'happy-national-rachel-day-',
'timestamp': 1704333938,
'uploader': 'Devonanustart',
'channel_id': '4',
'duration': 13175,
'cast': ['Devonanustart'],
'description': '',
'categories': ['prank call'],
'upload_date': '20240104'
}
}, {
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
'info_dict': {
'id': '6217',
'ext': 'mp3',
'title': 'Jake the Work Crow!',
'display_id': 'jake-the-work-crow-',
'timestamp': 1704346592,
'uploader': 'despicabledogs',
'channel_id': '957',
'duration': 263.287,
'cast': ['despicabledogs'],
'description': 'https://imgur.com/a/vtxLvKU',
'categories': [],
'upload_date': '20240104'
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, video_id)
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
content = self._parse_json(post['post_contents_json'], video_id)[0]
uploader = post.get('user_name')
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
return {
'id': video_id,
'title': post.get('post_title') or self._og_search_title(webpage),
'display_id': display_id,
'url': content.get('url'),
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
'uploader': uploader,
'channel_id': str_or_none(post.get('user_id')),
'duration': float_or_none(content.get('duration')),
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
'description': post.get('post_body'),
'categories': list(filter(None, [content.get('category')])),
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
'subtitles': {
'live_chat': [{
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
'ext': 'json',
}],
} if post.get('content_id') else None
}

View File

@ -1,5 +1,6 @@
import base64 import base64
import random import random
import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
@ -11,6 +12,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
) )
from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor): class RadikoBaseIE(InfoExtractor):
@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor):
return formats return formats
def _extract_performers(self, prog):
performers = traverse_obj(prog, (
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,]', x)}, ..., {str.strip}))
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
return ', '.join(performers) or None
class RadikoIE(RadikoBaseIE): class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE):
return { return {
'id': video_id, 'id': video_id,
'title': try_call(lambda: prog.find('title').text), 'title': try_call(lambda: prog.find('title').text),
'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)), 'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station, 'uploader_id': station,
'timestamp': vid_int, 'timestamp': vid_int,
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True, 'is_live': True,
'formats': self._extract_formats( 'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False, video_id=video_id, station=station, is_onair=False,
@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE):
return { return {
'id': station, 'id': station,
'title': title, 'title': title,
'artist': self._extract_performers(prog),
'description': description, 'description': description,
'uploader': station_name, 'uploader': station_name,
'uploader_id': station, 'uploader_id': station,

View File

@ -12,7 +12,7 @@ from ..utils import (
class RedTubeIE(InfoExtractor): class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
_TESTS = [{ _TESTS = [{
'url': 'https://www.redtube.com/38864951', 'url': 'https://www.redtube.com/38864951',
@ -35,6 +35,9 @@ class RedTubeIE(InfoExtractor):
}, { }, {
'url': 'http://it.redtube.com/66418', 'url': 'http://it.redtube.com/66418',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.redtube.com.br/103224331',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -25,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38902413', 'id': '38902413',
'ext': 'mp4', 'ext': 'mp4',
'title': 'GCC IA16 backend', 'title': 'GCC IA16 backend',
'timestamp': 1648189972, 'timestamp': 1697793372,
'upload_date': '20220325', 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:42', 'thumbnails': 'count:42',
'chapters': 'count:41', 'chapters': 'count:41',
@ -42,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38935785', 'id': '38935785',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115', 'upload_date': '20231020',
'timestamp': 1636996003, 'timestamp': 1697807002,
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:640', 'thumbnails': 'count:640',
'chapters': 'count:639', 'chapters': 'count:639',
@ -59,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
'id': '38973182', 'id': '38973182',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?', 'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
'upload_date': '20220201', 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135, 'timestamp': 1697822521,
'thumbnails': 'count:3', 'thumbnails': 'count:3',
'chapters': 'count:2', 'chapters': 'count:2',
'duration': 5889, 'duration': 5889,
@ -70,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, { }, {
# service_name = youtube, only XML slides info # formerly youtube, converted to native
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost', 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01', 'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': { 'info_dict': {
'id': 'jmg02wCJD5M', 'id': '38897546',
'display_id': '38897546',
'ext': 'mp4', 'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost', 'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
'description': 'Watch full version of this video at https://slideslive.com/38897546.', 'thumbnail': r're:^https?://.*\.jpg',
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw', 'upload_date': '20231029',
'channel': 'SlidesLive Videos - G1', 'timestamp': 1698588144,
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader': 'SlidesLive Videos - G1',
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'live_status': 'not_live',
'upload_date': '20160710',
'timestamp': 1618786715,
'duration': 6827,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:169', 'thumbnails': 'count:169',
'playable_in_embed': True,
'availability': 'unlisted',
'tags': [],
'categories': ['People & Blogs'],
'chapters': 'count:168', 'chapters': 'count:168',
'duration': 6827,
},
'params': {
'skip_download': 'm3u8',
}, },
}, { }, {
# embed-only presentation, only XML slides info # embed-only presentation, only XML slides info
@ -111,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness', 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8', 'thumbnails': 'count:8',
'timestamp': 1629671508, 'timestamp': 1697803109,
'upload_date': '20210822', 'upload_date': '20231020',
'chapters': 'count:7', 'chapters': 'count:7',
'duration': 326, 'duration': 326,
}, },
@ -128,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'MoReL: Multi-omics Relational Learning', 'title': 'MoReL: Multi-omics Relational Learning',
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:7', 'thumbnails': 'count:7',
'timestamp': 1654714970, 'timestamp': 1697824939,
'upload_date': '20220608', 'upload_date': '20231020',
'chapters': 'count:6', 'chapters': 'count:6',
'duration': 171, 'duration': 171,
}, },
@ -145,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Decentralized Attribution of Generative Models', 'title': 'Decentralized Attribution of Generative Models',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:16', 'thumbnails': 'count:16',
'timestamp': 1622806321, 'timestamp': 1697814901,
'upload_date': '20210604', 'upload_date': '20231020',
'chapters': 'count:15', 'chapters': 'count:15',
'duration': 306, 'duration': 306,
}, },
@ -162,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Efficient Active Search for Combinatorial Optimization Problems', 'title': 'Efficient Active Search for Combinatorial Optimization Problems',
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:9', 'thumbnails': 'count:9',
'timestamp': 1654714896, 'timestamp': 1697824757,
'upload_date': '20220608', 'upload_date': '20231020',
'chapters': 'count:8', 'chapters': 'count:8',
'duration': 295, 'duration': 295,
}, },
@ -177,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979880', 'id': '38979880',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Representation Power of Neural Networks', 'title': 'The Representation Power of Neural Networks',
'timestamp': 1654714962, 'timestamp': 1697824919,
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:22', 'thumbnails': 'count:22',
'upload_date': '20220608', 'upload_date': '20231020',
'chapters': 'count:21', 'chapters': 'count:21',
'duration': 294, 'duration': 294,
}, },
@ -200,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979682', 'id': '38979682',
'ext': 'mp4', 'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models', 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
'timestamp': 1654714920, 'timestamp': 1697824815,
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30', 'thumbnails': 'count:30',
'upload_date': '20220608', 'upload_date': '20231020',
'chapters': 'count:31', 'chapters': 'count:31',
'duration': 272, 'duration': 272,
}, },
@ -213,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021', 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
'duration': 3, 'duration': 3,
'timestamp': 1654714920, 'timestamp': 1697824815,
'upload_date': '20220608', 'upload_date': '20231020',
}, },
}, { }, {
'info_dict': { 'info_dict': {
@ -222,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024', 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
'duration': 4, 'duration': 4,
'timestamp': 1654714920, 'timestamp': 1697824815,
'upload_date': '20220608', 'upload_date': '20231020',
}, },
}], }],
'params': { 'params': {
@ -242,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979481', 'id': '38979481',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification', 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
'timestamp': 1654714877, 'timestamp': 1697824716,
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:43', 'thumbnails': 'count:43',
'upload_date': '20220608', 'upload_date': '20231020',
'chapters': 'count:43', 'chapters': 'count:43',
'duration': 315, 'duration': 315,
}, },
@ -255,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013', 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
'duration': 3, 'duration': 3,
'timestamp': 1654714877, 'timestamp': 1697824716,
'upload_date': '20220608', 'upload_date': '20231020',
}, },
}], }],
'params': { 'params': {
@ -275,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
'channel_id': 'UC62SdArr41t_-_fX40QCLRw', 'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader': 'SlidesLive Videos - A', 'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'uploader_id': '@slideslivevideos-a6075',
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', 'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
'upload_date': '20200903', 'upload_date': '20200903',
'timestamp': 1602599092, 'timestamp': 1697805922,
'duration': 942, 'duration': 942,
'age_limit': 0, 'age_limit': 0,
'live_status': 'not_live', 'live_status': 'not_live',
@ -303,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38983994', 'id': '38983994',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Zero-Shot AutoML with Pretrained Models', 'title': 'Zero-Shot AutoML with Pretrained Models',
'timestamp': 1662384834, 'timestamp': 1697826708,
'upload_date': '20220905', 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:23', 'thumbnails': 'count:23',
'chapters': 'count:22', 'chapters': 'count:22',
@ -336,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness', 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8', 'thumbnails': 'count:8',
'timestamp': 1629671508, 'timestamp': 1697803109,
'upload_date': '20210822', 'upload_date': '20231020',
'chapters': 'count:7', 'chapters': 'count:7',
'duration': 326, 'duration': 326,
}, },
@ -386,7 +371,7 @@ class SlidesLiveIE(InfoExtractor):
if not line.startswith('#EXT-SL-'): if not line.startswith('#EXT-SL-'):
continue continue
tag, _, value = line.partition(':') tag, _, value = line.partition(':')
key = lookup.get(tag.lstrip('#EXT-SL-')) key = lookup.get(tag[8:])
if not key: if not key:
continue continue
m3u8_dict[key] = value m3u8_dict[key] = value

View File

@ -1,5 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError, base_url, int_or_none, url_basename
from ..utils.traversal import traverse_obj
class Vbox7IE(InfoExtractor): class Vbox7IE(InfoExtractor):
@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
_GEO_COUNTRIES = ['BG'] _GEO_COUNTRIES = ['BG']
_TESTS = [{ _TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c', 'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', 'md5': '50ca1f78345a9c15391af47d8062d074',
'info_dict': { 'info_dict': {
'id': '0946fff23c', 'id': '0946fff23c',
'ext': 'mp4', 'ext': 'mp4',
@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
'timestamp': 1470982814, 'timestamp': 1470982814,
'upload_date': '20160812', 'upload_date': '20160812',
'uploader': 'zdraveibulgaria', 'uploader': 'zdraveibulgaria',
}, 'view_count': int,
'params': { 'duration': 2640,
'proxy': '127.0.0.1:8118',
}, },
}, { }, {
'url': 'http://vbox7.com/play:249bb972c2', 'url': 'http://vbox7.com/play:249bb972c2',
'md5': '99f65c0c9ef9b682b97313e052734c3f', 'md5': 'da1dd2eb245200cb86e6d09d43232116',
'info_dict': { 'info_dict': {
'id': '249bb972c2', 'id': '249bb972c2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Смях! Чудо - чист за секунди - Скрита камера', 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
'uploader': 'svideteliat_ot_varshava',
'view_count': int,
'timestamp': 1360215023,
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
'upload_date': '20130207',
'duration': 83,
}, },
'skip': 'georestricted', 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True, 'only_matching': True,
@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
response = self._download_json( data = self._download_json(
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id, 'https://www.vbox7.com/aj/player/item/options', video_id,
video_id) query={'vid': video_id})['options']
if 'error' in response: src_url = data.get('src')
raise ExtractorError( if src_url in (None, '', 'blank'):
'%s said: %s' % (self.IE_NAME, response['error']), expected=True) raise ExtractorError('Video is unavailable', expected=True)
video = response['options'] fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
if fmt_base == 'vn':
self.raise_geo_restricted()
title = video['title'] fmt_base = base_url(src_url) + fmt_base
video_url = video['src']
if '/na.mp4' in video_url: formats = self._extract_m3u8_formats(
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
# TODO: Add MPD formats, when dash range support is added
uploader = video.get('uploader') for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
formats.append({
webpage = self._download_webpage( 'url': f'{fmt_base}_{res}.mp4',
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None) 'format_id': f'http-{res}',
'height': res,
info = {}
if webpage:
info = self._search_json_ld(
webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False)
info.update({
'id': video_id,
'title': title,
'url': video_url,
'uploader': uploader,
'thumbnail': self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage),
'http:'),
}) })
return info
return {
'id': video_id,
'formats': formats,
**self._search_json_ld(self._download_webpage(
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
**traverse_obj(data, {
'title': ('title', {str}),
'uploader': ('uploader', {str}),
'duration': ('duration', {int_or_none}),
}),
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class ViewLiftBaseIE(InfoExtractor): class ViewLiftBaseIE(InfoExtractor):
_API_BASE = 'https://prod-api.viewlift.com/' _API_BASE = 'https://prod-api.viewlift.com/'
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv' _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
_SITE_MAP = { _SITE_MAP = {
'ftfnext': 'lax', 'ftfnext': 'lax',
'funnyforfree': 'snagfilms', 'funnyforfree': 'snagfilms',
@ -27,6 +27,7 @@ class ViewLiftBaseIE(InfoExtractor):
'snagxtreme': 'snagfilms', 'snagxtreme': 'snagfilms',
'theidentitytb': 'tampabay', 'theidentitytb': 'tampabay',
'vayafilm': 'snagfilms', 'vayafilm': 'snagfilms',
'chorki': 'prothomalo',
} }
_TOKENS = {} _TOKENS = {}
@ -296,6 +297,33 @@ class ViewLiftIE(ViewLiftBaseIE):
}, { # Premium movie }, { # Premium movie
'url': 'https://www.hoichoi.tv/movies/detective-2020', 'url': 'https://www.hoichoi.tv/movies/detective-2020',
'only_matching': True 'only_matching': True
}, { # Chorki Premium series
'url': 'https://www.chorki.com/bn/series/sinpaat',
'playlist_mincount': 7,
'info_dict': {
'id': 'bn/series/sinpaat',
},
}, { # Chorki free movie
'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
'info_dict': {
'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
'title': 'Bikkhov',
'ext': 'mp4',
'upload_date': '20230824',
'timestamp': 1692860553,
'categories': ['Action Movies', 'Salman Special'],
'tags': 'count:14',
'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
'display_id': 'bn/videos/bangla-movie-bikkhov',
'description': 'md5:71492b086450625f4374a3eb824f27dc',
'duration': 8002,
},
'params': {
'skip_download': True,
},
}, { # Chorki Premium movie
'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
'only_matching': True,
}] }]
@classmethod @classmethod

View File

@ -269,7 +269,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'): if not jwt_response.get('jwt'):
return return
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json( original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id, f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {} headers=headers, fatal=False, expected_status=(403, 404)) or {}
@ -751,6 +751,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video = self._download_json( video = self._download_json(
api_url, video_id, headers={ api_url, video_id, headers={
'Authorization': 'jwt ' + token, 'Authorization': 'jwt ' + token,
'Accept': 'application/json',
}, query={ }, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
}) })
@ -785,7 +786,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword') password = self.get_param('videopassword')
@ -1147,10 +1148,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization, 'Authorization': 'jwt ' + authorization,
'Accept': 'application/json',
})['data'] })['data']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return return
raise
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')
if not link: if not link:
@ -1171,7 +1174,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
hashed_pass = None hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':

View File

@ -0,0 +1,71 @@
from .common import InfoExtractor
from ..utils import merge_dicts, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
class ZetlandDKArticleIE(InfoExtractor):
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
_TESTS = [{
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
'info_dict': {
'id': 'sO9aq2MY-a81VP3BY-66e69',
'ext': 'mp3',
'modified_date': '20240118',
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
'upload_date': '20240116',
'uploader_id': 'a81VP3BY',
'modified_timestamp': 1705568739,
'release_timestamp': 1705377592,
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
'uploader': 'Helle Fuusager',
'release_date': '20240116',
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
'description': 'md5:9619d426772c133f5abb26db27f26a01',
'timestamp': 1705377592,
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
}
}]
def _real_extract(self, url):
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
formats = []
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
formats.append({
'url': audio_url,
'vcodec': 'none',
})
return merge_dicts({
'id': display_id,
'formats': formats,
'uploader_id': uploader_id
}, traverse_obj(story_data, {
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
'uploader': ('sharer', 'name'),
'uploader_id': ('sharer', 'sharer_id'),
'description': ('story_content', 'content', 'socialDescription'),
'series_id': ('story_content', 'meta', 'seriesId'),
'release_timestamp': ('published_at', {unified_timestamp}),
'modified_timestamp': ('revised_at', {unified_timestamp}),
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
}), get_all=False), {
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'uploader': self._html_search_meta(['author'], webpage),
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
}, self._search_json_ld(webpage, display_id, fatal=False))

View File

@ -476,7 +476,8 @@ def create_parser():
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'],
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '