mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-03 07:41:27 +02:00
Compare commits
22 Commits
23cb98427b
...
887f6f07d1
Author | SHA1 | Date | |
---|---|---|---|
|
887f6f07d1 | ||
|
b1adb4e883 | ||
|
19a46f51c2 | ||
|
e3ce2b385e | ||
|
4253e3b7f4 | ||
|
8e765755f7 | ||
|
ffa017cfc5 | ||
|
a0d50aabc5 | ||
|
2f4b575946 | ||
|
fc2cc626f0 | ||
|
a2bac6b7ad | ||
|
4b8b0dded8 | ||
|
4a6ff0b47a | ||
|
62c65bfaf8 | ||
|
d63eae7e7f | ||
|
2792092afd | ||
|
cbed249aaa | ||
|
3725b4f0c9 | ||
|
67bb70cd70 | ||
|
9b5efaf86b | ||
|
999ea80beb | ||
|
41b6cdb419 |
|
@ -167,7 +167,8 @@ For ease of use, a few more compat options are available:
|
||||||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
||||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
||||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
|
||||||
|
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||||
|
|
||||||
|
|
||||||
# INSTALLATION
|
# INSTALLATION
|
||||||
|
|
|
@ -2451,7 +2451,7 @@ class YoutubeDL:
|
||||||
# for extractors with incomplete formats (audio only (soundcloud)
|
# for extractors with incomplete formats (audio only (soundcloud)
|
||||||
# or video only (imgur)) best/worst will fallback to
|
# or video only (imgur)) best/worst will fallback to
|
||||||
# best/worst {video,audio}-only format
|
# best/worst {video,audio}-only format
|
||||||
matches = formats
|
matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
|
||||||
elif seperate_fallback and not ctx['has_merged_format']:
|
elif seperate_fallback and not ctx['has_merged_format']:
|
||||||
# for compatibility with youtube-dl when there is no pre-merged format
|
# for compatibility with youtube-dl when there is no pre-merged format
|
||||||
matches = list(filter(seperate_fallback, formats))
|
matches = list(filter(seperate_fallback, formats))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import base64
|
import base64
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import glob
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
import io
|
import io
|
||||||
|
@ -23,7 +24,8 @@ from .aes import (
|
||||||
aes_gcm_decrypt_and_verify_bytes,
|
aes_gcm_decrypt_and_verify_bytes,
|
||||||
unpad_pkcs7,
|
unpad_pkcs7,
|
||||||
)
|
)
|
||||||
from .compat import functools
|
from .compat import functools # isort: split
|
||||||
|
from .compat import compat_os_name
|
||||||
from .dependencies import (
|
from .dependencies import (
|
||||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||||
secretstorage,
|
secretstorage,
|
||||||
|
@ -31,6 +33,7 @@ from .dependencies import (
|
||||||
)
|
)
|
||||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
DownloadError,
|
||||||
Popen,
|
Popen,
|
||||||
error_to_str,
|
error_to_str,
|
||||||
expand_path,
|
expand_path,
|
||||||
|
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||||
return YoutubeDLCookieJar()
|
return YoutubeDLCookieJar()
|
||||||
|
|
||||||
if profile is None:
|
if profile is None:
|
||||||
search_root = _firefox_browser_dir()
|
search_roots = list(_firefox_browser_dirs())
|
||||||
elif _is_path(profile):
|
elif _is_path(profile):
|
||||||
search_root = profile
|
search_roots = [profile]
|
||||||
else:
|
else:
|
||||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||||
|
search_root = ', '.join(map(repr, search_roots))
|
||||||
|
|
||||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||||
if cookie_database_path is None:
|
if cookie_database_path is None:
|
||||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||||
|
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||||
cursor.connection.close()
|
cursor.connection.close()
|
||||||
|
|
||||||
|
|
||||||
def _firefox_browser_dir():
|
def _firefox_browser_dirs():
|
||||||
if sys.platform in ('cygwin', 'win32'):
|
if sys.platform in ('cygwin', 'win32'):
|
||||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||||
|
|
||||||
elif sys.platform == 'darwin':
|
elif sys.platform == 'darwin':
|
||||||
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||||
return os.path.expanduser('~/.mozilla/firefox')
|
|
||||||
|
else:
|
||||||
|
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
|
||||||
|
|
||||||
|
|
||||||
|
def _firefox_cookie_dbs(roots):
|
||||||
|
for root in map(os.path.abspath, roots):
|
||||||
|
for pattern in ('', '*/', 'Profiles/*/'):
|
||||||
|
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||||
|
|
||||||
|
|
||||||
def _get_chromium_based_browser_settings(browser_name):
|
def _get_chromium_based_browser_settings(browser_name):
|
||||||
|
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||||
logger.error(f'{browser_name} does not support profiles')
|
logger.error(f'{browser_name} does not support profiles')
|
||||||
search_root = config['browser_dir']
|
search_root = config['browser_dir']
|
||||||
|
|
||||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||||
if cookie_database_path is None:
|
if cookie_database_path is None:
|
||||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||||
|
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||||
counts['unencrypted'] = unencrypted_cookies
|
counts['unencrypted'] = unencrypted_cookies
|
||||||
logger.debug(f'cookie version breakdown: {counts}')
|
logger.debug(f'cookie version breakdown: {counts}')
|
||||||
return jar
|
return jar
|
||||||
|
except PermissionError as error:
|
||||||
|
if compat_os_name == 'nt' and error.errno == 13:
|
||||||
|
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||||
|
logger.error(message)
|
||||||
|
raise DownloadError(message) # force exit
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
if cursor is not None:
|
if cursor is not None:
|
||||||
cursor.connection.close()
|
cursor.connection.close()
|
||||||
|
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||||
References:
|
References:
|
||||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||||
"""
|
"""
|
||||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||||
if path is None:
|
if path is None:
|
||||||
logger.error('could not find local state file')
|
logger.error('could not find local state file')
|
||||||
return None
|
return None
|
||||||
|
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
|
||||||
return [row[1].decode() for row in table_info]
|
return [row[1].decode() for row in table_info]
|
||||||
|
|
||||||
|
|
||||||
def _find_most_recently_used_file(root, filename, logger):
|
def _newest(files):
|
||||||
|
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_files(root, filename, logger):
|
||||||
# if there are multiple browser profiles, take the most recently used one
|
# if there are multiple browser profiles, take the most recently used one
|
||||||
i, paths = 0, []
|
i = 0
|
||||||
with _create_progress_bar(logger) as progress_bar:
|
with _create_progress_bar(logger) as progress_bar:
|
||||||
for curr_root, dirs, files in os.walk(root):
|
for curr_root, _, files in os.walk(root):
|
||||||
for file in files:
|
for file in files:
|
||||||
i += 1
|
i += 1
|
||||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||||
if file == filename:
|
if file == filename:
|
||||||
paths.append(os.path.join(curr_root, file))
|
yield os.path.join(curr_root, file)
|
||||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
|
||||||
|
|
||||||
|
|
||||||
def _merge_cookie_jars(jars):
|
def _merge_cookie_jars(jars):
|
||||||
|
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
|
||||||
|
|
||||||
|
|
||||||
def _is_path(value):
|
def _is_path(value):
|
||||||
return os.path.sep in value
|
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||||
|
|
||||||
|
|
||||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||||
|
|
|
@ -138,6 +138,10 @@ from .ard import (
|
||||||
ARDMediathekCollectionIE,
|
ARDMediathekCollectionIE,
|
||||||
ARDIE,
|
ARDIE,
|
||||||
)
|
)
|
||||||
|
from .art19 import (
|
||||||
|
Art19IE,
|
||||||
|
Art19ShowIE,
|
||||||
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTVIE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
|
@ -1243,7 +1247,10 @@ from .nexx import (
|
||||||
NexxIE,
|
NexxIE,
|
||||||
NexxEmbedIE,
|
NexxEmbedIE,
|
||||||
)
|
)
|
||||||
from .nfb import NFBIE
|
from .nfb import (
|
||||||
|
NFBIE,
|
||||||
|
NFBSeriesIE,
|
||||||
|
)
|
||||||
from .nfhsnetwork import NFHSNetworkIE
|
from .nfhsnetwork import NFHSNetworkIE
|
||||||
from .nfl import (
|
from .nfl import (
|
||||||
NFLIE,
|
NFLIE,
|
||||||
|
@ -1280,6 +1287,7 @@ from .niconico import (
|
||||||
NicovideoTagURLIE,
|
NicovideoTagURLIE,
|
||||||
NiconicoLiveIE,
|
NiconicoLiveIE,
|
||||||
)
|
)
|
||||||
|
from .ninaprotocol import NinaProtocolIE
|
||||||
from .ninecninemedia import (
|
from .ninecninemedia import (
|
||||||
NineCNineMediaIE,
|
NineCNineMediaIE,
|
||||||
CPTwentyFourIE,
|
CPTwentyFourIE,
|
||||||
|
@ -1386,6 +1394,7 @@ from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFFM4StoryIE,
|
ORFFM4StoryIE,
|
||||||
|
ORFONIE,
|
||||||
ORFRadioIE,
|
ORFRadioIE,
|
||||||
ORFPodcastIE,
|
ORFPodcastIE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
|
@ -1510,7 +1519,7 @@ from .puhutv import (
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
)
|
)
|
||||||
from .pr0gramm import Pr0grammIE
|
from .pr0gramm import Pr0grammIE
|
||||||
from .prankcast import PrankCastIE
|
from .prankcast import PrankCastIE, PrankCastPostIE
|
||||||
from .premiershiprugby import PremiershipRugbyIE
|
from .premiershiprugby import PremiershipRugbyIE
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
from .projectveritas import ProjectVeritasIE
|
from .projectveritas import ProjectVeritasIE
|
||||||
|
@ -2488,6 +2497,7 @@ from .zee5 import (
|
||||||
Zee5SeriesIE,
|
Zee5SeriesIE,
|
||||||
)
|
)
|
||||||
from .zeenews import ZeeNewsIE
|
from .zeenews import ZeeNewsIE
|
||||||
|
from .zetland import ZetlandDKArticleIE
|
||||||
from .zhihu import ZhihuIE
|
from .zhihu import ZhihuIE
|
||||||
from .zingmp3 import (
|
from .zingmp3 import (
|
||||||
ZingMp3IE,
|
ZingMp3IE,
|
||||||
|
|
303
yt_dlp/extractor/art19.py
Normal file
303
yt_dlp/extractor/art19.py
Normal file
|
@ -0,0 +1,303 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class Art19IE(InfoExtractor):
|
||||||
|
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||||
|
_VALID_URL = [
|
||||||
|
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||||
|
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||||
|
]
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Why Did DeSantis Drop Out?',
|
||||||
|
'series': 'The Daily Briefing',
|
||||||
|
'release_timestamp': 1705941275,
|
||||||
|
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||||
|
'episode': 'Episode 582',
|
||||||
|
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||||
|
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||||
|
'upload_date': '20240122',
|
||||||
|
'timestamp': 1705940815,
|
||||||
|
'episode_number': 582,
|
||||||
|
'modified_date': '20240122',
|
||||||
|
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||||
|
'modified_timestamp': 1705941275,
|
||||||
|
'release_date': '20240122',
|
||||||
|
'duration': 527.4,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||||
|
'modified_date': '20240116',
|
||||||
|
'upload_date': '20240105',
|
||||||
|
'modified_timestamp': 1705435802,
|
||||||
|
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||||
|
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||||
|
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||||
|
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||||
|
'release_timestamp': 1705305660,
|
||||||
|
'release_date': '20240115',
|
||||||
|
'timestamp': 1704481536,
|
||||||
|
'episode_number': 88,
|
||||||
|
'series': 'Scamfluencers',
|
||||||
|
'duration': 2588.37501,
|
||||||
|
'episode': 'Episode 88',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||||
|
'season': 'Seizoen 6',
|
||||||
|
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||||
|
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||||
|
'duration': 3061.82111,
|
||||||
|
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||||
|
'release_date': '20231126',
|
||||||
|
'modified_timestamp': 1701156004,
|
||||||
|
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||||
|
'season_number': 6,
|
||||||
|
'episode_number': 52,
|
||||||
|
'modified_date': '20231128',
|
||||||
|
'upload_date': '20231126',
|
||||||
|
'timestamp': 1701025981,
|
||||||
|
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||||
|
'series': 'De Boordradio',
|
||||||
|
'release_timestamp': 1701026308,
|
||||||
|
'episode': 'Episode 52',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Larry Bucshon announces retirement from congress',
|
||||||
|
'upload_date': '20240115',
|
||||||
|
'episode_number': 148,
|
||||||
|
'episode': 'Episode 148',
|
||||||
|
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||||
|
'release_date': '20240115',
|
||||||
|
'timestamp': 1705328205,
|
||||||
|
'release_timestamp': 1705329275,
|
||||||
|
'series': 'All INdiana Politics',
|
||||||
|
'modified_date': '20240117',
|
||||||
|
'modified_timestamp': 1705458901,
|
||||||
|
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||||
|
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||||
|
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||||
|
'duration': 1256.18848,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
yield from super()._extract_embed_urls(url, webpage)
|
||||||
|
for episode_id in re.findall(
|
||||||
|
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||||
|
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
|
||||||
|
player_metadata = self._download_json(
|
||||||
|
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||||
|
note='Downloading player metadata', fatal=False,
|
||||||
|
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||||
|
rss_metadata = self._download_json(
|
||||||
|
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||||
|
note='Downloading RSS metadata')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'direct',
|
||||||
|
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
}]
|
||||||
|
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||||
|
if fmt_id == 'waveform_bin':
|
||||||
|
continue
|
||||||
|
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||||
|
if not fmt_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': fmt_id,
|
||||||
|
'url': fmt_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': fmt_id,
|
||||||
|
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'formats': formats,
|
||||||
|
**traverse_obj(player_metadata, ('episode', {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description_plain', {str}),
|
||||||
|
'episode_id': ('id', {str}),
|
||||||
|
'episode_number': ('episode_number', {int_or_none}),
|
||||||
|
'season_id': ('season_id', {str}),
|
||||||
|
'series_id': ('series_id', {str}),
|
||||||
|
'timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||||
|
})),
|
||||||
|
**traverse_obj(rss_metadata, ('content', {
|
||||||
|
'title': ('episode_title', {str}),
|
||||||
|
'description': ('episode_description_plain', {str}),
|
||||||
|
'episode_id': ('episode_id', {str}),
|
||||||
|
'episode_number': ('episode_number', {int_or_none}),
|
||||||
|
'season': ('season_title', {str}),
|
||||||
|
'season_id': ('season_id', {str}),
|
||||||
|
'season_number': ('season_number', {int_or_none}),
|
||||||
|
'series': ('series_title', {str}),
|
||||||
|
'series_id': ('series_id', {str}),
|
||||||
|
'thumbnail': ('cover_image', {url_or_none}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Art19ShowIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||||
|
_VALID_URL = [
|
||||||
|
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||||
|
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||||
|
]
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||||
|
'display_id': 'echt-gebeurd',
|
||||||
|
'title': 'Echt Gebeurd',
|
||||||
|
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||||
|
'timestamp': 1492642167,
|
||||||
|
'upload_date': '20170419',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': 'count:7',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 425,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||||
|
'display_id': 'echt-gebeurd',
|
||||||
|
'title': 'Echt Gebeurd',
|
||||||
|
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||||
|
'timestamp': 1492642167,
|
||||||
|
'upload_date': '20170419',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': 'count:7',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 425,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rss.art19.com/scamfluencers',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||||
|
'display_id': 'scamfluencers',
|
||||||
|
'title': 'Scamfluencers',
|
||||||
|
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||||
|
'timestamp': 1647368573,
|
||||||
|
'upload_date': '20220315',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 90,
|
||||||
|
}, {
|
||||||
|
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||||
|
'display_id': 'enthuellt',
|
||||||
|
'title': 'Enthüllt',
|
||||||
|
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||||
|
'timestamp': 1601645860,
|
||||||
|
'upload_date': '20201002',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': 'count:10',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||||
|
'display_id': 'deconstructing-yourself',
|
||||||
|
'title': 'Deconstructing Yourself',
|
||||||
|
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||||
|
'timestamp': 1570581181,
|
||||||
|
'upload_date': '20191009',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': 'count:5',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 80,
|
||||||
|
}, {
|
||||||
|
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||||
|
'display_id': 'the-ben-joravsky-show',
|
||||||
|
'title': 'The Ben Joravsky Show',
|
||||||
|
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||||
|
'timestamp': 1550875095,
|
||||||
|
'upload_date': '20190222',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1900,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
yield from super()._extract_embed_urls(url, webpage)
|
||||||
|
for series_id in re.findall(
|
||||||
|
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||||
|
yield f'https://art19.com/shows/{series_id}'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
series_metadata = self._download_json(
|
||||||
|
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||||
|
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': [
|
||||||
|
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||||
|
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||||
|
],
|
||||||
|
**traverse_obj(series_metadata, ('series', {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'display_id': ('slug', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description_plain', {str}),
|
||||||
|
'timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||||
|
})),
|
||||||
|
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ import math
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..dependencies import Cryptodome
|
from ..dependencies import Cryptodome
|
||||||
|
@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = 'Bilibili video search'
|
IE_DESC = 'Bilibili video search'
|
||||||
_MAX_RESULTS = 100000
|
_MAX_RESULTS = 100000
|
||||||
_SEARCH_KEY = 'bilisearch'
|
_SEARCH_KEY = 'bilisearch'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
'playlist_count': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1n44y1Q7sc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
|
||||||
|
'timestamp': 1669889987,
|
||||||
|
'upload_date': '20221201',
|
||||||
|
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
|
||||||
|
'tags': list,
|
||||||
|
'uploader': '靡烟miya',
|
||||||
|
'duration': 123.156,
|
||||||
|
'uploader_id': '1958703906',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 988222410_part1'],
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}]
|
||||||
|
|
||||||
def _search_results(self, query):
|
def _search_results(self, query):
|
||||||
|
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||||
|
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
videos = self._download_json(
|
videos = self._download_json(
|
||||||
'https://api.bilibili.com/x/web-interface/search/type', query,
|
'https://api.bilibili.com/x/web-interface/search/type', query,
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
@ -60,6 +61,7 @@ class CCMAIE(InfoExtractor):
|
||||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||||
'media': media_type,
|
'media': media_type,
|
||||||
'idint': media_id,
|
'idint': media_id,
|
||||||
|
'format': 'dm',
|
||||||
})
|
})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -69,6 +71,10 @@ class CCMAIE(InfoExtractor):
|
||||||
format_url = url_or_none(format_.get('file'))
|
format_url = url_or_none(format_.get('file'))
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
if determine_ext(format_url) == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, media_id, mpd_id='dash', fatal=False))
|
||||||
|
continue
|
||||||
label = format_.get('label')
|
label = format_.get('label')
|
||||||
f = parse_resolution(label)
|
f = parse_resolution(label)
|
||||||
f.update({
|
f.update({
|
||||||
|
|
|
@ -67,7 +67,10 @@ class CineverseIE(CineverseBaseIE):
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||||
|
|
||||||
if idetails.get('err_code') == 1200:
|
err_code = idetails.get('err_code')
|
||||||
|
if err_code == 1002:
|
||||||
|
self.raise_login_required()
|
||||||
|
elif err_code == 1200:
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'This video is not available from your location due to geo restriction. '
|
'This video is not available from your location due to geo restriction. '
|
||||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||||
|
|
|
@ -54,6 +54,7 @@ class FacebookIE(InfoExtractor):
|
||||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||||
[^/]+/videos/(?:[^/]+/)?|
|
[^/]+/videos/(?:[^/]+/)?|
|
||||||
[^/]+/posts/|
|
[^/]+/posts/|
|
||||||
|
events/(?:[^/]+/)?|
|
||||||
groups/[^/]+/(?:permalink|posts)/|
|
groups/[^/]+/(?:permalink|posts)/|
|
||||||
watchparty/
|
watchparty/
|
||||||
)|
|
)|
|
||||||
|
@ -399,6 +400,18 @@ class FacebookIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
'skip': 'Requires logging in',
|
'skip': 'Requires logging in',
|
||||||
|
}, {
|
||||||
|
# data.event.cover_media_renderer.cover_video
|
||||||
|
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '637246984455045',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
|
||||||
|
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': 'Comitato Liberi Pensatori',
|
||||||
|
'uploader_id': '100065709540881',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||||
_api_config = {
|
_api_config = {
|
||||||
|
@ -473,38 +486,10 @@ class FacebookIE(InfoExtractor):
|
||||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||||
post = traverse_obj(post_data, (
|
post = traverse_obj(post_data, (
|
||||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||||
|
|
||||||
automatic_captions, subtitles = {}, {}
|
|
||||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
|
||||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
|
||||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
|
||||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
|
||||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
|
||||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
|
||||||
subtitles[locale] = [{'url': captions}]
|
|
||||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
|
||||||
for caption in traverse_obj(captions, (
|
|
||||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
|
||||||
):
|
|
||||||
lang = caption.get('localized_language') or ''
|
|
||||||
subs = {
|
|
||||||
'url': caption['captions_url'],
|
|
||||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
|
||||||
}
|
|
||||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
|
||||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
|
||||||
else:
|
|
||||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
|
||||||
|
|
||||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||||
title = get_first(media, ('title', 'text'))
|
title = get_first(media, ('title', 'text'))
|
||||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||||
uploader_data = (
|
|
||||||
get_first(media, ('owner', {dict}))
|
|
||||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
|
||||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
|
||||||
|
|
||||||
page_title = title or self._html_search_regex((
|
page_title = title or self._html_search_regex((
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||||
|
@ -513,11 +498,15 @@ class FacebookIE(InfoExtractor):
|
||||||
description = description or self._html_search_meta(
|
description = description or self._html_search_meta(
|
||||||
['description', 'og:description', 'twitter:description'],
|
['description', 'og:description', 'twitter:description'],
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default=None)
|
||||||
|
uploader_data = (
|
||||||
|
get_first(media, ('owner', {dict}))
|
||||||
|
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||||
|
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||||
|
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||||
uploader = uploader_data.get('name') or (
|
uploader = uploader_data.get('name') or (
|
||||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||||
or self._search_regex(
|
or self._search_regex(
|
||||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||||
|
|
||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
|
@ -539,8 +528,6 @@ class FacebookIE(InfoExtractor):
|
||||||
webpage, 'view count', default=None)),
|
webpage, 'view count', default=None)),
|
||||||
'concurrent_view_count': get_first(post, (
|
'concurrent_view_count': get_first(post, (
|
||||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||||
'automatic_captions': automatic_captions,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
@ -638,6 +625,29 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': playable_url,
|
'url': playable_url,
|
||||||
})
|
})
|
||||||
extract_dash_manifest(video, formats)
|
extract_dash_manifest(video, formats)
|
||||||
|
|
||||||
|
automatic_captions, subtitles = {}, {}
|
||||||
|
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||||
|
for caption in traverse_obj(video, (
|
||||||
|
'video_available_captions_locales',
|
||||||
|
{lambda x: sorted(x, key=lambda c: c['locale'])},
|
||||||
|
lambda _, v: url_or_none(v['captions_url'])
|
||||||
|
)):
|
||||||
|
lang = caption.get('localized_language') or 'und'
|
||||||
|
subs = {
|
||||||
|
'url': caption['captions_url'],
|
||||||
|
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||||
|
}
|
||||||
|
if caption.get('localized_creation_method') or is_broadcast:
|
||||||
|
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||||
|
else:
|
||||||
|
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||||
|
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
|
||||||
|
if captions_url and not automatic_captions and not subtitles:
|
||||||
|
locale = self._html_search_meta(
|
||||||
|
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||||
|
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': v_id,
|
'id': v_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -647,6 +657,8 @@ class FacebookIE(InfoExtractor):
|
||||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||||
or float_or_none(video.get('length_in_second'))),
|
or float_or_none(video.get('length_in_second'))),
|
||||||
|
'automatic_captions': automatic_captions,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
process_formats(info)
|
process_formats(info)
|
||||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||||
|
@ -681,7 +693,8 @@ class FacebookIE(InfoExtractor):
|
||||||
for edge in edges:
|
for edge in edges:
|
||||||
parse_attachment(edge, key='node')
|
parse_attachment(edge, key='node')
|
||||||
|
|
||||||
video = data.get('video') or {}
|
video = traverse_obj(data, (
|
||||||
|
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
|
||||||
if video:
|
if video:
|
||||||
attachments = try_get(video, [
|
attachments = try_get(video, [
|
||||||
lambda x: x['story']['attachments'],
|
lambda x: x['story']['attachments'],
|
||||||
|
|
|
@ -1,10 +1,54 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
merge_dicts,
|
||||||
|
parse_count,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NFBIE(InfoExtractor):
|
class NFBBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)'
|
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
|
||||||
|
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||||
|
return self._search_json(
|
||||||
|
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
||||||
|
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
||||||
|
|
||||||
|
def _extract_ep_info(self, data, video_id, slug=None):
|
||||||
|
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
'uploader': ('data_layer', 'episodeMaker', {str}),
|
||||||
|
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
|
||||||
|
'episode': ('data_layer', 'episodeTitle', {str}),
|
||||||
|
'season': ('data_layer', 'seasonTitle', {str}),
|
||||||
|
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
|
||||||
|
'series': ('data_layer', 'seriesTitle', {str}),
|
||||||
|
}), get_all=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**info,
|
||||||
|
'id': video_id,
|
||||||
|
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
||||||
|
'episode_number': int_or_none(self._search_regex(
|
||||||
|
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NFBIE(NFBBaseIE):
|
||||||
|
IE_NAME = 'nfb'
|
||||||
|
IE_DESC = 'nfb.ca and onf.ca films and episodes'
|
||||||
|
_VALID_URL = [
|
||||||
|
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
|
||||||
|
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
|
||||||
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'note': 'NFB film',
|
||||||
'url': 'https://www.nfb.ca/film/trafficopter/',
|
'url': 'https://www.nfb.ca/film/trafficopter/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'trafficopter',
|
'id': 'trafficopter',
|
||||||
|
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Barrie Howells',
|
'uploader': 'Barrie Howells',
|
||||||
'release_year': 1972,
|
'release_year': 1972,
|
||||||
|
'duration': 600.0,
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'ONF film',
|
||||||
|
'url': 'https://www.onf.ca/film/mal-du-siecle/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mal-du-siecle',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Le mal du siècle',
|
||||||
|
'description': 'md5:1abf774d77569ebe603419f2d344102b',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Catherine Lepage',
|
||||||
|
'release_year': 2019,
|
||||||
|
'duration': 300.0,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'NFB episode with English title',
|
||||||
|
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'true-north-episode9-true-north-finale-making-it',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
|
||||||
|
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
|
||||||
|
'series': 'True North: Inside the Rise of Toronto Basketball',
|
||||||
|
'release_year': 2018,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Finale: Making It',
|
||||||
|
'episode_number': 9,
|
||||||
|
'uploader': 'Ryan Sidhoo',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'ONF episode with French title',
|
||||||
|
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'direction-nord-episode-9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
|
||||||
|
'description': 'md5:349a57419b71432b97bf6083d92b029d',
|
||||||
|
'series': 'Direction nord – La montée du basketball à Toronto',
|
||||||
|
'release_year': 2018,
|
||||||
|
'season': 'Saison 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Finale : Réussir',
|
||||||
|
'episode_number': 9,
|
||||||
|
'uploader': 'Ryan Sidhoo',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'NFB episode with French title (needs geo-bypass)',
|
||||||
|
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Étoile du Nord - L\'observation',
|
||||||
|
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||||
|
'series': 'Étoile du Nord',
|
||||||
|
'release_year': 2023,
|
||||||
|
'season': 'Saison 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'L\'observation',
|
||||||
|
'episode_number': 1,
|
||||||
|
'uploader': 'Patrick Bossé',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'ONF episode with English title (needs geo-bypass)',
|
||||||
|
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'north-star-episode-1-observation',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'North Star - Observation',
|
||||||
|
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||||
|
'series': 'North Star',
|
||||||
|
'release_year': 2023,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Observation',
|
||||||
|
'episode_number': 1,
|
||||||
|
'uploader': 'Patrick Bossé',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
|
||||||
|
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'north-star-episode-1-observation',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'North Star - Observation',
|
||||||
|
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||||
|
'series': 'North Star',
|
||||||
|
'release_year': 2023,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Observation',
|
||||||
|
'episode_number': 1,
|
||||||
|
'uploader': 'Patrick Bossé',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
|
||||||
|
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Étoile du Nord - L\'observation',
|
||||||
|
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||||
|
'series': 'Étoile du Nord',
|
||||||
|
'release_year': 2023,
|
||||||
|
'season': 'Saison 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'L\'observation',
|
||||||
|
'episode_number': 1,
|
||||||
|
'uploader': 'Patrick Bossé',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
|
||||||
|
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'liste-des-choses-qui-existent-saison-2-ours',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La liste des choses qui existent - L\'ours en peluche',
|
||||||
|
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
|
||||||
|
'series': 'La liste des choses qui existent',
|
||||||
|
'release_year': 2022,
|
||||||
|
'season': 'Saison 2',
|
||||||
|
'season_number': 2,
|
||||||
|
'episode': 'L\'ours en peluche',
|
||||||
|
'episode_number': 12,
|
||||||
|
'uploader': 'Francis Papillon',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'NFB film /embed/player/ page',
|
||||||
|
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'afterlife',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Afterlife',
|
||||||
|
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
|
||||||
|
'release_year': 1978,
|
||||||
|
'duration': 420.0,
|
||||||
|
'uploader': 'Ishu Patel',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
||||||
|
# Need to construct the URL since we match /embed/player/ URLs as well
|
||||||
|
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
||||||
|
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||||
|
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||||
|
|
||||||
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id)
|
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
||||||
|
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
||||||
|
video_id = self._match_id(embed_url) # embed url has unique slug
|
||||||
|
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
||||||
|
if 'MESSAGE_GEOBLOCKED' in player:
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
iframe = self._html_search_regex(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)',
|
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
||||||
webpage, 'iframe', default=None, fatal=True)
|
video_id, 'mp4', m3u8_id='hls')
|
||||||
if iframe.startswith('/'):
|
|
||||||
iframe = f'https://www.nfb.ca{iframe}'
|
|
||||||
|
|
||||||
player = self._download_webpage(iframe, video_id)
|
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['format_note'] = 'described video'
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
source = self._html_search_regex(
|
info = {
|
||||||
r'source:\s*\'([^\']+)',
|
|
||||||
player, 'source', default=None, fatal=True)
|
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_regex(
|
'title': self._html_search_regex(
|
||||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
||||||
|
@ -45,14 +252,49 @@ class NFBIE(InfoExtractor):
|
||||||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||||
webpage, 'description', default=None),
|
webpage, 'description', default=None),
|
||||||
'thumbnail': self._html_search_regex(
|
'thumbnail': self._html_search_regex(
|
||||||
r'poster:\s*\'([^\']+)',
|
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
||||||
player, 'thumbnail', default=None),
|
|
||||||
'uploader': self._html_search_regex(
|
'uploader': self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||||
webpage, 'uploader', default=None),
|
|
||||||
'release_year': int_or_none(self._html_search_regex(
|
'release_year': int_or_none(self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
||||||
webpage, 'release_year', default=None)),
|
webpage, 'release_year', default=None)),
|
||||||
|
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}, info, self._search_json_ld(webpage, video_id, default={}))
|
||||||
|
|
||||||
|
|
||||||
|
class NFBSeriesIE(NFBBaseIE):
|
||||||
|
IE_NAME = 'nfb:series'
|
||||||
|
IE_DESC = 'nfb.ca and onf.ca series'
|
||||||
|
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'true-north-inside-the-rise-of-toronto-basketball',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
|
||||||
|
'playlist_mincount': 26,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'la-liste-des-choses-qui-existent-serie',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, episodes):
|
||||||
|
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
|
||||||
|
mobj = NFBIE._match_valid_url(episode['embed_url'])
|
||||||
|
yield self.url_result(
|
||||||
|
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
|
||||||
|
season_path = 'saison' if type_ == 'serie' else 'season'
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
|
||||||
|
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
|
||||||
|
|
||||||
|
return self.playlist_result(self._entries(episodes), series_id)
|
||||||
|
|
225
yt_dlp/extractor/ninaprotocol.py
Normal file
225
yt_dlp/extractor/ninaprotocol.py
Normal file
|
@ -0,0 +1,225 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class NinaProtocolIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||||
|
'title': 'The Spatulas - March Chant',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'channel': 'ppm',
|
||||||
|
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'display_id': 'the-spatulas-march-chant',
|
||||||
|
'upload_date': '20231201',
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
|
||||||
|
'title': 'March Chant In April',
|
||||||
|
'track': 'March Chant In April',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 152,
|
||||||
|
'track_number': 1,
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'channel': 'ppm',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'upload_date': '20231201',
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
|
||||||
|
'title': 'Rescue Mission',
|
||||||
|
'track': 'Rescue Mission',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 212,
|
||||||
|
'track_number': 2,
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'channel': 'ppm',
|
||||||
|
'upload_date': '20231201',
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
|
||||||
|
'title': 'Slinger Style',
|
||||||
|
'track': 'Slinger Style',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 179,
|
||||||
|
'track_number': 3,
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'upload_date': '20231201',
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'channel': 'ppm',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
|
||||||
|
'title': 'Psychic Signal',
|
||||||
|
'track': 'Psychic Signal',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 220,
|
||||||
|
'track_number': 4,
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'upload_date': '20231201',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'channel': 'ppm',
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
|
||||||
|
'title': 'Curvy Color',
|
||||||
|
'track': 'Curvy Color',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 148,
|
||||||
|
'track_number': 5,
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
'channel': 'ppm',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'upload_date': '20231201',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
|
||||||
|
'title': 'Caveman Star',
|
||||||
|
'track': 'Caveman Star',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 121,
|
||||||
|
'track_number': 6,
|
||||||
|
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||||
|
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||||
|
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||||
|
'album_artist': 'Post Present Medium ',
|
||||||
|
'uploader': 'ppmrecs',
|
||||||
|
'timestamp': 1701417610,
|
||||||
|
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||||
|
'album': 'The Spatulas - March Chant',
|
||||||
|
'channel': 'ppm',
|
||||||
|
'upload_date': '20231201',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
|
||||||
|
'description': 'md5:63f08d5db558b4b36e1896f317062721',
|
||||||
|
'title': 'F.G.S. - American Shield',
|
||||||
|
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
|
||||||
|
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
|
||||||
|
'channel': 'tinkscough',
|
||||||
|
'tags': [],
|
||||||
|
'album_artist': 'F.G.S.',
|
||||||
|
'album': 'F.G.S. - American Shield',
|
||||||
|
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
|
||||||
|
'display_id': 'f-g-s-american-shield',
|
||||||
|
'uploader': 'flannerysilva',
|
||||||
|
'timestamp': 1702395858,
|
||||||
|
'upload_date': '20231212',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
|
||||||
|
'display_id': 'time-to-figure-things-out',
|
||||||
|
'description': 'md5:960202ed01c3134bb8958f1008527e35',
|
||||||
|
'timestamp': 1706283607,
|
||||||
|
'title': 'DJ STEPDAD - time to figure things out',
|
||||||
|
'album_artist': 'DJ STEPDAD',
|
||||||
|
'uploader': 'tddvsss',
|
||||||
|
'upload_date': '20240126',
|
||||||
|
'album': 'time to figure things out',
|
||||||
|
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
|
||||||
|
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
release = self._download_json(
|
||||||
|
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
|
||||||
|
|
||||||
|
video_id = release.get('publicKey') or video_id
|
||||||
|
|
||||||
|
common_info = traverse_obj(release, {
|
||||||
|
'album': ('metadata', 'properties', 'title', {str}),
|
||||||
|
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
|
||||||
|
'timestamp': ('datetime', {parse_iso8601}),
|
||||||
|
'thumbnail': ('metadata', 'image', {url_or_none}),
|
||||||
|
'uploader': ('publisherAccount', 'handle', {str}),
|
||||||
|
'uploader_id': ('publisherAccount', 'publicKey', {str}),
|
||||||
|
'channel': ('hub', 'handle', {str}),
|
||||||
|
'channel_id': ('hub', 'publicKey', {str}),
|
||||||
|
}, get_all=False)
|
||||||
|
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for track_num, track in enumerate(traverse_obj(release, (
|
||||||
|
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
|
||||||
|
entries.append({
|
||||||
|
'id': f'{video_id}_{track_num}',
|
||||||
|
'url': track['uri'],
|
||||||
|
**traverse_obj(track, {
|
||||||
|
'title': ('track_title', {str}),
|
||||||
|
'track': ('track_title', {str}),
|
||||||
|
'ext': ('type', {mimetype2ext}),
|
||||||
|
'track_number': ('track', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
}),
|
||||||
|
'vcodec': 'none',
|
||||||
|
**common_info,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
**traverse_obj(release, {
|
||||||
|
'display_id': ('slug', {str}),
|
||||||
|
'title': ('metadata', 'name', {str}),
|
||||||
|
'description': ('metadata', 'description', {str}),
|
||||||
|
}),
|
||||||
|
**common_info,
|
||||||
|
}
|
|
@ -1,3 +1,4 @@
|
||||||
|
import base64
|
||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
class ORFONIE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:on'
|
||||||
|
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14210000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 2651.08,
|
||||||
|
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
|
||||||
|
'title': 'School of Champions (4/8)',
|
||||||
|
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
|
||||||
|
'media_type': 'episode',
|
||||||
|
'timestamp': 1706472362,
|
||||||
|
'upload_date': '20240128',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_video(self, video_id, display_id):
|
||||||
|
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||||
|
api_json = self._download_json(
|
||||||
|
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||||
|
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||||
|
if manifest_type == 'hls':
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
||||||
|
elif manifest_type == 'dash':
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
|
manifest_url, display_id, fatal=False, mpd_id='dash')
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(api_json, {
|
||||||
|
'duration': ('duration_second', {float_or_none}),
|
||||||
|
'title': (('title', 'headline'), {str}),
|
||||||
|
'description': (('description', 'teaser_text'), {str}),
|
||||||
|
'media_type': ('video_type', {str}),
|
||||||
|
}, get_all=False),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||||
|
'description': self._html_search_meta(
|
||||||
|
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||||
|
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||||
|
**self._extract_video(video_id, display_id),
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601, traverse_obj, try_call
|
from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class PrankCastIE(InfoExtractor):
|
class PrankCastIE(InfoExtractor):
|
||||||
|
@ -64,3 +67,71 @@ class PrankCastIE(InfoExtractor):
|
||||||
'categories': [json_info.get('broadcast_category')],
|
'categories': [json_info.get('broadcast_category')],
|
||||||
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
|
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PrankCastPostIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6214',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Happy National Rachel Day!',
|
||||||
|
'display_id': 'happy-national-rachel-day-',
|
||||||
|
'timestamp': 1704333938,
|
||||||
|
'uploader': 'Devonanustart',
|
||||||
|
'channel_id': '4',
|
||||||
|
'duration': 13175,
|
||||||
|
'cast': ['Devonanustart'],
|
||||||
|
'description': '',
|
||||||
|
'categories': ['prank call'],
|
||||||
|
'upload_date': '20240104'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6217',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Jake the Work Crow!',
|
||||||
|
'display_id': 'jake-the-work-crow-',
|
||||||
|
'timestamp': 1704346592,
|
||||||
|
'uploader': 'despicabledogs',
|
||||||
|
'channel_id': '957',
|
||||||
|
'duration': 263.287,
|
||||||
|
'cast': ['despicabledogs'],
|
||||||
|
'description': 'https://imgur.com/a/vtxLvKU',
|
||||||
|
'categories': [],
|
||||||
|
'upload_date': '20240104'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
|
||||||
|
content = self._parse_json(post['post_contents_json'], video_id)[0]
|
||||||
|
|
||||||
|
uploader = post.get('user_name')
|
||||||
|
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': post.get('post_title') or self._og_search_title(webpage),
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': content.get('url'),
|
||||||
|
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
|
||||||
|
'uploader': uploader,
|
||||||
|
'channel_id': str_or_none(post.get('user_id')),
|
||||||
|
'duration': float_or_none(content.get('duration')),
|
||||||
|
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
|
||||||
|
'description': post.get('post_body'),
|
||||||
|
'categories': list(filter(None, [content.get('category')])),
|
||||||
|
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
|
||||||
|
'subtitles': {
|
||||||
|
'live_chat': [{
|
||||||
|
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
|
||||||
|
'ext': 'json',
|
||||||
|
}],
|
||||||
|
} if post.get('content_id') else None
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import base64
|
import base64
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -11,6 +12,7 @@ from ..utils import (
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class RadikoBaseIE(InfoExtractor):
|
class RadikoBaseIE(InfoExtractor):
|
||||||
|
@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor):
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _extract_performers(self, prog):
|
||||||
|
performers = traverse_obj(prog, (
|
||||||
|
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip}))
|
||||||
|
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
|
||||||
|
return ', '.join(performers) or None
|
||||||
|
|
||||||
|
|
||||||
class RadikoIE(RadikoBaseIE):
|
class RadikoIE(RadikoBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||||
|
@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': try_call(lambda: prog.find('title').text),
|
'title': try_call(lambda: prog.find('title').text),
|
||||||
|
'artist': self._extract_performers(prog),
|
||||||
'description': clean_html(try_call(lambda: prog.find('info').text)),
|
'description': clean_html(try_call(lambda: prog.find('info').text)),
|
||||||
'uploader': try_call(lambda: station_program.find('.//name').text),
|
'uploader': try_call(lambda: station_program.find('.//name').text),
|
||||||
'uploader_id': station,
|
'uploader_id': station,
|
||||||
'timestamp': vid_int,
|
'timestamp': vid_int,
|
||||||
|
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': self._extract_formats(
|
'formats': self._extract_formats(
|
||||||
video_id=video_id, station=station, is_onair=False,
|
video_id=video_id, station=station, is_onair=False,
|
||||||
|
@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE):
|
||||||
return {
|
return {
|
||||||
'id': station,
|
'id': station,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'artist': self._extract_performers(prog),
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': station_name,
|
'uploader': station_name,
|
||||||
'uploader_id': station,
|
'uploader_id': station,
|
||||||
|
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class RedTubeIE(InfoExtractor):
|
class RedTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
|
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.redtube.com/38864951',
|
'url': 'https://www.redtube.com/38864951',
|
||||||
|
@ -35,6 +35,9 @@ class RedTubeIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://it.redtube.com/66418',
|
'url': 'http://it.redtube.com/66418',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redtube.com.br/103224331',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -25,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38902413',
|
'id': '38902413',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GCC IA16 backend',
|
'title': 'GCC IA16 backend',
|
||||||
'timestamp': 1648189972,
|
'timestamp': 1697793372,
|
||||||
'upload_date': '20220325',
|
'upload_date': '20231020',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'thumbnails': 'count:42',
|
'thumbnails': 'count:42',
|
||||||
'chapters': 'count:41',
|
'chapters': 'count:41',
|
||||||
|
@ -42,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38935785',
|
'id': '38935785',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||||
'upload_date': '20211115',
|
'upload_date': '20231020',
|
||||||
'timestamp': 1636996003,
|
'timestamp': 1697807002,
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:640',
|
'thumbnails': 'count:640',
|
||||||
'chapters': 'count:639',
|
'chapters': 'count:639',
|
||||||
|
@ -59,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38973182',
|
'id': '38973182',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
|
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
|
||||||
'upload_date': '20220201',
|
'upload_date': '20231020',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1643728135,
|
'timestamp': 1697822521,
|
||||||
'thumbnails': 'count:3',
|
'thumbnails': 'count:3',
|
||||||
'chapters': 'count:2',
|
'chapters': 'count:2',
|
||||||
'duration': 5889,
|
'duration': 5889,
|
||||||
|
@ -70,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# service_name = youtube, only XML slides info
|
# formerly youtube, converted to native
|
||||||
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
|
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
|
||||||
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
|
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'jmg02wCJD5M',
|
'id': '38897546',
|
||||||
'display_id': '38897546',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
|
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
|
||||||
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
|
'upload_date': '20231029',
|
||||||
'channel': 'SlidesLive Videos - G1',
|
'timestamp': 1698588144,
|
||||||
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
|
|
||||||
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
|
|
||||||
'uploader': 'SlidesLive Videos - G1',
|
|
||||||
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
|
|
||||||
'live_status': 'not_live',
|
|
||||||
'upload_date': '20160710',
|
|
||||||
'timestamp': 1618786715,
|
|
||||||
'duration': 6827,
|
|
||||||
'like_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'channel_follower_count': int,
|
|
||||||
'age_limit': 0,
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
|
|
||||||
'thumbnails': 'count:169',
|
'thumbnails': 'count:169',
|
||||||
'playable_in_embed': True,
|
|
||||||
'availability': 'unlisted',
|
|
||||||
'tags': [],
|
|
||||||
'categories': ['People & Blogs'],
|
|
||||||
'chapters': 'count:168',
|
'chapters': 'count:168',
|
||||||
|
'duration': 6827,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# embed-only presentation, only XML slides info
|
# embed-only presentation, only XML slides info
|
||||||
|
@ -111,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'thumbnails': 'count:8',
|
'thumbnails': 'count:8',
|
||||||
'timestamp': 1629671508,
|
'timestamp': 1697803109,
|
||||||
'upload_date': '20210822',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:7',
|
'chapters': 'count:7',
|
||||||
'duration': 326,
|
'duration': 326,
|
||||||
},
|
},
|
||||||
|
@ -128,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'title': 'MoReL: Multi-omics Relational Learning',
|
'title': 'MoReL: Multi-omics Relational Learning',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:7',
|
'thumbnails': 'count:7',
|
||||||
'timestamp': 1654714970,
|
'timestamp': 1697824939,
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:6',
|
'chapters': 'count:6',
|
||||||
'duration': 171,
|
'duration': 171,
|
||||||
},
|
},
|
||||||
|
@ -145,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'title': 'Decentralized Attribution of Generative Models',
|
'title': 'Decentralized Attribution of Generative Models',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'thumbnails': 'count:16',
|
'thumbnails': 'count:16',
|
||||||
'timestamp': 1622806321,
|
'timestamp': 1697814901,
|
||||||
'upload_date': '20210604',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:15',
|
'chapters': 'count:15',
|
||||||
'duration': 306,
|
'duration': 306,
|
||||||
},
|
},
|
||||||
|
@ -162,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
|
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:9',
|
'thumbnails': 'count:9',
|
||||||
'timestamp': 1654714896,
|
'timestamp': 1697824757,
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:8',
|
'chapters': 'count:8',
|
||||||
'duration': 295,
|
'duration': 295,
|
||||||
},
|
},
|
||||||
|
@ -177,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38979880',
|
'id': '38979880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Representation Power of Neural Networks',
|
'title': 'The Representation Power of Neural Networks',
|
||||||
'timestamp': 1654714962,
|
'timestamp': 1697824919,
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:22',
|
'thumbnails': 'count:22',
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:21',
|
'chapters': 'count:21',
|
||||||
'duration': 294,
|
'duration': 294,
|
||||||
},
|
},
|
||||||
|
@ -200,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38979682',
|
'id': '38979682',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
|
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
|
||||||
'timestamp': 1654714920,
|
'timestamp': 1697824815,
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:30',
|
'thumbnails': 'count:30',
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:31',
|
'chapters': 'count:31',
|
||||||
'duration': 272,
|
'duration': 272,
|
||||||
},
|
},
|
||||||
|
@ -213,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
|
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
|
||||||
'duration': 3,
|
'duration': 3,
|
||||||
'timestamp': 1654714920,
|
'timestamp': 1697824815,
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -222,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
|
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
|
||||||
'duration': 4,
|
'duration': 4,
|
||||||
'timestamp': 1654714920,
|
'timestamp': 1697824815,
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -242,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38979481',
|
'id': '38979481',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
|
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
|
||||||
'timestamp': 1654714877,
|
'timestamp': 1697824716,
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:43',
|
'thumbnails': 'count:43',
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:43',
|
'chapters': 'count:43',
|
||||||
'duration': 315,
|
'duration': 315,
|
||||||
},
|
},
|
||||||
|
@ -255,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
|
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
|
||||||
'duration': 3,
|
'duration': 3,
|
||||||
'timestamp': 1654714877,
|
'timestamp': 1697824716,
|
||||||
'upload_date': '20220608',
|
'upload_date': '20231020',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -275,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
|
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
|
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
|
||||||
'uploader': 'SlidesLive Videos - A',
|
'uploader': 'SlidesLive Videos - A',
|
||||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
'uploader_id': '@slideslivevideos-a6075',
|
||||||
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
|
'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
|
||||||
'upload_date': '20200903',
|
'upload_date': '20200903',
|
||||||
'timestamp': 1602599092,
|
'timestamp': 1697805922,
|
||||||
'duration': 942,
|
'duration': 942,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
|
@ -303,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'id': '38983994',
|
'id': '38983994',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Zero-Shot AutoML with Pretrained Models',
|
'title': 'Zero-Shot AutoML with Pretrained Models',
|
||||||
'timestamp': 1662384834,
|
'timestamp': 1697826708,
|
||||||
'upload_date': '20220905',
|
'upload_date': '20231020',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'thumbnails': 'count:23',
|
'thumbnails': 'count:23',
|
||||||
'chapters': 'count:22',
|
'chapters': 'count:22',
|
||||||
|
@ -336,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'thumbnails': 'count:8',
|
'thumbnails': 'count:8',
|
||||||
'timestamp': 1629671508,
|
'timestamp': 1697803109,
|
||||||
'upload_date': '20210822',
|
'upload_date': '20231020',
|
||||||
'chapters': 'count:7',
|
'chapters': 'count:7',
|
||||||
'duration': 326,
|
'duration': 326,
|
||||||
},
|
},
|
||||||
|
@ -386,7 +371,7 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
if not line.startswith('#EXT-SL-'):
|
if not line.startswith('#EXT-SL-'):
|
||||||
continue
|
continue
|
||||||
tag, _, value = line.partition(':')
|
tag, _, value = line.partition(':')
|
||||||
key = lookup.get(tag.lstrip('#EXT-SL-'))
|
key = lookup.get(tag[8:])
|
||||||
if not key:
|
if not key:
|
||||||
continue
|
continue
|
||||||
m3u8_dict[key] = value
|
m3u8_dict[key] = value
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError, base_url, int_or_none, url_basename
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class Vbox7IE(InfoExtractor):
|
class Vbox7IE(InfoExtractor):
|
||||||
|
@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['BG']
|
_GEO_COUNTRIES = ['BG']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vbox7.com/play:0946fff23c',
|
'url': 'http://vbox7.com/play:0946fff23c',
|
||||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
'md5': '50ca1f78345a9c15391af47d8062d074',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0946fff23c',
|
'id': '0946fff23c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
|
||||||
'timestamp': 1470982814,
|
'timestamp': 1470982814,
|
||||||
'upload_date': '20160812',
|
'upload_date': '20160812',
|
||||||
'uploader': 'zdraveibulgaria',
|
'uploader': 'zdraveibulgaria',
|
||||||
},
|
'view_count': int,
|
||||||
'params': {
|
'duration': 2640,
|
||||||
'proxy': '127.0.0.1:8118',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/play:249bb972c2',
|
'url': 'http://vbox7.com/play:249bb972c2',
|
||||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
'md5': 'da1dd2eb245200cb86e6d09d43232116',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '249bb972c2',
|
'id': '249bb972c2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
'uploader': 'svideteliat_ot_varshava',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1360215023,
|
||||||
|
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
|
||||||
|
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
'upload_date': '20130207',
|
||||||
|
'duration': 83,
|
||||||
},
|
},
|
||||||
'skip': 'georestricted',
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
response = self._download_json(
|
data = self._download_json(
|
||||||
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
|
'https://www.vbox7.com/aj/player/item/options', video_id,
|
||||||
video_id)
|
query={'vid': video_id})['options']
|
||||||
|
|
||||||
if 'error' in response:
|
src_url = data.get('src')
|
||||||
raise ExtractorError(
|
if src_url in (None, '', 'blank'):
|
||||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
raise ExtractorError('Video is unavailable', expected=True)
|
||||||
|
|
||||||
video = response['options']
|
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
|
||||||
|
if fmt_base == 'vn':
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
title = video['title']
|
fmt_base = base_url(src_url) + fmt_base
|
||||||
video_url = video['src']
|
|
||||||
|
|
||||||
if '/na.mp4' in video_url:
|
formats = self._extract_m3u8_formats(
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
|
||||||
|
# TODO: Add MPD formats, when dash range support is added
|
||||||
|
for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
|
||||||
|
formats.append({
|
||||||
|
'url': f'{fmt_base}_{res}.mp4',
|
||||||
|
'format_id': f'http-{res}',
|
||||||
|
'height': res,
|
||||||
|
})
|
||||||
|
|
||||||
uploader = video.get('uploader')
|
return {
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
|
|
||||||
|
|
||||||
info = {}
|
|
||||||
|
|
||||||
if webpage:
|
|
||||||
info = self._search_json_ld(
|
|
||||||
webpage.replace('"/*@context"', '"@context"'), video_id,
|
|
||||||
fatal=False)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'formats': formats,
|
||||||
'url': video_url,
|
**self._search_json_ld(self._download_webpage(
|
||||||
'uploader': uploader,
|
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
|
||||||
'thumbnail': self._proto_relative_url(
|
**traverse_obj(data, {
|
||||||
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
'title': ('title', {str}),
|
||||||
'http:'),
|
'uploader': ('uploader', {str}),
|
||||||
})
|
'duration': ('duration', {int_or_none}),
|
||||||
return info
|
}),
|
||||||
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
class ViewLiftBaseIE(InfoExtractor):
|
class ViewLiftBaseIE(InfoExtractor):
|
||||||
_API_BASE = 'https://prod-api.viewlift.com/'
|
_API_BASE = 'https://prod-api.viewlift.com/'
|
||||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||||
_SITE_MAP = {
|
_SITE_MAP = {
|
||||||
'ftfnext': 'lax',
|
'ftfnext': 'lax',
|
||||||
'funnyforfree': 'snagfilms',
|
'funnyforfree': 'snagfilms',
|
||||||
|
@ -27,6 +27,7 @@ class ViewLiftBaseIE(InfoExtractor):
|
||||||
'snagxtreme': 'snagfilms',
|
'snagxtreme': 'snagfilms',
|
||||||
'theidentitytb': 'tampabay',
|
'theidentitytb': 'tampabay',
|
||||||
'vayafilm': 'snagfilms',
|
'vayafilm': 'snagfilms',
|
||||||
|
'chorki': 'prothomalo',
|
||||||
}
|
}
|
||||||
_TOKENS = {}
|
_TOKENS = {}
|
||||||
|
|
||||||
|
@ -296,6 +297,33 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||||
}, { # Premium movie
|
}, { # Premium movie
|
||||||
'url': 'https://www.hoichoi.tv/movies/detective-2020',
|
'url': 'https://www.hoichoi.tv/movies/detective-2020',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, { # Chorki Premium series
|
||||||
|
'url': 'https://www.chorki.com/bn/series/sinpaat',
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bn/series/sinpaat',
|
||||||
|
},
|
||||||
|
}, { # Chorki free movie
|
||||||
|
'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
|
||||||
|
'title': 'Bikkhov',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230824',
|
||||||
|
'timestamp': 1692860553,
|
||||||
|
'categories': ['Action Movies', 'Salman Special'],
|
||||||
|
'tags': 'count:14',
|
||||||
|
'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
|
||||||
|
'display_id': 'bn/videos/bangla-movie-bikkhov',
|
||||||
|
'description': 'md5:71492b086450625f4374a3eb824f27dc',
|
||||||
|
'duration': 8002,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, { # Chorki Premium movie
|
||||||
|
'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -269,7 +269,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
|
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
|
||||||
if not jwt_response.get('jwt'):
|
if not jwt_response.get('jwt'):
|
||||||
return
|
return
|
||||||
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
|
headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
|
||||||
original_response = self._download_json(
|
original_response = self._download_json(
|
||||||
f'https://api.vimeo.com/videos/{video_id}', video_id,
|
f'https://api.vimeo.com/videos/{video_id}', video_id,
|
||||||
headers=headers, fatal=False, expected_status=(403, 404)) or {}
|
headers=headers, fatal=False, expected_status=(403, 404)) or {}
|
||||||
|
@ -751,6 +751,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
api_url, video_id, headers={
|
api_url, video_id, headers={
|
||||||
'Authorization': 'jwt ' + token,
|
'Authorization': 'jwt ' + token,
|
||||||
|
'Accept': 'application/json',
|
||||||
}, query={
|
}, query={
|
||||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||||
})
|
})
|
||||||
|
@ -785,7 +786,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
jwt = viewer['jwt']
|
jwt = viewer['jwt']
|
||||||
album = self._download_json(
|
album = self._download_json(
|
||||||
'https://api.vimeo.com/albums/' + album_id,
|
'https://api.vimeo.com/albums/' + album_id,
|
||||||
album_id, headers={'Authorization': 'jwt ' + jwt},
|
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
|
||||||
query={'fields': 'description,name,privacy'})
|
query={'fields': 'description,name,privacy'})
|
||||||
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||||
password = self.get_param('videopassword')
|
password = self.get_param('videopassword')
|
||||||
|
@ -1147,10 +1148,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||||
'Authorization': 'jwt ' + authorization,
|
'Authorization': 'jwt ' + authorization,
|
||||||
|
'Accept': 'application/json',
|
||||||
})['data']
|
})['data']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||||
return
|
return
|
||||||
|
raise
|
||||||
for video in videos:
|
for video in videos:
|
||||||
link = video.get('link')
|
link = video.get('link')
|
||||||
if not link:
|
if not link:
|
||||||
|
@ -1171,7 +1174,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
jwt = viewer['jwt']
|
jwt = viewer['jwt']
|
||||||
album = self._download_json(
|
album = self._download_json(
|
||||||
'https://api.vimeo.com/albums/' + album_id,
|
'https://api.vimeo.com/albums/' + album_id,
|
||||||
album_id, headers={'Authorization': 'jwt ' + jwt},
|
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
|
||||||
query={'fields': 'description,name,privacy'})
|
query={'fields': 'description,name,privacy'})
|
||||||
hashed_pass = None
|
hashed_pass = None
|
||||||
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||||
|
|
71
yt_dlp/extractor/zetland.py
Normal file
71
yt_dlp/extractor/zetland.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import merge_dicts, unified_timestamp, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class ZetlandDKArticleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sO9aq2MY-a81VP3BY-66e69',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'modified_date': '20240118',
|
||||||
|
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
|
||||||
|
'upload_date': '20240116',
|
||||||
|
'uploader_id': 'a81VP3BY',
|
||||||
|
'modified_timestamp': 1705568739,
|
||||||
|
'release_timestamp': 1705377592,
|
||||||
|
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
|
||||||
|
'uploader': 'Helle Fuusager',
|
||||||
|
'release_date': '20240116',
|
||||||
|
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
|
||||||
|
'description': 'md5:9619d426772c133f5abb26db27f26a01',
|
||||||
|
'timestamp': 1705377592,
|
||||||
|
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
|
||||||
|
}
|
||||||
|
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
|
||||||
|
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
|
||||||
|
formats.append({
|
||||||
|
'url': audio_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': display_id,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader_id': uploader_id
|
||||||
|
}, traverse_obj(story_data, {
|
||||||
|
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
|
||||||
|
'uploader': ('sharer', 'name'),
|
||||||
|
'uploader_id': ('sharer', 'sharer_id'),
|
||||||
|
'description': ('story_content', 'content', 'socialDescription'),
|
||||||
|
'series_id': ('story_content', 'meta', 'seriesId'),
|
||||||
|
'release_timestamp': ('published_at', {unified_timestamp}),
|
||||||
|
'modified_timestamp': ('revised_at', {unified_timestamp}),
|
||||||
|
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
|
||||||
|
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
|
||||||
|
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
|
||||||
|
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
|
||||||
|
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
|
||||||
|
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
|
||||||
|
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
|
||||||
|
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
|
||||||
|
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
|
||||||
|
}), get_all=False), {
|
||||||
|
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
|
||||||
|
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
|
||||||
|
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
|
||||||
|
'uploader': self._html_search_meta(['author'], webpage),
|
||||||
|
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
|
||||||
|
}, self._search_json_ld(webpage, display_id, fatal=False))
|
|
@ -476,7 +476,8 @@ def create_parser():
|
||||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
|
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
|
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||||
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
|
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
|
||||||
'2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'],
|
||||||
|
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||||
}
|
}
|
||||||
}, help=(
|
}, help=(
|
||||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||||
|
|
Loading…
Reference in New Issue
Block a user