Compare commits

..

22 Commits

Author SHA1 Message Date
bashonly
887f6f07d1
[ie/slideslive] Update tests
Authored by: bashonly
2024-02-03 19:39:13 -06:00
bashonly
b1adb4e883
[ie/slideslive] Cleanup
Authored by: bashonly
2024-02-03 19:29:13 -06:00
bashonly
19a46f51c2
Merge branch 'yt-dlp:master' into cleanup/2024-01 2024-02-03 19:26:43 -06:00
YoshichikaAAA
e3ce2b385e
[ie/radiko] Extract more metadata (#9115)
Authored by: YoshichikaAAA
2024-02-03 18:44:17 +00:00
sepro
4253e3b7f4
[ie/CCMA] Extract 1080p DASH formats (#9130)
Closes #5755
Authored by: seproDev
2024-02-03 15:59:43 +01:00
bashonly
8e765755f7
[ie/vimeo] Fix API headers (#9125)
Closes #9124
Authored by: bashonly
2024-02-02 21:15:04 +00:00
c-basalt
ffa017cfc5
[ie/BiliBiliSearch] Set cookie to fix extraction (#9119)
Closes #5083
Authored by: c-basalt
2024-02-02 21:08:29 +00:00
HobbyistDev
a0d50aabc5
[ie/orf:on] Add extractor (#9113)
Closes #8903
Authored by: HobbyistDev
2024-02-02 20:57:53 +00:00
HobbyistDev
2f4b575946
[ie/zetland] Add extractor (#9116)
Closes #9024
Authored by: HobbyistDev
2024-02-02 20:56:29 +00:00
garret
fc2cc626f0
[ie/cineverse] Detect when login required (#9081)
Partially addresses #9072
Authored by: garret1317
2024-01-31 20:21:59 +00:00
columndeeply
a2bac6b7ad
[ie/PrankCastPost] Add extractor (#8933)
Authored by: columndeeply
2024-01-31 20:16:07 +00:00
rrgomes
4b8b0dded8
[ie/nfb] Add support for onf.ca and series (#8997)
Closes #8198
Authored by: bashonly, rrgomes

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-01-31 18:00:15 +00:00
jazz1611
4a6ff0b47a
[ie/redtube] Support redtube.com.br URLs (#9103)
Authored by: jazz1611
2024-01-31 17:56:29 +00:00
Radu Manole
62c65bfaf8
[ie/NinaProtocol] Add extractor (#8946)
Closes #8709, Closes #8764
Authored by: RaduManole, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-31 18:41:31 +01:00
bashonly
d63eae7e7f
[core] Don't select storyboard formats as fallback
Closes #7715
Authored by: bashonly
2024-01-31 03:17:51 -06:00
Simon Sawicki
2792092afd
[cookies] Improve error message for Windows --cookies-from-browser chrome issue (#9080)
Authored by: Grub4K
2024-01-31 09:56:14 +01:00
Simon Sawicki
cbed249aaa
[cookies] Fix --cookies-from-browser for snap Firefox (#9016)
Authored by: Grub4K
2024-01-31 09:43:52 +01:00
Simon Sawicki
3725b4f0c9
[core] Add --compat-options 2023 (#9084)
Authored by: Grub4K
2024-01-31 09:35:35 +01:00
sepro
67bb70cd70
[ie/Vbox7] Fix extractor (#9100)
Closes #1098, Closes #5661
Authored by: seproDev
2024-01-29 21:16:46 +01:00
kclauhk
9b5efaf86b
[ie/facebook] Support events (#9055)
Closes #5355
Authored by: kclauhk
2024-01-29 19:43:41 +00:00
sepro
999ea80beb
[ie/art19] Add extractors (#9099)
Authored by: seproDev
2024-01-29 20:38:25 +01:00
Nur Mahmud Ul Alam Tasin
41b6cdb419
[ie/viewlift] Add support for chorki.com (#9095)
Closes #3369
Authored by: NurTasin
2024-01-28 22:33:44 +00:00
21 changed files with 1277 additions and 181 deletions

View File

@ -167,7 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
# INSTALLATION

View File

@ -2451,7 +2451,7 @@ class YoutubeDL:
# for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format
matches = formats
matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
elif seperate_fallback and not ctx['has_merged_format']:
# for compatibility with youtube-dl when there is no pre-merged format
matches = list(filter(seperate_fallback, formats))

View File

@ -1,6 +1,7 @@
import base64
import collections
import contextlib
import glob
import http.cookiejar
import http.cookies
import io
@ -23,7 +24,8 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
from .compat import functools
from .compat import functools # isort: split
from .compat import compat_os_name
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage,
@ -31,6 +33,7 @@ from .dependencies import (
)
from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
DownloadError,
Popen,
error_to_str,
expand_path,
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
return YoutubeDLCookieJar()
if profile is None:
search_root = _firefox_browser_dir()
search_roots = list(_firefox_browser_dirs())
elif _is_path(profile):
search_root = profile
search_roots = [profile]
else:
search_root = os.path.join(_firefox_browser_dir(), profile)
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
search_root = ', '.join(map(repr, search_roots))
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
if cookie_database_path is None:
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
cursor.connection.close()
def _firefox_browser_dir():
def _firefox_browser_dirs():
if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
return os.path.expanduser('~/.mozilla/firefox')
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
else:
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
def _firefox_cookie_dbs(roots):
for root in map(os.path.abspath, roots):
for pattern in ('', '*/', 'Profiles/*/'):
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
def _get_chromium_based_browser_settings(browser_name):
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.error(f'{browser_name} does not support profiles')
search_root = config['browser_dir']
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
if cookie_database_path is None:
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
counts['unencrypted'] = unencrypted_cookies
logger.debug(f'cookie version breakdown: {counts}')
return jar
except PermissionError as error:
if compat_os_name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message)
raise DownloadError(message) # force exit
raise
finally:
if cursor is not None:
cursor.connection.close()
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
References:
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
"""
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
path = _newest(_find_files(browser_root, 'Local State', logger))
if path is None:
logger.error('could not find local state file')
return None
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
return [row[1].decode() for row in table_info]
def _find_most_recently_used_file(root, filename, logger):
def _newest(files):
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
def _find_files(root, filename, logger):
# if there are multiple browser profiles, take the most recently used one
i, paths = 0, []
i = 0
with _create_progress_bar(logger) as progress_bar:
for curr_root, dirs, files in os.walk(root):
for curr_root, _, files in os.walk(root):
for file in files:
i += 1
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
if file == filename:
paths.append(os.path.join(curr_root, file))
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
yield os.path.join(curr_root, file)
def _merge_cookie_jars(jars):
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
def _is_path(value):
return os.path.sep in value
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):

View File

@ -138,6 +138,10 @@ from .ard import (
ARDMediathekCollectionIE,
ARDIE,
)
from .art19 import (
Art19IE,
Art19ShowIE,
)
from .arte import (
ArteTVIE,
ArteTVEmbedIE,
@ -1243,7 +1247,10 @@ from .nexx import (
NexxIE,
NexxEmbedIE,
)
from .nfb import NFBIE
from .nfb import (
NFBIE,
NFBSeriesIE,
)
from .nfhsnetwork import NFHSNetworkIE
from .nfl import (
NFLIE,
@ -1280,6 +1287,7 @@ from .niconico import (
NicovideoTagURLIE,
NiconicoLiveIE,
)
from .ninaprotocol import NinaProtocolIE
from .ninecninemedia import (
NineCNineMediaIE,
CPTwentyFourIE,
@ -1386,6 +1394,7 @@ from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
ORFONIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
@ -1510,7 +1519,7 @@ from .puhutv import (
PuhuTVSerieIE,
)
from .pr0gramm import Pr0grammIE
from .prankcast import PrankCastIE
from .prankcast import PrankCastIE, PrankCastPostIE
from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE
@ -2488,6 +2497,7 @@ from .zee5 import (
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
ZingMp3IE,

303
yt_dlp/extractor/art19.py Normal file
View File

@ -0,0 +1,303 @@
import re
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class Art19IE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
_VALID_URL = [
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
_TESTS = [{
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
'info_dict': {
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'ext': 'mp3',
'title': 'Why Did DeSantis Drop Out?',
'series': 'The Daily Briefing',
'release_timestamp': 1705941275,
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
'episode': 'Episode 582',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
'upload_date': '20240122',
'timestamp': 1705940815,
'episode_number': 582,
'modified_date': '20240122',
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'modified_timestamp': 1705941275,
'release_date': '20240122',
'duration': 527.4,
},
}, {
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
'info_dict': {
'id': '8319b776-4153-4d22-8630-631f204a03dd',
'ext': 'mp3',
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
'modified_date': '20240116',
'upload_date': '20240105',
'modified_timestamp': 1705435802,
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
'release_timestamp': 1705305660,
'release_date': '20240115',
'timestamp': 1704481536,
'episode_number': 88,
'series': 'Scamfluencers',
'duration': 2588.37501,
'episode': 'Episode 88',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
'info_dict': {
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'ext': 'mp3',
'title': "'Verstappen wordt een synoniem voor Formule 1'",
'season': 'Seizoen 6',
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'duration': 3061.82111,
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
'release_date': '20231126',
'modified_timestamp': 1701156004,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'season_number': 6,
'episode_number': 52,
'modified_date': '20231128',
'upload_date': '20231126',
'timestamp': 1701025981,
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
'series': 'De Boordradio',
'release_timestamp': 1701026308,
'episode': 'Episode 52',
},
}, {
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
'info_dict': {
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'ext': 'mp3',
'title': 'Larry Bucshon announces retirement from congress',
'upload_date': '20240115',
'episode_number': 148,
'episode': 'Episode 148',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20240115',
'timestamp': 1705328205,
'release_timestamp': 1705329275,
'series': 'All INdiana Politics',
'modified_date': '20240117',
'modified_timestamp': 1705458901,
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
'duration': 1256.18848,
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for episode_id in re.findall(
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
def _real_extract(self, url):
episode_id = self._match_id(url)
player_metadata = self._download_json(
f'https://art19.com/episodes/{episode_id}', episode_id,
note='Downloading player metadata', fatal=False,
headers={'Accept': 'application/vnd.art19.v0+json'})
rss_metadata = self._download_json(
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
note='Downloading RSS metadata')
formats = [{
'format_id': 'direct',
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
'vcodec': 'none',
'acodec': 'mp3',
}]
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
if fmt_id == 'waveform_bin':
continue
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
if not fmt_url:
continue
formats.append({
'format_id': fmt_id,
'url': fmt_url,
'vcodec': 'none',
'acodec': fmt_id,
'quality': -2 if fmt_id == 'ogg' else -1,
})
return {
'id': episode_id,
'formats': formats,
**traverse_obj(player_metadata, ('episode', {
'title': ('title', {str}),
'description': ('description_plain', {str}),
'episode_id': ('id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season_id': ('season_id', {str}),
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601})
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),
'description': ('episode_description_plain', {str}),
'episode_id': ('episode_id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season': ('season_title', {str}),
'season_id': ('season_id', {str}),
'season_number': ('season_number', {int_or_none}),
'series': ('series_title', {str}),
'series_id': ('series_id', {str}),
'thumbnail': ('cover_image', {url_or_none}),
'duration': ('duration', {float_or_none}),
})),
}
class Art19ShowIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
_VALID_URL = [
rf'{_VALID_URL_BASE}(?:$|[#?])',
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
_TESTS = [{
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://www.art19.com/shows/echt-gebeurd',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://rss.art19.com/scamfluencers',
'info_dict': {
'_type': 'playlist',
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'display_id': 'scamfluencers',
'title': 'Scamfluencers',
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
'timestamp': 1647368573,
'upload_date': '20220315',
'modified_timestamp': int,
'modified_date': str,
'tags': [],
},
'playlist_mincount': 90,
}, {
'url': 'https://art19.com/shows/enthuellt/embed',
'info_dict': {
'_type': 'playlist',
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
'display_id': 'enthuellt',
'title': 'Enthüllt',
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
'timestamp': 1601645860,
'upload_date': '20201002',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:10',
},
'playlist_mincount': 10,
}]
_WEBPAGE_TESTS = [{
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
'info_dict': {
'_type': 'playlist',
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
'display_id': 'deconstructing-yourself',
'title': 'Deconstructing Yourself',
'description': 'md5:dab5082b28b248a35476abf64768854d',
'timestamp': 1570581181,
'upload_date': '20191009',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:5',
},
'playlist_mincount': 80,
}, {
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
'info_dict': {
'_type': 'playlist',
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
'display_id': 'the-ben-joravsky-show',
'title': 'The Ben Joravsky Show',
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
'timestamp': 1550875095,
'upload_date': '20190222',
'modified_timestamp': int,
'modified_date': str,
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
},
'playlist_mincount': 1900,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for series_id in re.findall(
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
yield f'https://art19.com/shows/{series_id}'
def _real_extract(self, url):
series_id = self._match_id(url)
series_metadata = self._download_json(
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
headers={'Accept': 'application/vnd.art19.v0+json'})
return {
'_type': 'playlist',
'entries': [
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
],
**traverse_obj(series_metadata, ('series', {
'id': ('id', {str}),
'display_id': ('slug', {str}),
'title': ('title', {str}),
'description': ('description_plain', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
}

View File

@ -7,6 +7,7 @@ import math
import re
import time
import urllib.parse
import uuid
from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome
@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年我怎么还在等你单推的女人睡觉后开播啊”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]
def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1):
videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query,

View File

@ -1,6 +1,7 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
parse_duration,
parse_resolution,
@ -60,6 +61,7 @@ class CCMAIE(InfoExtractor):
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'format': 'dm',
})
formats = []
@ -69,6 +71,10 @@ class CCMAIE(InfoExtractor):
format_url = url_or_none(format_.get('file'))
if not format_url:
continue
if determine_ext(format_url) == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, media_id, mpd_id='dash', fatal=False))
continue
label = format_.get('label')
f = parse_resolution(label)
f.update({

View File

@ -67,7 +67,10 @@ class CineverseIE(CineverseBaseIE):
html = self._download_webpage(url, video_id)
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
if idetails.get('err_code') == 1200:
err_code = idetails.get('err_code')
if err_code == 1002:
self.raise_login_required()
elif err_code == 1200:
self.raise_geo_restricted(
'This video is not available from your location due to geo restriction. '
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',

View File

@ -54,6 +54,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/|
events/(?:[^/]+/)?|
groups/[^/]+/(?:permalink|posts)/|
watchparty/
)|
@ -399,6 +400,18 @@ class FacebookIE(InfoExtractor):
},
'playlist_count': 1,
'skip': 'Requires logging in',
}, {
# data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': {
'id': '637246984455045',
'ext': 'mp4',
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
'thumbnail': r're:^https?://.*',
'uploader': 'Comitato Liberi Pensatori',
'uploader_id': '100065709540881',
},
}]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = {
@ -473,38 +486,10 @@ class FacebookIE(InfoExtractor):
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
automatic_captions, subtitles = {}, {}
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
if url_or_none(captions): # if subs_data only had a 'captions_url'
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
subtitles[locale] = [{'url': captions}]
# or else subs_data had 'video_available_captions_locales', a list of dicts
for caption in traverse_obj(captions, (
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
):
lang = caption.get('localized_language') or ''
subs = {
'url': caption['captions_url'],
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
}
if caption.get('localized_creation_method') or is_video_broadcast:
automatic_captions.setdefault(caption['locale'], []).append(subs)
else:
subtitles.setdefault(caption['locale'], []).append(subs)
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text'))
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
uploader_data = (
get_first(media, ('owner', {dict}))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or {})
page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
@ -513,11 +498,15 @@ class FacebookIE(InfoExtractor):
description = description or self._html_search_meta(
['description', 'og:description', 'twitter:description'],
webpage, 'description', default=None)
uploader_data = (
get_first(media, ('owner', {dict}))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex(
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
@ -539,8 +528,6 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
'automatic_captions': automatic_captions,
'subtitles': subtitles,
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -638,6 +625,29 @@ class FacebookIE(InfoExtractor):
'url': playable_url,
})
extract_dash_manifest(video, formats)
automatic_captions, subtitles = {}, {}
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
for caption in traverse_obj(video, (
'video_available_captions_locales',
{lambda x: sorted(x, key=lambda c: c['locale'])},
lambda _, v: url_or_none(v['captions_url'])
)):
lang = caption.get('localized_language') or 'und'
subs = {
'url': caption['captions_url'],
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
}
if caption.get('localized_creation_method') or is_broadcast:
automatic_captions.setdefault(caption['locale'], []).append(subs)
else:
subtitles.setdefault(caption['locale'], []).append(subs)
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
if captions_url and not automatic_captions and not subtitles:
locale = self._html_search_meta(
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
info = {
'id': v_id,
'formats': formats,
@ -647,6 +657,8 @@ class FacebookIE(InfoExtractor):
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
or float_or_none(video.get('length_in_second'))),
'automatic_captions': automatic_captions,
'subtitles': subtitles,
}
process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text'])
@ -681,7 +693,8 @@ class FacebookIE(InfoExtractor):
for edge in edges:
parse_attachment(edge, key='node')
video = data.get('video') or {}
video = traverse_obj(data, (
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
if video:
attachments = try_get(video, [
lambda x: x['story']['attachments'],

View File

@ -1,10 +1,54 @@
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
int_or_none,
join_nonempty,
merge_dicts,
parse_count,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class NFBIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)'
class NFBBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
_GEO_COUNTRIES = ['CA']
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'uploader': ('data_layer', 'episodeMaker', {str}),
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
'episode': ('data_layer', 'episodeTitle', {str}),
'season': ('data_layer', 'seasonTitle', {str}),
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
'series': ('data_layer', 'seriesTitle', {str}),
}), get_all=False)
return {
**info,
'id': video_id,
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
'episode_number': int_or_none(self._search_regex(
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
}
class NFBIE(NFBBaseIE):
IE_NAME = 'nfb'
IE_DESC = 'nfb.ca and onf.ca films and episodes'
_VALID_URL = [
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
]
_TESTS = [{
'note': 'NFB film',
'url': 'https://www.nfb.ca/film/trafficopter/',
'info_dict': {
'id': 'trafficopter',
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Barrie Howells',
'release_year': 1972,
'duration': 600.0,
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF film',
'url': 'https://www.onf.ca/film/mal-du-siecle/',
'info_dict': {
'id': 'mal-du-siecle',
'ext': 'mp4',
'title': 'Le mal du siècle',
'description': 'md5:1abf774d77569ebe603419f2d344102b',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Catherine Lepage',
'release_year': 2019,
'duration': 300.0,
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with English title',
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
'info_dict': {
'id': 'true-north-episode9-true-north-finale-making-it',
'ext': 'mp4',
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
'series': 'True North: Inside the Rise of Toronto Basketball',
'release_year': 2018,
'season': 'Season 1',
'season_number': 1,
'episode': 'Finale: Making It',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with French title',
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
'info_dict': {
'id': 'direction-nord-episode-9',
'ext': 'mp4',
'title': 'Direction nord La montée du basketball à Toronto - Finale : Réussir',
'description': 'md5:349a57419b71432b97bf6083d92b029d',
'series': 'Direction nord La montée du basketball à Toronto',
'release_year': 2018,
'season': 'Saison 1',
'season_number': 1,
'episode': 'Finale : Réussir',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with French title (needs geo-bypass)',
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with English title (needs geo-bypass)',
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
'info_dict': {
'id': 'liste-des-choses-qui-existent-saison-2-ours',
'ext': 'mp4',
'title': 'La liste des choses qui existent - L\'ours en peluche',
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
'series': 'La liste des choses qui existent',
'release_year': 2022,
'season': 'Saison 2',
'season_number': 2,
'episode': 'L\'ours en peluche',
'episode_number': 12,
'uploader': 'Francis Papillon',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB film /embed/player/ page',
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
'info_dict': {
'id': 'afterlife',
'ext': 'mp4',
'title': 'Afterlife',
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
'release_year': 1978,
'duration': 420.0,
'uploader': 'Ishu Patel',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
# Need to construct the URL since we match /embed/player/ URLs as well
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id)
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
video_id = self._match_id(embed_url) # embed url has unique slug
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
if 'MESSAGE_GEOBLOCKED' in player:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
iframe = self._html_search_regex(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)',
webpage, 'iframe', default=None, fatal=True)
if iframe.startswith('/'):
iframe = f'https://www.nfb.ca{iframe}'
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
video_id, 'mp4', m3u8_id='hls')
player = self._download_webpage(iframe, video_id)
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
fmt['format_note'] = 'described video'
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
source = self._html_search_regex(
r'source:\s*\'([^\']+)',
player, 'source', default=None, fatal=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
return {
info = {
'id': video_id,
'title': self._html_search_regex(
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
@ -45,14 +252,49 @@ class NFBIE(InfoExtractor):
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
webpage, 'description', default=None),
'thumbnail': self._html_search_regex(
r'poster:\s*\'([^\']+)',
player, 'thumbnail', default=None),
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
'uploader': self._html_search_regex(
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
webpage, 'uploader', default=None),
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex(
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
return merge_dicts({
'formats': formats,
'subtitles': subtitles,
}
}, info, self._search_json_ld(webpage, video_id, default={}))
class NFBSeriesIE(NFBBaseIE):
IE_NAME = 'nfb:series'
IE_DESC = 'nfb.ca and onf.ca series'
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
'playlist_mincount': 9,
'info_dict': {
'id': 'true-north-inside-the-rise-of-toronto-basketball',
},
}, {
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
'playlist_mincount': 26,
'info_dict': {
'id': 'la-liste-des-choses-qui-existent-serie',
},
}]
def _entries(self, episodes):
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
mobj = NFBIE._match_valid_url(episode['embed_url'])
yield self.url_result(
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
def _real_extract(self, url):
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
season_path = 'saison' if type_ == 'serie' else 'season'
webpage = self._download_webpage(
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
return self.playlist_result(self._entries(episodes), series_id)

View File

@ -0,0 +1,225 @@
from .common import InfoExtractor
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NinaProtocolIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'title': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'channel': 'ppm',
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'display_id': 'the-spatulas-march-chant',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
},
'playlist': [{
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
'title': 'March Chant In April',
'track': 'March Chant In April',
'ext': 'mp3',
'duration': 152,
'track_number': 1,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'channel': 'ppm',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
'title': 'Rescue Mission',
'track': 'Rescue Mission',
'ext': 'mp3',
'duration': 212,
'track_number': 2,
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'channel': 'ppm',
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'timestamp': 1701417610,
'album': 'The Spatulas - March Chant',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
'title': 'Slinger Style',
'track': 'Slinger Style',
'ext': 'mp3',
'duration': 179,
'track_number': 3,
'timestamp': 1701417610,
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album_artist': 'Post Present Medium ',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel': 'ppm',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
'title': 'Psychic Signal',
'track': 'Psychic Signal',
'ext': 'mp3',
'duration': 220,
'track_number': 4,
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'upload_date': '20231201',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'album_artist': 'Post Present Medium ',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'channel': 'ppm',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
'title': 'Curvy Color',
'track': 'Curvy Color',
'ext': 'mp3',
'duration': 148,
'track_number': 5,
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album': 'The Spatulas - March Chant',
'album_artist': 'Post Present Medium ',
'channel': 'ppm',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
'title': 'Caveman Star',
'track': 'Caveman Star',
'ext': 'mp3',
'duration': 121,
'track_number': 6,
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'album': 'The Spatulas - March Chant',
'channel': 'ppm',
'upload_date': '20231201',
},
}],
}, {
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
'info_dict': {
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
'description': 'md5:63f08d5db558b4b36e1896f317062721',
'title': 'F.G.S. - American Shield',
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
'channel': 'tinkscough',
'tags': [],
'album_artist': 'F.G.S.',
'album': 'F.G.S. - American Shield',
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
'display_id': 'f-g-s-american-shield',
'uploader': 'flannerysilva',
'timestamp': 1702395858,
'upload_date': '20231212',
},
'playlist_count': 1,
}, {
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
'info_dict': {
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
'display_id': 'time-to-figure-things-out',
'description': 'md5:960202ed01c3134bb8958f1008527e35',
'timestamp': 1706283607,
'title': 'DJ STEPDAD - time to figure things out',
'album_artist': 'DJ STEPDAD',
'uploader': 'tddvsss',
'upload_date': '20240126',
'album': 'time to figure things out',
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
'tags': [],
},
'playlist_count': 4,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
release = self._download_json(
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
video_id = release.get('publicKey') or video_id
common_info = traverse_obj(release, {
'album': ('metadata', 'properties', 'title', {str}),
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
'timestamp': ('datetime', {parse_iso8601}),
'thumbnail': ('metadata', 'image', {url_or_none}),
'uploader': ('publisherAccount', 'handle', {str}),
'uploader_id': ('publisherAccount', 'publicKey', {str}),
'channel': ('hub', 'handle', {str}),
'channel_id': ('hub', 'publicKey', {str}),
}, get_all=False)
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
entries = []
for track_num, track in enumerate(traverse_obj(release, (
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
entries.append({
'id': f'{video_id}_{track_num}',
'url': track['uri'],
**traverse_obj(track, {
'title': ('track_title', {str}),
'track': ('track_title', {str}),
'ext': ('type', {mimetype2ext}),
'track_number': ('track', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'vcodec': 'none',
**common_info,
})
return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
**traverse_obj(release, {
'display_id': ('slug', {str}),
'title': ('metadata', 'name', {str}),
'description': ('metadata', 'description', {str}),
}),
**common_info,
}

View File

@ -1,3 +1,4 @@
import base64
import functools
import re
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
})
return self.playlist_result(entries)
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
'id': '14210000',
'ext': 'mp4',
'duration': 2651.08,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
}
}]
def _extract_video(self, video_id, display_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(api_json, {
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
}
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, display_id, fatal=False),
**self._extract_video(video_id, display_id),
}

View File

@ -1,5 +1,8 @@
import json
from .common import InfoExtractor
from ..utils import parse_iso8601, traverse_obj, try_call
from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
from ..utils.traversal import traverse_obj
class PrankCastIE(InfoExtractor):
@ -64,3 +67,71 @@ class PrankCastIE(InfoExtractor):
'categories': [json_info.get('broadcast_category')],
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
}
class PrankCastPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
_TESTS = [{
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
'info_dict': {
'id': '6214',
'ext': 'mp3',
'title': 'Happy National Rachel Day!',
'display_id': 'happy-national-rachel-day-',
'timestamp': 1704333938,
'uploader': 'Devonanustart',
'channel_id': '4',
'duration': 13175,
'cast': ['Devonanustart'],
'description': '',
'categories': ['prank call'],
'upload_date': '20240104'
}
}, {
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
'info_dict': {
'id': '6217',
'ext': 'mp3',
'title': 'Jake the Work Crow!',
'display_id': 'jake-the-work-crow-',
'timestamp': 1704346592,
'uploader': 'despicabledogs',
'channel_id': '957',
'duration': 263.287,
'cast': ['despicabledogs'],
'description': 'https://imgur.com/a/vtxLvKU',
'categories': [],
'upload_date': '20240104'
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, video_id)
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
content = self._parse_json(post['post_contents_json'], video_id)[0]
uploader = post.get('user_name')
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
return {
'id': video_id,
'title': post.get('post_title') or self._og_search_title(webpage),
'display_id': display_id,
'url': content.get('url'),
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
'uploader': uploader,
'channel_id': str_or_none(post.get('user_id')),
'duration': float_or_none(content.get('duration')),
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
'description': post.get('post_body'),
'categories': list(filter(None, [content.get('category')])),
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
'subtitles': {
'live_chat': [{
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
'ext': 'json',
}],
} if post.get('content_id') else None
}

View File

@ -1,5 +1,6 @@
import base64
import random
import re
import urllib.parse
from .common import InfoExtractor
@ -11,6 +12,7 @@ from ..utils import (
unified_timestamp,
update_url_query,
)
from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor):
@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor):
return formats
def _extract_performers(self, prog):
performers = traverse_obj(prog, (
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,]', x)}, ..., {str.strip}))
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
return ', '.join(performers) or None
class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE):
return {
'id': video_id,
'title': try_call(lambda: prog.find('title').text),
'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station,
'timestamp': vid_int,
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True,
'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False,
@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE):
return {
'id': station,
'title': title,
'artist': self._extract_performers(prog),
'description': description,
'uploader': station_name,
'uploader_id': station,

View File

@ -12,7 +12,7 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
_TESTS = [{
'url': 'https://www.redtube.com/38864951',
@ -35,6 +35,9 @@ class RedTubeIE(InfoExtractor):
}, {
'url': 'http://it.redtube.com/66418',
'only_matching': True,
}, {
'url': 'https://www.redtube.com.br/103224331',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -25,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38902413',
'ext': 'mp4',
'title': 'GCC IA16 backend',
'timestamp': 1648189972,
'upload_date': '20220325',
'timestamp': 1697793372,
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:42',
'chapters': 'count:41',
@ -42,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115',
'timestamp': 1636996003,
'upload_date': '20231020',
'timestamp': 1697807002,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:640',
'chapters': 'count:639',
@ -59,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
'id': '38973182',
'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
'upload_date': '20220201',
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135,
'timestamp': 1697822521,
'thumbnails': 'count:3',
'chapters': 'count:2',
'duration': 5889,
@ -70,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
'skip_download': 'm3u8',
},
}, {
# service_name = youtube, only XML slides info
# formerly youtube, converted to native
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': {
'id': 'jmg02wCJD5M',
'display_id': '38897546',
'id': '38897546',
'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'channel': 'SlidesLive Videos - G1',
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader': 'SlidesLive Videos - G1',
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'live_status': 'not_live',
'upload_date': '20160710',
'timestamp': 1618786715,
'duration': 6827,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20231029',
'timestamp': 1698588144,
'thumbnails': 'count:169',
'playable_in_embed': True,
'availability': 'unlisted',
'tags': [],
'categories': ['People & Blogs'],
'chapters': 'count:168',
'duration': 6827,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# embed-only presentation, only XML slides info
@ -111,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'timestamp': 1697803109,
'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@ -128,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'MoReL: Multi-omics Relational Learning',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:7',
'timestamp': 1654714970,
'upload_date': '20220608',
'timestamp': 1697824939,
'upload_date': '20231020',
'chapters': 'count:6',
'duration': 171,
},
@ -145,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Decentralized Attribution of Generative Models',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:16',
'timestamp': 1622806321,
'upload_date': '20210604',
'timestamp': 1697814901,
'upload_date': '20231020',
'chapters': 'count:15',
'duration': 306,
},
@ -162,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:9',
'timestamp': 1654714896,
'upload_date': '20220608',
'timestamp': 1697824757,
'upload_date': '20231020',
'chapters': 'count:8',
'duration': 295,
},
@ -177,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979880',
'ext': 'mp4',
'title': 'The Representation Power of Neural Networks',
'timestamp': 1654714962,
'timestamp': 1697824919,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:22',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:21',
'duration': 294,
},
@ -200,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979682',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
'timestamp': 1654714920,
'timestamp': 1697824815,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:31',
'duration': 272,
},
@ -213,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
'duration': 3,
'timestamp': 1654714920,
'upload_date': '20220608',
'timestamp': 1697824815,
'upload_date': '20231020',
},
}, {
'info_dict': {
@ -222,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
'duration': 4,
'timestamp': 1654714920,
'upload_date': '20220608',
'timestamp': 1697824815,
'upload_date': '20231020',
},
}],
'params': {
@ -242,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979481',
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
'timestamp': 1654714877,
'timestamp': 1697824716,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:43',
'upload_date': '20220608',
'upload_date': '20231020',
'chapters': 'count:43',
'duration': 315,
},
@ -255,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
'duration': 3,
'timestamp': 1654714877,
'upload_date': '20220608',
'timestamp': 1697824716,
'upload_date': '20231020',
},
}],
'params': {
@ -275,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader_id': '@slideslivevideos-a6075',
'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
'upload_date': '20200903',
'timestamp': 1602599092,
'timestamp': 1697805922,
'duration': 942,
'age_limit': 0,
'live_status': 'not_live',
@ -303,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38983994',
'ext': 'mp4',
'title': 'Zero-Shot AutoML with Pretrained Models',
'timestamp': 1662384834,
'upload_date': '20220905',
'timestamp': 1697826708,
'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:23',
'chapters': 'count:22',
@ -336,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'timestamp': 1697803109,
'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@ -386,7 +371,7 @@ class SlidesLiveIE(InfoExtractor):
if not line.startswith('#EXT-SL-'):
continue
tag, _, value = line.partition(':')
key = lookup.get(tag.lstrip('#EXT-SL-'))
key = lookup.get(tag[8:])
if not key:
continue
m3u8_dict[key] = value

View File

@ -1,5 +1,6 @@
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import ExtractorError, base_url, int_or_none, url_basename
from ..utils.traversal import traverse_obj
class Vbox7IE(InfoExtractor):
@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
'md5': '50ca1f78345a9c15391af47d8062d074',
'info_dict': {
'id': '0946fff23c',
'ext': 'mp4',
@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
'timestamp': 1470982814,
'upload_date': '20160812',
'uploader': 'zdraveibulgaria',
},
'params': {
'proxy': '127.0.0.1:8118',
'view_count': int,
'duration': 2640,
},
}, {
'url': 'http://vbox7.com/play:249bb972c2',
'md5': '99f65c0c9ef9b682b97313e052734c3f',
'md5': 'da1dd2eb245200cb86e6d09d43232116',
'info_dict': {
'id': '249bb972c2',
'ext': 'mp4',
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
'uploader': 'svideteliat_ot_varshava',
'view_count': int,
'timestamp': 1360215023,
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
'upload_date': '20130207',
'duration': 83,
},
'skip': 'georestricted',
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True,
@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
response = self._download_json(
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
video_id)
data = self._download_json(
'https://www.vbox7.com/aj/player/item/options', video_id,
query={'vid': video_id})['options']
if 'error' in response:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
src_url = data.get('src')
if src_url in (None, '', 'blank'):
raise ExtractorError('Video is unavailable', expected=True)
video = response['options']
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
if fmt_base == 'vn':
self.raise_geo_restricted()
title = video['title']
video_url = video['src']
fmt_base = base_url(src_url) + fmt_base
if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader')
webpage = self._download_webpage(
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
info = {}
if webpage:
info = self._search_json_ld(
webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False)
info.update({
'id': video_id,
'title': title,
'url': video_url,
'uploader': uploader,
'thumbnail': self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage),
'http:'),
formats = self._extract_m3u8_formats(
f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
# TODO: Add MPD formats, when dash range support is added
for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
formats.append({
'url': f'{fmt_base}_{res}.mp4',
'format_id': f'http-{res}',
'height': res,
})
return info
return {
'id': video_id,
'formats': formats,
**self._search_json_ld(self._download_webpage(
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
**traverse_obj(data, {
'title': ('title', {str}),
'uploader': ('uploader', {str}),
'duration': ('duration', {int_or_none}),
}),
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class ViewLiftBaseIE(InfoExtractor):
_API_BASE = 'https://prod-api.viewlift.com/'
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
_SITE_MAP = {
'ftfnext': 'lax',
'funnyforfree': 'snagfilms',
@ -27,6 +27,7 @@ class ViewLiftBaseIE(InfoExtractor):
'snagxtreme': 'snagfilms',
'theidentitytb': 'tampabay',
'vayafilm': 'snagfilms',
'chorki': 'prothomalo',
}
_TOKENS = {}
@ -296,6 +297,33 @@ class ViewLiftIE(ViewLiftBaseIE):
}, { # Premium movie
'url': 'https://www.hoichoi.tv/movies/detective-2020',
'only_matching': True
}, { # Chorki Premium series
'url': 'https://www.chorki.com/bn/series/sinpaat',
'playlist_mincount': 7,
'info_dict': {
'id': 'bn/series/sinpaat',
},
}, { # Chorki free movie
'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
'info_dict': {
'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
'title': 'Bikkhov',
'ext': 'mp4',
'upload_date': '20230824',
'timestamp': 1692860553,
'categories': ['Action Movies', 'Salman Special'],
'tags': 'count:14',
'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
'display_id': 'bn/videos/bangla-movie-bikkhov',
'description': 'md5:71492b086450625f4374a3eb824f27dc',
'duration': 8002,
},
'params': {
'skip_download': True,
},
}, { # Chorki Premium movie
'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
'only_matching': True,
}]
@classmethod

View File

@ -269,7 +269,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'):
return
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {}
@ -751,6 +751,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video = self._download_json(
api_url, video_id, headers={
'Authorization': 'jwt ' + token,
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
@ -785,7 +786,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt},
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword')
@ -1147,10 +1148,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization,
'Accept': 'application/json',
})['data']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return
raise
for video in videos:
link = video.get('link')
if not link:
@ -1171,7 +1174,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt},
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password':

View File

@ -0,0 +1,71 @@
from .common import InfoExtractor
from ..utils import merge_dicts, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
class ZetlandDKArticleIE(InfoExtractor):
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
_TESTS = [{
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
'info_dict': {
'id': 'sO9aq2MY-a81VP3BY-66e69',
'ext': 'mp3',
'modified_date': '20240118',
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
'upload_date': '20240116',
'uploader_id': 'a81VP3BY',
'modified_timestamp': 1705568739,
'release_timestamp': 1705377592,
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
'uploader': 'Helle Fuusager',
'release_date': '20240116',
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
'description': 'md5:9619d426772c133f5abb26db27f26a01',
'timestamp': 1705377592,
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
}
}]
def _real_extract(self, url):
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
formats = []
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
formats.append({
'url': audio_url,
'vcodec': 'none',
})
return merge_dicts({
'id': display_id,
'formats': formats,
'uploader_id': uploader_id
}, traverse_obj(story_data, {
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
'uploader': ('sharer', 'name'),
'uploader_id': ('sharer', 'sharer_id'),
'description': ('story_content', 'content', 'socialDescription'),
'series_id': ('story_content', 'meta', 'seriesId'),
'release_timestamp': ('published_at', {unified_timestamp}),
'modified_timestamp': ('revised_at', {unified_timestamp}),
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
}), get_all=False), {
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'uploader': self._html_search_meta(['author'], webpage),
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
}, self._search_json_ld(webpage, display_id, fatal=False))

View File

@ -476,7 +476,8 @@ def create_parser():
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'],
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '