mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 03:11:24 +01:00
Compare commits
24 Commits
69bb4555c4
...
04fd7a6b88
Author | SHA1 | Date | |
---|---|---|---|
|
04fd7a6b88 | ||
|
2bf76cce91 | ||
|
ed30718ab7 | ||
|
56e0a3a3da | ||
|
823181217e | ||
|
a74b2aceba | ||
|
ad02f40dd3 | ||
|
b12de3b802 | ||
|
cc5f9ea447 | ||
|
933be9a6b3 | ||
|
c1d71d0d9f | ||
|
661c9a1d02 | ||
|
568f080518 | ||
|
904a19ee93 | ||
|
52414d64ca | ||
|
2269065ad6 | ||
|
a5e264d74b | ||
|
b84fda7388 | ||
|
5fccabac27 | ||
|
21f40e75df | ||
|
b3febedbeb | ||
|
295fbb3ae3 | ||
|
35f9a306e6 | ||
|
9d6254069c |
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
|
@ -33,7 +33,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
run: pip install pytest -r requirements.txt
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
|
@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
|||
|
||||
# NEW FEATURES
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -158,6 +158,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
|||
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
|
||||
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
|
||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ from yt_dlp.utils import (
|
|||
DownloadError,
|
||||
ExtractorError,
|
||||
UnavailableVideoError,
|
||||
YoutubeDLError,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
)
|
||||
|
@ -100,6 +101,8 @@ def generator(test_case, tname):
|
|||
print_skipping('IE marked as not _WORKING')
|
||||
|
||||
for tc in test_cases:
|
||||
if tc.get('expected_exception'):
|
||||
continue
|
||||
info_dict = tc.get('info_dict', {})
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
|
@ -139,6 +142,17 @@ def generator(test_case, tname):
|
|||
|
||||
res_dict = None
|
||||
|
||||
def match_exception(err):
|
||||
expected_exception = test_case.get('expected_exception')
|
||||
if not expected_exception:
|
||||
return False
|
||||
if err.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
for exc in err.exc_info:
|
||||
if exc.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_rm_tcs_files(tcs=None):
|
||||
if tcs is None:
|
||||
tcs = test_cases
|
||||
|
@ -161,6 +175,8 @@ def generator(test_case, tname):
|
|||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
|
||||
if match_exception(err):
|
||||
return
|
||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||
raise
|
||||
|
||||
|
@ -171,6 +187,10 @@ def generator(test_case, tname):
|
|||
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
|
||||
|
||||
try_num += 1
|
||||
except YoutubeDLError as err:
|
||||
if match_exception(err):
|
||||
return
|
||||
raise
|
||||
else:
|
||||
break
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ from http.cookiejar import CookieJar
|
|||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, urllib3, requests
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
|
@ -43,10 +43,11 @@ from yt_dlp.networking.exceptions import (
|
|||
HTTPError,
|
||||
IncompleteRead,
|
||||
NoSupportingHandlers,
|
||||
ProxyError,
|
||||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
UnsupportedRequest, ProxyError,
|
||||
UnsupportedRequest,
|
||||
)
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
@ -821,24 +822,20 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
|||
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
# TODO: Test poolsize adjustments
|
||||
# TODO: Test verbose output
|
||||
# TODO: go through these for all supported urllib3 versions and request
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: requests.exceptions.SSLError(
|
||||
'something [CERTIFICATE_VERIFY_FAILED] something'), CertificateVerifyError),
|
||||
(lambda: requests.exceptions.SSLError(), SSLError),
|
||||
(lambda: requests.exceptions.ProxyError(), ProxyError),
|
||||
# TODO: Exceptions wrapped by ConnectionError
|
||||
(lambda: requests.exceptions.ConnectionError(), TransportError),
|
||||
(lambda: requests.exceptions.ReadTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.ContentDecodingError(), TransportError),
|
||||
(lambda: requests.exceptions.ReadTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.Timeout(), TransportError),
|
||||
(lambda: requests.exceptions.ConnectionError(), TransportError),
|
||||
(lambda: requests.exceptions.ProxyError(), ProxyError),
|
||||
(lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
|
||||
(lambda: requests.exceptions.SSLError(), SSLError),
|
||||
(lambda: requests.exceptions.InvalidURL(), RequestError),
|
||||
(lambda: requests.exceptions.InvalidHeader(), RequestError),
|
||||
# catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError),
|
||||
(lambda: requests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
|
@ -852,22 +849,23 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
|||
|
||||
with pytest.raises(expected) as exc_info:
|
||||
rh.send(Request('http://fake'))
|
||||
assert type(exc_info.value) == expected
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: urllib3.exceptions.SSLError(), SSLError),
|
||||
(lambda: urllib3.exceptions.TimeoutError(), TransportError),
|
||||
(lambda: urllib3.exceptions.ReadTimeoutError(None,None,None), TransportError),
|
||||
(lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError),
|
||||
(lambda: urllib3.exceptions.ProtocolError(), TransportError),
|
||||
(lambda: urllib3.exceptions.ProtocolError(
|
||||
'error', http.client.IncompleteRead(partial=b'')), IncompleteRead),
|
||||
(lambda: urllib3.exceptions.IncompleteRead(partial=b'', expected=5), IncompleteRead),
|
||||
(lambda: urllib3.exceptions.IncompleteRead(partial=3, expected=5), IncompleteRead),
|
||||
(lambda: urllib3.exceptions.DecodeError(), TransportError),
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError) # catch-all
|
||||
])
|
||||
# FIXME: monkey patch a fake response
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
# FIXME: monkey patch a fake response
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
|
||||
|
||||
def mock_read(*args, **kwargs):
|
||||
|
@ -877,7 +875,7 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
|||
with pytest.raises(expected) as exc_info:
|
||||
res.read()
|
||||
|
||||
assert type(exc_info.value) == expected
|
||||
assert exc_info.type is expected
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
|
@ -1326,15 +1324,12 @@ class TestYoutubeDLNetworking:
|
|||
rh = self.build_handler(ydl, UrllibRH)
|
||||
assert rh.enable_file_urls is True
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_requests_concurrent_conns(self, handler):
|
||||
handler_class = type(handler())
|
||||
with FakeYDL({'concurrent_fragment_downloads': 4}) as ydl:
|
||||
rh = self.build_handler(ydl, handler_class)
|
||||
assert rh.conn_pool_maxsize == 10
|
||||
with FakeYDL({'concurrent_fragment_downloads': 11}) as ydl:
|
||||
rh = self.build_handler(ydl, handler_class)
|
||||
assert rh.conn_pool_maxsize == 11
|
||||
def test_compat_opt_prefer_urllib(self):
|
||||
# This assumes urllib only has a preference when this compat opt is given
|
||||
with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
|
||||
director = ydl.build_request_director([UrllibRH])
|
||||
assert len(director.preferences) == 1
|
||||
assert director.preferences.pop()(UrllibRH, None)
|
||||
|
||||
|
||||
class TestRequest:
|
||||
|
|
|
@ -1218,6 +1218,12 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||
|
||||
def test_js_to_json_map_array_constructors(self):
|
||||
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
|
||||
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
|
||||
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
|
||||
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
|
|
@ -4065,6 +4065,9 @@ class YoutubeDL:
|
|||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
if 'unsupported proxy type: "https"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
|
@ -4086,7 +4089,7 @@ class YoutubeDL:
|
|||
|
||||
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
|
||||
for handler in handlers:
|
||||
params = dict(
|
||||
director.add_handler(handler(
|
||||
logger=logger,
|
||||
headers=headers,
|
||||
cookiejar=self.cookiejar,
|
||||
|
@ -4104,15 +4107,10 @@ class YoutubeDL:
|
|||
'client_certificate_key': 'client_certificate_key',
|
||||
'client_certificate_password': 'client_certificate_password',
|
||||
},
|
||||
}))
|
||||
if handler.RH_KEY == 'Requests':
|
||||
# Increase the requests connection pool size if the number of concurrent downloads is high.
|
||||
# Otherwise, since the pool size is limited to 10 by default, requests will not reuse some connections.
|
||||
n = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if n > handler.DEFAULT_POOLSIZE:
|
||||
params['conn_pool_maxsize'] = n
|
||||
director.add_handler(handler(**params))
|
||||
})))
|
||||
director.preferences.update(preferences or [])
|
||||
if 'prefer-legacy-http-handler' in self.params['compat_opts']:
|
||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
|
|
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
|||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ except Exception as _err:
|
|||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
|
@ -81,10 +83,6 @@ all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
|
|||
available_dependencies = {k: v for k, v in all_dependencies.items() if v}
|
||||
|
||||
|
||||
class OptionalDependencyWarning(Warning):
|
||||
pass
|
||||
|
||||
|
||||
# Deprecated
|
||||
Cryptodome_AES = Cryptodome.AES
|
||||
|
||||
|
|
|
@ -296,9 +296,11 @@ from .cammodels import CamModelsIE
|
|||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
|
@ -565,6 +567,7 @@ from .epicon import (
|
|||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import (
|
||||
|
@ -1598,6 +1601,7 @@ from .rbmaradio import RBMARadioIE
|
|||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
RbgTumNewCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
|
|
|
@ -49,14 +49,14 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size')),
|
||||
|
@ -71,6 +71,7 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
|
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
|
@ -1687,7 +1687,7 @@ class InfoExtractor:
|
|||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
rectx = re.escape(context_name)
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
js, arg_keys, arg_vals = self._search_regex(
|
||||
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||
|
|
|
@ -1,31 +1,72 @@
|
|||
import time
|
||||
import hashlib
|
||||
import re
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
if js_code:
|
||||
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||
return js_code
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
c = round(time.time())
|
||||
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||
phantom = PhantomJSwrapper(self)
|
||||
result = phantom.execute(js_script, video_id,
|
||||
note='Executing JS signing script').strip()
|
||||
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||
|
||||
def _search_js_sign_func(self, webpage, fatal=True):
|
||||
# The greedy look-behind ensures last possible script tag is matched
|
||||
return self._search_regex(
|
||||
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||
|
||||
|
||||
class DouyuTVIE(DouyuBaseIE):
|
||||
IE_DESC = '斗鱼直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'url': 'https://www.douyu.com/pigff',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'uploader': '7师傅',
|
||||
'id': '24422',
|
||||
'display_id': 'pigff',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||
'thumbnail': str,
|
||||
'uploader': 'pigff',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_sign_func(self, room_id, video_id):
|
||||
return self._download_json(
|
||||
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||
note='Getting signing script')['data'][f'room{room_id}']
|
||||
|
||||
def _extract_stream_formats(self, stream_formats):
|
||||
formats = []
|
||||
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||
stream_url = urljoin(
|
||||
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||
if stream_url:
|
||||
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||
ext = determine_ext(stream_url)
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'format_id': str_or_none(rate_id),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||
**traverse_obj(rate_info, {
|
||||
'format': ('name', {str_or_none}),
|
||||
'tbr': ('bit', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||
|
||||
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
# Grab metadata from API
|
||||
params = {
|
||||
|
@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
|
|||
'time': int(time.time()),
|
||||
}
|
||||
params['auth'] = hashlib.md5(
|
||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = self._download_json(
|
||||
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = traverse_obj(self._download_json(
|
||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||
note='Downloading room info', query=params)['data']
|
||||
note='Downloading room info', query=params, fatal=False), 'data')
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
if traverse_obj(room, 'show_status') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
||||
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||
form_data = {
|
||||
'rate': 0,
|
||||
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||
}
|
||||
stream_formats = [self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note="Downloading livestream format",
|
||||
data=urlencode_postdata(form_data))]
|
||||
|
||||
title = unescapeHTML(room['room_name'])
|
||||
description = room.get('show_details')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||
form_data['rate'] = rate_id
|
||||
stream_formats.append(self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note=f'Downloading livestream format {rate_id}',
|
||||
data=urlencode_postdata(form_data)))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': self._extract_stream_formats(stream_formats),
|
||||
'is_live': True,
|
||||
'subtitles': subs,
|
||||
'formats': formats,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
class DouyuShowIE(DouyuBaseIE):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'id': 'mPyq7oVNe5Yv1gLY',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||
'duration': 633,
|
||||
'thumbnail': str,
|
||||
'uploader': '美食作家王刚V',
|
||||
'uploader_id': 'OVAO4NVx1m7Q',
|
||||
'timestamp': 1661850002,
|
||||
'upload_date': '20220830',
|
||||
'view_count': int,
|
||||
'tags': ['美食', '美食综合'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'super': '原画',
|
||||
'high': '超清',
|
||||
'normal': '高清',
|
||||
}
|
||||
|
||||
_QUALITIES = {
|
||||
'super': -1,
|
||||
'high': -2,
|
||||
'normal': -3,
|
||||
}
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'super': '1920x1080',
|
||||
'high': '1280x720',
|
||||
'normal': '852x480',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
video_info = self._search_json(
|
||||
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||
'video info', video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = None
|
||||
js_sign_func = self._search_js_sign_func(webpage)
|
||||
form_data = {
|
||||
'vid': video_id,
|
||||
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||
}
|
||||
url_info = self._download_json(
|
||||
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
formats = []
|
||||
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'format': self._FORMATS.get(name),
|
||||
'format_id': name,
|
||||
'url': video_url,
|
||||
'quality': self._QUALITIES.get(name),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||
})
|
||||
else:
|
||||
self.to_screen(
|
||||
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
**traverse_obj(video_info, ('DATA', {
|
||||
'title': ('content', 'title', {str}),
|
||||
'uploader': ('content', 'author', {str}),
|
||||
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||
'duration': ('content', 'video_duration', {int_or_none}),
|
||||
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||
'view_count': ('content', 'view_num', {int_or_none}),
|
||||
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||
}))
|
||||
}
|
||||
|
|
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class EplusIbIE(InfoExtractor):
|
||||
IE_NAME = 'eplus:inbound'
|
||||
IE_DESC = 'e+ (イープラス) overseas'
|
||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||
|
||||
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||
|
||||
if delivery_status == 'PREPARING':
|
||||
live_status = 'is_upcoming'
|
||||
elif delivery_status == 'STARTED':
|
||||
live_status = 'is_live'
|
||||
elif delivery_status == 'STOPPED':
|
||||
if archive_mode != 'ON':
|
||||
raise ExtractorError(
|
||||
'This event has ended and there is no archive for this event', expected=True)
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||
live_status = 'was_live'
|
||||
else:
|
||||
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||
live_status = 'is_live'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_playlist_urls = self._search_json(
|
||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
if not m3u8_playlist_urls:
|
||||
if live_status == 'is_upcoming':
|
||||
self.raise_no_formats(
|
||||
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||
elif live_status == 'is_upcoming':
|
||||
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
elif live_status == 'post_live':
|
||||
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||
else:
|
||||
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||
# FIXME: HTTP request headers need to be updated to continue download
|
||||
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||
if live_status == 'is_live':
|
||||
self.report_warning(warning)
|
||||
elif live_status == 'was_live':
|
||||
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||
|
||||
return {
|
||||
'id': data_json['app_id'],
|
||||
'title': data_json.get('app_name'),
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'description': data_json.get('content'),
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
|
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:expressen|di)\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||
|
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
|
|||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
|
||||
video_id = traverse_obj(
|
||||
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_json(
|
||||
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
|
||||
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
|
||||
|
||||
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
self.raise_no_formats('Unable to extract video ID from webpage')
|
||||
|
||||
|
|
|
@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
for message in [
|
||||
'Debido a tu ubicación no puedes ver el contenido',
|
||||
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||
]:
|
||||
if message in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@ from ..utils import (
|
|||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
|
@ -49,77 +48,52 @@ class NovaEmbedIE(InfoExtractor):
|
|||
duration = None
|
||||
formats = []
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||
webpage, 'player', default='{}', group='json'), video_id, fatal=False)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
else:
|
||||
# Old path, not actual as of 08.04.2020
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
continue
|
||||
f = {
|
||||
def process_format_list(format_list, format_id=""):
|
||||
nonlocal formats, has_drm
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
})
|
||||
|
||||
player = self._search_json(
|
||||
r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
|
||||
if player:
|
||||
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
|
||||
process_format_list(src)
|
||||
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
|
||||
if not formats and not has_drm:
|
||||
# older code path, in use before August 2023
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||
webpage, 'player', group='json'), video_id)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
process_format_list(format_list, format_id)
|
||||
duration = int_or_none(player.get('duration'))
|
||||
|
||||
if not formats and has_drm:
|
||||
self.report_drm(video_id)
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
|
||||
|
||||
|
||||
class RbgTumIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||
_VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
# Combined view
|
||||
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||
|
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
|
|||
'title': 'Fachschaftsvollversammlung',
|
||||
'series': 'Fachschaftsvollversammlung Informatik',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tum.live/w/linalginfo/27102',
|
||||
'only_matching': True,
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
lecture_series_title = self._html_search_regex(
|
||||
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||
m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
|
||||
lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
|
@ -57,9 +60,9 @@ class RbgTumIE(InfoExtractor):
|
|||
|
||||
|
||||
class RbgTumCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||
'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
|
||||
'info_dict': {
|
||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||
'id': '2022/S/fpv',
|
||||
|
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
|
|||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||
'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
|
||||
'info_dict': {
|
||||
'title': 'SET FSMPIC',
|
||||
'id': '2022/W/set',
|
||||
|
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
|
|||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://tum.live/old/course/2023/S/linalginfo',
|
||||
'only_matching': True,
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
|
||||
meta = self._download_json(
|
||||
f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
|
||||
query={'year': year, 'term': term}) or {}
|
||||
lecture_series_title = meta.get('Name')
|
||||
lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
|
||||
for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
|
||||
|
||||
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
if not lectures:
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||
lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
|
||||
for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
|
||||
|
||||
lecture_urls = []
|
||||
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||
return self.playlist_result(lectures, course_id, lecture_series_title)
|
||||
|
||||
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
||||
|
||||
class RbgTumNewCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
|
||||
'info_dict': {
|
||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||
'id': '2022/S/fpv',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
|
||||
'info_dict': {
|
||||
'title': 'SET FSMPIC',
|
||||
'id': '2022/W/set',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
|
||||
if errors:
|
||||
raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
|
||||
year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
|
||||
hostname = self._match_valid_url(url).group('hostname')
|
||||
|
||||
return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
|
||||
|
|
|
@ -5,8 +5,9 @@ import re
|
|||
from .common import InfoExtractor
|
||||
from ..dependencies import websockets
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
clean_html,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
|
@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor):
|
|||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/ivetesangalo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://twitcasting.tv/c:unusedlive',
|
||||
'expected_exception': 'UserNotLive',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -260,7 +264,7 @@ class TwitCastingLiveIE(InfoExtractor):
|
|||
r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
|
||||
webpage, 'current live ID 2', default=None, group='video_id')
|
||||
if not current_live:
|
||||
raise ExtractorError('The user is not currently live')
|
||||
raise UserNotLive(video_id=uploader_id)
|
||||
return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# flake8: noqa: 401
|
||||
import warnings
|
||||
|
||||
from .common import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
|
@ -11,8 +13,11 @@ from .common import (
|
|||
# isort: split
|
||||
# TODO: all request handlers should be safely imported
|
||||
from . import _urllib
|
||||
from ..utils import bug_reports_message
|
||||
|
||||
try:
|
||||
from . import _requests
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
warnings.warn(f'Unable to import requests handler: {e}' + bug_reports_message())
|
||||
|
|
|
@ -5,8 +5,8 @@ import re
|
|||
import socket
|
||||
import warnings
|
||||
|
||||
from ..dependencies import OptionalDependencyWarning, brotli, requests, urllib3
|
||||
from ..utils import int_or_none
|
||||
from ..dependencies import brotli, requests, urllib3
|
||||
from ..utils import int_or_none, variadic
|
||||
|
||||
if requests is None:
|
||||
raise ImportError('requests module is not installed')
|
||||
|
@ -21,9 +21,11 @@ if len(urllib3_version) == 2:
|
|||
urllib3_version = tuple(map(functools.partial(int_or_none, default=0), urllib3_version[:3]))
|
||||
|
||||
if urllib3_version < (1, 26, 0):
|
||||
warnings.warn('Unsupported version of `urllib3` installed. urllib3 >= 1.26.0 is required for `requests` support.')
|
||||
raise ImportError('Only urllib3 >= 1.26.0 is supported')
|
||||
|
||||
if requests.__build__ < 0x023100:
|
||||
warnings.warn('Unsupported version of `requests` installed. requests >= 2.31.0 is required for `requests` support.')
|
||||
raise ImportError('Only requests >= 2.31.0 is supported')
|
||||
|
||||
from http.client import HTTPConnection
|
||||
|
@ -67,8 +69,10 @@ SUPPORTED_ENCODINGS = [
|
|||
|
||||
# urllib3 does not support brotlicffi on versions < 1.26.9 [1]
|
||||
# 1: https://github.com/urllib3/urllib3/blob/1.26.x/CHANGES.rst#1269-2022-03-16
|
||||
if (brotli is not None
|
||||
and not (brotli.__name__ == 'brotlicffi' and urllib3_version < (1, 26, 9))):
|
||||
if (
|
||||
brotli is not None
|
||||
and not (brotli.__name__ == 'brotlicffi' and urllib3_version < (1, 26, 9))
|
||||
):
|
||||
SUPPORTED_ENCODINGS.append('br')
|
||||
|
||||
"""
|
||||
|
@ -82,7 +86,7 @@ However, some sites may have an incorrect implementation where they provide
|
|||
a percent-encoded url that is then compared case-sensitively.[2]
|
||||
|
||||
While this is a very rare case, since urllib does not do this normalization step, it
|
||||
is best to avoid it here too for compatability reasons.
|
||||
is best to avoid it in requests too for compatability reasons.
|
||||
|
||||
1: https://tools.ietf.org/html/rfc3986#section-2.1
|
||||
2: https://github.com/streamlink/streamlink/pull/4003
|
||||
|
@ -110,16 +114,16 @@ if hasattr(urllib3.util.url, 'PERCENT_RE'):
|
|||
elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
|
||||
urllib3.util.url._PERCENT_RE = _Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
|
||||
else:
|
||||
warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)', OptionalDependencyWarning)
|
||||
warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)')
|
||||
|
||||
"""
|
||||
Workaround for issue in urllib.util.ssl_.py. ssl_wrap_context does not pass
|
||||
Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
|
||||
server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
|
||||
however this is an issue because we set check_hostname to True in our SSLContext.
|
||||
|
||||
Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
|
||||
|
||||
This has been fixed in urllib3 2.0.
|
||||
This has been fixed in urllib3 2.0+.
|
||||
See: https://github.com/urllib3/urllib3/issues/517
|
||||
"""
|
||||
|
||||
|
@ -130,7 +134,8 @@ if urllib3_version < (2, 0, 0):
|
|||
pass
|
||||
|
||||
|
||||
# Requests will not automatically handle no_proxy by default due to buggy no_proxy handling with proxy dict [1].
|
||||
# Requests will not automatically handle no_proxy by default
|
||||
# due to buggy no_proxy handling with proxy dict [1].
|
||||
# 1. https://github.com/psf/requests/issues/5000
|
||||
requests.adapters.select_proxy = select_proxy
|
||||
|
||||
|
@ -141,35 +146,34 @@ class RequestsResponseAdapter(Response):
|
|||
fp=res.raw, headers=res.headers, url=res.url,
|
||||
status=res.status_code, reason=res.reason)
|
||||
|
||||
self.requests_response = res
|
||||
self._requests_response = res
|
||||
|
||||
def read(self, amt: int = None):
|
||||
try:
|
||||
# Interact with urllib3 response directly.
|
||||
return self.fp.read(amt, decode_content=True)
|
||||
# raw is an urllib3 HTTPResponse, so exceptions will be from urllib3
|
||||
# See raised error in urllib3.response.HTTPResponse.read()
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
handle_urllib3_read_exceptions(e)
|
||||
|
||||
# See urllib3.response.HTTPResponse.read() for exceptions raised on read
|
||||
except urllib3.exceptions.SSLError as e:
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.IncompleteRead as e:
|
||||
# urllib3 IncompleteRead.partial is always an integer
|
||||
raise IncompleteRead(partial=e.partial, expected=e.expected) from e
|
||||
|
||||
except urllib3.exceptions.ProtocolError as e:
|
||||
# http.client.IncompleteRead may be contained within ProtocolError
|
||||
# See urllib3.response.HTTPResponse._error_catcher()
|
||||
ir_err = next(
|
||||
(err for err in (e.__context__, e.__cause__, *variadic(e.args))
|
||||
if isinstance(err, http.client.IncompleteRead)), None)
|
||||
if ir_err is not None:
|
||||
raise IncompleteRead(partial=len(ir_err.partial), expected=ir_err.expected) from e
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
def find_original_error(e, err_types):
|
||||
if not isinstance(e, Exception):
|
||||
return
|
||||
return next(
|
||||
(err for err in (e, e.__cause__, *(e.args or [])) if
|
||||
isinstance(err, err_types)), None)
|
||||
|
||||
|
||||
def handle_urllib3_read_exceptions(e):
|
||||
# Sometimes IncompleteRead is wrapped by urllib3.exceptions.ProtocolError, so we have to check the args
|
||||
# TODO: check the above statement and what versions of urllib3 it impacts
|
||||
ic_read_err = find_original_error(e, (http.client.IncompleteRead, urllib3.exceptions.IncompleteRead))
|
||||
if ic_read_err is not None:
|
||||
raise IncompleteRead(partial=ic_read_err.partial, expected=ic_read_err.expected)
|
||||
if isinstance(e, urllib3.exceptions.SSLError):
|
||||
raise SSLError(cause=e) from e
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# catch-all for any other urllib3 response exceptions
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
|
||||
|
@ -250,10 +254,7 @@ class Urllib3LoggingHandler(logging.Handler):
|
|||
class RequestsRH(RequestHandler, InstanceStoreMixin):
|
||||
|
||||
"""Requests RequestHandler
|
||||
Params:
|
||||
|
||||
@param max_conn_pools: Max number of urllib3 connection pools to cache.
|
||||
@param conn_pool_maxsize: Max number of connections per pool.
|
||||
https://github.com/psf/requests
|
||||
"""
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https')
|
||||
_SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
|
||||
|
@ -261,14 +262,9 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
|
|||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
RH_NAME = 'requests'
|
||||
|
||||
DEFAULT_POOLSIZE = requests.adapters.DEFAULT_POOLSIZE
|
||||
|
||||
def __init__(self, max_conn_pools: int = None, conn_pool_maxsize: int = None, *args, **kwargs):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.max_conn_pools = max_conn_pools or self.DEFAULT_POOLSIZE # default from urllib3
|
||||
self.conn_pool_maxsize = conn_pool_maxsize or self.DEFAULT_POOLSIZE
|
||||
|
||||
# Forward urllib3 debug messages to our logger
|
||||
logger = logging.getLogger('urllib3')
|
||||
handler = Urllib3LoggingHandler(logger=self._logger)
|
||||
|
@ -300,9 +296,6 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
|
|||
ssl_context=self._make_sslcontext(),
|
||||
source_address=self.source_address,
|
||||
max_retries=urllib3.util.retry.Retry(False),
|
||||
pool_maxsize=self.conn_pool_maxsize,
|
||||
pool_connections=self.max_conn_pools,
|
||||
pool_block=False,
|
||||
)
|
||||
session.adapters.clear()
|
||||
session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
|
||||
|
@ -320,11 +313,10 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
|
|||
max_redirects_exceeded = False
|
||||
|
||||
session = self._get_instance(
|
||||
cookiejar=request.extensions.get('cookiejar') or self.cookiejar
|
||||
)
|
||||
cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
|
||||
|
||||
try:
|
||||
res = session.request(
|
||||
requests_res = session.request(
|
||||
method=request.method,
|
||||
url=request.url,
|
||||
data=request.data,
|
||||
|
@ -337,42 +329,41 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
|
|||
|
||||
except requests.exceptions.TooManyRedirects as e:
|
||||
max_redirects_exceeded = True
|
||||
res = e.response
|
||||
requests_res = e.response
|
||||
|
||||
except requests.exceptions.SSLError as e:
|
||||
if 'CERTIFICATE_VERIFY_FAILED' in str(e):
|
||||
raise CertificateVerifyError(cause=e) from e
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except requests.exceptions.ProxyError as e:
|
||||
raise ProxyError(cause=e) from e
|
||||
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
|
||||
# Some urllib3 exceptions such as IncompleteRead are wrapped by ConnectionError on request
|
||||
# TOOD: check the above
|
||||
handle_urllib3_read_exceptions(find_original_error(e, (urllib3.exceptions.HTTPError,)))
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# Catch any urllib3 exceptions that may leak through
|
||||
# TODO: check this
|
||||
handle_urllib3_read_exceptions(e)
|
||||
raise TransportError(cause=e) from e
|
||||
# Any misc Requests exception. May not necessary be network related e.g. InvalidURL
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
# Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
|
||||
raise RequestError(cause=e) from e
|
||||
|
||||
requests_res = RequestsResponseAdapter(res)
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
if not 200 <= requests_res.status < 300:
|
||||
raise HTTPError(requests_res, redirect_loop=max_redirects_exceeded)
|
||||
if not 200 <= res.status < 300:
|
||||
raise HTTPError(res, redirect_loop=max_redirects_exceeded)
|
||||
|
||||
return requests_res
|
||||
return res
|
||||
|
||||
|
||||
@register_preference(RequestsRH)
|
||||
def requests_preference(rh, request):
|
||||
return 500
|
||||
return 100
|
||||
|
||||
|
||||
# Since we already have a socks proxy implementation,
|
||||
# we can use that with urllib3 instead of requiring an extra dependency.
|
||||
# Use our socks proxy implementation with requests to avoid an extra dependency.
|
||||
class SocksHTTPConnection(urllib3.connection.HTTPConnection):
|
||||
def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
|
||||
self._proxy_args = _socks_options
|
||||
|
@ -387,11 +378,13 @@ class SocksHTTPConnection(urllib3.connection.HTTPConnection):
|
|||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
|
||||
except (socket.timeout, TimeoutError) as e:
|
||||
raise urllib3.exceptions.ConnectTimeoutError(self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
|
||||
raise urllib3.exceptions.ConnectTimeoutError(
|
||||
self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
|
||||
except SocksProxyError as e:
|
||||
raise urllib3.exceptions.ProxyError(str(e), e) from e
|
||||
except (OSError, socket.error) as e:
|
||||
raise urllib3.exceptions.NewConnectionError(self, f'Failed to establish a new connection: {e}') from e
|
||||
raise urllib3.exceptions.NewConnectionError(
|
||||
self, f'Failed to establish a new connection: {e}') from e
|
||||
|
||||
|
||||
class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
|
||||
|
|
|
@ -471,6 +471,7 @@ def create_parser():
|
|||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
'prefer-legacy-http-handler'
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
|
||||
|
|
|
@ -2727,6 +2727,7 @@ def js_to_json(code, vars={}, *, strict=False):
|
|||
def create_map(mobj):
|
||||
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
||||
|
||||
code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
|
||||
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
|
||||
if not strict:
|
||||
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
||||
|
|
Loading…
Reference in New Issue
Block a user