mirror of https://github.com/yt-dlp/yt-dlp.git
Compare commits
30 Commits
2aff675ea5
...
389aa54583
Author | SHA1 | Date |
---|---|---|
TAHRI Ahmed R | 389aa54583 | |
TAHRI Ahmed R | 5aab3229e9 | |
Alexandre Huot | 6b54cccdcb | |
src-tinkerer | c4b87dd885 | |
fireattack | 2338827072 | |
fireattack | 06d52c8731 | |
sepro | df5c9e733a | |
Mozi | b38018b781 | |
Rasmus Antons | 145dc6f656 | |
bashonly | 5904853ae5 | |
Chris Caruso | c8bf48f3a8 | |
The-MAGI | 351368cb9a | |
sepro | 96da952504 | |
bashonly | bec9a59e8e | |
bashonly | 036e0d92c6 | |
bashonly | cb2fb4a643 | |
bashonly | 231c2eacc4 | |
bashonly | c4853655cb | |
Simon Sawicki | ac817bc83e | |
bashonly | 1a366403d9 | |
Simon Sawicki | 7e26bd53f9 | |
Simon Sawicki | 64766459e3 | |
bashonly | 89f535e265 | |
bashonly | ff38a011d5 | |
bashonly | 8056a3026e | |
Simon Sawicki | 3ee1194288 | |
bashonly | e3b42d8b1b | |
bashonly | c9ce57d9bf | |
bashonly | 02483bea1c | |
Ahmed TAHRI | a045605461 |
|
@ -12,6 +12,9 @@ on:
|
|||
unix:
|
||||
default: true
|
||||
type: boolean
|
||||
linux_static:
|
||||
default: true
|
||||
type: boolean
|
||||
linux_arm:
|
||||
default: true
|
||||
type: boolean
|
||||
|
@ -27,9 +30,6 @@ on:
|
|||
windows32:
|
||||
default: true
|
||||
type: boolean
|
||||
meta_files:
|
||||
default: true
|
||||
type: boolean
|
||||
origin:
|
||||
required: false
|
||||
default: ''
|
||||
|
@ -52,7 +52,11 @@ on:
|
|||
default: stable
|
||||
type: string
|
||||
unix:
|
||||
description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
|
||||
description: yt-dlp, yt-dlp.tar.gz
|
||||
default: true
|
||||
type: boolean
|
||||
linux_static:
|
||||
description: yt-dlp_linux
|
||||
default: true
|
||||
type: boolean
|
||||
linux_arm:
|
||||
|
@ -75,10 +79,6 @@ on:
|
|||
description: yt-dlp_x86.exe
|
||||
default: true
|
||||
type: boolean
|
||||
meta_files:
|
||||
description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
|
||||
default: true
|
||||
type: boolean
|
||||
origin:
|
||||
description: Origin
|
||||
required: false
|
||||
|
@ -112,27 +112,9 @@ jobs:
|
|||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- uses: conda-incubator/setup-miniconda@v3
|
||||
with:
|
||||
miniforge-variant: Mambaforge
|
||||
use-mamba: true
|
||||
channels: conda-forge
|
||||
auto-update-conda: true
|
||||
activate-environment: ""
|
||||
auto-activate-base: false
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install zip pandoc man sed
|
||||
cat > ./requirements.txt << EOF
|
||||
python=3.10.*
|
||||
pyinstaller
|
||||
brotli-python
|
||||
EOF
|
||||
python devscripts/install_deps.py --print \
|
||||
--exclude brotli --exclude brotlicffi \
|
||||
--include secretstorage >> ./requirements.txt
|
||||
mamba create -n build --file ./requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
|
||||
|
@ -141,30 +123,15 @@ jobs:
|
|||
- name: Build Unix platform-independent binary
|
||||
run: |
|
||||
make all tar
|
||||
- name: Build Unix standalone binary
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
unset LD_LIBRARY_PATH # Harmful; set by setup-python
|
||||
conda activate build
|
||||
python -m bundle.pyinstaller --onedir
|
||||
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
|
||||
python -m bundle.pyinstaller
|
||||
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
||||
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
run: |
|
||||
binaries=("yt-dlp" "yt-dlp_linux")
|
||||
for binary in "${binaries[@]}"; do
|
||||
chmod +x ./${binary}
|
||||
cp ./${binary} ./${binary}_downgraded
|
||||
version="$(./${binary} --version)"
|
||||
./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||
downgraded_version="$(./${binary}_downgraded --version)"
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
done
|
||||
|
||||
chmod +x ./yt-dlp
|
||||
cp ./yt-dlp ./yt-dlp_downgraded
|
||||
version="$(./yt-dlp --version)"
|
||||
./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||
downgraded_version="$(./yt-dlp_downgraded --version)"
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
@ -172,8 +139,39 @@ jobs:
|
|||
path: |
|
||||
yt-dlp
|
||||
yt-dlp.tar.gz
|
||||
yt-dlp_linux
|
||||
yt-dlp_linux.zip
|
||||
compression-level: 0
|
||||
|
||||
linux_static:
|
||||
needs: process
|
||||
if: inputs.linux_static
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build static executable
|
||||
env:
|
||||
channel: ${{ inputs.channel }}
|
||||
origin: ${{ needs.process.outputs.origin }}
|
||||
version: ${{ inputs.version }}
|
||||
run: |
|
||||
mkdir ~/build
|
||||
cd bundle/docker
|
||||
docker compose up --build static
|
||||
sudo chown "${USER}:docker" ~/build/yt-dlp_linux
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
run: |
|
||||
chmod +x ~/build/yt-dlp_linux
|
||||
cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
|
||||
version="$(~/build/yt-dlp_linux --version)"
|
||||
~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||
downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
~/build/yt-dlp_linux
|
||||
compression-level: 0
|
||||
|
||||
linux_arm:
|
||||
|
@ -254,7 +252,7 @@ jobs:
|
|||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
python3 -m pip install -U --user delocate
|
||||
mkdir curl_cffi_whls curl_cffi_universal2
|
||||
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
|
||||
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
|
||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||
python3 -m pip download \
|
||||
--only-binary=:all: \
|
||||
|
@ -300,7 +298,7 @@ jobs:
|
|||
macos_legacy:
|
||||
needs: process
|
||||
if: inputs.macos_legacy
|
||||
runs-on: macos-latest
|
||||
runs-on: macos-12
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -362,7 +360,7 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe --include curl_cffi
|
||||
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
|
@ -447,10 +445,11 @@ jobs:
|
|||
compression-level: 0
|
||||
|
||||
meta_files:
|
||||
if: inputs.meta_files && always() && !cancelled()
|
||||
if: always() && !cancelled()
|
||||
needs:
|
||||
- process
|
||||
- unix
|
||||
- linux_static
|
||||
- linux_arm
|
||||
- macos
|
||||
- macos_legacy
|
||||
|
|
|
@ -53,7 +53,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi --include niquests
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
|
@ -196,13 +196,15 @@ ### Networking
|
|||
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
|
||||
* [**niquests**](https://github.com/jawah/niquests) - HTTP library as a replacement for Requests with native HTTP/2, and HTTP/3 support along with Happy Eyeballs, resiliency against TLS fingerprinting (not as effective as curl_cffi). Licensed under [Apache-2.0](https://github.com/jawah/niquests/blob/main/LICENSE)
|
||||
* Can be installed with the `niquests` group, e.g. `pip install yt-dlp[default,niquests]`
|
||||
|
||||
#### Impersonation
|
||||
|
||||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
services:
|
||||
static:
|
||||
build: static
|
||||
environment:
|
||||
channel: ${channel}
|
||||
origin: ${origin}
|
||||
version: ${version}
|
||||
volumes:
|
||||
- ~/build:/build
|
||||
- ../..:/yt-dlp
|
|
@ -0,0 +1,21 @@
|
|||
FROM alpine:3.19 as base
|
||||
|
||||
RUN apk --update add --no-cache \
|
||||
build-base \
|
||||
python3 \
|
||||
pipx \
|
||||
;
|
||||
|
||||
RUN pipx install pyinstaller
|
||||
# Requires above step to prepare the shared venv
|
||||
RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
|
||||
RUN apk --update add --no-cache \
|
||||
scons \
|
||||
patchelf \
|
||||
binutils \
|
||||
;
|
||||
RUN pipx install staticx
|
||||
|
||||
WORKDIR /yt-dlp
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
ENTRYPOINT /entrypoint.sh
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/ash
|
||||
set -e
|
||||
|
||||
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
||||
python -m devscripts.install_deps --include secretstorage
|
||||
python -m devscripts.make_lazy_extractors
|
||||
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
||||
python -m bundle.pyinstaller
|
||||
deactivate
|
||||
|
||||
source ~/.local/share/pipx/venvs/staticx/bin/activate
|
||||
staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
|
||||
deactivate
|
|
@ -66,7 +66,31 @@ def main():
|
|||
pip_args.append('--user')
|
||||
pip_args.extend(targets)
|
||||
|
||||
return subprocess.call(pip_args)
|
||||
exit_code = subprocess.call(pip_args)
|
||||
|
||||
if exit_code:
|
||||
return exit_code
|
||||
|
||||
# if both Requests, and Niquests are there, make sure the original
|
||||
# urllib3 is present. Requests support urllib3.future as a replacement to former urllib3.
|
||||
# all of this can be safely removed once Requests is in an optional group, then install it after Niquests.
|
||||
# see https://niquests.readthedocs.io/en/latest/community/faq.html#what-is-urllib3-future to learn more.
|
||||
if ["niquests" in _ or "requests" in _ for _ in targets].count(True) == 2:
|
||||
exit_code = subprocess.call(
|
||||
[sys.executable, '-m', 'pip', 'uninstall', '-y', 'urllib3', 'urllib3.future']
|
||||
)
|
||||
if exit_code:
|
||||
return exit_code
|
||||
exit_code = subprocess.call(
|
||||
[sys.executable, '-m', 'pip', 'install', '-U', 'urllib3.future']
|
||||
)
|
||||
if exit_code:
|
||||
return exit_code
|
||||
exit_code = subprocess.call(
|
||||
[sys.executable, '-m', 'pip', 'install', '-U', 'urllib3']
|
||||
)
|
||||
|
||||
return exit_code
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -53,7 +53,10 @@ dependencies = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
default = []
|
||||
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
niquests = [
|
||||
"niquests>=3,<4"
|
||||
]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
|
|
|
@ -1906,6 +1906,15 @@ def test_response_with_expected_status_returns_content(self):
|
|||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
def test_search_nextjs_data(self):
|
||||
data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
|
||||
self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||
with self.assertRaises(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
from test.conftest import validate_and_send
|
||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3, niquests, niquests_urllib3
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
|
@ -318,7 +318,7 @@ def setup_class(cls):
|
|||
|
||||
|
||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_verify_cert(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(CertificateVerifyError):
|
||||
|
@ -329,7 +329,7 @@ def test_verify_cert(self, handler):
|
|||
assert r.status == 200
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_ssl_error(self, handler):
|
||||
# HTTPS server with too old TLS version
|
||||
# XXX: is there a better way to test this than to create a new server?
|
||||
|
@ -347,7 +347,7 @@ def test_ssl_error(self, handler):
|
|||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_percent_encode(self, handler):
|
||||
with handler() as rh:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
|
@ -359,7 +359,7 @@ def test_percent_encode(self, handler):
|
|||
assert res.status == 200
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
@pytest.mark.parametrize('path', [
|
||||
'/a/b/./../../headers',
|
||||
'/redirect_dotsegments',
|
||||
|
@ -376,14 +376,14 @@ def test_remove_dot_segments(self, handler, path):
|
|||
res.close()
|
||||
|
||||
# Not supported by CurlCFFI (non-standard)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'Niquests'], indirect=True)
|
||||
def test_unicode_path_redirection(self, handler):
|
||||
with handler() as rh:
|
||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_raise_http_error(self, handler):
|
||||
with handler() as rh:
|
||||
for bad_status in (400, 500, 599, 302):
|
||||
|
@ -393,7 +393,7 @@ def test_raise_http_error(self, handler):
|
|||
# Should not raise an error
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_response_url(self, handler):
|
||||
with handler() as rh:
|
||||
# Response url should be that of the last url in redirect chain
|
||||
|
@ -405,7 +405,7 @@ def test_response_url(self, handler):
|
|||
res2.close()
|
||||
|
||||
# Covers some basic cases we expect some level of consistency between request handlers for
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
@pytest.mark.parametrize('redirect_status,method,expected', [
|
||||
# A 303 must either use GET or HEAD for subsequent request
|
||||
(303, 'POST', ('', 'GET', False)),
|
||||
|
@ -447,7 +447,7 @@ def test_redirect(self, handler, redirect_status, method, expected):
|
|||
assert expected[1] == res.headers.get('method')
|
||||
assert expected[2] == ('content-length' in headers.decode().lower())
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_request_cookie_header(self, handler):
|
||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||
with handler() as rh:
|
||||
|
@ -480,19 +480,19 @@ def test_request_cookie_header(self, handler):
|
|||
assert b'cookie: test=ytdlp' not in data.lower()
|
||||
assert b'cookie: test=test3' in data.lower()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_redirect_loop(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(HTTPError, match='redirect loop'):
|
||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_incompleteread(self, handler):
|
||||
with handler(timeout=2) as rh:
|
||||
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
||||
with pytest.raises(IncompleteRead, match=r'^(13 bytes read, 234221 more expected|received 13 bytes, expected 234234)$'):
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_cookies(self, handler):
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
|
@ -509,7 +509,7 @@ def test_cookies(self, handler):
|
|||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||
assert b'cookie: test=ytdlp' in data.lower()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_headers(self, handler):
|
||||
|
||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||
|
@ -525,7 +525,7 @@ def test_headers(self, handler):
|
|||
assert b'test2: test2' not in data
|
||||
assert b'test3: test3' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_read_timeout(self, handler):
|
||||
with handler() as rh:
|
||||
# Default timeout is 20 seconds, so this should go through
|
||||
|
@ -541,7 +541,7 @@ def test_read_timeout(self, handler):
|
|||
validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_connect_timeout(self, handler):
|
||||
# nothing should be listening on this port
|
||||
connect_timeout_url = 'http://10.255.255.255'
|
||||
|
@ -560,7 +560,7 @@ def test_connect_timeout(self, handler):
|
|||
rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
|
||||
assert 0.01 <= time.time() - now < 20
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
# on some systems these loopback addresses we need for testing may not be available
|
||||
|
@ -572,13 +572,13 @@ def test_source_address(self, handler):
|
|||
assert source_address == data
|
||||
|
||||
# Not supported by CurlCFFI
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'Niquests'], indirect=True)
|
||||
def test_gzip_trailing_garbage(self, handler):
|
||||
with handler() as rh:
|
||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'Niquests'], indirect=True)
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
def test_brotli(self, handler):
|
||||
with handler() as rh:
|
||||
|
@ -589,7 +589,7 @@ def test_brotli(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -599,7 +599,7 @@ def test_deflate(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -609,7 +609,7 @@ def test_gzip(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
|
@ -621,7 +621,7 @@ def test_multiple_encodings(self, handler):
|
|||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
# Not supported by curl_cffi
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'Niquests'], indirect=True)
|
||||
def test_unsupported_encoding(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -631,7 +631,7 @@ def test_unsupported_encoding(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||
assert res.read() == b'raw'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -664,7 +664,7 @@ def setup_class(cls):
|
|||
cls.geo_proxy_thread.daemon = True
|
||||
cls.geo_proxy_thread.start()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_http_proxy(self, handler):
|
||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
||||
|
@ -690,7 +690,7 @@ def test_http_proxy(self, handler):
|
|||
assert res != f'normal: {real_url}'
|
||||
assert 'Accept' in res
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_noproxy(self, handler):
|
||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
||||
# NO_PROXY
|
||||
|
@ -700,7 +700,7 @@ def test_noproxy(self, handler):
|
|||
'utf-8')
|
||||
assert 'Accept' in nop_response
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_allproxy(self, handler):
|
||||
url = 'http://foo.com/bar'
|
||||
with handler() as rh:
|
||||
|
@ -708,7 +708,7 @@ def test_allproxy(self, handler):
|
|||
'utf-8')
|
||||
assert response == f'normal: {url}'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_http_proxy_with_idn(self, handler):
|
||||
with handler(proxies={
|
||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
||||
|
@ -745,27 +745,27 @@ def _run_test(self, handler, **handler_kwargs):
|
|||
) as rh:
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_certificate_combined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_certificate_nocombined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_certificate_combined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
'client_certificate_password': 'foobar',
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI', 'Niquests'], indirect=True)
|
||||
def test_certificate_nocombined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
|
@ -785,6 +785,25 @@ def test_supported_impersonate_targets(self, handler):
|
|||
assert res.status == 200
|
||||
assert std_headers['user-agent'].lower() not in res.read().decode().lower()
|
||||
|
||||
def test_response_extensions(self, handler):
|
||||
with handler() as rh:
|
||||
for target in rh.supported_targets:
|
||||
request = Request(
|
||||
f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
|
||||
res = validate_and_send(rh, request)
|
||||
assert res.extensions['impersonate'] == rh._get_request_target(request)
|
||||
|
||||
def test_http_error_response_extensions(self, handler):
|
||||
with handler() as rh:
|
||||
for target in rh.supported_targets:
|
||||
request = Request(
|
||||
f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
|
||||
try:
|
||||
validate_and_send(rh, request)
|
||||
except HTTPError as e:
|
||||
res = e.response
|
||||
assert res.extensions['impersonate'] == rh._get_request_target(request)
|
||||
|
||||
|
||||
class TestRequestHandlerMisc:
|
||||
"""Misc generic tests for request handlers, not related to request or validation testing"""
|
||||
|
@ -1130,6 +1149,89 @@ def close(self):
|
|||
assert res4._buffer == b''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Niquests'], indirect=True)
|
||||
class TestNiquestsRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: niquests.exceptions.ConnectTimeout(), TransportError),
|
||||
(lambda: niquests.exceptions.ReadTimeout(), TransportError),
|
||||
(lambda: niquests.exceptions.Timeout(), TransportError),
|
||||
(lambda: niquests.exceptions.ConnectionError(), TransportError),
|
||||
(lambda: niquests.exceptions.ProxyError(), ProxyError),
|
||||
(lambda: niquests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
|
||||
(lambda: niquests.exceptions.SSLError(), SSLError),
|
||||
(lambda: niquests.exceptions.InvalidURL(), RequestError),
|
||||
(lambda: niquests.exceptions.InvalidHeader(), RequestError),
|
||||
# catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
|
||||
(lambda: niquests_urllib3.exceptions.HTTPError(), TransportError),
|
||||
(lambda: niquests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: niquests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
def mock_get_instance(*args, **kwargs):
|
||||
class MockSession:
|
||||
def request(self, *args, **kwargs):
|
||||
raise raised()
|
||||
return MockSession()
|
||||
|
||||
monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
|
||||
|
||||
with pytest.raises(expected) as exc_info:
|
||||
rh.send(Request('http://fake'))
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
@pytest.mark.parametrize('raised,expected,match', [
|
||||
(lambda: niquests_urllib3.exceptions.SSLError(), SSLError, None),
|
||||
(lambda: niquests_urllib3.exceptions.TimeoutError(), TransportError, None),
|
||||
(lambda: niquests_urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
|
||||
(lambda: niquests_urllib3.exceptions.ProtocolError(), TransportError, None),
|
||||
(lambda: niquests_urllib3.exceptions.DecodeError(), TransportError, None),
|
||||
(lambda: niquests_urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
|
||||
(
|
||||
lambda: niquests_urllib3.exceptions.ProtocolError('error', niquests_urllib3.exceptions.IncompleteRead(partial="abc", expected=4)),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 4 more expected'
|
||||
),
|
||||
(
|
||||
lambda: niquests_urllib3.exceptions.ProtocolError('error', niquests_urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 5 more expected'
|
||||
),
|
||||
])
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
|
||||
from niquests.models import Response as RequestsResponse
|
||||
|
||||
from yt_dlp.networking._niquests import RequestsResponseAdapter
|
||||
requests_res = RequestsResponse()
|
||||
requests_res.raw = niquests_urllib3.response.HTTPResponse(body=b'', status=200)
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
def mock_read(*args, **kwargs):
|
||||
raise raised()
|
||||
monkeypatch.setattr(res.fp, 'read', mock_read)
|
||||
|
||||
with pytest.raises(expected, match=match) as exc_info:
|
||||
res.read()
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
def test_close(self, handler, monkeypatch):
|
||||
rh = handler()
|
||||
session = rh._get_instance(cookiejar=rh.cookiejar)
|
||||
called = False
|
||||
original_close = session.close
|
||||
|
||||
def mock_close(*args, **kwargs):
|
||||
nonlocal called
|
||||
called = True
|
||||
return original_close(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(session, 'close', mock_close)
|
||||
rh.close()
|
||||
assert called
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
if error:
|
||||
|
@ -1170,6 +1272,10 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('http', False, {}),
|
||||
('https', False, {}),
|
||||
]),
|
||||
('Niquests', [
|
||||
('http', False, {}),
|
||||
('https', False, {}),
|
||||
]),
|
||||
('Websockets', [
|
||||
('ws', False, {}),
|
||||
('wss', False, {}),
|
||||
|
@ -1201,6 +1307,14 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('socks5', False),
|
||||
('socks5h', False),
|
||||
]),
|
||||
('Niquests', 'http', [
|
||||
('http', False),
|
||||
('https', False),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
('socks5h', False),
|
||||
]),
|
||||
('CurlCFFI', 'http', [
|
||||
('http', False),
|
||||
('https', False),
|
||||
|
@ -1226,6 +1340,10 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
('Niquests', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
('CurlCFFI', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
|
@ -1251,6 +1369,13 @@ class HTTPSupportedRH(ValidationRH):
|
|||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
('Niquests', 'http', [
|
||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||
({'timeout': 1}, False),
|
||||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
('CurlCFFI', 'http', [
|
||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||
|
@ -1283,7 +1408,7 @@ class HTTPSupportedRH(ValidationRH):
|
|||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('Niquests', False), ('CurlCFFI', False)], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
|
@ -1306,13 +1431,13 @@ def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
|
|||
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
||||
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'Niquests', 'CurlCFFI'], indirect=True)
|
||||
def test_empty_proxy(self, handler):
|
||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
||||
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'Niquests', 'CurlCFFI'], indirect=True)
|
||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
||||
|
||||
|
|
|
@ -142,6 +142,7 @@ def handle(self):
|
|||
'!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000))
|
||||
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
self.server.shutdown()
|
||||
|
||||
|
||||
class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler):
|
||||
|
@ -193,6 +194,7 @@ def handle(self):
|
|||
self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001))
|
||||
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
self.server.shutdown()
|
||||
|
||||
|
||||
class IPv6ThreadingTCPServer(ThreadingTCPServer):
|
||||
|
@ -290,6 +292,7 @@ def ctx(request):
|
|||
'handler,ctx', [
|
||||
('Urllib', 'http'),
|
||||
('Requests', 'http'),
|
||||
('Niquests', 'http'),
|
||||
('Websockets', 'ws'),
|
||||
('CurlCFFI', 'http')
|
||||
], indirect=True)
|
||||
|
@ -365,6 +368,7 @@ def test_timeout(self, handler, ctx):
|
|||
'handler,ctx', [
|
||||
('Urllib', 'http'),
|
||||
('Requests', 'http'),
|
||||
('Niquests', 'http'),
|
||||
('Websockets', 'ws'),
|
||||
('CurlCFFI', 'http')
|
||||
], indirect=True)
|
||||
|
|
|
@ -2059,7 +2059,22 @@ def test_extract_basic_auth(self):
|
|||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||
def test_Popen_windows_escaping(self):
|
||||
def test_windows_escaping(self):
|
||||
tests = [
|
||||
'test"&',
|
||||
'%CMDCMDLINE:~-1%&',
|
||||
'a\nb',
|
||||
'"',
|
||||
'\\',
|
||||
'!',
|
||||
'^!',
|
||||
'a \\ b',
|
||||
'a \\" b',
|
||||
'a \\ b\\',
|
||||
# We replace \r with \n
|
||||
('a\r\ra', 'a\n\na'),
|
||||
]
|
||||
|
||||
def run_shell(args):
|
||||
stdout, stderr, error = Popen.run(
|
||||
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
@ -2067,15 +2082,15 @@ def run_shell(args):
|
|||
assert not error
|
||||
return stdout
|
||||
|
||||
# Test escaping
|
||||
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
|
||||
assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
|
||||
assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
|
||||
assert run_shell(['echo', '"']) == '""""\n'
|
||||
assert run_shell(['echo', '\\']) == '\\\n'
|
||||
# Test if delayed expansion is disabled
|
||||
assert run_shell(['echo', '^!']) == '"^!"\n'
|
||||
assert run_shell('echo "^!"') == '"^!"\n'
|
||||
for argument in tests:
|
||||
if isinstance(argument, str):
|
||||
expected = argument
|
||||
else:
|
||||
argument, expected = argument
|
||||
|
||||
args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
|
||||
assert run_shell(args) == expected
|
||||
assert run_shell(shell_quote(args, shell=True)) == expected
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -2136,6 +2136,11 @@ def _filter(f):
|
|||
|
||||
def _check_formats(self, formats):
|
||||
for f in formats:
|
||||
working = f.get('__working')
|
||||
if working is not None:
|
||||
if working:
|
||||
yield f
|
||||
continue
|
||||
self.to_screen('[info] Testing format %s' % f['format_id'])
|
||||
path = self.get_output_path('temp')
|
||||
if not self._ensure_dir_exists(f'{path}/'):
|
||||
|
@ -2152,33 +2157,44 @@ def _check_formats(self, formats):
|
|||
os.remove(temp_file.name)
|
||||
except OSError:
|
||||
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
||||
f['__working'] = success
|
||||
if success:
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
||||
|
||||
def _select_formats(self, formats, selector):
|
||||
return list(selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
download = download and not self.params.get('simulate')
|
||||
prefer_best = download and (
|
||||
self.params['outtmpl']['default'] == '-'
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
|
||||
prefer_best = (
|
||||
not self.params.get('simulate')
|
||||
and download
|
||||
and (
|
||||
not can_merge()
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start')
|
||||
or self.params['outtmpl']['default'] == '-'))
|
||||
compat = (
|
||||
prefer_best
|
||||
or self.params.get('allow_multiple_audio_streams', False)
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
if not prefer_best and download and not can_merge():
|
||||
prefer_best = True
|
||||
formats = self._get_formats(info_dict)
|
||||
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||
if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
|
||||
self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
|
||||
'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
|
||||
|
||||
return (
|
||||
'best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo*+bestaudio/best' if not compat
|
||||
else 'bestvideo+bestaudio/best')
|
||||
compat = (self.params.get('allow_multiple_audio_streams')
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
|
||||
return ('best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo+bestaudio/best' if compat
|
||||
else 'bestvideo*+bestaudio/best')
|
||||
|
||||
def build_format_selector(self, format_spec):
|
||||
def syntax_error(note, start):
|
||||
|
@ -2928,12 +2944,7 @@ def is_wellformed(f):
|
|||
self.write_debug(f'Default format spec: {req_format}')
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
|
||||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
formats_to_download = self._select_formats(formats, format_selector)
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
continue
|
||||
|
@ -4141,10 +4152,11 @@ def urlopen(self, req):
|
|||
if (
|
||||
'unsupported proxy type: "https"' in ue.msg.lower()
|
||||
and 'requests' not in self._request_director.handlers
|
||||
and 'niquests' not in self._request_director.handlers
|
||||
and 'curl_cffi' not in self._request_director.handlers
|
||||
):
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi or niquests')
|
||||
|
||||
elif (
|
||||
re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
|
||||
|
|
|
@ -25,7 +25,7 @@ def get_hidden_imports():
|
|||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
|
||||
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi', 'niquests', 'urllib3-future')
|
||||
|
||||
|
||||
hiddenimports = list(get_hidden_imports())
|
||||
|
|
|
@ -79,6 +79,16 @@
|
|||
except ImportError:
|
||||
curl_cffi = None
|
||||
|
||||
try:
|
||||
import niquests
|
||||
if niquests._compat.HAS_LEGACY_URLLIB3:
|
||||
import urllib3_future as niquests_urllib3
|
||||
else:
|
||||
niquests_urllib3 = urllib3
|
||||
except ImportError:
|
||||
niquests = None
|
||||
niquests_urllib3 = None
|
||||
|
||||
from . import Cryptodome
|
||||
|
||||
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
|
||||
|
|
|
@ -105,7 +105,7 @@ def _real_extract(self, url):
|
|||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default='{}'),
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
'title': ('event_name', {str}),
|
||||
'thumbnail': ('event_thumbnail_image', {url_or_none}),
|
||||
|
|
|
@ -93,11 +93,11 @@ def extract_formats(self, play_info):
|
|||
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid):
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}')['data']
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
|
@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
|
@ -531,7 +532,7 @@ def _real_extract(self, url):
|
|||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology'),
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
|
@ -552,7 +553,7 @@ def _real_extract(self, url):
|
|||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
|
@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage = self._download_webpage(url, episode_id, headers=headers)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
|
@ -724,7 +726,7 @@ def _real_extract(self, url):
|
|||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
|
@ -1049,9 +1051,10 @@ def fetch_page(page_idx):
|
|||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] == -401:
|
||||
if response['code'] in (-352, -401):
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
int_or_none,
|
||||
qualities,
|
||||
str_or_none,
|
||||
|
@ -162,9 +166,19 @@ def _extract_formats(self, player_urls, video_id):
|
|||
|
||||
def _real_extract(self, url):
|
||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||
|
||||
auth_headers = {}
|
||||
auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
|
||||
if auth_cookie is not None:
|
||||
try:
|
||||
auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
|
||||
auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
|
||||
|
||||
post = self._download_json(
|
||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)
|
||||
|
||||
post_title = post.get('title')
|
||||
if not post_title:
|
||||
|
@ -202,7 +216,9 @@ def _real_extract(self, url):
|
|||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||
}, get_all=False)})
|
||||
|
||||
if not entries:
|
||||
if not entries and not post.get('hasAccess'):
|
||||
self.raise_login_required('This post requires a subscription', metadata_available=True)
|
||||
elif not entries:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
|
|
@ -151,7 +151,7 @@ def _real_extract(self, url):
|
|||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
|
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||
'info_dict': {
|
||||
'id': '2334524995812',
|
||||
'ext': 'mp4',
|
||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||
'timestamp': 1714788791,
|
||||
'duration': 77.678,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:0',
|
||||
'upload_date': '20240504',
|
||||
'categories': 'count:3',
|
||||
'series': 'The National',
|
||||
'tags': 'count:15',
|
||||
'creators': ['encoder'],
|
||||
'location': 'Canada',
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
'url': 'cbcplayer:1.7159484',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -1738,12 +1738,16 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
|||
traverse_json_ld(json_ld)
|
||||
return filter_dict(info)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
|
||||
if default == '{}':
|
||||
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
|
||||
default = {}
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
return self._search_json(
|
||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
|
|
|
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||
't-kdgp2h8c3jub8fn0fq',
|
||||
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||
)).encode()).decode()
|
||||
_IS_PREMIUM = None
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
|
@ -43,69 +47,78 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
def _set_auth_info(self, response):
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try:
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data), impersonate=True)
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
if target := error.cause.response.extensions.get('impersonate'):
|
||||
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare. '
|
||||
'Install the required impersonation dependency if possible, '
|
||||
'or else navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
return
|
||||
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': f'{self._BASE_URL}/welcome/login'
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
|
||||
if self.is_logged_in:
|
||||
grant_type = 'etp_rt_cookie'
|
||||
else:
|
||||
grant_type = 'client_id'
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers=auth_headers, data=f'grant_type={grant_type}'.encode())
|
||||
login_response = self._request_token(
|
||||
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||
'username': username,
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'scope': 'offline_access',
|
||||
}, note='Logging in', errnote='Failed to log in')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||
self._set_auth_info(login_response)
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||
return
|
||||
|
||||
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
data = {
|
||||
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||
'grant_type': 'refresh_token',
|
||||
'scope': 'offline_access',
|
||||
}
|
||||
else:
|
||||
data = {'grant_type': 'client_id'}
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._request_token(auth_headers, data)
|
||||
except ExtractorError as error:
|
||||
username, password = self._get_login_info()
|
||||
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||
raise
|
||||
self.to_screen('Refresh token has expired. Re-logging in')
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||
self._perform_login(username, password)
|
||||
return
|
||||
|
||||
self._set_auth_info(auth_response)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
|
@ -168,7 +181,8 @@ def _extract_stream(self, identifier, display_id=None):
|
|||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
|
@ -383,11 +397,12 @@ def entries():
|
|||
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
result['chapters'] = self._extract_chapters(internal_id)
|
||||
|
||||
|
@ -573,14 +588,16 @@ def _real_extract(self, url):
|
|||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
||||
return result
|
||||
|
||||
|
|
|
@ -560,7 +560,7 @@ def extract_from_jsmods_instances(js_data):
|
|||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
|
@ -9,17 +15,20 @@ class MixchIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/u/16236849/live',
|
||||
'url': 'https://mixch.tv/u/16943797/live',
|
||||
'skip': 'don\'t know if this live persists',
|
||||
'info_dict': {
|
||||
'id': '16236849',
|
||||
'title': '24配信シェア⭕️投票🙏💦',
|
||||
'comment_count': 13145,
|
||||
'view_count': 28348,
|
||||
'timestamp': 1636189377,
|
||||
'uploader': '🦥伊咲👶🏻#フレアワ',
|
||||
'uploader_id': '16236849',
|
||||
}
|
||||
'id': '16943797',
|
||||
'ext': 'mp4',
|
||||
'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'timestamp': 1714726805,
|
||||
'uploader': 'Ent.View K-news🎶💕',
|
||||
'uploader_id': '16943797',
|
||||
'live_status': 'is_live',
|
||||
'upload_date': '20240503',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixch.tv/u/16137876/live',
|
||||
'only_matching': True,
|
||||
|
@ -48,8 +57,20 @@ def _real_extract(self, url):
|
|||
'protocol': 'm3u8',
|
||||
}],
|
||||
'is_live': True,
|
||||
'__post_extractor': self.extract_comments(video_id),
|
||||
}
|
||||
|
||||
def _get_comments(self, video_id):
|
||||
yield from traverse_obj(self._download_json(
|
||||
f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
|
||||
note='Downloading comments', errnote='Failed to download comments'), (..., {
|
||||
'author': ('name', {str}),
|
||||
'author_id': ('user_id', {str_or_none}),
|
||||
'id': ('message_id', {str}, {lambda x: x or None}),
|
||||
'text': ('body', {str}),
|
||||
'timestamp': ('created', {int}),
|
||||
}))
|
||||
|
||||
|
||||
class MixchArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'mixch:archive'
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
|
@ -14,7 +14,6 @@
|
|||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -199,7 +198,50 @@ class PatreonIE(PatreonBaseIE):
|
|||
'channel_id': '2147162',
|
||||
'uploader_url': 'https://www.patreon.com/yaboyroshi',
|
||||
},
|
||||
}, {
|
||||
# NSFW vimeo embed URL
|
||||
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
|
||||
'info_dict': {
|
||||
'id': '902250943',
|
||||
'ext': 'mp4',
|
||||
'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'uploader': 'Npickyeonhwa',
|
||||
'uploader_id': '90574422',
|
||||
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'channel_id': '10237902',
|
||||
'channel_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'duration': 70,
|
||||
'timestamp': 1705150153,
|
||||
'upload_date': '20240113',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# multiple attachments/embeds
|
||||
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
||||
'playlist_count': 3,
|
||||
'info_dict': {
|
||||
'id': '100601977',
|
||||
'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
|
||||
'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
|
||||
'uploader': 'Bradley Hall',
|
||||
'uploader_id': '24401883',
|
||||
'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
|
||||
'channel_id': '3193932',
|
||||
'channel_url': 'https://www.patreon.com/bradleyhallguitar',
|
||||
'channel_follower_count': int,
|
||||
'timestamp': 1710777855,
|
||||
'upload_date': '20240318',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'skip': 'Patron-only content',
|
||||
}]
|
||||
_RETURN_TYPE = 'video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -214,95 +256,108 @@ def _real_extract(self, url):
|
|||
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
|
||||
})
|
||||
attributes = post['data']['attributes']
|
||||
title = attributes['title'].strip()
|
||||
image = attributes.get('image') or {}
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clean_html(attributes.get('content')),
|
||||
'thumbnail': image.get('large_url') or image.get('url'),
|
||||
'timestamp': parse_iso8601(attributes.get('published_at')),
|
||||
'like_count': int_or_none(attributes.get('like_count')),
|
||||
'comment_count': int_or_none(attributes.get('comment_count')),
|
||||
}
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
if can_view_post and info['comment_count']:
|
||||
info['__post_extractor'] = self.extract_comments(video_id)
|
||||
info = traverse_obj(attributes, {
|
||||
'title': ('title', {str.strip}),
|
||||
'description': ('content', {clean_html}),
|
||||
'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
|
||||
'timestamp': ('published_at', {parse_iso8601}),
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
})
|
||||
|
||||
for i in post.get('included', []):
|
||||
i_type = i.get('type')
|
||||
if i_type == 'media':
|
||||
media_attributes = i.get('attributes') or {}
|
||||
download_url = media_attributes.get('download_url')
|
||||
entries = []
|
||||
idx = 0
|
||||
for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
|
||||
include_type = include['type']
|
||||
if include_type == 'media':
|
||||
media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
|
||||
download_url = url_or_none(media_attributes.get('download_url'))
|
||||
ext = mimetype2ext(media_attributes.get('mimetype'))
|
||||
|
||||
# if size_bytes is None, this media file is likely unavailable
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
|
||||
size_bytes = int_or_none(media_attributes.get('size_bytes'))
|
||||
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
|
||||
# XXX: what happens if there are multiple attachments?
|
||||
return {
|
||||
**info,
|
||||
idx += 1
|
||||
entries.append({
|
||||
'id': f'{video_id}-{idx}',
|
||||
'ext': ext,
|
||||
'filesize': size_bytes,
|
||||
'url': download_url,
|
||||
}
|
||||
elif i_type == 'user':
|
||||
user_attributes = i.get('attributes')
|
||||
if user_attributes:
|
||||
info.update({
|
||||
'uploader': user_attributes.get('full_name'),
|
||||
'uploader_id': str_or_none(i.get('id')),
|
||||
'uploader_url': user_attributes.get('url'),
|
||||
})
|
||||
|
||||
elif i_type == 'post_tag':
|
||||
info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
|
||||
elif include_type == 'user':
|
||||
info.update(traverse_obj(include, {
|
||||
'uploader': ('attributes', 'full_name', {str}),
|
||||
'uploader_id': ('id', {str_or_none}),
|
||||
'uploader_url': ('attributes', 'url', {url_or_none}),
|
||||
}))
|
||||
|
||||
elif i_type == 'campaign':
|
||||
info.update({
|
||||
'channel': traverse_obj(i, ('attributes', 'title')),
|
||||
'channel_id': str_or_none(i.get('id')),
|
||||
'channel_url': traverse_obj(i, ('attributes', 'url')),
|
||||
'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
|
||||
})
|
||||
elif include_type == 'post_tag':
|
||||
if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
|
||||
info.setdefault('tags', []).append(post_tag)
|
||||
|
||||
elif include_type == 'campaign':
|
||||
info.update(traverse_obj(include, {
|
||||
'channel': ('attributes', 'title', {str}),
|
||||
'channel_id': ('id', {str_or_none}),
|
||||
'channel_url': ('attributes', 'url', {url_or_none}),
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# handle Vimeo embeds
|
||||
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
|
||||
embed_html = try_get(attributes, lambda x: x['embed']['html'])
|
||||
v_url = url_or_none(compat_urllib_parse_unquote(
|
||||
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
|
||||
if v_url:
|
||||
v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
|
||||
if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||
if url_or_none(v_url) and self._request_webpage(
|
||||
v_url, video_id, 'Checking Vimeo embed URL',
|
||||
headers={'Referer': 'https://patreon.com/'},
|
||||
fatal=False, errnote=False):
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = try_get(attributes, lambda x: x['embed']['url'])
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(embed_url, **info)
|
||||
entries.append(self.url_result(embed_url))
|
||||
|
||||
post_file = traverse_obj(attributes, 'post_file')
|
||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||
if post_file:
|
||||
name = post_file.get('name')
|
||||
ext = determine_ext(name)
|
||||
if ext in KNOWN_EXTENSIONS:
|
||||
return {
|
||||
**info,
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'ext': ext,
|
||||
'url': post_file['url'],
|
||||
}
|
||||
})
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
return {
|
||||
**info,
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
})
|
||||
|
||||
if can_view_post is False:
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
comments = None
|
||||
if can_view_post and info.get('comment_count'):
|
||||
comments = self.extract_comments(video_id)
|
||||
|
||||
if not entries and can_view_post is False:
|
||||
self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
|
||||
else:
|
||||
elif not entries:
|
||||
self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
|
||||
elif len(entries) == 1:
|
||||
info.update(entries[0])
|
||||
else:
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
|
||||
|
||||
info['id'] = video_id
|
||||
info['__post_extractor'] = comments
|
||||
return info
|
||||
|
||||
def _get_comments(self, post_id):
|
||||
|
|
|
@ -361,7 +361,7 @@ def extract_count(key):
|
|||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genre': info.get('genre'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
||||
'formats': formats if not extract_flat else None
|
||||
}
|
||||
|
||||
|
@ -395,10 +395,10 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
_TESTS = [
|
||||
{
|
||||
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
|
||||
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
|
||||
'info_dict': {
|
||||
'id': '62986583',
|
||||
'ext': 'mp3',
|
||||
'ext': 'opus',
|
||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||
'uploader': 'E.T. ExTerrestrial Music',
|
||||
|
@ -411,6 +411,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'genres': [],
|
||||
}
|
||||
},
|
||||
# geo-restricted
|
||||
|
@ -418,7 +421,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
||||
'info_dict': {
|
||||
'id': '47127627',
|
||||
'ext': 'mp3',
|
||||
'ext': 'opus',
|
||||
'title': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
|
@ -431,6 +434,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
|
@ -452,6 +458,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
|
@ -473,6 +482,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
|
@ -482,6 +494,21 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'info_dict': {
|
||||
'id': '343609555',
|
||||
'ext': 'wav',
|
||||
'title': 'The Following',
|
||||
'description': '',
|
||||
'uploader': '80M',
|
||||
'uploader_id': '312384765',
|
||||
'uploader_url': 'https://soundcloud.com/the80m',
|
||||
'upload_date': '20170922',
|
||||
'timestamp': 1506120436,
|
||||
'duration': 397.228,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
|
||||
'license': 'all-rights-reserved',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
|
@ -503,6 +530,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
|
@ -525,6 +555,8 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'genres': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -532,13 +564,13 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
},
|
||||
{
|
||||
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
|
||||
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
|
||||
'md5': '8227c3473a4264df6b02ad7e5b7527ac',
|
||||
'info_dict': {
|
||||
'id': '583011102',
|
||||
'ext': 'mp3',
|
||||
'ext': 'opus',
|
||||
'title': 'Mezzo Valzer',
|
||||
'description': 'md5:4138d582f81866a530317bae316e8b61',
|
||||
'uploader': 'Micronie',
|
||||
'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
|
||||
'uploader': 'Giovanni Sarani',
|
||||
'uploader_id': '3352531',
|
||||
'timestamp': 1551394171,
|
||||
'upload_date': '20190228',
|
||||
|
@ -549,6 +581,8 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'genres': ['Piano'],
|
||||
'uploader_url': 'https://soundcloud.com/giovannisarani',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
|
|||
|
||||
|
||||
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:vod'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
|
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'videoEpisodes'
|
||||
|
@ -204,7 +207,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:ppv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
|
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'events'
|
||||
|
|
|
@ -41,7 +41,7 @@ def _real_extract(self, url):
|
|||
ptype, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||
props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
|
||||
props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
|
||||
player_api_cache = try_get(
|
||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||
|
||||
|
|
|
@ -776,7 +776,7 @@ def _real_extract(self, url):
|
|||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
||||
self.write_debug('Found next.js data')
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
)
|
||||
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TVAIE(InfoExtractor):
|
||||
|
@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '6084352463001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode 01',
|
||||
'title': 'Ép 01. Mon dernier jour',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20190907',
|
||||
'timestamp': 1567899756,
|
||||
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
'episode': 'Ép 01. Mon dernier jour',
|
||||
'episode_number': 1,
|
||||
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
|
||||
'duration': 2625.963,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Alerte Amber',
|
||||
'channel': 'TVA',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
|
||||
|
@ -64,22 +72,24 @@ class QubIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._download_json(
|
||||
'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
|
||||
entity_id, query={'id': entity_id})
|
||||
webpage = self._download_webpage(url, entity_id)
|
||||
entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
|
||||
video_id = entity['videoId']
|
||||
episode = strip_or_none(entity.get('name'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://videos.tva.ca/details/_{video_id}',
|
||||
'ie_key': TVAIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': episode,
|
||||
# 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
|
||||
'url': 'https://videos.tva.ca/details/_' + video_id,
|
||||
'description': entity.get('longDescription'),
|
||||
'duration': float_or_none(entity.get('durationMillis'), 1000),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(entity.get('episodeNumber')),
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
'ie_key': TVAIE.ie_key(),
|
||||
**traverse_obj(entity, {
|
||||
'description': ('longDescription', {str}),
|
||||
'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
|
||||
'channel': ('knownEntities', 'channel', 'name', {str}),
|
||||
'series': ('knownEntities', 'videoShow', 'name', {str}),
|
||||
'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -451,6 +451,7 @@ def _real_extract(self, url):
|
|||
info_page, 'view count', default=None))
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for format_id, format_url in data.items():
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
|
@ -462,12 +463,21 @@ def _real_extract(self, url):
|
|||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'source_preference': 1,
|
||||
'height': height,
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False, live=is_live))
|
||||
m3u8_id=format_id, fatal=False, live=is_live)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif format_id.startswith('dash_'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif format_id == 'rtmp':
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
|
@ -475,7 +485,6 @@ def _real_extract(self, url):
|
|||
'ext': 'flv',
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for sub in data.get('subs') or {}:
|
||||
subtitles.setdefault(sub.get('lang', 'en'), []).append({
|
||||
'ext': sub.get('title', '.srt').split('.')[-1],
|
||||
|
@ -496,6 +505,7 @@ def _real_extract(self, url):
|
|||
'comment_count': int_or_none(mv_data.get('commcount')),
|
||||
'is_live': is_live,
|
||||
'subtitles': subtitles,
|
||||
'_format_sort_fields': ('res', 'source'),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
jwt_decode_hs256,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
|
@ -147,7 +148,7 @@ def _download_metadata(self, url, video_id, lang, props_keys):
|
|||
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
|
||||
if not metadata:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
metadata = traverse_obj(nextjs_data, (
|
||||
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
|
||||
return metadata
|
||||
|
@ -194,8 +195,7 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._get_formats(video_data, (
|
||||
(('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
|
||||
'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('displayName', {str}),
|
||||
'description': ('description', {str}),
|
||||
|
@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
|
|||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'manifest provides live-a (partial) and live-b (full) streams',
|
||||
'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'events'
|
||||
|
@ -285,12 +289,16 @@ def _real_extract(self, url):
|
|||
|
||||
video_data, decrypt = self._call_encrypted_api(
|
||||
video_id, ':watchArchive', 'watch archive', data={'method': 1})
|
||||
info['formats'] = self._get_formats(video_data, (
|
||||
('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
|
||||
# 'chromecastUrls' can be only partial videos, avoid
|
||||
info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
|
||||
for f in info['formats']:
|
||||
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
|
||||
if f.get('tbr'):
|
||||
f['tbr'] = int(f['tbr'] / 2.5)
|
||||
# prefer variants with the same basename as the master playlist to avoid partial streams
|
||||
f['format_id'] = url_basename(f['url']).partition('.')[0]
|
||||
if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
|
||||
f['preference'] = -10
|
||||
|
||||
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
|
||||
if hls_aes_key:
|
||||
|
|
|
@ -259,15 +259,15 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
|
||||
data_json = self._search_json(
|
||||
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
|
||||
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
|
||||
webpage, 'server state').replace('State', 'Settings')
|
||||
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
|
||||
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
|
||||
webpage, 'uploader', default='<a>')
|
||||
uploader_name = extract_attributes(uploader).get('aria-label')
|
||||
video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
|
||||
stream_urls = try_get(video_json, lambda x: x['video']['streams'])
|
||||
item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
|
||||
video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for s_url in stream_urls:
|
||||
for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
|
||||
ext = determine_ext(s_url)
|
||||
if ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
|
||||
|
|
|
@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
|
|||
'id': '16290308',
|
||||
'age_limit': 18,
|
||||
'categories': [],
|
||||
'description': 'md5:00ea70f642f431c379763c17c2f396bc',
|
||||
'description': str, # TODO: detect/remove SEO spam description in ytdl backport
|
||||
'display_id': 'tinderspecial-trailer1',
|
||||
'duration': 298.0,
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'Ersties',
|
||||
'tags': [],
|
||||
'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
|
||||
'timestamp': 1606089600,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
'timestamp': 1606147564,
|
||||
'title': 'Tinder In Real Life',
|
||||
'view_count': int,
|
||||
}
|
||||
|
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
definitions = self._download_json(
|
||||
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
|
||||
self._set_cookie('.youporn.com', 'age_verified', '1')
|
||||
webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
|
||||
definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
|
||||
|
||||
def get_format_data(data, f):
|
||||
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
|
||||
def get_format_data(data, stream_type):
|
||||
info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
|
||||
if not info_url:
|
||||
return []
|
||||
return traverse_obj(
|
||||
self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
|
||||
lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
|
||||
|
||||
formats = []
|
||||
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
|
||||
|
@ -123,10 +129,6 @@ def get_format_data(data, f):
|
|||
f['height'] = height
|
||||
formats.append(f)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.youporn.com/watch/%s' % video_id, display_id,
|
||||
headers={'Cookie': 'age_verified=1'})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
|
|
|
@ -35,3 +35,10 @@
|
|||
pass
|
||||
except Exception as e:
|
||||
warnings.warn(f'Failed to import "curl_cffi" request handler: {e}' + bug_reports_message())
|
||||
|
||||
try:
|
||||
from . import _niquests
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
warnings.warn(f'Failed to import "niquests" request handler: {e}' + bug_reports_message())
|
||||
|
|
|
@ -132,6 +132,16 @@ def _check_extensions(self, extensions):
|
|||
extensions.pop('cookiejar', None)
|
||||
extensions.pop('timeout', None)
|
||||
|
||||
def send(self, request: Request) -> Response:
|
||||
target = self._get_request_target(request)
|
||||
try:
|
||||
response = super().send(request)
|
||||
except HTTPError as e:
|
||||
e.response.extensions['impersonate'] = target
|
||||
raise
|
||||
response.extensions['impersonate'] = target
|
||||
return response
|
||||
|
||||
def _send(self, request: Request):
|
||||
max_redirects_exceeded = False
|
||||
session: curl_cffi.requests.Session = self._get_instance(
|
||||
|
|
|
@ -0,0 +1,401 @@
|
|||
import functools
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import warnings
|
||||
|
||||
from ..dependencies import brotli, niquests, niquests_urllib3 as urllib3, certifi
|
||||
from ..utils import bug_reports_message, int_or_none, variadic
|
||||
from ..utils.networking import normalize_url
|
||||
|
||||
if niquests is None:
|
||||
raise ImportError('niquests module is not installed')
|
||||
|
||||
if urllib3 is None:
|
||||
raise ImportError('urllib3-future module is not installed')
|
||||
|
||||
urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
|
||||
|
||||
if urllib3_version < (2, 1, 900):
|
||||
raise ImportError('Only urllib3-future >= 2.1.900 is supported')
|
||||
|
||||
if niquests.__build__ < 0x030000:
|
||||
raise ImportError('Only niquests >= 3.0.0 is supported')
|
||||
|
||||
from ._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
create_connection,
|
||||
create_socks_proxy_socket,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
)
|
||||
from .common import (
|
||||
Features,
|
||||
RequestHandler,
|
||||
Response,
|
||||
register_preference,
|
||||
register_rh,
|
||||
)
|
||||
from .exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
ProxyError,
|
||||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
)
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
|
||||
SUPPORTED_ENCODINGS = [
|
||||
'gzip', 'deflate'
|
||||
]
|
||||
|
||||
if brotli is not None:
|
||||
SUPPORTED_ENCODINGS.append('br')
|
||||
|
||||
"""
|
||||
Override urllib3's behavior to not convert lower-case percent-encoded characters
|
||||
to upper-case during url normalization process.
|
||||
|
||||
RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
|
||||
and normalizers should convert them to uppercase for consistency [1].
|
||||
|
||||
However, some sites may have an incorrect implementation where they provide
|
||||
a percent-encoded url that is then compared case-sensitively.[2]
|
||||
|
||||
While this is a very rare case, since urllib does not do this normalization step, it
|
||||
is best to avoid it in requests too for compatability reasons.
|
||||
|
||||
1: https://tools.ietf.org/html/rfc3986#section-2.1
|
||||
2: https://github.com/streamlink/streamlink/pull/4003
|
||||
"""
|
||||
|
||||
|
||||
class Urllib3PercentREOverride:
|
||||
def __init__(self, r: re.Pattern):
|
||||
self.re = r
|
||||
|
||||
# pass through all other attribute calls to the original re
|
||||
def __getattr__(self, item):
|
||||
return self.re.__getattribute__(item)
|
||||
|
||||
def subn(self, repl, string, *args, **kwargs):
|
||||
return string, self.re.subn(repl, string, *args, **kwargs)[1]
|
||||
|
||||
|
||||
if hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
|
||||
urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
|
||||
else:
|
||||
warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
|
||||
|
||||
|
||||
class RequestsResponseAdapter(Response):
|
||||
def __init__(self, res: niquests.models.Response):
|
||||
super().__init__(
|
||||
fp=res.raw, headers=res.headers, url=res.url,
|
||||
status=res.status_code, reason=res.reason)
|
||||
|
||||
self._requests_response = res
|
||||
|
||||
def read(self, amt: int = None):
|
||||
try:
|
||||
# Interact with urllib3 response directly.
|
||||
return self.fp.read(amt, decode_content=True)
|
||||
|
||||
# See urllib3.response.HTTPResponse.read() for exceptions raised on read
|
||||
except urllib3.exceptions.SSLError as e:
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.ProtocolError as e:
|
||||
# IncompleteRead is always contained within ProtocolError
|
||||
# See urllib3.response.HTTPResponse._error_catcher()
|
||||
if isinstance(e, urllib3.exceptions.IncompleteRead):
|
||||
ir_err = e
|
||||
else:
|
||||
ir_err = next(
|
||||
(err for err in (e.__context__, e.__cause__, *variadic(e.args))
|
||||
if isinstance(err, urllib3.exceptions.IncompleteRead)), None)
|
||||
if ir_err is not None:
|
||||
# `urllib3.exceptions.IncompleteRead` uses an `int` for its `partial` property.
|
||||
partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
|
||||
raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
|
||||
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# catch-all for any other urllib3 response exceptions
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
class RequestsHTTPAdapter(niquests.adapters.HTTPAdapter):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._pm_args = {}
|
||||
|
||||
if "source_address" in kwargs:
|
||||
if kwargs["source_address"] is not None:
|
||||
kwargs["source_address"] = (kwargs["source_address"], 0)
|
||||
|
||||
self._proxy_ssl_context = None
|
||||
|
||||
if "proxy_ssl_context" in kwargs:
|
||||
self._proxy_ssl_context = kwargs["proxy_ssl_context"]
|
||||
kwargs.pop("proxy_ssl_context")
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def init_poolmanager(self, *args, **kwargs):
|
||||
return super().init_poolmanager(*args, **kwargs, **self._pm_args)
|
||||
|
||||
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
||||
extra_kwargs = {}
|
||||
if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
|
||||
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
|
||||
# hmm... the dev-socks server isn't happy with... Happy Eyeballs.
|
||||
# It makes the server unstable due to the concurrent conn attempts.
|
||||
if self._happy_eyeballs:
|
||||
self._happy_eyeballs = False
|
||||
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
|
||||
|
||||
|
||||
class RequestsSession(niquests.sessions.Session):
|
||||
"""
|
||||
Ensure unified redirect method handling with our urllib redirect handler.
|
||||
"""
|
||||
|
||||
def rebuild_method(self, prepared_request, response):
|
||||
new_method = get_redirect_method(prepared_request.method, response.status_code)
|
||||
|
||||
# HACK: requests removes headers/body on redirect unless code was a 307/308.
|
||||
if new_method == prepared_request.method:
|
||||
response._real_status_code = response.status_code
|
||||
response.status_code = 308
|
||||
|
||||
prepared_request.method = new_method
|
||||
|
||||
# Requests fails to resolve dot segments on absolute redirect locations
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/9020
|
||||
prepared_request.url = normalize_url(prepared_request.url)
|
||||
|
||||
def rebuild_auth(self, prepared_request, response):
|
||||
# HACK: undo status code change from rebuild_method, if applicable.
|
||||
# rebuild_auth runs after requests would remove headers/body based on status code
|
||||
if hasattr(response, '_real_status_code'):
|
||||
response.status_code = response._real_status_code
|
||||
del response._real_status_code
|
||||
return super().rebuild_auth(prepared_request, response)
|
||||
|
||||
|
||||
class Urllib3LoggingFilter(logging.Filter):
|
||||
|
||||
def filter(self, record):
|
||||
# Ignore HTTP request messages since HTTPConnection prints those
|
||||
if record.msg == '%s://%s:%s "%s %s %s" %s %s':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Urllib3LoggingHandler(logging.Handler):
|
||||
"""Redirect urllib3 logs to our logger"""
|
||||
|
||||
def __init__(self, logger, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._logger = logger
|
||||
|
||||
def emit(self, record):
|
||||
try:
|
||||
msg = self.format(record)
|
||||
if record.levelno >= logging.ERROR:
|
||||
self._logger.error(msg)
|
||||
else:
|
||||
self._logger.stdout(msg)
|
||||
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
|
||||
@register_rh
|
||||
class NiquestsRH(RequestHandler, InstanceStoreMixin):
|
||||
|
||||
"""Niquests RequestHandler
|
||||
https://github.com/jawah/niquests
|
||||
"""
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https')
|
||||
_SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
RH_NAME = 'niquests'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# Forward urllib3 debug messages to our logger
|
||||
logger = logging.getLogger('urllib3')
|
||||
self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
|
||||
self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
|
||||
self.__logging_handler.addFilter(Urllib3LoggingFilter())
|
||||
logger.addHandler(self.__logging_handler)
|
||||
# TODO: Use a logger filter to suppress pool reuse warning instead
|
||||
logger.setLevel(logging.ERROR)
|
||||
|
||||
if self.verbose:
|
||||
# Setting this globally is not ideal, but is easier than hacking with urllib3.
|
||||
# It could technically be problematic for scripts embedding yt-dlp.
|
||||
# However, it is unlikely debug traffic is used in that context in a way this will cause problems.
|
||||
urllib3.connection.HTTPConnection.debuglevel = 1
|
||||
logger.setLevel(logging.DEBUG)
|
||||
# this is expected if we are using --no-check-certificate
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
def close(self):
|
||||
self._clear_instances()
|
||||
# Remove the logging handler that contains a reference to our logger
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
|
||||
logging.getLogger('urllib3').removeHandler(self.__logging_handler)
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
super()._check_extensions(extensions)
|
||||
extensions.pop('cookiejar', None)
|
||||
extensions.pop('timeout', None)
|
||||
|
||||
def _create_instance(self, cookiejar):
|
||||
certs = self._client_cert
|
||||
session = RequestsSession()
|
||||
|
||||
session.verify = self.verify
|
||||
|
||||
if self.verify and not self.prefer_system_certs and certifi is not None:
|
||||
session.verify = certifi.where()
|
||||
|
||||
session.trust_env = False # no need, we already load proxies from env
|
||||
session.cookies = cookiejar
|
||||
session.headers.setdefault("Connection", "keep-alive")
|
||||
|
||||
has_client_cert = "client_certificate" in certs
|
||||
has_client_key = "client_certificate_key" in certs
|
||||
has_password = "client_certificate_password" in certs
|
||||
|
||||
if len(list(filter(lambda _: _ is True, [has_client_cert, has_client_key, has_password]))) > 1:
|
||||
session.cert = (
|
||||
certs["client_certificate"],
|
||||
certs["client_certificate_key"] if has_client_key else None,
|
||||
certs["client_certificate_password"] if has_password else None,
|
||||
)
|
||||
elif has_client_cert:
|
||||
session.cert = certs["client_certificate"]
|
||||
|
||||
session.adapters.clear()
|
||||
|
||||
# we want to override the default adapter in order to leverage our proxy implt
|
||||
http_adapter = RequestsHTTPAdapter(source_address=self.source_address, max_retries=False, happy_eyeballs=True)
|
||||
|
||||
session.mount("http://", http_adapter)
|
||||
session.mount("https://", http_adapter)
|
||||
|
||||
return session
|
||||
|
||||
def _send(self, request):
|
||||
headers = self._merge_headers(request.headers)
|
||||
add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
|
||||
|
||||
max_redirects_exceeded = False
|
||||
|
||||
session = self._get_instance(cookiejar=self._get_cookiejar(request))
|
||||
|
||||
try:
|
||||
requests_res = session.request(
|
||||
method=request.method,
|
||||
url=request.url,
|
||||
data=request.data,
|
||||
headers=headers,
|
||||
timeout=self._calculate_timeout(request),
|
||||
proxies=self._get_proxies(request),
|
||||
allow_redirects=True,
|
||||
stream=True
|
||||
)
|
||||
|
||||
except niquests.exceptions.TooManyRedirects as e:
|
||||
max_redirects_exceeded = True
|
||||
requests_res = e.response
|
||||
|
||||
except niquests.exceptions.SSLError as e:
|
||||
if 'CERTIFICATE_VERIFY_FAILED' in str(e):
|
||||
raise CertificateVerifyError(cause=e) from e
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except niquests.exceptions.ProxyError as e:
|
||||
raise ProxyError(cause=e) from e
|
||||
|
||||
except (niquests.exceptions.ConnectionError, niquests.exceptions.Timeout) as e:
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# Catch any urllib3 exceptions that may leak through
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except niquests.exceptions.RequestException as e:
|
||||
# Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
|
||||
raise RequestError(cause=e) from e
|
||||
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
if not 200 <= res.status < 300:
|
||||
raise HTTPError(res, redirect_loop=max_redirects_exceeded)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@register_preference(NiquestsRH)
|
||||
def requests_preference(rh, request):
|
||||
return 200
|
||||
|
||||
|
||||
# Use our socks proxy implementation with requests to avoid an extra dependency.
|
||||
class SocksHTTPConnection(urllib3.connection.HTTPConnection):
|
||||
def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
|
||||
self._proxy_args = _socks_options
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _new_conn(self):
|
||||
try:
|
||||
return create_connection(
|
||||
address=(self._proxy_args['addr'], self._proxy_args['port']),
|
||||
timeout=self.timeout,
|
||||
source_address=self.source_address,
|
||||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
|
||||
except (socket.timeout, TimeoutError) as e:
|
||||
raise urllib3.exceptions.ConnectTimeoutError(
|
||||
self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
|
||||
except SocksProxyError as e:
|
||||
raise urllib3.exceptions.ProxyError(str(e), e) from e
|
||||
except OSError as e:
|
||||
raise urllib3.exceptions.NewConnectionError(
|
||||
self, f'Failed to establish a new connection: {e}') from e
|
||||
|
||||
|
||||
class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
|
||||
pass
|
||||
|
||||
|
||||
class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
|
||||
ConnectionCls = SocksHTTPConnection
|
||||
|
||||
|
||||
class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
|
||||
ConnectionCls = SocksHTTPSConnection
|
||||
|
||||
|
||||
class SocksProxyManager(urllib3.PoolManager):
|
||||
|
||||
def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
|
||||
connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
|
||||
super().__init__(num_pools, headers, **connection_pool_kw)
|
||||
self.pool_classes_by_scheme = {
|
||||
'http': SocksHTTPConnectionPool,
|
||||
'https': SocksHTTPSConnectionPool
|
||||
}
|
||||
|
||||
|
||||
niquests.adapters.SOCKSProxyManager = SocksProxyManager
|
|
@ -497,6 +497,7 @@ class Response(io.IOBase):
|
|||
@param headers: response headers.
|
||||
@param status: Response HTTP status code. Default is 200 OK.
|
||||
@param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
|
||||
@param extensions: Dictionary of handler-specific response extensions.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -505,7 +506,9 @@ def __init__(
|
|||
url: str,
|
||||
headers: Mapping[str, str],
|
||||
status: int = 200,
|
||||
reason: str = None):
|
||||
reason: str = None,
|
||||
extensions: dict = None
|
||||
):
|
||||
|
||||
self.fp = fp
|
||||
self.headers = Message()
|
||||
|
@ -517,6 +520,7 @@ def __init__(
|
|||
self.reason = reason or HTTPStatus(status).phrase
|
||||
except ValueError:
|
||||
self.reason = None
|
||||
self.extensions = extensions or {}
|
||||
|
||||
def readable(self):
|
||||
return self.fp.readable()
|
||||
|
|
|
@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
|
|||
# Ref: https://en.wikipedia.org/wiki/Uname#Examples
|
||||
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
|
||||
machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
|
||||
# sys.executable returns a /tmp/ path for staticx builds (linux_static)
|
||||
# Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
|
||||
if static_exe_path := os.getenv('STATICX_PROG_PATH'):
|
||||
path = static_exe_path
|
||||
return f'{remove_end(sys.platform, "32")}{machine}_exe', path
|
||||
|
||||
path = os.path.dirname(__file__)
|
||||
|
|
|
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
|
|||
return encoding if encoding is not None else 'utf-8'
|
||||
|
||||
|
||||
_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
|
||||
_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
|
||||
_CMD_QUOTE_TRANS = str.maketrans({
|
||||
# Keep quotes balanced by replacing them with `""` instead of `\\"`
|
||||
'"': '""',
|
||||
# Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
|
||||
# These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
|
||||
# `=` should be unique since variables containing `=` cannot be set using cmd
|
||||
'\n': '%=%',
|
||||
# While we are only required to escape backslashes immediately before quotes,
|
||||
# we instead escape all of 'em anyways to be consistent
|
||||
'\\': '\\\\',
|
||||
'\r': '%=%',
|
||||
# Use zero length variable replacement so `%` doesn't get expanded
|
||||
# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
|
||||
'%': '%%cd:~,%',
|
||||
|
@ -1656,19 +1654,14 @@ def get_filesystem_encoding():
|
|||
|
||||
def shell_quote(args, *, shell=False):
|
||||
args = list(variadic(args))
|
||||
if any(isinstance(item, bytes) for item in args):
|
||||
deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
|
||||
encoding = get_filesystem_encoding()
|
||||
for index, item in enumerate(args):
|
||||
if isinstance(item, bytes):
|
||||
args[index] = item.decode(encoding)
|
||||
|
||||
if compat_os_name != 'nt':
|
||||
return shlex.join(args)
|
||||
|
||||
trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
|
||||
return ' '.join(
|
||||
s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
|
||||
s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
|
||||
else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
|
||||
for s in args)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue