Compare commits

..

No commits in common. "23d93556717785603e24430742fc5149ed88da1a" and "4cd23b99dbfc6952898254672633d71ee2a76c79" have entirely different histories.

49 changed files with 736 additions and 1165 deletions

View File

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting that yt-dlp is broken on a **supported** site - label: I'm reporting that yt-dlp is broken on a **supported** site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -64,7 +64,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -72,8 +72,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View File

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a new site support request - label: I'm reporting a new site support request
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -76,7 +76,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -84,8 +84,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View File

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm requesting a site-specific feature - label: I'm requesting a site-specific feature
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -72,7 +72,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -80,8 +80,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View File

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a bug unrelated to a specific site - label: I'm reporting a bug unrelated to a specific site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -57,7 +57,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -65,8 +65,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View File

@ -20,7 +20,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true required: true
@ -53,7 +53,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -61,7 +61,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell

View File

@ -26,7 +26,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true required: true
@ -59,7 +59,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -67,7 +67,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.07, Current version: 2023.10.07 Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.10.07) yt-dlp is up to date (2023.09.24)
<more lines> <more lines>
render: shell render: shell

View File

@ -13,7 +13,7 @@ jobs:
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
# CPython 3.11 is in quick-test # CPython 3.11 is in quick-test
python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10] python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
run-tests-ext: [sh] run-tests-ext: [sh]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
@ -21,7 +21,7 @@ jobs:
python-version: '3.7' python-version: '3.7'
run-tests-ext: bat run-tests-ext: bat
- os: windows-latest - os: windows-latest
python-version: '3.12' python-version: '3.12-dev'
run-tests-ext: bat run-tests-ext: bat
- os: windows-latest - os: windows-latest
python-version: pypy-3.9 python-version: pypy-3.9

View File

@ -28,7 +28,7 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: ['3.7', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10] python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
run-tests-ext: [sh] run-tests-ext: [sh]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows

View File

@ -503,9 +503,3 @@ Yalab7
zhallgato zhallgato
zhong-yiyu zhong-yiyu
Zprokkel Zprokkel
AS6939
drzraf
handlerug
jiru
madewokherd
xofe

View File

@ -4,35 +4,6 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2023.10.07
#### Extractor changes
- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe)
- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd)
- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly)
- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939)
- **lbry**
- [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K)
- [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf)
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru)
- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt)
- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317)
- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317)
- **substack**
- [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug)
- [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug)
- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera)
- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly)
- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K)
- **youtube**
- [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly)
- [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **cleanup**
- [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261)
- Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K)
### 2023.09.24 ### 2023.09.24
#### Important changes #### Important changes

View File

@ -1809,7 +1809,6 @@ The following extractors use this feature:
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests * `innertube_key`: Innertube API key to use for all API requests
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)

View File

@ -422,7 +422,6 @@
- **eplus:inbound**: e+ (イープラス) overseas - **eplus:inbound**: e+ (イープラス) overseas
- **Epoch** - **Epoch**
- **Eporner** - **Eporner**
- **Erocast**
- **EroProfile**: [*eroprofile*](## "netrc machine") - **EroProfile**: [*eroprofile*](## "netrc machine")
- **EroProfile:album** - **EroProfile:album**
- **ertflix**: ERTFLIX videos - **ertflix**: ERTFLIX videos
@ -700,7 +699,6 @@
- **LastFMUser** - **LastFMUser**
- **lbry** - **lbry**
- **lbry:channel** - **lbry:channel**
- **lbry:playlist**
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -1476,6 +1474,8 @@
- **ThePlatformFeed** - **ThePlatformFeed**
- **TheStar** - **TheStar**
- **TheSun** - **TheSun**
- **ThetaStream**
- **ThetaVideo**
- **TheWeatherChannel** - **TheWeatherChannel**
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**

View File

@ -1209,9 +1209,6 @@ class TestUtil(unittest.TestCase):
on = js_to_json('\'"\\""\'') on = js_to_json('\'"\\""\'')
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped') self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
on = js_to_json('[new Date("spam"), \'("eggs")\']')
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
def test_js_to_json_malformed(self): def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1') self.assertEqual(js_to_json('42a-1'), '42"a"-1')
@ -1223,13 +1220,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
self.assertEqual(js_to_json('`${name}`', {}), '"name"') self.assertEqual(js_to_json('`${name}`', {}), '"name"')
def test_js_to_json_common_constructors(self): def test_js_to_json_map_array_constructors(self):
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})

View File

@ -14,7 +14,6 @@ from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
from ..utils.networking import HTTPHeaderDict from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator
class HttpQuietDownloader(HttpFD): class HttpQuietDownloader(HttpFD):
@ -227,7 +226,8 @@ class FragmentFD(FileDownloader):
resume_len = ctx['complete_frags_downloaded_bytes'] resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags'] total_frags = ctx['total_frags']
ctx_id = ctx.get('ctx_id') ctx_id = ctx.get('ctx_id')
# Stores the download progress, updated by the progress hook # This dict stores the download progress, it's updated by the progress
# hook
state = { state = {
'status': 'downloading', 'status': 'downloading',
'downloaded_bytes': resume_len, 'downloaded_bytes': resume_len,
@ -237,8 +237,14 @@ class FragmentFD(FileDownloader):
'tmpfilename': ctx['tmpfilename'], 'tmpfilename': ctx['tmpfilename'],
} }
ctx['started'] = time.time() start = time.time()
progress = ProgressCalculator(resume_len) ctx.update({
'started': start,
'fragment_started': start,
# Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation
'prev_frag_downloaded_bytes': 0,
})
def frag_progress_hook(s): def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'): if s['status'] not in ('downloading', 'finished'):
@ -253,35 +259,38 @@ class FragmentFD(FileDownloader):
state['max_progress'] = ctx.get('max_progress') state['max_progress'] = ctx.get('max_progress')
state['progress_idx'] = ctx.get('progress_idx') state['progress_idx'] = ctx.get('progress_idx')
state['elapsed'] = progress.elapsed time_now = time.time()
state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0 frag_total_bytes = s.get('total_bytes') or 0
s['fragment_info_dict'] = s.pop('info_dict', {}) s['fragment_info_dict'] = s.pop('info_dict', {})
# XXX: Fragment resume is not accounted for here
if not ctx['live']: if not ctx['live']:
estimated_size = ( estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
/ (state['fragment_index'] + 1) * total_frags) / (state['fragment_index'] + 1) * total_frags)
progress.total = estimated_size state['total_bytes_estimate'] = estimated_size
progress.update(s.get('downloaded_bytes'))
state['total_bytes_estimate'] = progress.total
else:
progress.update(s.get('downloaded_bytes'))
if s['status'] == 'finished': if s['status'] == 'finished':
state['fragment_index'] += 1 state['fragment_index'] += 1
ctx['fragment_index'] = state['fragment_index'] ctx['fragment_index'] = state['fragment_index']
progress.thread_reset() state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded ctx['speed'] = state['speed'] = self.calc_speed(
state['speed'] = ctx['speed'] = progress.speed.smooth ctx['fragment_started'], time_now, frag_total_bytes)
state['eta'] = progress.eta.smooth ctx['fragment_started'] = time.time()
ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
if not ctx['live']:
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict) self._hook_progress(state, info_dict)
ctx['dl'].add_progress_hook(frag_progress_hook) ctx['dl'].add_progress_hook(frag_progress_hook)
return ctx['started'] return start
def _finish_frag_download(self, ctx, info_dict): def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close() ctx['dest_stream'].close()
@ -491,6 +500,7 @@ class FragmentFD(FileDownloader):
download_fragment(fragment, ctx_copy) download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
try: try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):

View File

@ -574,7 +574,6 @@ from .epicon import (
from .eplus import EplusIbIE from .eplus import EplusIbIE
from .epoch import EpochIE from .epoch import EpochIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .erocast import ErocastIE
from .eroprofile import ( from .eroprofile import (
EroProfileIE, EroProfileIE,
EroProfileAlbumIE, EroProfileAlbumIE,
@ -952,7 +951,6 @@ from .lastfm import (
from .lbry import ( from .lbry import (
LBRYIE, LBRYIE,
LBRYChannelIE, LBRYChannelIE,
LBRYPlaylistIE,
) )
from .lci import LCIIE from .lci import LCIIE
from .lcp import ( from .lcp import (
@ -2004,6 +2002,10 @@ from .theplatform import (
) )
from .thestar import TheStarIE from .thestar import TheStarIE
from .thesun import TheSunIE from .thesun import TheSunIE
from .theta import (
ThetaVideoIE,
ThetaStreamIE,
)
from .theweatherchannel import TheWeatherChannelIE from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE

View File

@ -181,102 +181,18 @@ class ABCIViewIE(InfoExtractor):
_GEO_COUNTRIES = ['AU'] _GEO_COUNTRIES = ['AU']
_TESTS = [{ _TESTS = [{
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
'info_dict': {
'id': 'CO1211V001S00',
'ext': 'mp4',
'title': 'Series 1 Ep 1 Wood For The Trees',
'series': 'Utopia',
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
'upload_date': '20230726',
'uploader_id': 'abc1',
'series_id': 'CO1211V',
'episode_id': 'CO1211V001S00',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Wood For The Trees',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
'timestamp': 1690403700,
},
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name',
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
'md5': '67715ce3c78426b11ba167d875ac6abf', 'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': { 'info_dict': {
'id': 'LE1927H001S00', 'id': 'LE1927H001S00',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Series 11 Ep 1', 'title': "Series 11 Ep 1",
'series': 'Gruen', 'series': "Gruen",
'description': 'md5:52cc744ad35045baf6aded2ce7287f67', 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
'upload_date': '20190925', 'upload_date': '20190925',
'uploader_id': 'abc1', 'uploader_id': 'abc1',
'series_id': 'LE1927H',
'episode_id': 'LE1927H001S00',
'season_number': 11,
'season': 'Season 11',
'episode_number': 1,
'episode': 'Episode 1',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
'timestamp': 1569445289, 'timestamp': 1569445289,
}, },
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode number',
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
'md5': '77cb7d8434440e3b28fbebe331c2456a',
'info_dict': {
'id': 'NC2203H039S00',
'ext': 'mp4',
'title': 'Series 2022 Locking Up Kids',
'series': 'Four Corners',
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
'upload_date': '20221114',
'uploader_id': 'abc1',
'series_id': 'NC2203H',
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name or number',
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
'md5': '2e17dec06b13cc81dc119d2565289396',
'info_dict': {
'id': 'RF2004Q043S00',
'ext': 'mp4',
'title': 'Series 2021',
'series': 'Landline',
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
'upload_date': '20211205',
'uploader_id': 'abc1',
'series_id': 'RF2004Q',
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
@ -338,8 +254,6 @@ class ABCIViewIE(InfoExtractor):
'episode_number': int_or_none(self._search_regex( 'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number, 'episode_id': house_number,
'episode': self._search_regex(
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
'uploader_id': video_params.get('channel'), 'uploader_id': video_params.get('channel'),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,

View File

@ -338,7 +338,6 @@ class BiographyIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': '404 Not Found',
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -26,7 +26,6 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True, 'only_matching': True,

View File

@ -66,7 +66,6 @@ class CBCIE(InfoExtractor):
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
'timestamp': 255977160, 'timestamp': 255977160,
}, },
'skip': '404 Not Found',
}, { }, {
# multiple iframes # multiple iframes
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
@ -98,7 +97,7 @@ class CBCIE(InfoExtractor):
# multiple CBC.APP.Caffeine.initInstance(...) # multiple CBC.APP.Caffeine.initInstance(...)
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
'info_dict': { 'info_dict': {
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
'id': 'dog-indoor-exercise-winter-1.3928238', 'id': 'dog-indoor-exercise-winter-1.3928238',
'description': 'md5:c18552e41726ee95bd75210d1ca9194c', 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
}, },
@ -477,10 +476,6 @@ class CBCGemPlaylistIE(InfoExtractor):
'id': 'schitts-creek/s06', 'id': 'schitts-creek/s06',
'title': 'Season 6', 'title': 'Season 6',
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
'series': 'Schitt\'s Creek',
'season_number': 6,
'season': 'Season 6',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
}, },
}, { }, {
'url': 'https://gem.cbc.ca/schitts-creek/s06', 'url': 'https://gem.cbc.ca/schitts-creek/s06',

View File

@ -101,7 +101,6 @@ class CBSIE(CBSBaseIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Subscription required',
}, { }, {
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/', 'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
'info_dict': { 'info_dict': {
@ -118,7 +117,6 @@ class CBSIE(CBSBaseIE):
}, },
'expected_warnings': [ 'expected_warnings': [
'This content expired on', 'No video formats found', 'Requested format is not available'], 'This content expired on', 'No video formats found', 'Requested format is not available'],
'skip': '404 Not Found',
}, { }, {
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
'only_matching': True, 'only_matching': True,

View File

@ -19,7 +19,6 @@ class CNBCIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Dead link',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -50,7 +49,6 @@ class CNBCVideoIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Dead link',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1687,7 +1687,7 @@ class InfoExtractor:
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name) rectx = re.escape(context_name)
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex( js, arg_keys, arg_vals = self._search_regex(
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),

View File

@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/', 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
'info_dict': { 'info_dict': {
'id': '870923331648', 'id': '870923331648',
'ext': 'mp4', 'ext': 'mp4',
@ -54,7 +54,6 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Failed to parse JSON'], 'expected_warnings': ['Failed to parse JSON'],
# FIXME: yt-dlp wrongly raises for geo restriction
}, { }, {
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753', 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
'only_matching': True, 'only_matching': True,

View File

@ -1,63 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class ErocastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://erocast.me/track/9787/f',
'md5': 'af63b91f5f231096aba54dd682abea3b',
'info_dict': {
'id': '9787',
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
'ext': 'm4a',
'age_limit': 18,
'release_timestamp': 1696178652,
'release_date': '20231001',
'modified_timestamp': int,
'modified_date': str,
'description': 'ExtraTerrestrial Tuesday!',
'uploader': 'clarissaisshy',
'uploader_id': '8113',
'uploader_url': 'https://erocast.me/clarissaisshy',
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
'duration': 2307,
'view_count': int,
'comment_count': int,
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_json(
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
'age_limit': 18,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
'release_timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'uploader': ('user', 'name', {str}),
'uploader_id': ('user', 'id', {str_or_none}),
'uploader_url': ('user', 'permalink_url', {url_or_none}),
'thumbnail': ('artwork_url', {url_or_none}),
'duration': ('duration', {int_or_none}),
'view_count': ('plays', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'webpage_url': ('permalink_url', {url_or_none}),
}),
}

View File

@ -58,8 +58,6 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'trailer', 'title': 'trailer',
'upload_date': '20100513', 'upload_date': '20100513',
'direct': True,
'timestamp': 1273772943.0,
} }
}, },
# Direct link to media delivered compressed (until Accept-Encoding is *) # Direct link to media delivered compressed (until Accept-Encoding is *)
@ -103,8 +101,6 @@ class GenericIE(InfoExtractor):
'ext': 'webm', 'ext': 'webm',
'title': '5_Lennart_Poettering_-_Systemd', 'title': '5_Lennart_Poettering_-_Systemd',
'upload_date': '20141120', 'upload_date': '20141120',
'direct': True,
'timestamp': 1416498816.0,
}, },
'expected_warnings': [ 'expected_warnings': [
'URL could be a direct video link, returning it as such.' 'URL could be a direct video link, returning it as such.'
@ -137,7 +133,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20201204', 'upload_date': '20201204',
}, },
}], }],
'skip': 'Dead link',
}, },
# RSS feed with item with description and thumbnails # RSS feed with item with description and thumbnails
{ {
@ -150,12 +145,12 @@ class GenericIE(InfoExtractor):
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
'ext': 'm4a', 'ext': 'm4a',
'id': '818a5d38-01cd-152f-2231-ee479677fa82', 'id': 'c1c879525ce2cb640b344507e682c36d',
'title': 're:Hydrogen!', 'title': 're:Hydrogen!',
'description': 're:.*In this episode we are going.*', 'description': 're:.*In this episode we are going.*',
'timestamp': 1567977776, 'timestamp': 1567977776,
'upload_date': '20190908', 'upload_date': '20190908',
'duration': 423, 'duration': 459,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 1, 'episode_number': 1,
'season_number': 1, 'season_number': 1,
@ -272,7 +267,6 @@ class GenericIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found',
}, },
# MPD from http://dash-mse-test.appspot.com/media.html # MPD from http://dash-mse-test.appspot.com/media.html
{ {
@ -284,7 +278,6 @@ class GenericIE(InfoExtractor):
'title': 'car-20120827-manifest', 'title': 'car-20120827-manifest',
'formats': 'mincount:9', 'formats': 'mincount:9',
'upload_date': '20130904', 'upload_date': '20130904',
'timestamp': 1378272859.0,
}, },
}, },
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
@ -325,7 +318,7 @@ class GenericIE(InfoExtractor):
'id': 'cmQHVoWB5FY', 'id': 'cmQHVoWB5FY',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20130224', 'upload_date': '20130224',
'uploader_id': '@TheVerge', 'uploader_id': 'TheVerge',
'description': r're:^Chris Ziegler takes a look at the\.*', 'description': r're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge', 'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side', 'title': 'First Firefox OS phones side-by-side',

View File

@ -60,7 +60,7 @@ class GofileIE(InfoExtractor):
account_data = self._download_json( account_data = self._download_json(
'https://api.gofile.io/createAccount', None, note='Getting a new guest account') 'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
self._TOKEN = account_data['data']['token'] self._TOKEN = account_data['data']['token']
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id): def _entries(self, file_id):
query_params = { query_params = {

View File

@ -499,10 +499,9 @@ class IqIE(InfoExtractor):
'tm': tm, 'tm': tm,
'qdy': 'a', 'qdy': 'a',
'qds': 0, 'qds': 0,
'k_ft1': '143486267424900', 'k_ft1': 141287244169348,
'k_ft4': '1572868', 'k_ft4': 34359746564,
'k_ft7': '4', 'k_ft5': 1,
'k_ft5': '1',
'bop': JSON.stringify({ 'bop': JSON.stringify({
'version': '10.0', 'version': '10.0',
'dfp': dfp 'dfp': dfp
@ -530,22 +529,14 @@ class IqIE(InfoExtractor):
webpack_js_url = self._proto_relative_url(self._search_regex( webpack_js_url = self._proto_relative_url(self._search_regex(
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
webpack_map = self._search_json( webpack_map = self._search_json(
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id, r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}', contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json) end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
replacement_map = self._search_json(
r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
fatal=False) or {}
for module_index in reversed(webpack_map): for module_index in reversed(webpack_map):
real_module = replacement_map.get(module_index) or module_index
module_js = self._download_webpage( module_js = self._download_webpage(
f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js', f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js: if 'vms request' in module_js:
self.cache.store('iq', 'player_js', module_js) self.cache.store('iq', 'player_js', module_js)

View File

@ -22,11 +22,10 @@ from ..utils import (
class LBRYBaseIE(InfoExtractor): class LBRYBaseIE(InfoExtractor):
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)' _BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}' _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX _OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio'] _SUPPORTED_STREAM_TYPES = ['video', 'audio']
_PAGE_SIZE = 50
def _call_api_proxy(self, method, display_id, params, resource): def _call_api_proxy(self, method, display_id, params, resource):
headers = {'Content-Type': 'application/json-rpc'} headers = {'Content-Type': 'application/json-rpc'}
@ -70,78 +69,18 @@ class LBRYBaseIE(InfoExtractor):
'duration': ('value', stream_type, 'duration', {int_or_none}), 'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}), 'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}), 'channel_id': ('signing_channel', 'claim_id', {str}),
'uploader_id': ('signing_channel', 'name', {str}),
}) })
if info.get('uploader_id') and info.get('channel_id'): channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id']) if channel_name and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
return info return info
def _fetch_page(self, display_id, url, params, page):
page += 1
page_params = {
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
**params,
}
result = self._call_api_proxy(
'claim_search', display_id, page_params, f'page {page}')
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': item['claim_id'],
'url': self._permanent_url(url, item['name'], item['claim_id']),
}
def _playlist_entries(self, url, display_id, claim_param, metadata):
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'claim_type': 'stream',
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
**claim_param,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, display_id, url, params),
self._PAGE_SIZE)
return self.playlist_result(
entries, display_id, **traverse_obj(metadata, ('value', {
'title': 'title',
'description': 'description',
})))
class LBRYIE(LBRYBaseIE): class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry' IE_NAME = 'lbry'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf''' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
(?:\$/(?:download|embed)/)?
(?P<id>
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
)'''
_TESTS = [{ _TESTS = [{
# Video # Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@ -159,7 +98,6 @@ class LBRYIE(LBRYBaseIE):
'height': 720, 'height': 720,
'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png', 'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
'license': 'None', 'license': 'None',
'uploader_id': '@Mantega',
'duration': 346, 'duration': 346,
'channel': 'LBRY/Odysee rats united!!!', 'channel': 'LBRY/Odysee rats united!!!',
'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627', 'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
@ -193,7 +131,6 @@ class LBRYIE(LBRYBaseIE):
'vcodec': 'none', 'vcodec': 'none',
'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png', 'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
'license': 'None', 'license': 'None',
'uploader_id': '@LBRYFoundation',
} }
}, { }, {
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e', 'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
@ -212,7 +149,6 @@ class LBRYIE(LBRYBaseIE):
'channel': 'Gardening In Canada', 'channel': 'Gardening In Canada',
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc', 'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
'uploader_id': '@gardeningincanada',
'formats': 'mincount:3', 'formats': 'mincount:3',
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
'license': 'Copyrighted (contact publisher)', 'license': 'Copyrighted (contact publisher)',
@ -238,7 +174,6 @@ class LBRYIE(LBRYBaseIE):
'formats': 'mincount:1', 'formats': 'mincount:1',
'thumbnail': 'startswith:https://thumb', 'thumbnail': 'startswith:https://thumb',
'license': 'None', 'license': 'None',
'uploader_id': '@RT',
}, },
'params': {'skip_download': True} 'params': {'skip_download': True}
}, { }, {
@ -249,13 +184,12 @@ class LBRYIE(LBRYBaseIE):
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634', 'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)', 'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378', 'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'channel': 'Wicked Truths', 'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0', 'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'uploader_id': '@wickedtruths', 'timestamp': 1685790036,
'timestamp': 1695114347, 'upload_date': '20230603',
'upload_date': '20230919',
'release_timestamp': 1685617473, 'release_timestamp': 1685617473,
'release_date': '20230601', 'release_date': '20230601',
'duration': 1063, 'duration': 1063,
@ -295,10 +229,10 @@ class LBRYIE(LBRYBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
if display_id.startswith('@'): if display_id.startswith('$/'):
display_id = display_id.replace(':', '#') display_id = display_id.split('/', 2)[-1].replace('/', ':')
else: else:
display_id = display_id.replace('/', ':') display_id = display_id.replace(':', '#')
display_id = urllib.parse.unquote(display_id) display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream') result = self._resolve_url(uri, display_id, 'stream')
@ -365,7 +299,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE): class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel' IE_NAME = 'lbry:channel'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_TESTS = [{ _TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0', 'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': { 'info_dict': {
@ -381,50 +315,65 @@ class LBRYChannelIE(LBRYBaseIE):
'url': 'lbry://@lbry#3f', 'url': 'lbry://@lbry#3f',
'only_matching': True, 'only_matching': True,
}] }]
_PAGE_SIZE = 50
def _fetch_page(self, claim_id, url, params, page):
page += 1
page_params = {
'channel_ids': [claim_id],
'claim_type': 'stream',
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
}
page_params.update(params)
result = self._call_api_proxy(
'claim_search', claim_id, page_params, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
if not (stream_claim_name and stream_claim_id):
continue
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#') display_id = self._match_id(url).replace(':', '#')
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel') result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
claim_id = result['claim_id'] claim_id = result['claim_id']
qs = parse_qs(url)
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result) content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
class LBRYPlaylistIE(LBRYBaseIE): 'order_by': {
IE_NAME = 'lbry:playlist' 'new': ['release_time'],
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)' 'top': ['effective_amount'],
_TESTS = [{ 'trending': ['trending_group', 'trending_mixed'],
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2', }[qs.get('order', ['new'])[0]],
'info_dict': { 'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2', }
'title': 'Théâtre Classique', duration = qs.get('duration', [None])[0]
'description': 'Théâtre Classique', if duration:
}, params['duration'] = {
'playlist_mincount': 4, 'long': '>=1200',
}, { 'short': '<=240',
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770', }[duration]
'info_dict': { language = qs.get('language', ['all'])[0]
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770', if language != 'all':
'title': 'Social Media Exposed', languages = [language]
'description': 'md5:98af97317aacd5b85d595775ea37d80e', if language == 'en':
}, languages.append('none')
'playlist_mincount': 34, params['any_languages'] = languages
}, { entries = OnDemandPagedList(
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184', functools.partial(self._fetch_page, claim_id, url, params),
'info_dict': { self._PAGE_SIZE)
'id': '938fb11d-215f-4d1c-ad64-723954df2184', result_value = result.get('value') or {}
}, return self.playlist_result(
'playlist_mincount': 1000, entries, claim_id, result_value.get('title'),
}] result_value.get('description'))
def _real_extract(self, url):
display_id = self._match_id(url)
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
'claim_ids': [display_id],
'no_totals': True,
'page': 1,
'page_size': self._PAGE_SIZE,
}, 'playlist'), ('items', 0))
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
return self._playlist_entries(url, display_id, claim_param, result)

View File

@ -13,7 +13,7 @@ from ..utils import (
class LiTVIE(InfoExtractor): class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
_TESTS = [{ _TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
@ -21,18 +21,16 @@ class LiTVIE(InfoExtractor):
'id': 'VOD00041606', 'id': 'VOD00041606',
'title': '花千骨', 'title': '花千骨',
}, },
'playlist_count': 51, # 50 episodes + 1 trailer 'playlist_count': 50,
}, { }, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'md5': '969e343d9244778cb29acec608e53640',
'info_dict': { 'info_dict': {
'id': 'VOD00041610', 'id': 'VOD00041610',
'ext': 'mp4', 'ext': 'mp4',
'title': '花千骨第1集', 'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'categories': ['奇幻', '愛情', '中國', '仙俠'],
'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
}, },
'params': { 'params': {
@ -48,17 +46,20 @@ class LiTVIE(InfoExtractor):
'title': '芈月傳第1集 霸星芈月降世楚國', 'title': '芈月傳第1集 霸星芈月降世楚國',
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。', 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
}, },
'skip': 'No longer exists', 'skip': 'Georestricted to Taiwan',
}] }]
def _extract_playlist(self, playlist_data, content_type): def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
episode_title = program_info['title']
content_id = season_list['contentId']
all_episodes = [ all_episodes = [
self.url_result(smuggle_url( self.url_result(smuggle_url(
self._URL_TEMPLATE % (content_type, episode['contentId']), self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion {'force_noplaylist': True})) # To prevent infinite recursion
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] for episode in season_list['episode']]
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) return self.playlist_result(all_episodes, content_id, episode_title)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -67,31 +68,24 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if self._search_regex(
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
webpage, 'meta refresh redirect', default=False, group=0):
raise ExtractorError('No such content found', expected=True)
program_info = self._parse_json(self._search_regex( program_info = self._parse_json(self._search_regex(
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id) video_id)
# In browsers `getProgramInfo` request is always issued. Usually this season_list = list(program_info.get('seasonList', {}).values())
playlist_id = traverse_obj(season_list, 0, 'contentId')
if self._yes_playlist(playlist_id, video_id, smuggled_data):
return self._extract_playlist(season_list[0], video_id, program_info)
# In browsers `getMainUrl` request is always issued. Usually this
# endpoint gives the same result as the data embedded in the webpage. # endpoint gives the same result as the data embedded in the webpage.
# If, for some reason, there are no embedded data, we do an extra request. # If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
if 'assetId' not in program_info: if 'assetId' not in program_info:
program_info = self._download_json( program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id}, query={'contentId': video_id},
headers={'Accept': 'application/json'}) headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
@ -102,7 +96,7 @@ class LiTVIE(InfoExtractor):
'contentType': program_info['contentType'], 'contentType': program_info['contentType'],
} }
video_data = self._download_json( video_data = self._download_json(
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id, 'https://www.litv.tv/vod/getMainUrl', video_id,
data=json.dumps(payload).encode('utf-8'), data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'}) headers={'Content-Type': 'application/json'})

View File

@ -127,8 +127,7 @@ class MediasetIE(ThePlatformBaseIE):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, }
'skip': 'Dead link',
}, { }, {
# WittyTV embed # WittyTV embed
'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/', 'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',

View File

@ -23,7 +23,6 @@ class MovieClipsIE(InfoExtractor):
'uploader': 'Movieclips', 'uploader': 'Movieclips',
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'redirects to YouTube',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -24,7 +24,6 @@ class NationalGeographicVideoIE(InfoExtractor):
'uploader': 'NAGS', 'uploader': 'NAGS',
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
}, },
{ {
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
@ -39,7 +38,6 @@ class NationalGeographicVideoIE(InfoExtractor):
'uploader': 'NAGS', 'uploader': 'NAGS',
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
}, },
] ]
@ -77,7 +75,6 @@ class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Content not available',
}] }]
_HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/' _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
_API_KEY = '238bb0a0c2aba67922c48709ce0c06fd' _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'

View File

@ -284,7 +284,7 @@ class NBCSportsIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# iframe src # iframe src
'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation', 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
'info_dict': { 'info_dict': {
'id': 'PHJSaFWbrTY9', 'id': 'PHJSaFWbrTY9',
'ext': 'mp4', 'ext': 'mp4',
@ -379,7 +379,7 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
'info_dict': { 'info_dict': {
'id': '269389891880', 'id': '269389891880',
'ext': 'mp4', 'ext': 'mp4',
@ -387,8 +387,6 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'timestamp': 1401363060, 'timestamp': 1401363060,
'upload_date': '20140529', 'upload_date': '20140529',
'duration': 46.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
}, },
}, },
{ {
@ -404,7 +402,7 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
}, },
{ {
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '40d0e48c68896359c80372306ece0fc3', 'md5': '8eb831eca25bfa7d25ddd83e85946548',
'info_dict': { 'info_dict': {
'id': '394064451844', 'id': '394064451844',
'ext': 'mp4', 'ext': 'mp4',
@ -412,13 +410,11 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900, 'timestamp': 1423104900,
'upload_date': '20150205', 'upload_date': '20150205',
'duration': 1236.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
}, },
}, },
{ {
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939', 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
'info_dict': { 'info_dict': {
'id': 'n431456', 'id': 'n431456',
'ext': 'mp4', 'ext': 'mp4',
@ -426,13 +422,11 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'upload_date': '20150922', 'upload_date': '20150922',
'timestamp': 1442917800, 'timestamp': 1442917800,
'duration': 37.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
}, },
}, },
{ {
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '693d1fa21d23afcc9b04c66b227ed9ff', 'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': { 'info_dict': {
'id': '669831235788', 'id': '669831235788',
'ext': 'mp4', 'ext': 'mp4',
@ -440,8 +434,6 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420', 'upload_date': '20160420',
'timestamp': 1461152093, 'timestamp': 1461152093,
'duration': 69.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
}, },
}, },
{ {
@ -455,7 +447,6 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406937606, 'timestamp': 1406937606,
'upload_date': '20140802', 'upload_date': '20140802',
'duration': 940.0,
}, },
}, },
{ {
@ -544,7 +535,6 @@ class NBCOlympicsIE(InfoExtractor):
'upload_date': '20160815', 'upload_date': '20160815',
'uploader': 'NBCU-SPORTS', 'uploader': 'NBCU-SPORTS',
}, },
'skip': '404 Not Found',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -588,7 +578,6 @@ class NBCOlympicsStreamIE(AdobePassIE):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'skip': 'Livestream',
}, { }, {
'note': 'Plain m3u8 source URL', 'note': 'Plain m3u8 source URL',
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars', 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
@ -600,7 +589,6 @@ class NBCOlympicsStreamIE(AdobePassIE):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'skip': 'Livestream',
}, },
] ]

View File

@ -2,74 +2,105 @@ import itertools
import json import json
import re import re
import time import time
from base64 import b64encode
from binascii import hexlify
from datetime import datetime
from hashlib import md5 from hashlib import md5
from random import randint from random import randint
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import compat_urllib_parse_urlencode
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none, int_or_none,
join_nonempty, intlist_to_bytes,
str_or_none, try_get,
strftime_or_none,
traverse_obj,
unified_strdate,
url_or_none,
urljoin,
variadic,
) )
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic'] _FORMATS = ['bMusic', 'mMusic', 'hMusic']
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
@staticmethod @classmethod
def kilo_or_none(value): def _encrypt(cls, dfsid):
return int_or_none(value, scale=1000) salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes)
for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
m = md5()
m.update(bytes(string_bytes))
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
def _create_eapi_cipher(self, api_path, query_body, cookies): def make_player_api_request_data_and_headers(self, song_id, bitrate):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1') now = int(time.time() * 1000)
msg_digest = md5(message).hexdigest() rand = randint(0, 1000)
cookie = {
data = pkcs7_padding(list(str.encode( 'osver': None,
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}'))) 'deviceId': None,
encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
return f'params={encrypted.hex().upper()}'.encode()
def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
cookies = {
'osver': 'undefined',
'deviceId': 'undefined',
'appver': '8.0.0', 'appver': '8.0.0',
'versioncode': '140', 'versioncode': '140',
'mobilename': 'undefined', 'mobilename': None,
'buildver': '1623435496', 'buildver': '1623435496',
'resolution': '1920x1080', 'resolution': '1920x1080',
'__csrf': '', '__csrf': '',
'os': 'pc', 'os': 'pc',
'channel': 'undefined', 'channel': None,
'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}', 'requestId': '{0}_{1:04}'.format(now, rand),
**traverse_obj(self._get_cookies(self._API_BASE), {
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
})
} }
return self._download_json( request_text = json.dumps(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ separators=(',', ':'))
message = 'nobody{0}use{1}md5forencrypt'.format(
URL, request_text).encode('latin1')
msg_digest = md5(message).hexdigest()
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()
cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])
headers = {
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com', 'Referer': 'https://music.163.com',
'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]), 'Cookie': cookie,
**headers, }
}, **kwargs) return ('params={0}'.format(encrypted_params), headers)
def _call_player_api(self, song_id, bitrate): def _call_player_api(self, song_id, bitrate):
return self._download_eapi_json( url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
note=f'Downloading song URL info: bitrate {bitrate}') try:
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}
def extract_formats(self, info): def extract_formats(self, info):
err = 0 err = 0
@ -79,50 +110,45 @@ class NetEaseMusicBaseIE(InfoExtractor):
details = info.get(song_format) details = info.get(song_format)
if not details: if not details:
continue continue
bitrate = int_or_none(details.get('bitrate')) or 999000 bitrate = int_or_none(details.get('bitrate')) or 999000
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))): data = self._call_player_api(song_id, bitrate)
song_url = song['url'] for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if not song_url:
continue
if self._is_valid_url(song_url, info['id'], 'song'): if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({ formats.append({
'url': song_url, 'url': song_url,
'ext': details.get('extension'),
'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format, 'format_id': song_format,
'asr': traverse_obj(details, ('sr', {int_or_none})), 'filesize': int_or_none(song.get('size')),
**traverse_obj(song, { 'asr': int_or_none(details.get('sr')),
'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
}) })
elif err == 0: elif err == 0:
err = traverse_obj(song, ('code', {int})) or 0 err = try_get(song, lambda x: x['code'], int)
if not formats: if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400): if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True) raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else: else:
self.raise_geo_restricted( self.raise_geo_restricted(
'No media links found: probably due to geo restriction.', countries=['CN']) msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
return formats return formats
def query_api(self, endpoint, video_id, note): @classmethod
result = self._download_json( def convert_milliseconds(cls, ms):
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) return int(round(ms / 1000.0))
code = traverse_obj(result, ('code', {int}))
message = traverse_obj(result, ('message', {str})) or ''
if code == -462:
self.raise_login_required(f'Login required to download: {message}')
elif code != 200:
raise ExtractorError(f'Failed to get meta info: {code} {message}')
return result
def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'): def query_api(self, endpoint, video_id, note):
for song in traverse_obj(songs_data, ( req = Request('%s%s' % (self._API_BASE, endpoint))
*variadic(entry_keys, (str, bytes, dict, set)), req.headers['Referer'] = self._API_BASE
lambda _, v: int_or_none(v[id_key]) is not None)): return self._download_json(req, video_id, note)
song_id = str(song[id_key])
yield self.url_result(
f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE,
song_id, traverse_obj(song, (name_key, {str})))
class NetEaseMusicIE(NetEaseMusicBaseIE): class NetEaseMusicIE(NetEaseMusicBaseIE):
@ -130,18 +156,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_DESC = '网易云音乐' IE_DESC = '网易云音乐'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://music.163.com/#/song?id=548648087', 'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': { 'info_dict': {
'id': '548648087', 'id': '32102397',
'ext': 'mp3', 'ext': 'mp3',
'title': '戒烟 (Live)', 'title': 'Bad Blood',
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊', 'creator': 'Taylor Swift / Kendrick Lamar',
'timestamp': 1522944000, 'upload_date': '20150516',
'upload_date': '20180405', 'timestamp': 1431792000,
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
"duration": 256,
'thumbnail': r're:^http.*\.jpg',
}, },
}, { }, {
'note': 'No lyrics.', 'note': 'No lyrics.',
@ -152,9 +176,21 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28', 'title': 'Opus 28',
'creator': 'Dustin O\'Halloran', 'creator': 'Dustin O\'Halloran',
'upload_date': '20080211', 'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600, 'timestamp': 1202745600,
'duration': 263, },
'thumbnail': r're:^http.*\.jpg', }, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
'upload_date': '20100127',
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
}, },
}, { }, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@ -167,99 +203,59 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'upload_date': '19911130', 'upload_date': '19911130',
'timestamp': 691516800, 'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141', 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg',
}, },
}, {
'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 199,
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'upload_date': '20100127',
'timestamp': 1264608000,
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 229,
'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}] }]
def _process_lyrics(self, lyrics_info): def _process_lyrics(self, lyrics_info):
original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str})) original = lyrics_info.get('lrc', {}).get('lyric')
translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str})) translated = lyrics_info.get('tlyric', {}).get('lyric')
if not original or original == '[99:00.00]纯音乐,请欣赏\n':
return None
if not translated: if not translated:
return { return original
'lyrics': [{'data': original, 'ext': 'lrc'}],
}
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
original_ts_texts = re.findall(lyrics_expr, original) original_ts_texts = re.findall(lyrics_expr, original)
translation_ts_dict = dict(re.findall(lyrics_expr, translated)) translation_ts_dict = dict(
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
merged = '\n'.join( )
join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ') lyrics = '\n'.join([
for timestamp, text in original_ts_texts) '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
for time_stamp, text in original_ts_texts
return { ])
'lyrics_merged': [{'data': merged, 'ext': 'lrc'}], return lyrics
'lyrics': [{'data': original, 'ext': 'lrc'}],
'lyrics_translated': [{'data': translated, 'ext': 'lrc'}],
}
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
params = {
'id': song_id,
'ids': '[%s]' % song_id
}
info = self.query_api( info = self.query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] 'song/detail?' + compat_urllib_parse_urlencode(params),
song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info) formats = self.extract_formats(info)
lyrics = self._process_lyrics(self.query_api( lyrics_info = self.query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
lyric_data = { song_id, 'Downloading lyrics data')
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False), lyrics = self._process_lyrics(lyrics_info)
'subtitles': lyrics,
} if lyrics else {} alt_title = None
if info.get('transNames'):
alt_title = '/'.join(info.get('transNames'))
return { return {
'id': song_id, 'id': song_id,
'title': info['name'],
'alt_title': alt_title,
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
'thumbnail': info.get('album', {}).get('picUrl'),
'duration': self.convert_milliseconds(info.get('duration', 0)),
'description': lyrics,
'formats': formats, 'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
}),
} }
@ -267,44 +263,31 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:album' IE_NAME = 'netease:album'
IE_DESC = '网易云音乐 - 专辑' IE_DESC = '网易云音乐 - 专辑'
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TEST = {
'url': 'https://music.163.com/#/album?id=133153666',
'info_dict': {
'id': '133153666',
'title': '桃几的翻唱',
'upload_date': '20210913',
'description': '桃几2021年翻唱合集',
'thumbnail': r're:^http.*\.jpg',
},
'playlist_mincount': 13,
}, {
'url': 'http://music.163.com/#/album?id=220780', 'url': 'http://music.163.com/#/album?id=220780',
'info_dict': { 'info_dict': {
'id': '220780', 'id': '220780',
'title': 'B\'Day', 'title': 'B\'day',
'upload_date': '20060904',
'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0',
'thumbnail': r're:^http.*\.jpg',
}, },
'playlist_count': 23, 'playlist_count': 23,
}] 'skip': 'Blocked outside Mainland China',
}
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) album_id = self._match_id(url)
webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id)
songs = self._search_json( info = self.query_api(
r'<textarea[^>]+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id, 'album/%s?id=%s' % (album_id, album_id),
end_pattern=r'</textarea>', contains_pattern=r'\[(?s:.+)\]') album_id, 'Downloading album data')['album']
metainfo = {
'title': self._og_search_property('title', webpage, 'title', fatal=False), name = info['name']
'description': self._html_search_regex( desc = info.get('description')
(rf'<div[^>]+\bid="album-desc-{suffix}"[^>]*>(.*?)</div>' for suffix in ('more', 'dot')), entries = [
webpage, 'description', flags=re.S, fatal=False), self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False), 'NetEaseMusic', song['id'])
'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)), for song in info['songs']
} ]
return self.playlist_result(self._get_entries(songs), album_id, **metainfo) return self.playlist_result(entries, album_id, name, desc)
class NetEaseMusicSingerIE(NetEaseMusicBaseIE): class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
@ -316,9 +299,10 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'url': 'http://music.163.com/#/artist?id=10559', 'url': 'http://music.163.com/#/artist?id=10559',
'info_dict': { 'info_dict': {
'id': '10559', 'id': '10559',
'title': '张惠妹 - aMEI;阿妹;阿密特', 'title': '张惠妹 - aMEI;阿密特',
}, },
'playlist_count': 50, 'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'Singer has translated name.', 'note': 'Singer has translated name.',
'url': 'http://music.163.com/#/artist?id=124098', 'url': 'http://music.163.com/#/artist?id=124098',
@ -327,28 +311,28 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'title': '李昇基 - 이승기', 'title': '李昇基 - 이승기',
}, },
'playlist_count': 50, 'playlist_count': 50,
}, { 'skip': 'Blocked outside Mainland China',
'note': 'Singer with both translated and alias',
'url': 'https://music.163.com/#/artist?id=159692',
'info_dict': {
'id': '159692',
'title': '初音ミク - 初音未来;Hatsune Miku',
},
'playlist_count': 50,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
singer_id = self._match_id(url) singer_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') 'artist/%s?id=%s' % (singer_id, singer_id),
singer_id, 'Downloading singer data')
name = join_nonempty( name = info['artist']['name']
traverse_obj(info, ('artist', 'name', {str})), if info['artist']['trans']:
join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'), name = '%s - %s' % (name, info['artist']['trans'])
delim=' - ') if info['artist']['alias']:
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name) entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['hotSongs']
]
return self.playlist_result(entries, singer_id, name)
class NetEaseMusicListIE(NetEaseMusicBaseIE): class NetEaseMusicListIE(NetEaseMusicBaseIE):
@ -360,28 +344,10 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'info_dict': { 'info_dict': {
'id': '79177352', 'id': '79177352',
'title': 'Billboard 2007 Top 100', 'title': 'Billboard 2007 Top 100',
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022', 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
'tags': ['欧美'],
'uploader': '浑然破灭',
'uploader_id': '67549805',
'timestamp': int,
'upload_date': r're:\d{8}',
}, },
'playlist_mincount': 95, 'playlist_count': 99,
}, { 'skip': 'Blocked outside Mainland China',
'note': 'Toplist/Charts sample',
'url': 'https://music.163.com/#/discover/toplist?id=60198',
'info_dict': {
'id': '60198',
'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
'description': '美国Billboard排行榜',
'tags': ['流行', '欧美', '榜单'],
'uploader': 'Billboard公告牌',
'uploader_id': '48171',
'timestamp': int,
'upload_date': r're:\d{8}',
},
'playlist_count': 100,
}, { }, {
'note': 'Toplist/Charts sample', 'note': 'Toplist/Charts sample',
'url': 'http://music.163.com/#/discover/toplist?id=3733003', 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@ -397,86 +363,64 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
info = self._download_eapi_json( info = self.query_api(
'/v3/playlist/detail', list_id, 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
{'id': list_id, 't': '-1', 'n': '500', 's': '0'}, list_id, 'Downloading playlist data')['result']
note="Downloading playlist info")
metainfo = traverse_obj(info, ('playlist', { name = info['name']
'title': ('name', {str}), desc = info.get('description')
'description': ('description', {str}),
'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}),
}))
if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo) if info.get('specialType') == 10: # is a chart/toplist
datestamp = datetime.fromtimestamp(
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
name = '%s %s' % (name, datestamp)
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['tracks']
]
return self.playlist_result(entries, list_id, name, desc)
class NetEaseMusicMvIE(NetEaseMusicBaseIE): class NetEaseMusicMvIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:mv' IE_NAME = 'netease:mv'
IE_DESC = '网易云音乐 - MV' IE_DESC = '网易云音乐 - MV'
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TEST = {
'url': 'https://music.163.com/#/mv?id=10958064',
'info_dict': {
'id': '10958064',
'ext': 'mp4',
'title': '交换余生',
'description': 'md5:e845872cff28820642a2b02eda428fea',
'creator': '林俊杰',
'upload_date': '20200916',
'thumbnail': r're:http.*\.jpg',
'duration': 364,
'view_count': int,
'like_count': int,
'comment_count': int,
},
}, {
'url': 'http://music.163.com/#/mv?id=415350', 'url': 'http://music.163.com/#/mv?id=415350',
'info_dict': { 'info_dict': {
'id': '415350', 'id': '415350',
'ext': 'mp4', 'ext': 'mp4',
'title': '이럴거면 그러지말지', 'title': '이럴거면 그러지말지',
'description': '白雅言自作曲唱甜蜜爱情', 'description': '白雅言自作曲唱甜蜜爱情',
'creator': '娥娟', 'creator': '白雅言',
'upload_date': '20150520', 'upload_date': '20150520',
'thumbnail': r're:http.*\.jpg',
'duration': 216,
'view_count': int,
'like_count': int,
'comment_count': int,
}, },
}] 'skip': 'Blocked outside Mainland China',
}
def _real_extract(self, url): def _real_extract(self, url):
mv_id = self._match_id(url) mv_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] 'mv/detail?id=%s&type=mp4' % mv_id,
mv_id, 'Downloading mv info')['data']
formats = [ formats = [
{'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)} {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
for brs, mv_url in info['brs'].items() for brs, mv_url in info['brs'].items()
] ]
return { return {
'id': mv_id, 'id': mv_id,
'title': info['name'],
'description': info.get('desc') or info.get('briefDesc'),
'creator': info['artistName'],
'upload_date': info['publishTime'].replace('-', ''),
'formats': formats, 'formats': formats,
**traverse_obj(info, { 'thumbnail': info.get('cover'),
'title': ('name', {str}), 'duration': self.convert_milliseconds(info.get('duration', 0)),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
}, get_all=False),
} }
@ -487,74 +431,75 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055', 'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': { 'info_dict': {
'id': '32593346', 'id': '10109055',
'ext': 'mp3', 'ext': 'mp3',
'title': '不丹足球背后的故事', 'title': '不丹足球背后的故事',
'description': '喜马拉雅人的足球梦 ...', 'description': '喜马拉雅人的足球梦 ...',
'creator': '大话西藏', 'creator': '大话西藏',
'timestamp': 1434179287, 'timestamp': 1434179342,
'upload_date': '20150613', 'upload_date': '20150613',
'thumbnail': r're:http.*\.jpg',
'duration': 900, 'duration': 900,
}, },
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'This program has accompanying songs.', 'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022', 'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': { 'info_dict': {
'id': '10141022', 'id': '10141022',
'title': '滚滚电台的有声节目', 'title': '25岁你是自在如风的少年<27°C>',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ',
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
}, },
'playlist_count': 4, 'playlist_count': 4,
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'This program has accompanying songs.', 'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022', 'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': { 'info_dict': {
'id': '32647209', 'id': '10141022',
'ext': 'mp3', 'ext': 'mp3',
'title': '滚滚电台的有声节目', 'title': '25岁你是自在如风的少年<27°C>',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ', 'timestamp': 1434450841,
'timestamp': 1434450733,
'upload_date': '20150616', 'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
'duration': 1104,
}, },
'params': { 'params': {
'noplaylist': True 'noplaylist': True
}, },
'skip': 'Blocked outside Mainland China',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
program_id = self._match_id(url) program_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] 'dj/program/detail?id=%s' % program_id,
program_id, 'Downloading program info')['program']
metainfo = traverse_obj(info, { name = info['name']
'title': ('name', {str}), description = info['description']
'description': ('description', {str}),
'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}),
})
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']): if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
formats = self.extract_formats(info['mainSong']) formats = self.extract_formats(info['mainSong'])
return { return {
'id': str(info['mainSong']['id']), 'id': info['mainSong']['id'],
'title': name,
'description': description,
'creator': info['dj']['brand'],
'timestamp': self.convert_milliseconds(info['createTime']),
'thumbnail': info['coverUrl'],
'duration': self.convert_milliseconds(info.get('duration', 0)),
'formats': formats, 'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
**metainfo,
} }
songs = traverse_obj(info, (('mainSong', ('songs', ...)),)) song_ids = [info['mainSong']['id']]
return self.playlist_result(self._get_entries(songs), program_id, **metainfo) song_ids.extend([song['id'] for song in info['songs']])
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
'NetEaseMusic', song_id)
for song_id in song_ids
]
return self.playlist_result(entries, program_id, name, description)
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
@ -566,32 +511,38 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
'info_dict': { 'info_dict': {
'id': '42', 'id': '42',
'title': '声音蔓延', 'title': '声音蔓延',
'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08' 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
}, },
'playlist_mincount': 40, 'playlist_mincount': 40,
'skip': 'Blocked outside Mainland China',
} }
_PAGE_SIZE = 1000 _PAGE_SIZE = 1000
def _real_extract(self, url): def _real_extract(self, url):
dj_id = self._match_id(url) dj_id = self._match_id(url)
metainfo = {} name = None
desc = None
entries = [] entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE): for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api( info = self.query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
dj_id, note=f'Downloading dj programs - {offset}') % (self._PAGE_SIZE, dj_id, offset),
dj_id, 'Downloading dj programs - %d' % offset)
entries.extend(self.url_result( entries.extend([
f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE, self.url_result(
program['id'], program.get('name')) for program in info['programs']) 'http://music.163.com/#/program?id=%s' % program['id'],
if not metainfo: 'NetEaseMusicProgram', program['id'])
metainfo = traverse_obj(info, ('programs', 0, 'radio', { for program in info['programs']
'title': ('name', {str}), ])
'description': ('desc', {str}),
})) if name is None:
radio = info['programs'][0]['radio']
name = radio['name']
desc = radio['desc']
if not info['more']: if not info['more']:
break break
return self.playlist_result(entries, dj_id, **metainfo) return self.playlist_result(entries, dj_id, name, desc)

View File

@ -28,44 +28,6 @@ class NhkBaseIE(InfoExtractor):
m_id, lang, '/all' if is_video else ''), m_id, lang, '/all' if is_video else ''),
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
def _get_api_info(self, refresh=True):
if not refresh:
return self.cache.load('nhk', 'api_info')
self.cache.store('nhk', 'api_info', {})
movie_player_js = self._download_webpage(
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
note='Downloading stream API information')
api_info = {
'url': self._search_regex(
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
'token': self._search_regex(
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
}
self.cache.store('nhk', 'api_info', api_info)
return api_info
def _extract_formats_and_subtitles(self, vod_id):
for refresh in (False, True):
api_info = self._get_api_info(refresh)
if not api_info:
continue
api_url = api_info.pop('url')
stream_url = traverse_obj(
self._download_json(
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
**api_info,
'type': 'json',
'optional_id': vod_id,
'active_flg': 1,
}),
('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
if stream_url:
return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
raise ExtractorError('Unable to extract stream url')
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
@ -105,14 +67,12 @@ class NhkBaseIE(InfoExtractor):
} }
if is_video: if is_video:
vod_id = episode['vod_id'] vod_id = episode['vod_id']
formats, subs = self._extract_formats_and_subtitles(vod_id)
info.update({ info.update({
'_type': 'url_transparent',
'ie_key': 'Piksel',
'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id,
'id': vod_id, 'id': vod_id,
'formats': formats,
'subtitles': subs,
}) })
else: else:
if fetch_episode: if fetch_episode:
audio_path = episode['audio']['audio'] audio_path = episode['audio']['audio']

View File

@ -1,5 +1,4 @@
import base64 import base64
import random
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
@ -14,7 +13,6 @@ from ..utils import (
class RadikoBaseIE(InfoExtractor): class RadikoBaseIE(InfoExtractor):
_GEO_BYPASS = False
_FULL_KEY = None _FULL_KEY = None
_HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = ( _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = (
'https://c-rpaa.smartstream.ne.jp', 'https://c-rpaa.smartstream.ne.jp',
@ -34,7 +32,7 @@ class RadikoBaseIE(InfoExtractor):
'https://c-radiko.smartstream.ne.jp', 'https://c-radiko.smartstream.ne.jp',
) )
def _negotiate_token(self): def _auth_client(self):
_, auth1_handle = self._download_webpage_handle( _, auth1_handle = self._download_webpage_handle(
'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
headers={ headers={
@ -60,23 +58,10 @@ class RadikoBaseIE(InfoExtractor):
'x-radiko-partialkey': partial_key, 'x-radiko-partialkey': partial_key,
}).split(',')[0] }).split(',')[0]
if area_id == 'OUT':
self.raise_geo_restricted(countries=['JP'])
auth_data = (auth_token, area_id) auth_data = (auth_token, area_id)
self.cache.store('radiko', 'auth_data', auth_data) self.cache.store('radiko', 'auth_data', auth_data)
return auth_data return auth_data
def _auth_client(self):
cachedata = self.cache.load('radiko', 'auth_data')
if cachedata is not None:
response = self._download_webpage(
'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401,
headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]})
if response == 'OK':
return cachedata
return self._negotiate_token()
def _extract_full_key(self): def _extract_full_key(self):
if self._FULL_KEY: if self._FULL_KEY:
return self._FULL_KEY return self._FULL_KEY
@ -90,7 +75,7 @@ class RadikoBaseIE(InfoExtractor):
if full_key: if full_key:
full_key = full_key.encode() full_key = full_key.encode()
else: # use only full key ever known else: # use full key ever known
full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa'
self._FULL_KEY = full_key self._FULL_KEY = full_key
@ -118,24 +103,24 @@ class RadikoBaseIE(InfoExtractor):
m3u8_playlist_data = self._download_xml( m3u8_playlist_data = self._download_xml(
f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id,
note='Downloading stream information') note='Downloading stream information')
m3u8_urls = m3u8_playlist_data.findall('.//url')
formats = [] formats = []
found = set() found = set()
for url_tag in m3u8_urls:
timefree_int = 0 if is_onair else 1 pcu = url_tag.find('playlist_create_url').text
url_attrib = url_tag.attrib
for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'):
pcu = element.text
if pcu in found:
continue
found.add(pcu)
playlist_url = update_url_query(pcu, { playlist_url = update_url_query(pcu, {
'station_id': station, 'station_id': station,
**query, **query,
'l': '15', 'l': '15',
'lsid': ''.join(random.choices('0123456789abcdef', k=32)), 'lsid': '88ecea37e968c1f17d5413312d9f8003',
'type': 'b', 'type': 'b',
}) })
if playlist_url in found:
continue
else:
found.add(playlist_url)
time_to_skip = None if is_onair else cursor - ft time_to_skip = None if is_onair else cursor - ft
@ -153,7 +138,7 @@ class RadikoBaseIE(InfoExtractor):
not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)): not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
sf['preference'] = -100 sf['preference'] = -100
sf['format_note'] = 'not preferred' sf['format_note'] = 'not preferred'
if not is_onair and timefree_int == 1 and time_to_skip: if not is_onair and url_attrib['timefree'] == '1' and time_to_skip:
sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]}
formats.extend(subformats) formats.extend(subformats)
@ -181,7 +166,21 @@ class RadikoIE(RadikoBaseIE):
vid_int = unified_timestamp(video_id, False) vid_int = unified_timestamp(video_id, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_token, area_id = self._auth_client() auth_cache = self.cache.load('radiko', 'auth_data')
for attempt in range(2):
auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
formats = self._extract_formats(
video_id=video_id, station=station, is_onair=False,
ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
query={
'start_at': radio_begin,
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id,
})
if formats:
break
return { return {
'id': video_id, 'id': video_id,
@ -190,18 +189,8 @@ class RadikoIE(RadikoBaseIE):
'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station, 'uploader_id': station,
'timestamp': vid_int, 'timestamp': vid_int,
'formats': formats,
'is_live': True, 'is_live': True,
'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False,
ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
query={
'start_at': radio_begin,
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id
}
),
} }

View File

@ -39,7 +39,6 @@ class ScrippsNetworksWatchIE(AWSIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [AnvatoIE.ie_key()], 'add_ie': [AnvatoIE.ie_key()],
'skip': '404 Not Found',
}] }]
_SNI_TABLE = { _SNI_TABLE = {
@ -114,9 +113,6 @@ class ScrippsNetworksIE(InfoExtractor):
'timestamp': 1475678834, 'timestamp': 1475678834,
'upload_date': '20161005', 'upload_date': '20161005',
'uploader': 'SCNI-SCND', 'uploader': 'SCNI-SCND',
'duration': 29.995,
'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'expected_warnings': ['No HLS formats found'], 'expected_warnings': ['No HLS formats found'],

View File

@ -50,16 +50,16 @@ class SubstackIE(InfoExtractor):
if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage): if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
return return
mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P<subdomain>[^\\"\']+)', webpage) mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
if mobj: if mobj:
parsed = urllib.parse.urlparse(url) parsed = urllib.parse.urlparse(url)
yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
raise cls.StopExtraction() raise cls.StopExtraction()
def _extract_video_formats(self, video_id, url): def _extract_video_formats(self, video_id, username):
formats, subtitles = [], {} formats, subtitles = [], {}
for video_format in ('hls', 'mp4'): for video_format in ('hls', 'mp4'):
video_url = urllib.parse.urljoin(url, f'/api/v1/video/upload/{video_id}/src?type={video_format}') video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
if video_format == 'hls': if video_format == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False) fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
@ -81,17 +81,12 @@ class SubstackIE(InfoExtractor):
r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string', r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id) display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
canonical_url = url
domain = traverse_obj(webpage_info, ('domainInfo', 'customDomain', {str}))
if domain:
canonical_url = urllib.parse.urlparse(url)._replace(netloc=domain).geturl()
post_type = webpage_info['post']['type'] post_type = webpage_info['post']['type']
formats, subtitles = [], {} formats, subtitles = [], {}
if post_type == 'podcast': if post_type == 'podcast':
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
elif post_type == 'video': elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
else: else:
self.raise_no_formats(f'Page type "{post_type}" is not supported') self.raise_no_formats(f'Page type "{post_type}" is not supported')
@ -104,5 +99,4 @@ class SubstackIE(InfoExtractor):
'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')), 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
'uploader': traverse_obj(webpage_info, ('pub', 'name')), 'uploader': traverse_obj(webpage_info, ('pub', 'name')),
'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))), 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
'webpage_url': canonical_url,
} }

View File

@ -23,7 +23,6 @@ class SyfyIE(AdobePassIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'Redirects to main page',
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -167,7 +167,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'CNet no longer uses ThePlatform', 'skip': '404 Not Found',
}, { }, {
'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD', 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
'info_dict': { 'info_dict': {
@ -177,7 +177,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'title': 'HIGHLIGHTS: USA bag first ever series Cup win', 'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
'uploader': 'EGSM', 'uploader': 'EGSM',
}, },
'skip': 'Dead link', 'skip': '404 Not Found',
}, { }, {
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
'only_matching': True, 'only_matching': True,
@ -195,7 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'upload_date': '20150701', 'upload_date': '20150701',
'uploader': 'NBCU-NEWS', 'uploader': 'NBCU-NEWS',
}, },
'skip': 'Error: Player PID "nbcNewsOffsite" is disabled', 'skip': '404 Not Found',
}, { }, {
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1 # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
# geo-restricted (US), HLS encrypted with AES-128 # geo-restricted (US), HLS encrypted with AES-128

90
yt_dlp/extractor/theta.py Normal file
View File

@ -0,0 +1,90 @@
from .common import InfoExtractor
from ..utils import try_get
class ThetaStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9-]+)'
_TESTS = [{
'url': 'https://www.theta.tv/davirus',
'skip': 'The live may have ended',
'info_dict': {
'id': 'DaVirus',
'ext': 'mp4',
'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
}
}, {
'url': 'https://www.theta.tv/mst3k',
'note': 'This channel is live 24/7',
'info_dict': {
'id': 'MST3K',
'ext': 'mp4',
'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
}
}, {
'url': 'https://www.theta.tv/contv-anime',
'info_dict': {
'id': 'ConTVAnime',
'ext': 'mp4',
'title': 'CONTV ANIME 24/7. Powered by THETA Network.',
'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
}
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
m3u8_playlist = next(
data['url'] for data in info['live_stream']['video_urls']
if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
return {
'id': channel,
'title': try_get(info, lambda x: x['live_stream']['title']),
'channel': channel,
'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
'is_live': True,
'formats': formats,
'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
}
class ThetaVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
_TEST = {
'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
'md5': '633d8c29eb276bb38a111dbd591c677f',
'info_dict': {
'id': 'vidiq6aaet3kzf799p0',
'ext': 'mp4',
'title': 'Theta EdgeCast Tutorial',
'uploader': 'Pixiekittie',
'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'title': info.get('title'),
'uploader': try_get(info, lambda x: x['user']['username']),
'description': info.get('description'),
'view_count': info.get('view_count'),
'like_count': info.get('like_count'),
'formats': formats,
'thumbnail': info.get('thumbnail_url'),
}

View File

@ -11,19 +11,17 @@ from ..utils import (
class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete IE class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))' _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://weather.com/storms/hurricane/video/invest-95l-in-atlantic-has-a-medium-chance-of-development', 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
'md5': '68f0cf616435683f27ce36bd9c927394', 'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
'info_dict': { 'info_dict': {
'id': '81acef2d-ee8c-4545-ba83-bff3cc80db97', 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Invest 95L In Atlantic Has A Medium Chance Of Development', 'title': 'Ice Climber Is In For A Shock',
'description': 'md5:0de720fd5f0d0e32207bd4c270fff824', 'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
'uploader': 'TWC - Digital', 'uploader': 'TWC - Digital (No Distro)',
'uploader_id': 'b5a999e0-9e04-11e1-9ee2-001d092f5a10', 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
'upload_date': '20230721', 'upload_date': '20160720',
'timestamp': 1689967343, 'timestamp': 1469018835,
'display_id': 'invest-95l-in-atlantic-has-a-medium-chance-of-development',
'duration': 34.0,
} }
}, { }, {
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india', 'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',

View File

@ -8,160 +8,158 @@ from ..utils import (
class TMZIE(InfoExtractor): class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/.*' _VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.tmz.com/videos/0-cegprt2p/', "url": "http://www.tmz.com/videos/0-cegprt2p/",
'info_dict': { "info_dict": {
'id': 'http://www.tmz.com/videos/0-cegprt2p/', "id": "http://www.tmz.com/videos/0-cegprt2p/",
'ext': 'mp4', "ext": "mp4",
'title': 'No Charges Against Hillary Clinton? Harvey Says It Ain\'t Over Yet', "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
'description': 'Harvey talks about Director Comeys decision not to prosecute Hillary Clinton.', "description": "Harvey talks about Director Comeys decision not to prosecute Hillary Clinton.",
'timestamp': 1467831837, "timestamp": 1467831837,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20160706', "upload_date": "20160706",
'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg",
'duration': 772.0, "duration": 772.0,
}, },
}, },
{ {
'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/', "url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
'info_dict': { "info_dict": {
'id': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/', "id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
'ext': 'mp4', "ext": "mp4",
'title': 'Angry Bagel Shop Guy Says He Doesn\'t Trust Women', "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
'description': 'The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it\'s women\'s fault in the first place.', "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
'timestamp': 1562889485, "timestamp": 1562889485,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20190711', "upload_date": "20190711",
'thumbnail': 'https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg",
'duration': 123.0, "duration": 123.0,
}, },
}, },
{ {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', "url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
'md5': '5429c85db8bde39a473a56ca8c4c5602', "md5": "5429c85db8bde39a473a56ca8c4c5602",
'info_dict': { "info_dict": {
'id': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', "id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
'ext': 'mp4', "ext": "mp4",
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429467813, "timestamp": 1429467813,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20150419', "upload_date": "20150419",
'duration': 29.0, "duration": 29.0,
'thumbnail': 'https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg",
}, },
}, },
{ {
'url': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/', "url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
'info_dict': { "info_dict": {
'id': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/', "id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
'ext': 'mp4', "ext": "mp4",
'title': 'Patti LaBelle -- Goes Nuclear On Stripping Fan', "title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
'description': 'Patti LaBelle made it known loud and clear last night ... NO ' "description": "Patti LaBelle made it known loud and clear last night ... NO "
'ONE gets on her stage and strips down.', "ONE gets on her stage and strips down.",
'timestamp': 1442683746, "timestamp": 1442683746,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20150919', "upload_date": "20150919",
'duration': 104.0, "duration": 104.0,
'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg",
}, },
}, },
{ {
'url': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/', "url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
'info_dict': { "info_dict": {
'id': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/', "id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
'ext': 'mp4', "ext": "mp4",
'title': 'NBA\'s Adam Silver -- Blake Griffin\'s a Great Guy ... He\'ll Learn from This', "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
'description': 'Two pretty parts of this video with NBA Commish Adam Silver.', "description": "Two pretty parts of this video with NBA Commish Adam Silver.",
'timestamp': 1454010989, "timestamp": 1454010989,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20160128', "upload_date": "20160128",
'duration': 59.0, "duration": 59.0,
'thumbnail': 'https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg",
}, },
}, },
{ {
'url': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/', "url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
'info_dict': { "info_dict": {
'id': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/', "id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
'ext': 'mp4', "ext": "mp4",
'title': 'Trump Star Vandal -- I\'m Not Afraid of Donald or the Cops!', "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
'description': 'James Otis is the the guy who took a pickaxe to Donald Trump\'s star on the Walk of Fame, and he tells TMZ .. he\'s ready and willing to go to jail for the crime.', "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
'timestamp': 1477500095, "timestamp": 1477500095,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20161026', "upload_date": "20161026",
'thumbnail': 'https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg",
'duration': 128.0, "duration": 128.0,
}, },
}, },
{ {
'url': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/', "url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
'info_dict': { "info_dict": {
'id': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/', "id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
'ext': 'mp4', "ext": "mp4",
'title': 'Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist ' "title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
'Demonstrators', "Demonstrators",
'description': 'Beverly Hills may be an omen of what\'s coming next week, ' "description": "Beverly Hills may be an omen of what's coming next week, "
'because things got crazy on the streets and cops started ' "because things got crazy on the streets and cops started "
'swinging their billy clubs at both Anti-Fascist and Pro-Trump ' "swinging their billy clubs at both Anti-Fascist and Pro-Trump "
'demonstrators.', "demonstrators.",
'timestamp': 1604182772, "timestamp": 1604182772,
'uploader': 'TMZ Staff', "uploader": "TMZ Staff",
'upload_date': '20201031', "upload_date": "20201031",
'duration': 96.0, "duration": 96.0,
'thumbnail': 'https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg', "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg",
}, },
}, },
{ {
'url': 'https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/', "url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
'info_dict': { "info_dict": {
'id': 'Dddb6IGe-ws', "id": "Dddb6IGe-ws",
'ext': 'mp4', "ext": "mp4",
'title': 'SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing', "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
'uploader': 'ESNEWS', "uploader": "ESNEWS",
'description': 'md5:49675bc58883ccf80474b8aa701e1064', "description": "md5:49675bc58883ccf80474b8aa701e1064",
'upload_date': '20201102', "upload_date": "20201102",
'uploader_id': '@ESNEWS', "uploader_id": "ESNEWS",
'uploader_url': 'https://www.youtube.com/@ESNEWS', "uploader_url": "http://www.youtube.com/user/ESNEWS",
'like_count': int, "like_count": int,
'channel_id': 'UCI-Oq7oFGakzSzHFlTtsUsQ', "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ",
'channel': 'ESNEWS', "channel": "ESNEWS",
'view_count': int, "view_count": int,
'duration': 225, "duration": 225,
'live_status': 'not_live', "live_status": "not_live",
'thumbnail': 'https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp', "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp",
'channel_url': 'https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ', "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ",
'channel_follower_count': int, "channel_follower_count": int,
'playable_in_embed': True, "playable_in_embed": True,
'categories': ['Sports'], "categories": ["Sports"],
'age_limit': 0, "age_limit": 0,
'tags': 'count:10', "tags": "count:10",
'availability': 'public', "availability": "public",
'comment_count': int,
}, },
}, },
{ {
'url': 'https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/', "url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
'info_dict': { "info_dict": {
'id': '1329448013937471491', "id": "1329450007125225473",
'ext': 'mp4', "ext": "mp4",
'title': 'The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.', "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
'uploader': 'The Mac Life', "uploader": "The Mac Life",
'description': 'md5:56e6009bbc3d12498e10d08a8e1f1c69', "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
'upload_date': '20201119', "upload_date": "20201119",
'display_id': '1329450007125225473', "uploader_id": "TheMacLife",
'uploader_id': 'TheMacLife', "timestamp": 1605800556,
'timestamp': 1605800556, "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small",
'thumbnail': 'https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small', "like_count": int,
'like_count': int, "duration": 11.812,
'duration': 11.812, "uploader_url": "https://twitter.com/TheMacLife",
'uploader_url': 'https://twitter.com/TheMacLife', "age_limit": 0,
'age_limit': 0, "repost_count": int,
'repost_count': int, "tags": [],
'tags': [], "comment_count": int,
'comment_count': int,
}, },
}, },
] ]
@ -169,25 +167,25 @@ class TMZIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, url) webpage = self._download_webpage(url, url)
jsonld = self._search_json_ld(webpage, url) jsonld = self._search_json_ld(webpage, url)
if not jsonld or 'url' not in jsonld: if not jsonld or "url" not in jsonld:
# try to extract from YouTube Player API # try to extract from YouTube Player API
# see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage) match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
if match_obj: if match_obj:
res = self.url_result(match_obj.group('id')) res = self.url_result(match_obj.group("id"))
return res return res
# try to extract from twitter # try to extract from twitter
blockquote_el = get_element_by_attribute('class', 'twitter-tweet', webpage) blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
if blockquote_el: if blockquote_el:
matches = re.findall( matches = re.findall(
r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)', r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
blockquote_el) blockquote_el)
if matches: if matches:
for _, match in matches: for _, match in matches:
if '/status/' in match: if "/status/" in match:
res = self.url_result(match) res = self.url_result(match)
return res return res
raise ExtractorError('No video found!') raise ExtractorError("No video found!")
if id not in jsonld: if id not in jsonld:
jsonld['id'] = url jsonld["id"] = url
return jsonld return jsonld

View File

@ -190,7 +190,10 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id') lang, video_id = self._match_valid_url(url).group('lang', 'id')
metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData') metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData')
video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID}) video_data = self._call_api(video_id, ':watch', 'watch', data={
# 'deviceId' is required if ignoreDeviceRestriction is False
'ignoreDeviceRestriction': True,
})
return { return {
'id': video_id, 'id': video_id,

View File

@ -407,7 +407,7 @@ class XHamsterEmbedIE(InfoExtractor):
class XHamsterUserIE(InfoExtractor): class XHamsterUserIE(InfoExtractor):
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_TESTS = [{ _TESTS = [{
# Paginated user profile # Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos', 'url': 'https://xhamster.com/users/netvideogirls/videos',
@ -422,12 +422,6 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan', 'id': 'firatkaan',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
}, { }, {
'url': 'https://xhday.com/users/mobhunter', 'url': 'https://xhday.com/users/mobhunter',
'only_matching': True, 'only_matching': True,
@ -436,9 +430,8 @@ class XHamsterUserIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _entries(self, user_id, is_user): def _entries(self, user_id):
prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive') next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
page = self._download_webpage( page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum) next_page_url, user_id, 'Downloading page %s' % pagenum)
@ -461,5 +454,5 @@ class XHamsterUserIE(InfoExtractor):
break break
def _real_extract(self, url): def _real_extract(self, url):
user, user_id = self._match_valid_url(url).group('user', 'id') user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id, bool(user)), user_id) return self.playlist_result(self._entries(user_id), user_id)

View File

@ -941,13 +941,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'): default_client='web'):
raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) for retry in self.RetryManager():
# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
icd_rm = next(icd_retries)
main_retries = iter(self.RetryManager())
main_rm = next(main_retries)
for _ in range(main_rm.retries + icd_rm.retries + 1):
try: try:
response = self._call_api( response = self._call_api(
ep=ep, fatal=True, headers=headers, ep=ep, fatal=True, headers=headers,
@ -959,8 +953,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not isinstance(e.cause, network_exceptions): if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
elif not isinstance(e.cause, HTTPError): elif not isinstance(e.cause, HTTPError):
main_rm.error = e retry.error = e
next(main_retries)
continue continue
first_bytes = e.cause.response.read(512) first_bytes = e.cause.response.read(512)
@ -972,32 +965,27 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if yt_error: if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False) self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error # Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome # We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if e.cause.status not in (403, 429): if e.cause.status not in (403, 429):
main_rm.error = e retry.error = e
next(main_retries)
continue continue
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
try: try:
self._extract_and_report_alerts(response, only_once=True) self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e: except ExtractorError as e:
# YouTube's servers may return errors we want to retry on in a 200 OK response # YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839 # See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower(): if 'unknown error' in e.msg.lower():
main_rm.error = e retry.error = e
next(main_retries)
continue continue
return self._error_or_warning(e, fatal=fatal) return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data # Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)): if not traverse_obj(response, *variadic(check_get_keys)):
icd_rm.error = ExtractorError('Incomplete data received', expected=True) retry.error = ExtractorError('Incomplete data received', expected=True)
should_retry = next(icd_retries, None)
if not should_retry:
return None
continue continue
return response return response
@ -3292,15 +3280,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
chapter_time, chapter_title, duration) chapter_time, chapter_title, duration)
for contents in content_list)), []) for contents in content_list)), [])
def _extract_heatmap(self, data): def _extract_heatmap_from_player_overlay(self, data):
return traverse_obj(data, ( content_list = traverse_obj(data, (
'frameworkUpdates', 'entityBatchUpdate', 'mutations', 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP', 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., { return next(filter(None, (
'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}), traverse_obj(contents, (..., 'heatMarkerRenderer', {
'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000}, 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
'value': ('intensityScoreNormalized', {float_or_none}), 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
})) or None 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
})) for contents in content_list)), None)
def _extract_comment(self, comment_renderer, parent=None): def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId') comment_id = comment_renderer.get('commentId')
@ -4434,7 +4423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or self._extract_chapters_from_description(video_description, duration) or self._extract_chapters_from_description(video_description, duration)
or None) or None)
info['heatmap'] = self._extract_heatmap(initial_data) info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
contents = traverse_obj( contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

View File

@ -2744,7 +2744,7 @@ def js_to_json(code, vars={}, *, strict=False):
code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code) code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code) code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict: if not strict:
code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code) code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code) code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code) code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code) code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)

View File

@ -1,109 +0,0 @@
from __future__ import annotations
import bisect
import threading
import time
class ProgressCalculator:
# Time to calculate the speed over (seconds)
SAMPLING_WINDOW = 3
# Minimum timeframe before to sample next downloaded bytes (seconds)
SAMPLING_RATE = 0.05
# Time before showing eta (seconds)
GRACE_PERIOD = 1
def __init__(self, initial: int):
self._initial = initial or 0
self.downloaded = self._initial
self.elapsed: float = 0
self.speed = SmoothValue(0, smoothing=0.7)
self.eta = SmoothValue(None, smoothing=0.9)
self._total = 0
self._start_time = time.monotonic()
self._last_update = self._start_time
self._lock = threading.Lock()
self._thread_sizes: dict[int, int] = {}
self._times = [self._start_time]
self._downloaded = [self.downloaded]
@property
def total(self):
return self._total
@total.setter
def total(self, value: int | None):
with self._lock:
if value is not None and value < self.downloaded:
value = self.downloaded
self._total = value
def thread_reset(self):
current_thread = threading.get_ident()
with self._lock:
self._thread_sizes[current_thread] = 0
def update(self, size: int | None):
if not size:
return
current_thread = threading.get_ident()
with self._lock:
last_size = self._thread_sizes.get(current_thread, 0)
self._thread_sizes[current_thread] = size
self._update(size - last_size)
def _update(self, size: int):
current_time = time.monotonic()
self.downloaded += size
self.elapsed = current_time - self._start_time
if self.total is not None and self.downloaded > self.total:
self._total = self.downloaded
if self._last_update + self.SAMPLING_RATE > current_time:
return
self._last_update = current_time
self._times.append(current_time)
self._downloaded.append(self.downloaded)
offset = bisect.bisect_left(self._times, current_time - self.SAMPLING_WINDOW)
del self._times[:offset]
del self._downloaded[:offset]
if len(self._times) < 2:
self.speed.reset()
self.eta.reset()
return
download_time = current_time - self._times[0]
if not download_time:
return
self.speed.set((self.downloaded - self._downloaded[0]) / download_time)
if self.total and self.speed.value and self.elapsed > self.GRACE_PERIOD:
self.eta.set((self.total - self.downloaded) / self.speed.value)
else:
self.eta.reset()
class SmoothValue:
def __init__(self, initial: float | None, smoothing: float):
self.value = self.smooth = self._initial = initial
self._smoothing = smoothing
def set(self, value: float):
self.value = value
if self.smooth is None:
self.smooth = self.value
else:
self.smooth = (1 - self._smoothing) * value + self._smoothing * self.smooth
def reset(self):
self.value = self.smooth = self._initial

View File

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py # Autogenerated by devscripts/update-version.py
__version__ = '2023.10.07' __version__ = '2023.09.24'
RELEASE_GIT_HEAD = '377e85a1797db9e98b78b38203ed9d4ded229991' RELEASE_GIT_HEAD = '088add9567d39b758737e4299a0e619fd89d2e8f'
VARIANT = None VARIANT = None