Compare commits

..

No commits in common. "7b8670327a579d5ba13b20caafab9924a6f0f4b2" and "7f17aea499cb49b14c3eb20b288b3793dc764015" have entirely different histories.

25 changed files with 432 additions and 689 deletions

View File

@ -419,16 +419,22 @@ jobs:
run: | run: |
python -m bundle.pyinstaller python -m bundle.pyinstaller
python -m bundle.pyinstaller --onedir python -m bundle.pyinstaller --onedir
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
- name: Add migration executable for py2exe - name: Install Requirements (py2exe)
run: | run: |
Copy-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python devscripts/install_deps.py --include py2exe
- name: Build (py2exe)
run: |
python -m bundle.py2exe
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
run: | run: |
foreach ($name in @("yt-dlp")) { foreach ($name in @("yt-dlp","yt-dlp_min")) {
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
$version = & "./dist/${name}.exe" --version $version = & "./dist/${name}.exe" --version
& "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04

View File

@ -304,7 +304,7 @@ For extraction to work yt-dlp relies on metadata your extractor extracts and pro
- `id` (media identifier) - `id` (media identifier)
- `url` (media download URL) or `formats` - `url` (media download URL) or `formats`
The aforementioned metadata fields are the critical data without which extraction does not make any sense. If any of them fail to be extracted, then the extractor is considered broken. All other metadata extraction should be completely non-fatal. The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. All extractors must make all other metadata extraction non-fatal.
For pornographic sites, appropriate `age_limit` must also be returned. For pornographic sites, appropriate `age_limit` must also be returned.

View File

@ -106,6 +106,7 @@ File|Description
File|Description File|Description
:---|:--- :---|:---
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
@ -259,6 +260,18 @@ After installing these, simply run `make`.
You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this)
### Standalone Py2Exe Builds (Windows)
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run.
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
```
py devscripts/install_deps.py --include py2exe
py devscripts/make_lazy_extractors.py
py -m bundle.py2exe
```
### Related scripts ### Related scripts
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
@ -335,13 +348,6 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
containing directory ("-" for stdin). Can be containing directory ("-" for stdin). Can be
used multiple times and inside other used multiple times and inside other
configuration files configuration files
--plugin-dirs PATH Path to an additional directory to search
for plugins. This option can be used
multiple times to add multiple directories.
Note that this currently only works for
extractor plugins; postprocessor plugins can
only be loaded from the default plugin
directories
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract the videos of a playlist,
only list them only list them
--no-flat-playlist Fully extract the videos of a playlist --no-flat-playlist Fully extract the videos of a playlist
@ -1922,7 +1928,7 @@ Plugins can be installed using various methods and locations.
* Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
* Note: plugin files between plugin packages installed with pip must have unique filenames. * Note: plugin files between plugin packages installed with pip must have unique filenames.
* Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
* Note: This does not apply for Pyinstaller builds. * Note: This does not apply for Pyinstaller/py2exe builds.
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.

59
bundle/py2exe.py Executable file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import warnings
from py2exe import freeze
from devscripts.utils import read_version
VERSION = read_version()
def main():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
freeze(
console=[{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
version_info={
'version': VERSION,
'description': 'A feature-rich command-line audio/video downloader',
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'product_name': 'yt-dlp',
'product_version': VERSION,
},
options={
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# requests >=2.32.0 breaks py2exe builds due to certifi dependency
'requests',
'urllib3',
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
zipfile=None,
)
if __name__ == '__main__':
main()

View File

@ -199,12 +199,12 @@
}, },
{ {
"action": "add", "action": "add",
"when": "a886cf3e900f4a2ec00af705f883539269545609", "when": "64d84d75ca8c19ec06558cc7c511f5f4f7a822bc",
"short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)" "short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)"
}, },
{ {
"action": "add", "action": "add",
"when": "a886cf3e900f4a2ec00af705f883539269545609", "when": "64d84d75ca8c19ec06558cc7c511f5f4f7a822bc",
"short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)"
} }
] ]

View File

@ -86,6 +86,9 @@ test = [
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0
] ]
py2exe = [
"py2exe>=0.12",
]
[project.urls] [project.urls]
Documentation = "https://github.com/yt-dlp/yt-dlp#readme" Documentation = "https://github.com/yt-dlp/yt-dlp#readme"

View File

@ -10,7 +10,6 @@ TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata')
sys.path.append(str(TEST_DATA_DIR)) sys.path.append(str(TEST_DATA_DIR))
importlib.invalidate_caches() importlib.invalidate_caches()
from yt_dlp.utils import Config
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
@ -69,24 +68,6 @@ class TestPlugins(unittest.TestCase):
os.remove(zip_path) os.remove(zip_path)
importlib.invalidate_caches() # reset the import caches importlib.invalidate_caches() # reset the import caches
def test_plugin_dirs(self):
# Internal plugin dirs hack for CLI --plugin-dirs
# To be replaced with proper system later
custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages'
Config._plugin_dirs = [str(custom_plugin_dir)]
importlib.invalidate_caches() # reset the import caches
try:
package = importlib.import_module(f'{PACKAGE_NAME}.extractor')
self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__))
plugins_ie = load_plugins('extractor', 'IE')
self.assertIn('PackagePluginIE', plugins_ie.keys())
finally:
Config._plugin_dirs = []
importlib.invalidate_caches() # reset the import caches
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,5 +0,0 @@
from yt_dlp.extractor.common import InfoExtractor
class PackagePluginIE(InfoExtractor):
pass

View File

@ -34,7 +34,6 @@ from .postprocessor import (
) )
from .update import Updater from .update import Updater
from .utils import ( from .utils import (
Config,
NO_DEFAULT, NO_DEFAULT,
POSTPROCESS_WHEN, POSTPROCESS_WHEN,
DateRange, DateRange,
@ -968,10 +967,6 @@ def _real_main(argv=None):
parser, opts, all_urls, ydl_opts = parse_options(argv) parser, opts, all_urls, ydl_opts = parse_options(argv)
# HACK: Set the plugin dirs early on
# TODO(coletdjnz): remove when plugin globals system is implemented
Config._plugin_dirs = opts.plugin_dirs
# Dump user agent # Dump user agent
if opts.dump_user_agent: if opts.dump_user_agent:
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])

View File

@ -363,10 +363,7 @@ from .ccc import (
) )
from .ccma import CCMAIE from .ccma import CCMAIE
from .cctv import CCTVIE from .cctv import CCTVIE
from .cda import ( from .cda import CDAIE
CDAIE,
CDAFolderIE,
)
from .cellebrite import CellebriteIE from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE from .cgtn import CGTNIE

View File

@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
} }
response = self._download_json( response = self._download_json(
'https://login.sooplive.co.kr/app/LoginAction.php', None, 'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form)) 'Logging in', data=urlencode_postdata(login_form))
_ERRORS = { _ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.', -4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.sooplive.co.kr/membership/changeMember.php', -6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php', -9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php', -11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.sooplive.co.kr/app/user_security.php', -12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.', 0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.', -3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global Soop account to access Korean Soop.', -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.', -32008: 'You have failed to log in. Please contact our Help Center.',
} }
@ -61,40 +61,76 @@ class AfreecaTVBaseIE(InfoExtractor):
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
return self._download_json(Request( return self._download_json(Request(
f'https://api.m.sooplive.co.kr/{endpoint}', f'https://api.m.afreecatv.com/{endpoint}',
data=data, headers=headers, query=query, data=data, headers=headers, query=query,
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'afreecatv'
IE_DESC = 'sooplive.co.kr' IE_DESC = 'afreecatv.com'
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])' _VALID_URL = r'''(?x)
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)/?(?:$|[?#&])
'''
_TESTS = [{ _TESTS = [{
'url': 'https://vod.sooplive.co.kr/player/96753363', 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
'info_dict': { 'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1', 'id': '36164052',
'ext': 'mp4', 'ext': 'mp4',
'uploader_id': 'rlantnghks', 'title': '데일리 에이프릴 요정들의 시상식!',
'uploader': '페이즈으', 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'duration': 10840, 'uploader': 'dailyapril',
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+', 'uploader_id': 'dailyapril',
'upload_date': '20230108', 'upload_date': '20160503',
'timestamp': 1673218805,
'title': '젠지 페이즈',
}, },
'params': { 'skip': 'Video is gone',
'skip_download': True, }, {
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': {
'id': '36153164',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '36153164_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
}, },
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '36153164_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}],
'skip': 'Video is gone',
}, { }, {
# non standard key # non standard key
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
'info_dict': { 'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h', 'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4', 'ext': 'mp4',
'title': '혼자사는여자집', 'title': '혼자사는여자집',
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '♥이슬이', 'uploader': '♥이슬이',
'uploader_id': 'dasl8121', 'uploader_id': 'dasl8121',
'upload_date': '20170411', 'upload_date': '20170411',
@ -106,12 +142,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
}, },
}, { }, {
# adult content # adult content
'url': 'https://vod.sooplive.co.kr/player/97267690', 'url': 'https://vod.afreecatv.com/player/97267690',
'info_dict': { 'info_dict': {
'id': '20180327_27901457_202289533_1', 'id': '20180327_27901457_202289533_1',
'ext': 'mp4', 'ext': 'mp4',
'title': '[생]빨개요♥ (part 1)', 'title': '[생]빨개요♥ (part 1)',
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '[SA]서아', 'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu', 'uploader_id': 'bjdyrksu',
'upload_date': '20180327', 'upload_date': '20180327',
@ -121,17 +157,36 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'The VOD does not exist', 'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}, { }, {
# adult content # adult content
'url': 'https://vod.sooplive.co.kr/player/70395877', 'url': 'https://vod.afreecatv.com/player/70395877',
'only_matching': True, 'only_matching': True,
}, { }, {
# subscribers only # subscribers only
'url': 'https://vod.sooplive.co.kr/player/104647403', 'url': 'https://vod.afreecatv.com/player/104647403',
'only_matching': True, 'only_matching': True,
}, { }, {
# private # private
'url': 'https://vod.sooplive.co.kr/player/81669846', 'url': 'https://vod.afreecatv.com/player/81669846',
'only_matching': True, 'only_matching': True,
}] }]
@ -207,11 +262,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'soop:catchstory' IE_NAME = 'afreecatv:catchstory'
IE_DESC = 'sooplive.co.kr catch story' IE_DESC = 'afreecatv.com catch story'
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory' _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', 'url': 'https://vod.afreecatv.com/player/103247/catchstory',
'info_dict': { 'info_dict': {
'id': '103247', 'id': '103247',
}, },
@ -244,11 +299,11 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
class AfreecaTVLiveIE(AfreecaTVBaseIE): class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'soop:live' IE_NAME = 'afreecatv:live'
IE_DESC = 'sooplive.co.kr livestreams' IE_DESC = 'afreecatv.com livestreams'
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?' _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'url': 'https://play.afreecatv.com/pyh3646/237852185',
'info_dict': { 'info_dict': {
'id': '237852185', 'id': '237852185',
'ext': 'mp4', 'ext': 'mp4',
@ -260,30 +315,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
}, },
'skip': 'Livestream has ended', 'skip': 'Livestream has ended',
}, { }, {
'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'url': 'https://play.afreecatv.com/pyh3646/237852185',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://play.sooplive.co.kr/pyh3646', 'url': 'https://play.afreecatv.com/pyh3646',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
_WORKING_CDNS = [ _WORKING_CDNS = [
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr 'gcp_cdn', # live-global-cdn-v02.afreecatv.com
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
] ]
_BAD_CDNS = [ _BAD_CDNS = [
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400) 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve) 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve) 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
] ]
def _extract_formats(self, channel_info, broadcast_no, aid): def _extract_formats(self, channel_info, broadcast_no, aid):
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
default_cdn_ids = orderedSet([ default_cdn_ids = orderedSet([
@ -303,7 +358,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
try: try:
return self._extract_m3u8_formats( return self._extract_m3u8_formats(
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
headers={'Referer': 'https://play.sooplive.co.kr/'}) headers={'Referer': 'https://play.afreecatv.com/'})
except ExtractorError as e: except ExtractorError as e:
if attempt == len(cdn_ids): if attempt == len(cdn_ids):
raise raise
@ -319,13 +374,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no broadcast_no = channel_info.get('BNO') or broadcast_no
if not broadcast_no: if not broadcast_no:
result = channel_info.get('RESULT')
if result == 0:
raise UserNotLive(video_id=broadcaster_id) raise UserNotLive(video_id=broadcaster_id)
elif result == -6:
self.raise_login_required(
'This channel is streaming for subscribers only', method='password')
raise ExtractorError('Unable to extract broadcast number')
password = self.get_param('videopassword') password = self.get_param('videopassword')
if channel_info.get('BPWD') == 'Y' and password is None: if channel_info.get('BPWD') == 'Y' and password is None:
@ -354,7 +403,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
formats = self._extract_formats(channel_info, broadcast_no, aid) formats = self._extract_formats(channel_info, broadcast_no, aid)
station_info = traverse_obj(self._download_json( station_info = traverse_obj(self._download_json(
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
'Downloading channel metadata', 'Unable to download channel metadata', 'Downloading channel metadata', 'Unable to download channel metadata',
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
@ -370,11 +419,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
} }
class AfreecaTVUserIE(AfreecaTVBaseIE): class AfreecaTVUserIE(InfoExtractor):
IE_NAME = 'soop:user' IE_NAME = 'afreecatv:user'
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?' _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
_TESTS = [{ _TESTS = [{
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -382,7 +431,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
}, },
'playlist_count': 218, 'playlist_count': 218,
}, { }, {
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'parang1995', 'id': 'parang1995',
@ -390,7 +439,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
}, },
'playlist_count': 997, 'playlist_count': 997,
}, { }, {
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', 'url': 'https://bj.afreecatv.com/ryuryu24/vods',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -398,7 +447,7 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
}, },
'playlist_count': 221, 'playlist_count': 221,
}, { }, {
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -410,12 +459,12 @@ class AfreecaTVUserIE(AfreecaTVBaseIE):
def _fetch_page(self, user_id, user_type, page): def _fetch_page(self, user_id, user_type, page):
page += 1 page += 1
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}') note=f'Downloading {user_type} video page {page}')
for item in info['data']: for item in info['data']:
yield self.url_result( yield self.url_result(
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
def _real_extract(self, url): def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')

View File

@ -4,6 +4,7 @@ import json
import re import re
import time import time
import urllib.parse import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
@ -11,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
@ -522,13 +524,14 @@ class CBCGemIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# This is a normal, public, TV show video # This is a normal, public, TV show video
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s06e01', 'id': 'schitts-creek/s06e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Smoke Signals', 'title': 'Smoke Signals',
'description': 'md5:929868d20021c924020641769eb3e7f1', 'description': 'md5:929868d20021c924020641769eb3e7f1',
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg', 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
'duration': 1324, 'duration': 1314,
'categories': ['comedy'], 'categories': ['comedy'],
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season': 'Season 6', 'season': 'Season 6',
@ -536,21 +539,19 @@ class CBCGemIE(InfoExtractor):
'episode': 'Smoke Signals', 'episode': 'Smoke Signals',
'episode_number': 1, 'episode_number': 1,
'episode_id': 'schitts-creek/s06e01', 'episode_id': 'schitts-creek/s06e01',
'upload_date': '20210618',
'timestamp': 1623988800,
'release_date': '20200107',
'release_timestamp': 1578427200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
# This video requires an account in the browser, but works fine in yt-dlp # This video requires an account in the browser, but works fine in yt-dlp
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s01e01', 'id': 'schitts-creek/s01e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Cup Runneth Over', 'title': 'The Cup Runneth Over',
'description': 'md5:9bca14ea49ab808097530eb05a29e797', 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg', 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season_number': 1, 'season_number': 1,
'season': 'Season 1', 'season': 'Season 1',
@ -559,12 +560,9 @@ class CBCGemIE(InfoExtractor):
'episode_id': 'schitts-creek/s01e01', 'episode_id': 'schitts-creek/s01e01',
'duration': 1309, 'duration': 1309,
'categories': ['comedy'], 'categories': ['comedy'],
'upload_date': '20210617',
'timestamp': 1623902400,
'release_date': '20151124',
'release_timestamp': 1448323200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
'only_matching': True, 'only_matching': True,
@ -633,6 +631,38 @@ class CBCGemIE(InfoExtractor):
return return
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
if not base_format:
return
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
return
for child in secret_xml:
if child.attrib.get('Type') != 'video':
continue
for video_quality in child:
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
if not bitrate or 'Index' not in video_quality.attrib:
continue
height = int_or_none(video_quality.attrib.get('MaxHeight'))
yield {
**base_format,
'format_id': join_nonempty('sec', height),
# Note: \g<1> is necessary instead of \1 since bitrate is a number
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
'tbr': bitrate / 1000.0,
'height': height,
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_info = self._download_json( video_info = self._download_json(
@ -646,6 +676,7 @@ class CBCGemIE(InfoExtractor):
else: else:
headers = {} headers = {}
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
m3u8_url = m3u8_info.get('url')
if m3u8_info.get('errorCode') == 1: if m3u8_info.get('errorCode') == 1:
self.raise_geo_restricted(countries=['CA']) self.raise_geo_restricted(countries=['CA'])
@ -654,9 +685,9 @@ class CBCGemIE(InfoExtractor):
elif m3u8_info.get('errorCode') != 0: elif m3u8_info.get('errorCode') != 0:
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
formats.extend(self._find_secret_formats(formats, video_id))
for fmt in formats: for fmt in formats:
if fmt.get('vcodec') == 'none': if fmt.get('vcodec') == 'none':
@ -672,21 +703,20 @@ class CBCGemIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_info['title'],
'description': video_info.get('description'),
'thumbnail': video_info.get('image'),
'series': video_info.get('series'),
'season_number': video_info.get('season'),
'season': f'Season {video_info.get("season")}',
'episode_number': video_info.get('episode'),
'episode': video_info.get('title'),
'episode_id': video_id, 'episode_id': video_id,
'duration': video_info.get('duration'),
'categories': [video_info.get('category')],
'formats': formats, 'formats': formats,
**traverse_obj(video_info, { 'release_timestamp': video_info.get('airDate'),
'title': ('title', {str}), 'timestamp': video_info.get('availableDate'),
'episode': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'series': ('series', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'duration': ('duration', {int_or_none}),
'categories': ('category', {str}, all),
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
}),
} }

View File

@ -12,7 +12,6 @@ from .common import InfoExtractor
from ..compat import compat_ord from ..compat import compat_ord
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
@ -352,50 +351,3 @@ class CDAIE(InfoExtractor):
extract_format(webpage, resolution) extract_format(webpage, resolution)
return merge_dicts(info_dict, info) return merge_dicts(info_dict, info)
class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://www.cda.pl/domino264/folder/31188385',
'info_dict': {
'id': '31188385',
'title': 'SERIA DRUGA',
},
'playlist_mincount': 13,
},
{
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
'info_dict': {
'id': '2664592',
'title': 'VideoDowcipy - wszystkie odcinki',
},
'playlist_mincount': 71,
},
{
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
'info_dict': {
'id': '19129979',
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}]
def _real_extract(self, url):
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
webpage = self._download_webpage(url, folder_id)
def extract_page_entries(page):
webpage = self._download_webpage(
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
f'Downloading page {page + 1}', expected_status=404)
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
for video_id in items:
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
return self.playlist_result(
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
folder_id, self._og_search_title(webpage))

View File

@ -3,7 +3,7 @@ from .nexx import NexxIE
class FunkIE(InfoExtractor): class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8610449476156f338761a75391b0017d', 'md5': '8610449476156f338761a75391b0017d',
@ -27,9 +27,6 @@ class FunkIE(InfoExtractor):
}, { }, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
class ImgurIE(ImgurBaseIE): class ImgurIE(ImgurBaseIE):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://imgur.com/A61SaA1', 'url': 'https://imgur.com/A61SaA1',
@ -54,22 +54,6 @@ class ImgurIE(ImgurBaseIE):
'like_count': int, 'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
}, },
}, {
# Test with URL slug
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'timestamp': 1416446068,
'upload_date': '20141120',
'dislike_count': int,
'comment_count': int,
'release_timestamp': 1416446068,
'release_date': '20141120',
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
},
}, { }, {
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
'only_matching': True, 'only_matching': True,
@ -108,7 +92,6 @@ class ImgurIE(ImgurBaseIE):
'comment_count': int, 'comment_count': int,
'release_timestamp': 1710491255, 'release_timestamp': 1710491255,
'release_date': '20240315', 'release_date': '20240315',
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
}, },
}] }]
@ -225,10 +208,7 @@ class ImgurIE(ImgurBaseIE):
}), get_all=False), }), get_all=False),
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'thumbnails': [{ 'thumbnail': url_or_none(search('thumbnailUrl')),
'url': thumbnail_url,
'http_headers': {'Accept': '*/*'},
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
'http_headers': {'Accept': '*/*'}, 'http_headers': {'Accept': '*/*'},
} }
@ -272,9 +252,17 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
class ImgurGalleryIE(ImgurGalleryBaseIE): class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery' IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
'title': 'Adding faces make every GIF better',
},
'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, {
# TODO: static images - replace with animated/video gallery # TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk', 'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True, 'only_matching': True,
@ -292,27 +280,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'release_timestamp': 1358554297, 'release_timestamp': 1358554297,
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'release_date': '20130119', 'release_date': '20130119',
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
},
}, {
# Test with slug
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'timestamp': 1358554297,
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
'release_timestamp': 1358554297,
'release_date': '20130119',
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
'comment_count': int, 'comment_count': int,
'dislike_count': int, 'dislike_count': int,
'like_count': int, 'like_count': int,
@ -349,13 +317,6 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'title': 'Penguins !', 'title': 'Penguins !',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, { }, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C', 'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'], 'add_ies': ['Imgur'],
@ -396,7 +357,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
class ImgurAlbumIE(ImgurGalleryBaseIE): class ImgurAlbumIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:album' IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False _GALLERY = False
_TESTS = [{ _TESTS = [{
# TODO: only static images - replace with animated/video gallery # TODO: only static images - replace with animated/video gallery
@ -411,14 +372,6 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
'title': 'enen-no-shouboutai', 'title': 'enen-no-shouboutai',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, {
# Test with URL slug
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
'info_dict': {
'id': 'iX265HX',
'title': 'enen-no-shouboutai',
},
'playlist_count': 2,
}, { }, {
'url': 'https://imgur.com/a/8pih2Ed', 'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': { 'info_dict': {

View File

@ -1,13 +1,14 @@
from .telecinco import TelecincoBaseIE from .telecinco import TelecincoIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
) )
class MiTeleIE(TelecincoBaseIE): class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': { 'info_dict': {
@ -26,7 +27,6 @@ class MiTeleIE(TelecincoBaseIE):
'timestamp': 1471209401, 'timestamp': 1471209401,
'upload_date': '20160814', 'upload_date': '20160814',
}, },
'skip': 'HTTP Error 404 Not Found',
}, { }, {
# no explicit title # no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
@ -49,26 +49,6 @@ class MiTeleIE(TelecincoBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'HTTP Error 404 Not Found',
}, {
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
'info_dict': {
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
'ext': 'mp4',
'title': 'Horizonte Temporada 5 Programa 171',
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
'episode': 'Las Zonas de Bajas Emisiones, a debate',
'episode_number': 171,
'season': 'Season 5',
'season_number': 5,
'series': 'Horizonte',
'duration': 7012,
'upload_date': '20240927',
'timestamp': 1727416450,
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
'age_limit': 12,
},
'params': {'geo_bypass_country': 'ES'},
}, { }, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True, 'only_matching': True,

View File

@ -2,13 +2,7 @@ import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..utils import js_to_json, str_or_none, traverse_obj
from ..utils import (
determine_ext,
js_to_json,
str_or_none,
)
from ..utils.traversal import traverse_obj
class SubstackIE(InfoExtractor): class SubstackIE(InfoExtractor):
@ -49,19 +43,6 @@ class SubstackIE(InfoExtractor):
'uploader': "Andrew Zimmern's Spilled Milk ", 'uploader': "Andrew Zimmern's Spilled Milk ",
'uploader_id': '577659', 'uploader_id': '577659',
}, },
}, {
# Podcast that needs its file extension resolved to mp3
'url': 'https://persuasion1.substack.com/p/summers',
'md5': '1456a755d46084744facdfac9edf900f',
'info_dict': {
'id': '141970405',
'ext': 'mp3',
'title': 'Larry Summers on What Went Wrong on Campus',
'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
'uploader': 'Persuasion',
'uploader_id': '61579',
},
}] }]
@classmethod @classmethod
@ -108,15 +89,7 @@ class SubstackIE(InfoExtractor):
post_type = webpage_info['post']['type'] post_type = webpage_info['post']['type']
formats, subtitles = [], {} formats, subtitles = [], {}
if post_type == 'podcast': if post_type == 'podcast':
fmt = {'url': webpage_info['post']['podcast_url']} formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
if not determine_ext(fmt['url'], default_ext=None):
# The redirected format URL expires but the original URL doesn't,
# so we only want to extract the extension from this request
fmt['ext'] = determine_ext(self._request_webpage(
HEADRequest(fmt['url']), display_id,
'Resolving podcast file extension',
'Podcast URL is invalid').url)
formats.append(fmt)
elif post_type == 'video': elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
else: else:

View File

@ -2,69 +2,15 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
clean_html, clean_html,
int_or_none, int_or_none,
join_nonempty,
str_or_none, str_or_none,
traverse_obj, try_get,
update_url,
url_or_none,
) )
class TelecincoBaseIE(InfoExtractor): class TelecincoIE(InfoExtractor):
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
self.report_drm(video_id)
stream = caronte['dls'][0]['stream']
headers = {
'Referer': url,
'Origin': re.match(r'https?://[^/]+', url).group(0),
}
geo_headers = {**headers, **self.geo_verification_headers()}
try:
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers={
'Content-Type': 'application/json',
**geo_headers,
})['tokens']['1']['cdn']
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
error_code = traverse_obj(
self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False),
({json.loads}, 'code', {int}))
if error_code == 4038:
self.raise_geo_restricted(countries=['ES'])
raise
formats = self._extract_m3u8_formats(
update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers)
return {
'id': video_id,
'title': traverse_obj(config, ('info', 'title', {str})),
'formats': formats,
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
'http_headers': headers,
}
class TelecincoIE(TelecincoBaseIE):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
@ -84,7 +30,6 @@ class TelecincoIE(TelecincoBaseIE):
'duration': 662, 'duration': 662,
}, },
}], }],
'skip': 'HTTP Error 410 Gone',
}, { }, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
@ -95,24 +40,23 @@ class TelecincoIE(TelecincoBaseIE):
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
'duration': 79, 'duration': 79,
}, },
'skip': 'Redirects to main page',
}, { }, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': '5ce057f43f30b634fbaf0f18c71a140a', 'md5': 'eddb50291df704ce23c74821b995bcac',
'info_dict': { 'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2', 'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4', 'ext': 'mp4',
'title': '#DOYLACARA. Con la trata no hay trato', 'title': '#DOYLACARA. Con la trata no hay trato',
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
'duration': 50, 'duration': 50,
'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg',
}, },
}, { }, {
# video in opening's content # video in opening's content
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
'info_dict': { 'info_dict': {
'id': '1691427', 'id': '2907195140',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}', 'description': 'md5:73f340a7320143d37ab895375b2bf13a',
}, },
'playlist': [{ 'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7', 'md5': 'adb28c37238b675dad0f042292f209a7',
@ -121,7 +65,6 @@ class TelecincoIE(TelecincoBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'duration': 1015, 'duration': 1015,
'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg',
}, },
}], }],
'params': { 'params': {
@ -138,29 +81,66 @@ class TelecincoIE(TelecincoBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
title = config['info']['title']
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
stream = caronte['dls'][0]['stream']
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json;charset=UTF-8',
'Origin': re.match(r'https?://[^/]+', url).group(0),
})
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers=headers)['tokens']['1']['cdn']
formats = self._extract_m3u8_formats(
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': int_or_none(content.get('dataDuration')),
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article = self._search_json( article = self._parse_json(self._search_regex(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
webpage, 'article', display_id)['article'] webpage, 'article'), display_id)['article']
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter)) title = article.get('title')
description = clean_html(article.get('leadParagraph')) or ''
if article.get('editorialType') != 'VID': if article.get('editorialType') != 'VID':
entries = [] entries = []
body = [article.get('opening')]
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])): body.extend(try_get(article, lambda x: x['body'], list) or [])
content = p['content'] for p in body:
if not isinstance(p, dict):
continue
content = p.get('content')
if not content:
continue
type_ = p.get('type') type_ = p.get('type')
if type_ == 'paragraph' and isinstance(content, str): if type_ == 'paragraph':
description = join_nonempty(description, content, delim='') content_str = str_or_none(content)
elif type_ == 'video' and isinstance(content, dict): if content_str:
description += content_str
continue
if type_ == 'video' and isinstance(content, dict):
entries.append(self._parse_content(content, url)) entries.append(self._parse_content(content, url))
return self.playlist_result( return self.playlist_result(
entries, str_or_none(article.get('id')), entries, str_or_none(article.get('id')), title, description)
traverse_obj(article, ('title', {str})), clean_html(description)) content = article['opening']['content']
info = self._parse_content(content, url)
info = self._parse_content(article['opening']['content'], url) info.update({
info['description'] = description 'description': description,
})
return info return info

View File

@ -6,7 +6,6 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
strip_or_none,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
@ -133,12 +132,12 @@ class TubiTvIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': strip_or_none(title), 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'season_number': int_or_none(season_number), 'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number), 'episode_number': int_or_none(episode_number),
'episode': strip_or_none(episode_title), 'episode': episode_title,
**traverse_obj(video_data, { **traverse_obj(video_data, {
'description': ('description', {str}), 'description': ('description', {str}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),

View File

@ -934,13 +934,14 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': 'MoniqueCamarra', 'uploader_id': 'MoniqueCamarra',
'live_status': 'was_live', 'live_status': 'was_live',
'release_timestamp': 1658417414, 'release_timestamp': 1658417414,
'description': r're:Twitter Space participated by Sergej Sumlenny.+', 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
'timestamp': 1658407771, 'timestamp': 1658407771,
'release_date': '20220721', 'release_date': '20220721',
'upload_date': '20220721', 'upload_date': '20220721',
}, },
'add_ie': ['TwitterSpaces'], 'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'Requires authentication',
}, { }, {
# URL specifies video number but --yes-playlist # URL specifies video number but --yes-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1', 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
@ -1855,6 +1856,8 @@ class TwitterSpacesIE(TwitterBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
space_id = self._match_id(url) space_id = self._match_id(url)
if not self.is_logged_in:
self.raise_login_required('Twitter Spaces require authentication')
space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace'] space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
if not space_data: if not space_data:
raise ExtractorError('Twitter Space not found', expected=True) raise ExtractorError('Twitter Space not found', expected=True)

View File

@ -22,7 +22,7 @@ import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper from .openload import PhantomJSwrapper
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
@ -55,7 +55,6 @@ from ..utils import (
str_or_none, str_or_none,
str_to_int, str_to_int,
strftime_or_none, strftime_or_none,
time_seconds,
traverse_obj, traverse_obj,
try_call, try_call,
try_get, try_get,
@ -115,7 +114,6 @@ INNERTUBE_CLIENTS = {
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 67, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
}, },
# This client now requires sign-in for every video
'web_creator': { 'web_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -155,7 +153,6 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True, 'REQUIRE_PO_TOKEN': True,
}, },
# This client now requires sign-in for every video
'android_creator': { 'android_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -203,6 +200,21 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'PLAYER_PARAMS': '2AMB', 'PLAYER_PARAMS': '2AMB',
}, },
# This client only has legacy formats and storyboards
'android_producer': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_PRODUCER',
'clientVersion': '0.111.1',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
'REQUIRE_JS_PLAYER': False,
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats. # iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'ios': { 'ios': {
@ -235,7 +247,6 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
}, },
# This client now requires sign-in for every video
'ios_creator': { 'ios_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -271,9 +282,8 @@ INNERTUBE_CLIENTS = {
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
}, },
# This client now requires sign-in for every video # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# It was previously an age-gate workaround for videos that were `playable_in_embed` # See: https://github.com/zerodytrash/YouTube-Internal-Clients
# It may still be useful if signed into an EU account that is not age-verified
'tv_embedded': { 'tv_embedded': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -516,8 +526,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
_NETRC_MACHINE = 'youtube'
def ucid_or_none(self, ucid): def ucid_or_none(self, ucid):
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
@ -576,213 +584,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._initialize_consent() self._initialize_consent()
self._check_login_required() self._check_login_required()
def _perform_login(self, username, password):
auth_type, _, user = (username or '').partition('+')
if auth_type != 'oauth':
raise ExtractorError(self._youtube_login_hint, expected=True)
self._initialize_oauth(user, password)
'''
OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
- https://developers.google.com/identity/protocols/oauth2/limited-input-device
- https://accounts.google.com/.well-known/openid-configuration
- https://www.rfc-editor.org/rfc/rfc8628
- https://www.rfc-editor.org/rfc/rfc6749
Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
which applies some modifications to the response (such as returning errors as 200 OK).
Since the client works with the standard API, we will use that as it is well-documented.
'''
_OAUTH_PROFILE = None
_OAUTH_ACCESS_TOKEN_CACHE = {}
_OAUTH_DISPLAY_ID = 'oauth'
# YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
_OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
_OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT'
_OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
# From https://accounts.google.com/.well-known/openid-configuration
# Technically, these should be fetched dynamically and not hard-coded.
# However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
_OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code'
_OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token'
@property
def _oauth_cache_key(self):
return f'oauth_refresh_token_{self._OAUTH_PROFILE}'
def _read_oauth_error_response(self, response):
return traverse_obj(
self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False),
({json.loads}, 'error', {str}))
def _set_oauth_info(self, token_response):
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({
'access_token': token_response['access_token'],
'token_type': token_response['token_type'],
'expiry': time_seconds(
seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10),
})
refresh_token = traverse_obj(token_response, ('refresh_token', {str}))
if refresh_token:
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
def _initialize_oauth(self, user, refresh_token):
self._OAUTH_PROFILE = user or 'default'
if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE:
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
return
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
if refresh_token:
refresh_token = refresh_token.strip('\'') or None
# Allow refresh token passed to initialize cache
if refresh_token:
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
if refresh_token:
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
try:
token_response = self._refresh_token(refresh_token)
except ExtractorError as e:
error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '')
self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
token_response = self._oauth_authorize
else:
token_response = self._oauth_authorize
self._set_oauth_info(token_response)
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
def _refresh_token(self, refresh_token):
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Refreshing access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'refresh_token': refresh_token,
'grant_type': 'refresh_token',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if error == 'invalid_grant':
# RFC6749 § 5.2
raise ExtractorError(
'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)',
expected=True, video_id=self._OAUTH_DISPLAY_ID)
raise ExtractorError(
f'Failed to refresh access token: Authorization server returned error {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
@property
def _oauth_authorize(self):
code_response = self._download_json(
self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Initializing authorization flow',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'scope': self._OAUTH_SCOPE,
}).encode(),
headers={'Content-Type': 'application/json'})
verification_url = traverse_obj(code_response, ('verification_url', {str}))
user_code = traverse_obj(code_response, ('user_code', {str}))
if not verification_url or not user_code:
raise ExtractorError(
'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID)
# note: The whitespace is intentional
self.to_screen(
f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
f'go to {verification_url} and enter code {user_code}')
# RFC8628 § 3.5: default poll interval is 5 seconds if not provided
poll_interval = traverse_obj(code_response, ('interval', {int}), default=5)
for retry in self.RetryManager():
while True:
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note=False,
errnote='Failed to request access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'device_code': code_response['device_code'],
'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, TransportError):
retry.error = e
break
elif isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if not error:
retry.error = e
break
if error == 'authorization_pending':
time.sleep(poll_interval)
continue
elif error == 'expired_token':
raise ExtractorError(
'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'access_denied':
raise ExtractorError(
'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'slow_down':
# RFC8628 § 3.5: add 5 seconds to the poll interval
poll_interval += 5
time.sleep(poll_interval)
continue
else:
raise ExtractorError(
f'Authorization server returned an error when fetching access token: {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
def _update_oauth(self):
token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
if token is None or token['expiry'] > time.time():
return
self._set_oauth_info(self._refresh_token(token['refresh_token']))
@property
def _youtube_login_hint(self):
return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, '
f'or else u{self._login_hint(method="cookies")[1:]}. '
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. '
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies')
def _check_login_required(self): def _check_login_required(self):
if self._LOGIN_REQUIRED and not self.is_authenticated: if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required( self.raise_login_required('Login details are needed to download this content', method='cookies')
f'Login details are needed to download this content. {self._youtube_login_hint}', method=None)
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
@ -881,6 +685,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if session_index is not None: if session_index is not None:
return session_index return session_index
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
return self._search_regex(
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None, fatal=False)
def _data_sync_id_to_delegated_session_id(self, data_sync_id): def _data_sync_id_to_delegated_session_id(self, data_sync_id):
if not data_sync_id: if not data_sync_id:
return return
@ -927,7 +742,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
@functools.cached_property @functools.cached_property
def is_authenticated(self): def is_authenticated(self):
return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) return bool(self._generate_sapisidhash_header())
def extract_ytcfg(self, video_id, webpage): def extract_ytcfg(self, video_id, webpage):
if not webpage: if not webpage:
@ -937,21 +752,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) or {} default='{}'), video_id, fatal=False) or {}
def _generate_oauth_headers(self): def generate_api_headers(
self._update_oauth() self, *, ytcfg=None, account_syncid=None, session_index=None,
oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
if not oauth_token:
return {}
return { origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}', headers = {
'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
} }
def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
headers = {}
account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
if account_syncid:
headers['X-Goog-PageId'] = account_syncid
if session_index is None: if session_index is None:
session_index = self._extract_session_index(ytcfg) session_index = self._extract_session_index(ytcfg)
if account_syncid or session_index is not None: if account_syncid or session_index is not None:
@ -961,29 +776,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if auth is not None: if auth is not None:
headers['Authorization'] = auth headers['Authorization'] = auth
headers['X-Origin'] = origin headers['X-Origin'] = origin
return headers
def generate_api_headers(
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, api_hostname=None, default_client='web', **kwargs):
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
**self._generate_oauth_headers(),
**self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
}
return filter_dict(headers) return filter_dict(headers)
def _generate_webpage_headers(self):
return self._generate_oauth_headers()
def _download_ytcfg(self, client, video_id): def _download_ytcfg(self, client, video_id):
url = { url = {
'web': 'https://www.youtube.com', 'web': 'https://www.youtube.com',
@ -993,8 +787,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not url: if not url:
return {} return {}
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
headers=self._generate_webpage_headers())
return self.extract_ytcfg(video_id, webpage) or {} return self.extract_ytcfg(video_id, webpage) or {}
@staticmethod @staticmethod
@ -1732,7 +1525,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'heatmap': 'count:100', 'heatmap': 'count:100',
'timestamp': 1401991663, 'timestamp': 1401991663,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Age-gate video with embed allowed in public site', 'note': 'Age-gate video with embed allowed in public site',
@ -1763,7 +1555,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int, 'comment_count': int,
'channel_is_verified': True, 'channel_is_verified': True,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Age-gate video embedable only with clientScreen=EMBED', 'note': 'Age-gate video embedable only with clientScreen=EMBED',
@ -1794,7 +1585,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@ProjektMelody', 'uploader_id': '@ProjektMelody',
'timestamp': 1577508724, 'timestamp': 1577508724,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Non-Agegated non-embeddable video', 'note': 'Non-Agegated non-embeddable video',
@ -2566,7 +2356,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True, 'channel_is_verified': True,
'timestamp': 1405513526, 'timestamp': 1405513526,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
@ -2937,7 +2726,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'timestamp': 1577508724, 'timestamp': 1577508724,
}, },
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'url': 'https://www.youtube.com/live/qVv6vCqciTM', 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
@ -3259,8 +3047,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
code = self._download_webpage( code = self._download_webpage(
player_url, video_id, fatal=fatal, player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id, note='Downloading player ' + player_id,
errnote=f'Download of {player_url} failed', errnote=f'Download of {player_url} failed')
headers=self._generate_webpage_headers())
if code: if code:
self._code_cache[player_id] = code self._code_cache[player_id] = code
return self._code_cache.get(player_id) return self._code_cache.get(player_id)
@ -3543,8 +3330,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._download_webpage( self._download_webpage(
url, video_id, f'Marking {label}watched', url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False, 'Unable to mark watched', fatal=False)
headers=self._generate_webpage_headers())
@classmethod @classmethod
def _extract_from_webpage(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):
@ -4167,15 +3953,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
prs.append(pr) prs.append(pr)
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self._is_agegated(pr) and variant != 'tv_embedded':
append_client(f'tv_embedded.{base_client}')
# Unauthenticated users will only get tv_embedded client formats if age-gated
if self._is_agegated(pr) and not self.is_authenticated:
self.to_screen(
f'{video_id}: This video is age-restricted; some formats may be missing '
f'without authentication. {self._login_hint()}', only_once=True)
# EU countries require age-verification for accounts to access age-restricted videos # EU countries require age-verification for accounts to access age-restricted videos
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
if self.is_authenticated and self._is_agegated(pr): # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
self.to_screen( self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring ' f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True) 'account age-verification; some formats may be missing', only_once=True)
# web_creator and mediaconnect can work around the age-verification requirement # web_creator and mediaconnect can work around the age-verification requirement
# _testsuite & _vr variants can also work around age-verification # _producer, _testsuite, & _vr variants can also work around age-verification
# tv_embedded may(?) still work around age-verification if the video is embeddable
append_client('web_creator', 'mediaconnect') append_client('web_creator', 'mediaconnect')
prs.extend(deprioritized_prs) prs.extend(deprioritized_prs)
@ -4525,7 +4322,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if pp: if pp:
query['pp'] = pp query['pp'] = pp
webpage = self._download_webpage( webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
@ -4904,12 +4701,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers=self.generate_api_headers(ytcfg=master_ytcfg), headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON') note='Downloading initial data API JSON')
COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
info['comment_count'] = traverse_obj(initial_data, ( info['comment_count'] = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
), ( ), (
'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS, 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
), expected_type=self._get_count, get_all=False) ), expected_type=self._get_count, get_all=False)
@ -5813,7 +5609,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
webpage, data = None, None webpage, data = None, None
for retry in self.RetryManager(fatal=fatal): for retry in self.RetryManager(fatal=fatal):
try: try:
webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, network_exceptions): if isinstance(e.cause, network_exceptions):
@ -7187,7 +6983,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
raise ExtractorError('Unable to recognize tab page') raise ExtractorError('Unable to recognize tab page')
class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubePlaylistIE(InfoExtractor):
IE_DESC = 'YouTube playlists' IE_DESC = 'YouTube playlists'
_VALID_URL = r'''(?x)(?: _VALID_URL = r'''(?x)(?:
(?:https?://)? (?:https?://)?
@ -7301,7 +7097,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id) return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
class YoutubeYtBeIE(YoutubeBaseInfoExtractor): class YoutubeYtBeIE(InfoExtractor):
IE_DESC = 'youtu.be' IE_DESC = 'youtu.be'
_VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
_TESTS = [{ _TESTS = [{
@ -7352,7 +7148,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id) }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor): class YoutubeLivestreamEmbedIE(InfoExtractor):
IE_DESC = 'YouTube livestream embeds' IE_DESC = 'YouTube livestream embeds'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
_TESTS = [{ _TESTS = [{
@ -7367,7 +7163,7 @@ class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key(), video_id=channel_id) ie=YoutubeTabIE.ie_key(), video_id=channel_id)
class YoutubeYtUserIE(YoutubeBaseInfoExtractor): class YoutubeYtUserIE(InfoExtractor):
IE_DESC = 'YouTube user videos; "ytuser:" prefix' IE_DESC = 'YouTube user videos; "ytuser:" prefix'
IE_NAME = 'youtube:user' IE_NAME = 'youtube:user'
_VALID_URL = r'ytuser:(?P<id>.+)' _VALID_URL = r'ytuser:(?P<id>.+)'
@ -7654,7 +7450,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title) return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeFeedsInfoExtractor(InfoExtractor):
""" """
Base class for feed extractors Base class for feed extractors
Subclasses must re-define the _FEED_NAME property. Subclasses must re-define the _FEED_NAME property.
@ -7662,6 +7458,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
_FEED_NAME = 'feeds' _FEED_NAME = 'feeds'
def _real_initialize(self):
YoutubeBaseInfoExtractor._check_login_required(self)
@classproperty @classproperty
def IE_NAME(cls): def IE_NAME(cls):
return f'youtube:{cls._FEED_NAME}' return f'youtube:{cls._FEED_NAME}'
@ -7671,7 +7470,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key()) f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor): class YoutubeWatchLaterIE(InfoExtractor):
IE_NAME = 'youtube:watchlater' IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
_VALID_URL = r':ytwatchlater' _VALID_URL = r':ytwatchlater'
@ -7725,7 +7524,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
}] }]
class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor): class YoutubeShortsAudioPivotIE(InfoExtractor):
IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)' IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
IE_NAME = 'youtube:shorts:pivot:audio' IE_NAME = 'youtube:shorts:pivot:audio'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts' _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
@ -7749,7 +7548,7 @@ class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE) ie=YoutubeTabIE)
class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor): class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url' IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list IE_DESC = False # Do not list
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -7908,7 +7707,7 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
return self.url_result(redirect_url) return self.url_result(redirect_url)
class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor): class YoutubeTruncatedIDIE(InfoExtractor):
IE_NAME = 'youtube:truncated_id' IE_NAME = 'youtube:truncated_id'
IE_DESC = False # Do not list IE_DESC = False # Do not list
_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$' _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

View File

@ -408,14 +408,6 @@ def create_parser():
help=( help=(
'Location of the main configuration file; either the path to the config or its containing directory ' 'Location of the main configuration file; either the path to the config or its containing directory '
'("-" for stdin). Can be used multiple times and inside other configuration files')) '("-" for stdin). Can be used multiple times and inside other configuration files'))
general.add_option(
'--plugin-dirs',
dest='plugin_dirs', metavar='PATH', action='append',
help=(
'Path to an additional directory to search for plugins. '
'This option can be used multiple times to add multiple directories. '
'Note that this currently only works for extractor plugins; '
'postprocessor plugins can only be loaded from the default plugin directories'))
general.add_option( general.add_option(
'--flat-playlist', '--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False, action='store_const', dest='extract_flat', const='in_playlist', default=False,

View File

@ -15,7 +15,6 @@ from zipfile import ZipFile
from .compat import functools # isort: split from .compat import functools # isort: split
from .utils import ( from .utils import (
Config,
get_executable_path, get_executable_path,
get_system_config_dirs, get_system_config_dirs,
get_user_config_dirs, get_user_config_dirs,
@ -85,12 +84,6 @@ class PluginFinder(importlib.abc.MetaPathFinder):
with contextlib.suppress(ValueError): # Added when running __main__.py directly with contextlib.suppress(ValueError): # Added when running __main__.py directly
candidate_locations.remove(Path(__file__).parent) candidate_locations.remove(Path(__file__).parent)
# TODO(coletdjnz): remove when plugin globals system is implemented
if Config._plugin_dirs:
candidate_locations.extend(_get_package_paths(
*Config._plugin_dirs,
containing_folder=''))
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts

View File

@ -103,6 +103,7 @@ def current_git_head():
_FILE_SUFFIXES = { _FILE_SUFFIXES = {
'zip': '', 'zip': '',
'py2exe': '_min.exe',
'win_exe': '.exe', 'win_exe': '.exe',
'win_x86_exe': '_x86.exe', 'win_x86_exe': '_x86.exe',
'darwin_exe': '_macos', 'darwin_exe': '_macos',
@ -116,7 +117,6 @@ _NON_UPDATEABLE_REASONS = {
**{variant: None for variant in _FILE_SUFFIXES}, # Updatable **{variant: None for variant in _FILE_SUFFIXES}, # Updatable
**{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release' **{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release'
for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()}, for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()},
'py2exe': 'py2exe is no longer supported by yt-dlp; This executable cannot be updated',
'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'source': 'You cannot update when running from source code; Use git to pull the latest changes',
'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update', 'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update',
'other': 'You are using an unofficial build of yt-dlp; Build the executable again', 'other': 'You are using an unofficial build of yt-dlp; Build the executable again',
@ -152,10 +152,15 @@ def _get_system_deprecation():
variant = detect_variant() variant = detect_variant()
# Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2 # Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2
if variant in ('win_exe', 'win_x86_exe'): if variant in ('win_exe', 'win_x86_exe', 'py2exe'):
platform_name = platform.platform() platform_name = platform.platform()
if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')): if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')):
return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG) return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG)
elif variant == 'py2exe':
return EXE_MSG_TMPL.format(
'py2exe builds (yt-dlp_min.exe)', 'issues/10087',
'In a future update you will be migrated to the PyInstaller-bundled executable. '
'This will be done automatically; no action is required on your part')
return None return None
# Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9 # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9
@ -520,7 +525,7 @@ class Updater:
return os.rename(old_filename, self.filename) return os.rename(old_filename, self.filename)
variant = detect_variant() variant = detect_variant()
if variant.startswith('win'): if variant.startswith('win') or variant == 'py2exe':
atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"', atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"',
shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
elif old_filename: elif old_filename:

View File

@ -4897,10 +4897,6 @@ class Config:
filename = None filename = None
__initialized = False __initialized = False
# Internal only, do not use! Hack to enable --plugin-dirs
# TODO(coletdjnz): remove when plugin globals system is implemented
_plugin_dirs = None
def __init__(self, parser, label=None): def __init__(self, parser, label=None):
self.parser, self.label = parser, label self.parser, self.label = parser, label
self._loaded_paths, self.configs = set(), [] self._loaded_paths, self.configs = set(), []