Compare commits

...

7 Commits

Author SHA1 Message Date
bashonly
4ffb926b2c
Merge 8cbf4659bd into 52c0ffe40a 2024-11-17 09:53:51 +05:30
bashonly
52c0ffe40a
[ie/youtube] Remove broken OAuth support (#11558)
Closes #11462
Authored by: bashonly
2024-11-16 23:40:21 +00:00
sepro
637d62a3a9
[ie/youtube] Player client maintenance (#11528)
Authored by: bashonly, seproDev

Co-authored-by: bashonly <bashonly@protonmail.com>
2024-11-17 00:31:04 +01:00
bashonly
8cbf4659bd
Merge branch 'yt-dlp:master' into fix/foxsports 2024-07-01 22:38:34 -05:00
bashonly
0bfcbc79b6
Merge branch 'yt-dlp:master' into fix/foxsports 2024-06-12 01:07:42 -05:00
bashonly
d07eb72f6f
fun
Authored by: bashonly
2024-04-05 22:18:44 -05:00
bashonly
734d7bd4a3
[ie/fox] Support more FOX Sports URLs
Authored by: bashonly
2024-04-05 22:05:58 -05:00
4 changed files with 101 additions and 278 deletions

View File

@ -1768,7 +1768,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator,mediaconnect` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)

View File

@ -7,17 +7,19 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
join_nonempty,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urljoin,
) )
class FOXIE(InfoExtractor): class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)' _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?!play-)(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
# clip # clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
@ -30,8 +32,9 @@ class FOXIE(InfoExtractor):
'duration': 102, 'duration': 102,
'timestamp': 1504291893, 'timestamp': 1504291893,
'upload_date': '20170901', 'upload_date': '20170901',
'creator': 'FOX', 'creators': ['FOX'],
'series': 'Gotham', # actual series name 'Gotham' is no longer returned by the API
'series': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
'age_limit': 14, 'age_limit': 14,
'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight', 'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
@ -39,6 +42,24 @@ class FOXIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# XML endpoint
'url': 'https://www.foxsports.com/watch/fmc-m2du80v5ewz11pbw',
'md5': '5451a633a5ca87b582a4d025df6852e6',
'info_dict': {
'id': 'fmc-m2du80v5ewz11pbw',
'ext': 'mp4',
'title': 'WWE FRIDAY NIGHT SMACKDOWN',
'description': 'From Fiserv Forum in Milwaukee, WI',
'duration': 5367,
'timestamp': 1698176671,
'upload_date': '20231024',
'creators': ['fox-digital'],
'series': 'WWE FRIDAY NIGHT SMACKDOWN',
'age_limit': 0,
'episode': 'WWE FRIDAY NIGHT SMACKDOWN',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, { }, {
# episode, geo-restricted # episode, geo-restricted
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/', 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
@ -54,9 +75,13 @@ class FOXIE(InfoExtractor):
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_HOME_PAGE_URL = 'https://www.fox.com/' _HOME_PAGE_URL = 'https://www.fox.com/'
_API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9' _API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9' # sports: 'cf289e299efdfa39fb6316f259d1de93'
_access_token = None _access_token = None
_device_id = str(uuid.uuid4()) _device_id = str(uuid.uuid4())
_XML_NS = {
'vmap': 'http://www.iab.net/videosuite/vmap',
'yospacenet': 'http://www.yospace.com/extension',
}
def _call_api(self, path, video_id, data=None): def _call_api(self, path, video_id, data=None):
headers = { headers = {
@ -66,7 +91,7 @@ class FOXIE(InfoExtractor):
headers['Authorization'] = 'Bearer ' + self._access_token headers['Authorization'] = 'Bearer ' + self._access_token
try: try:
return self._download_json( return self._download_json(
'https://api3.fox.com/v2.0/' + path, urljoin('https://api3.fox.com/v2.0/', path),
video_id, data=data, headers=headers) video_id, data=data, headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
@ -100,8 +125,8 @@ class FOXIE(InfoExtractor):
f'previewpassmvpd?device_id={self._device_id}&mvpd_id=TempPass_fbcfox_60min', f'previewpassmvpd?device_id={self._device_id}&mvpd_id=TempPass_fbcfox_60min',
video_id)['accessToken'] video_id)['accessToken']
video = self._call_api('watch', video_id, data=json.dumps({ video = self._call_api('https://prod.api.video.fox/v2.0/watch', video_id, data=json.dumps({
'capabilities': ['drm/widevine', 'fsdk/yo'], 'capabilities': ['fsdk/yo/v3'],
'deviceWidth': 1280, 'deviceWidth': 1280,
'deviceHeight': 720, 'deviceHeight': 720,
'maxRes': '720p', 'maxRes': '720p',
@ -116,13 +141,16 @@ class FOXIE(InfoExtractor):
'privacy': {'us': '1---'}, 'privacy': {'us': '1---'},
'siteSection': '', 'siteSection': '',
'streamType': 'vod', 'streamType': 'vod',
'streamId': video_id}).encode()) 'streamId': video_id,
}).encode())
title = video['name']
release_url = video['url']
try: try:
m3u8_url = self._download_json(release_url, video_id)['playURL'] if playback_url := traverse_obj(video, ('playbackUrl', {url_or_none})):
xml_data = self._download_xml(playback_url, video_id)
stream = xml_data.find('vmap:Extensions/vmap:Extension/yospacenet:Stream', self._XML_NS)
m3u8_url = join_nonempty('https://', stream.get('urlDomain'), stream.get('urlSuffix'), delim='')
else:
m3u8_url = self._download_json(video['url'], video_id)['playURL']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read().decode(), video_id) error = self._parse_json(e.cause.response.read().decode(), video_id)
@ -130,9 +158,11 @@ class FOXIE(InfoExtractor):
self.raise_geo_restricted(countries=['US']) self.raise_geo_restricted(countries=['US'])
raise ExtractorError(error['description'], expected=True) raise ExtractorError(error['description'], expected=True)
raise raise
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', if not m3u8_url or m3u8_url == 'https://':
entry_protocol='m3u8_native', m3u8_id='hls') raise ExtractorError('Unable to extract m3u8 url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
data = try_get( data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {} video, lambda x: x['trackingData']['properties'], dict) or {}
@ -157,7 +187,7 @@ class FOXIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': video.get('name'),
'formats': formats, 'formats': formats,
'description': video.get('description'), 'description': video.get('description'),
'duration': duration, 'duration': duration,

View File

@ -5,7 +5,7 @@ from ..utils import float_or_none, make_archive_id, smuggle_url
class FoxSportsIE(InfoExtractor): class FoxSportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>play-[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.foxsports.com/watch/play-612168c6700004b', 'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
'info_dict': { 'info_dict': {

View File

@ -22,7 +22,7 @@ import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper from .openload import PhantomJSwrapper
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
@ -50,12 +50,12 @@ from ..utils import (
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
remove_end,
remove_start, remove_start,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
str_to_int, str_to_int,
strftime_or_none, strftime_or_none,
time_seconds,
traverse_obj, traverse_obj,
try_call, try_call,
try_get, try_get,
@ -124,14 +124,15 @@ INNERTUBE_CLIENTS = {
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 62, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
'REQUIRE_AUTH': True,
}, },
'android': { 'android': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID', 'clientName': 'ANDROID',
'clientVersion': '19.29.37', 'clientVersion': '19.44.38',
'androidSdkVersion': 30, 'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip', 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip',
'osName': 'Android', 'osName': 'Android',
'osVersion': '11', 'osVersion': '11',
}, },
@ -140,13 +141,14 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True, 'REQUIRE_PO_TOKEN': True,
}, },
# This client now requires sign-in for every video
'android_music': { 'android_music': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_MUSIC', 'clientName': 'ANDROID_MUSIC',
'clientVersion': '7.11.50', 'clientVersion': '7.27.52',
'androidSdkVersion': 30, 'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip', 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip',
'osName': 'Android', 'osName': 'Android',
'osVersion': '11', 'osVersion': '11',
}, },
@ -154,15 +156,16 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True, 'REQUIRE_PO_TOKEN': True,
'REQUIRE_AUTH': True,
}, },
# This client now requires sign-in for every video # This client now requires sign-in for every video
'android_creator': { 'android_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_CREATOR', 'clientName': 'ANDROID_CREATOR',
'clientVersion': '24.30.100', 'clientVersion': '24.45.100',
'androidSdkVersion': 30, 'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip', 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip',
'osName': 'Android', 'osName': 'Android',
'osVersion': '11', 'osVersion': '11',
}, },
@ -170,17 +173,18 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True, 'REQUIRE_PO_TOKEN': True,
'REQUIRE_AUTH': True,
}, },
# YouTube Kids videos aren't returned on this client for some reason # YouTube Kids videos aren't returned on this client for some reason
'android_vr': { 'android_vr': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_VR', 'clientName': 'ANDROID_VR',
'clientVersion': '1.57.29', 'clientVersion': '1.60.19',
'deviceMake': 'Oculus', 'deviceMake': 'Oculus',
'deviceModel': 'Quest 3', 'deviceModel': 'Quest 3',
'androidSdkVersion': 32, 'androidSdkVersion': 32,
'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
'osName': 'Android', 'osName': 'Android',
'osVersion': '12L', 'osVersion': '12L',
}, },
@ -188,68 +192,56 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
}, },
'android_testsuite': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_TESTSUITE',
'clientVersion': '1.9',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
'REQUIRE_JS_PLAYER': False,
'PLAYER_PARAMS': '2AMB',
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats. # iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'ios': { 'ios': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.29.1', 'clientVersion': '19.45.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '17.5.1.21F90', 'osVersion': '18.1.0.22B83',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
}, },
# This client now requires sign-in for every video
'ios_music': { 'ios_music': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS_MUSIC', 'clientName': 'IOS_MUSIC',
'clientVersion': '7.08.2', 'clientVersion': '7.27.0',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '17.5.1.21F90', 'osVersion': '18.1.0.22B83',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_AUTH': True,
}, },
# This client now requires sign-in for every video # This client now requires sign-in for every video
'ios_creator': { 'ios_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS_CREATOR', 'clientName': 'IOS_CREATOR',
'clientVersion': '24.30.100', 'clientVersion': '24.45.100',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '17.5.1.21F90', 'osVersion': '18.1.0.22B83',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 15, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_AUTH': True,
}, },
# mweb has 'ultralow' formats # mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557 # See: https://github.com/yt-dlp/yt-dlp/pull/557
@ -282,8 +274,10 @@ INNERTUBE_CLIENTS = {
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 85, 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
'REQUIRE_AUTH': True,
}, },
# This client has pre-merged video+audio 720p/1080p streams # This client now requires sign-in for every video
# It may be able to receive pre-merged video+audio 720p/1080p streams
'mediaconnect': { 'mediaconnect': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -293,6 +287,7 @@ INNERTUBE_CLIENTS = {
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 95, 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_AUTH': True,
}, },
} }
@ -321,6 +316,7 @@ def build_innertube_clients():
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg.setdefault('REQUIRE_PO_TOKEN', False) ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
ytcfg.setdefault('REQUIRE_AUTH', False)
ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg.setdefault('PLAYER_PARAMS', None)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
@ -577,208 +573,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._check_login_required() self._check_login_required()
def _perform_login(self, username, password): def _perform_login(self, username, password):
auth_type, _, user = (username or '').partition('+') if username.startswith('oauth'):
if auth_type != 'oauth':
raise ExtractorError(self._youtube_login_hint, expected=True)
self._initialize_oauth(user, password)
'''
OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
- https://developers.google.com/identity/protocols/oauth2/limited-input-device
- https://accounts.google.com/.well-known/openid-configuration
- https://www.rfc-editor.org/rfc/rfc8628
- https://www.rfc-editor.org/rfc/rfc6749
Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
which applies some modifications to the response (such as returning errors as 200 OK).
Since the client works with the standard API, we will use that as it is well-documented.
'''
_OAUTH_PROFILE = None
_OAUTH_ACCESS_TOKEN_CACHE = {}
_OAUTH_DISPLAY_ID = 'oauth'
# YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
_OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
_OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT'
_OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
# From https://accounts.google.com/.well-known/openid-configuration
# Technically, these should be fetched dynamically and not hard-coded.
# However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
_OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code'
_OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token'
@property
def _oauth_cache_key(self):
return f'oauth_refresh_token_{self._OAUTH_PROFILE}'
def _read_oauth_error_response(self, response):
return traverse_obj(
self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False),
({json.loads}, 'error', {str}))
def _set_oauth_info(self, token_response):
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({
'access_token': token_response['access_token'],
'token_type': token_response['token_type'],
'expiry': time_seconds(
seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10),
})
refresh_token = traverse_obj(token_response, ('refresh_token', {str}))
if refresh_token:
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
def _initialize_oauth(self, user, refresh_token):
self._OAUTH_PROFILE = user or 'default'
if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE:
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
return
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
if refresh_token:
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
if self.get_param('cachedir') is not False:
msg += ' and caching token to disk; you should supply an empty password next time'
self.to_screen(msg)
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
else:
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
if refresh_token:
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
try:
token_response = self._refresh_token(refresh_token)
except ExtractorError as e:
error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '')
self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
token_response = self._oauth_authorize
else:
token_response = self._oauth_authorize
self._set_oauth_info(token_response)
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
def _refresh_token(self, refresh_token):
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Refreshing access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'refresh_token': refresh_token,
'grant_type': 'refresh_token',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if error == 'invalid_grant':
# RFC6749 § 5.2
raise ExtractorError( raise ExtractorError(
'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', f'Login with OAuth is no longer supported. {self._youtube_login_hint}', expected=True)
expected=True, video_id=self._OAUTH_DISPLAY_ID)
raise ExtractorError(
f'Failed to refresh access token: Authorization server returned error {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
@property self.report_warning(
def _oauth_authorize(self): f'Login with password is not supported for YouTube. {self._youtube_login_hint}')
code_response = self._download_json(
self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Initializing authorization flow',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'scope': self._OAUTH_SCOPE,
}).encode(),
headers={'Content-Type': 'application/json'})
verification_url = traverse_obj(code_response, ('verification_url', {str}))
user_code = traverse_obj(code_response, ('user_code', {str}))
if not verification_url or not user_code:
raise ExtractorError(
'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID)
# note: The whitespace is intentional
self.to_screen(
f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
f'go to {verification_url} and enter code {user_code}')
# RFC8628 § 3.5: default poll interval is 5 seconds if not provided
poll_interval = traverse_obj(code_response, ('interval', {int}), default=5)
for retry in self.RetryManager():
while True:
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note=False,
errnote='Failed to request access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'device_code': code_response['device_code'],
'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, TransportError):
retry.error = e
break
elif isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if not error:
retry.error = e
break
if error == 'authorization_pending':
time.sleep(poll_interval)
continue
elif error == 'expired_token':
raise ExtractorError(
'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'access_denied':
raise ExtractorError(
'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'slow_down':
# RFC8628 § 3.5: add 5 seconds to the poll interval
poll_interval += 5
time.sleep(poll_interval)
continue
else:
raise ExtractorError(
f'Authorization server returned an error when fetching access token: {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
def _update_oauth(self):
token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
if token is None or token['expiry'] > time.time():
return
self._set_oauth_info(self._refresh_token(token['refresh_token']))
@property @property
def _youtube_login_hint(self): def _youtube_login_hint(self):
return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' return (f'{self._login_hint(method="cookies")}. Also see '
f'or else u{self._login_hint(method="cookies")[1:]}. ' 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' 'for tips on effectively exporting YouTube cookies')
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies')
def _check_login_required(self): def _check_login_required(self):
if self._LOGIN_REQUIRED and not self.is_authenticated: if self._LOGIN_REQUIRED and not self.is_authenticated:
@ -928,7 +734,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
@functools.cached_property @functools.cached_property
def is_authenticated(self): def is_authenticated(self):
return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) return bool(self._generate_sapisidhash_header())
def extract_ytcfg(self, video_id, webpage): def extract_ytcfg(self, video_id, webpage):
if not webpage: if not webpage:
@ -938,16 +744,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) or {} default='{}'), video_id, fatal=False) or {}
def _generate_oauth_headers(self):
self._update_oauth()
oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
if not oauth_token:
return {}
return {
'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}',
}
def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
headers = {} headers = {}
account_syncid = account_syncid or self._extract_account_syncid(ytcfg) account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
@ -977,14 +773,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'Origin': origin, 'Origin': origin,
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
**self._generate_oauth_headers(),
**self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
} }
return filter_dict(headers) return filter_dict(headers)
def _generate_webpage_headers(self):
return self._generate_oauth_headers()
def _download_ytcfg(self, client, video_id): def _download_ytcfg(self, client, video_id):
url = { url = {
'web': 'https://www.youtube.com', 'web': 'https://www.youtube.com',
@ -994,8 +786,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not url: if not url:
return {} return {}
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
headers=self._generate_webpage_headers())
return self.extract_ytcfg(video_id, webpage) or {} return self.extract_ytcfg(video_id, webpage) or {}
@staticmethod @staticmethod
@ -3260,8 +3051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
code = self._download_webpage( code = self._download_webpage(
player_url, video_id, fatal=fatal, player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id, note='Downloading player ' + player_id,
errnote=f'Download of {player_url} failed', errnote=f'Download of {player_url} failed')
headers=self._generate_webpage_headers())
if code: if code:
self._code_cache[player_id] = code self._code_cache[player_id] = code
return self._code_cache.get(player_id) return self._code_cache.get(player_id)
@ -3544,8 +3334,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._download_webpage( self._download_webpage(
url, video_id, f'Marking {label}watched', url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False, 'Unable to mark watched', fatal=False)
headers=self._generate_webpage_headers())
@classmethod @classmethod
def _extract_from_webpage(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):
@ -4059,8 +3848,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if smuggled_data.get('is_music_url') or self.is_music_url(url): if smuggled_data.get('is_music_url') or self.is_music_url(url):
for requested_client in requested_clients: for requested_client in requested_clients:
_, base_client, variant = _split_innertube_client(requested_client) _, base_client, variant = _split_innertube_client(requested_client)
music_client = f'{base_client}_music' music_client = f'{base_client}_music' if base_client != 'mweb' else 'web_music'
if variant != 'music' and music_client in INNERTUBE_CLIENTS: if variant != 'music' and music_client in INNERTUBE_CLIENTS:
if not INNERTUBE_CLIENTS[music_client]['REQUIRE_AUTH'] or self.is_authenticated:
requested_clients.append(music_client) requested_clients.append(music_client)
return orderedSet(requested_clients) return orderedSet(requested_clients)
@ -4174,10 +3964,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.to_screen( self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring ' f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True) 'account age-verification; some formats may be missing', only_once=True)
# web_creator and mediaconnect can work around the age-verification requirement # web_creator can work around the age-verification requirement
# _testsuite & _vr variants can also work around age-verification # android_vr and mediaconnect may also be able to work around age-verification
# tv_embedded may(?) still work around age-verification if the video is embeddable # tv_embedded may(?) still work around age-verification if the video is embeddable
append_client('web_creator', 'mediaconnect') append_client('web_creator')
prs.extend(deprioritized_prs) prs.extend(deprioritized_prs)
@ -4526,7 +4316,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if pp: if pp:
query['pp'] = pp query['pp'] = pp
webpage = self._download_webpage( webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
@ -4669,6 +4459,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.raise_geo_restricted(subreason, countries, metadata_available=True) self.raise_geo_restricted(subreason, countries, metadata_available=True)
reason += f'. {subreason}' reason += f'. {subreason}'
if reason: if reason:
if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -5814,7 +5607,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
webpage, data = None, None webpage, data = None, None
for retry in self.RetryManager(fatal=fatal): for retry in self.RetryManager(fatal=fatal):
try: try:
webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, network_exceptions): if isinstance(e.cause, network_exceptions):