Compare commits

..

5 Commits

Author SHA1 Message Date
Elyse
9feef0b976
Merge 9dd8574b68 into 0b7ec08816 2024-10-21 17:22:00 -06:00
DarkZeros
0b7ec08816
[ie/telecinco] Fix extractors (#11142)
Closes #10986, Closes #11106
Authored by: DarkZeros, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-10-21 21:18:12 +00:00
David Skrundz
40054cb4a7
[ie/gem.cbc.ca] Fix formats extraction (#11196)
Also extracts `timestamp` and `release_timestamp` as seconds instead of milliseconds

Authored by: DavidSkrundz
2024-10-21 18:56:43 +00:00
bashonly
fed53d70bd [ie/youtube] Remove broken android_producer client (#11297)
Authored by: bashonly
2024-10-21 18:39:58 +00:00
bashonly
ec2f4bf082 [ie/youtube] Remove broken age-restriction workaround (#11297)
Closes #11296
Authored by: bashonly
2024-10-21 18:39:58 +00:00
4 changed files with 143 additions and 150 deletions

View File

@ -4,7 +4,6 @@ import json
import re import re
import time import time
import urllib.parse import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
@ -12,7 +11,6 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# This is a normal, public, TV show video # This is a normal, public, TV show video
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s06e01', 'id': 'schitts-creek/s06e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Smoke Signals', 'title': 'Smoke Signals',
'description': 'md5:929868d20021c924020641769eb3e7f1', 'description': 'md5:929868d20021c924020641769eb3e7f1',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)', 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
'duration': 1314, 'duration': 1324,
'categories': ['comedy'], 'categories': ['comedy'],
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season': 'Season 6', 'season': 'Season 6',
@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
'episode': 'Smoke Signals', 'episode': 'Smoke Signals',
'episode_number': 1, 'episode_number': 1,
'episode_id': 'schitts-creek/s06e01', 'episode_id': 'schitts-creek/s06e01',
'upload_date': '20210618',
'timestamp': 1623988800,
'release_date': '20200107',
'release_timestamp': 1578427200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
# This video requires an account in the browser, but works fine in yt-dlp # This video requires an account in the browser, but works fine in yt-dlp
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s01e01', 'id': 'schitts-creek/s01e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Cup Runneth Over', 'title': 'The Cup Runneth Over',
'description': 'md5:9bca14ea49ab808097530eb05a29e797', 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)', 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season_number': 1, 'season_number': 1,
'season': 'Season 1', 'season': 'Season 1',
@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
'episode_id': 'schitts-creek/s01e01', 'episode_id': 'schitts-creek/s01e01',
'duration': 1309, 'duration': 1309,
'categories': ['comedy'], 'categories': ['comedy'],
'upload_date': '20210617',
'timestamp': 1623902400,
'release_date': '20151124',
'release_timestamp': 1448323200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
'only_matching': True, 'only_matching': True,
@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
return return
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
if not base_format:
return
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
return
for child in secret_xml:
if child.attrib.get('Type') != 'video':
continue
for video_quality in child:
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
if not bitrate or 'Index' not in video_quality.attrib:
continue
height = int_or_none(video_quality.attrib.get('MaxHeight'))
yield {
**base_format,
'format_id': join_nonempty('sec', height),
# Note: \g<1> is necessary instead of \1 since bitrate is a number
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
'tbr': bitrate / 1000.0,
'height': height,
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_info = self._download_json( video_info = self._download_json(
@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
else: else:
headers = {} headers = {}
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
m3u8_url = m3u8_info.get('url')
if m3u8_info.get('errorCode') == 1: if m3u8_info.get('errorCode') == 1:
self.raise_geo_restricted(countries=['CA']) self.raise_geo_restricted(countries=['CA'])
@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
elif m3u8_info.get('errorCode') != 0: elif m3u8_info.get('errorCode') != 0:
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') formats = self._extract_m3u8_formats(
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
formats.extend(self._find_secret_formats(formats, video_id))
for fmt in formats: for fmt in formats:
if fmt.get('vcodec') == 'none': if fmt.get('vcodec') == 'none':
@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_info['title'],
'description': video_info.get('description'),
'thumbnail': video_info.get('image'),
'series': video_info.get('series'),
'season_number': video_info.get('season'),
'season': f'Season {video_info.get("season")}',
'episode_number': video_info.get('episode'),
'episode': video_info.get('title'),
'episode_id': video_id, 'episode_id': video_id,
'duration': video_info.get('duration'),
'categories': [video_info.get('category')],
'formats': formats, 'formats': formats,
'release_timestamp': video_info.get('airDate'), **traverse_obj(video_info, {
'timestamp': video_info.get('availableDate'), 'title': ('title', {str}),
'episode': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'series': ('series', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'duration': ('duration', {int_or_none}),
'categories': ('category', {str}, all),
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
}),
} }

View File

@ -1,14 +1,13 @@
from .telecinco import TelecincoIE from .telecinco import TelecincoBaseIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
) )
class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE class MiTeleIE(TelecincoBaseIE):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': { 'info_dict': {
@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
'timestamp': 1471209401, 'timestamp': 1471209401,
'upload_date': '20160814', 'upload_date': '20160814',
}, },
'skip': 'HTTP Error 404 Not Found',
}, { }, {
# no explicit title # no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'HTTP Error 404 Not Found',
}, {
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
'info_dict': {
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
'ext': 'mp4',
'title': 'Horizonte Temporada 5 Programa 171',
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
'episode': 'Las Zonas de Bajas Emisiones, a debate',
'episode_number': 171,
'season': 'Season 5',
'season_number': 5,
'series': 'Horizonte',
'duration': 7012,
'upload_date': '20240927',
'timestamp': 1727416450,
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
'age_limit': 12,
},
'params': {'geo_bypass_country': 'ES'},
}, { }, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True, 'only_matching': True,

View File

@ -2,15 +2,69 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
clean_html, clean_html,
int_or_none, int_or_none,
join_nonempty,
str_or_none, str_or_none,
try_get, traverse_obj,
update_url,
url_or_none,
) )
class TelecincoIE(InfoExtractor): class TelecincoBaseIE(InfoExtractor):
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
self.report_drm(video_id)
stream = caronte['dls'][0]['stream']
headers = {
'Referer': url,
'Origin': re.match(r'https?://[^/]+', url).group(0),
}
geo_headers = {**headers, **self.geo_verification_headers()}
try:
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers={
'Content-Type': 'application/json',
**geo_headers,
})['tokens']['1']['cdn']
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
error_code = traverse_obj(
self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False),
({json.loads}, 'code', {int}))
if error_code == 4038:
self.raise_geo_restricted(countries=['ES'])
raise
formats = self._extract_m3u8_formats(
update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers)
return {
'id': video_id,
'title': traverse_obj(config, ('info', 'title', {str})),
'formats': formats,
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
'http_headers': headers,
}
class TelecincoIE(TelecincoBaseIE):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
@ -30,6 +84,7 @@ class TelecincoIE(InfoExtractor):
'duration': 662, 'duration': 662,
}, },
}], }],
'skip': 'HTTP Error 410 Gone',
}, { }, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
@ -40,23 +95,24 @@ class TelecincoIE(InfoExtractor):
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
'duration': 79, 'duration': 79,
}, },
'skip': 'Redirects to main page',
}, { }, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': 'eddb50291df704ce23c74821b995bcac', 'md5': '5ce057f43f30b634fbaf0f18c71a140a',
'info_dict': { 'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2', 'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4', 'ext': 'mp4',
'title': '#DOYLACARA. Con la trata no hay trato', 'title': '#DOYLACARA. Con la trata no hay trato',
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
'duration': 50, 'duration': 50,
'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg',
}, },
}, { }, {
# video in opening's content # video in opening's content
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
'info_dict': { 'info_dict': {
'id': '2907195140', 'id': '1691427',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'description': 'md5:73f340a7320143d37ab895375b2bf13a', 'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}',
}, },
'playlist': [{ 'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7', 'md5': 'adb28c37238b675dad0f042292f209a7',
@ -65,6 +121,7 @@ class TelecincoIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'duration': 1015, 'duration': 1015,
'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg',
}, },
}], }],
'params': { 'params': {
@ -81,66 +138,29 @@ class TelecincoIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
title = config['info']['title']
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
stream = caronte['dls'][0]['stream']
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json;charset=UTF-8',
'Origin': re.match(r'https?://[^/]+', url).group(0),
})
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers=headers)['tokens']['1']['cdn']
formats = self._extract_m3u8_formats(
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': int_or_none(content.get('dataDuration')),
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article = self._parse_json(self._search_regex( article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article'), display_id)['article'] webpage, 'article', display_id)['article']
title = article.get('title') description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
description = clean_html(article.get('leadParagraph')) or ''
if article.get('editorialType') != 'VID': if article.get('editorialType') != 'VID':
entries = [] entries = []
body = [article.get('opening')]
body.extend(try_get(article, lambda x: x['body'], list) or []) for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
for p in body: content = p['content']
if not isinstance(p, dict):
continue
content = p.get('content')
if not content:
continue
type_ = p.get('type') type_ = p.get('type')
if type_ == 'paragraph': if type_ == 'paragraph' and isinstance(content, str):
content_str = str_or_none(content) description = join_nonempty(description, content, delim='')
if content_str: elif type_ == 'video' and isinstance(content, dict):
description += content_str
continue
if type_ == 'video' and isinstance(content, dict):
entries.append(self._parse_content(content, url)) entries.append(self._parse_content(content, url))
return self.playlist_result( return self.playlist_result(
entries, str_or_none(article.get('id')), title, description) entries, str_or_none(article.get('id')),
content = article['opening']['content'] traverse_obj(article, ('title', {str})), clean_html(description))
info = self._parse_content(content, url)
info.update({ info = self._parse_content(article['opening']['content'], url)
'description': description, info['description'] = description
})
return info return info

View File

@ -114,6 +114,7 @@ INNERTUBE_CLIENTS = {
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 67, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
}, },
# This client now requires sign-in for every video
'web_creator': { 'web_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -153,6 +154,7 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True, 'REQUIRE_PO_TOKEN': True,
}, },
# This client now requires sign-in for every video
'android_creator': { 'android_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -200,21 +202,6 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
'PLAYER_PARAMS': '2AMB', 'PLAYER_PARAMS': '2AMB',
}, },
# This client only has legacy formats and storyboards
'android_producer': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_PRODUCER',
'clientVersion': '0.111.1',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
'REQUIRE_JS_PLAYER': False,
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats. # iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'ios': { 'ios': {
@ -247,6 +234,7 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
'REQUIRE_JS_PLAYER': False, 'REQUIRE_JS_PLAYER': False,
}, },
# This client now requires sign-in for every video
'ios_creator': { 'ios_creator': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -282,8 +270,9 @@ INNERTUBE_CLIENTS = {
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
}, },
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # This client now requires sign-in for every video
# See: https://github.com/zerodytrash/YouTube-Internal-Clients # It was previously an age-gate workaround for videos that were `playable_in_embed`
# It may still be useful if signed into an EU account that is not age-verified
'tv_embedded': { 'tv_embedded': {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
@ -1525,6 +1514,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'heatmap': 'count:100', 'heatmap': 'count:100',
'timestamp': 1401991663, 'timestamp': 1401991663,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Age-gate video with embed allowed in public site', 'note': 'Age-gate video with embed allowed in public site',
@ -1555,6 +1545,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int, 'comment_count': int,
'channel_is_verified': True, 'channel_is_verified': True,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Age-gate video embedable only with clientScreen=EMBED', 'note': 'Age-gate video embedable only with clientScreen=EMBED',
@ -1585,6 +1576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@ProjektMelody', 'uploader_id': '@ProjektMelody',
'timestamp': 1577508724, 'timestamp': 1577508724,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'note': 'Non-Agegated non-embeddable video', 'note': 'Non-Agegated non-embeddable video',
@ -2356,6 +2348,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True, 'channel_is_verified': True,
'timestamp': 1405513526, 'timestamp': 1405513526,
}, },
'skip': 'Age-restricted; requires authentication',
}, },
{ {
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
@ -2726,6 +2719,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'timestamp': 1577508724, 'timestamp': 1577508724,
}, },
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
'skip': 'Age-restricted; requires authentication',
}, },
{ {
'url': 'https://www.youtube.com/live/qVv6vCqciTM', 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
@ -3982,26 +3976,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
prs.append(pr) prs.append(pr)
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self._is_agegated(pr) and variant != 'tv_embedded':
append_client(f'tv_embedded.{base_client}')
# Unauthenticated users will only get tv_embedded client formats if age-gated
if self._is_agegated(pr) and not self.is_authenticated:
self.to_screen(
f'{video_id}: This video is age-restricted; some formats may be missing '
f'without authentication. {self._login_hint()}', only_once=True)
# EU countries require age-verification for accounts to access age-restricted videos # EU countries require age-verification for accounts to access age-restricted videos
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
# If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded if self.is_authenticated and self._is_agegated(pr):
embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
self.to_screen( self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring ' f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True) 'account age-verification; some formats may be missing', only_once=True)
# web_creator and mediaconnect can work around the age-verification requirement # web_creator and mediaconnect can work around the age-verification requirement
# _producer, _testsuite, & _vr variants can also work around age-verification # _testsuite & _vr variants can also work around age-verification
# tv_embedded may(?) still work around age-verification if the video is embeddable
append_client('web_creator', 'mediaconnect') append_client('web_creator', 'mediaconnect')
prs.extend(deprioritized_prs) prs.extend(deprioritized_prs)