Compare commits

...

5 Commits

Author SHA1 Message Date
Rajeshwaran 3edf8d066f
Merge bbda789bec into 5904853ae5 2024-05-06 14:18:04 +01:00
bashonly 5904853ae5
[ie/crunchyroll] Support browser impersonation (#9857)
Closes #7442
Authored by: bashonly
2024-05-05 23:15:32 +00:00
Chris Caruso c8bf48f3a8
[ie/cbc.ca:player] Improve `_VALID_URL` (#9866)
Closes #9825
Authored by: carusocr
2024-05-05 23:02:24 +00:00
The-MAGI 351368cb9a
[ie/youporn] Fix extractor (#8827)
Closes #7967
Authored by: The-MAGI
2024-05-05 22:57:38 +00:00
Rajeshwaran bbda789bec jiocinema extractor added 2024-03-03 18:18:13 +05:30
5 changed files with 405 additions and 15 deletions

View File

@ -873,6 +873,10 @@
SangiinIE,
)
from .jeuxvideo import JeuxVideoIE
from .jiocinema import (
JioCinemaIE,
JioVootSeriesIE,
)
from .jiosaavn import (
JioSaavnSongIE,
JioSaavnAlbumIE,

View File

@ -151,7 +151,7 @@ def _real_extract(self, url):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
'location': 'Canada',
'media_type': 'Full Program',
},
}, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': {
'id': '2334524995812',
'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791,
'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
'uploader': 'CBCC-NEW',
'chapters': 'count:0',
'upload_date': '20240504',
'categories': 'count:3',
'series': 'The National',
'tags': 'count:15',
'creators': ['encoder'],
'location': 'Canada',
'media_type': 'Excerpt',
},
}, {
'url': 'cbcplayer:1.7159484',
'only_matching': True,

View File

@ -53,15 +53,19 @@ def _set_auth_info(self, response):
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
try: # TODO: Add impersonation support here
try:
return self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
headers=headers, data=urlencode_postdata(data))
headers=headers, data=urlencode_postdata(data), impersonate=True)
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
raise
if target := error.cause.response.extensions.get('impersonate'):
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'Request blocked by Cloudflare. '
'Install the required impersonation dependency if possible, '
'or else navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)

View File

@ -0,0 +1,358 @@
import base64
import itertools
import json
import random
import re
import string
import time
from .common import InfoExtractor
from ..utils import (
str_or_none,
ExtractorError,
float_or_none,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
traverse_obj,
try_call,
url_or_none,
)
class JioBaseIE(InfoExtractor):
_NETRC_MACHINE = 'JioCinema'
_GEO_BYPASS = False
_LOGIN_HINT = 'Log in with "-u <phone>" to authenticate with OTP, or use "-u token -p <accessToken>" to login with token.'
_ACCESS_TOKEN = None
_REFRESH_TOKEN = None
_GUEST_TOKEN = None
_USER_ID = None
_DEVICE_ID = None
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
_APP_NAME = {'appName': 'RJIL_JioCinema'}
_APP_VERSION = {'appVersion': '5.0.0'}
_API_SIGNATURES = 'o668nxgzwff'
_TAG_FIELDS = {
'language': 'language',
'acodec': 'audio_codec',
'vcodec': 'video_codec',
}
def _cache_token(self, token_type):
if token_type in ('access', 'all'):
self.cache.store(
JioBaseIE._NETRC_MACHINE, f'{JioBaseIE._DEVICE_ID}-access', JioBaseIE._ACCESS_TOKEN)
if token_type in ('refresh', 'all'):
self.cache.store(
JioBaseIE._NETRC_MACHINE, f'{JioBaseIE._DEVICE_ID}-refresh', JioBaseIE._REFRESH_TOKEN)
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
return self._download_json(
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
'Content-Type': 'application/json',
'Accept': 'application/json',
**self._API_HEADERS,
**headers,
}, expected_status=(400, 403, 474))
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
return self._call_api(
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
None, note=note, headers=headers, data=data)
def _refresh_token(self):
if not JioBaseIE._REFRESH_TOKEN or not JioBaseIE._DEVICE_ID:
raise ExtractorError('User token has expired', expected=True)
response = self._call_auth_api(
'token', 'refreshtoken', 'Refreshing token',
headers={'accesstoken': self._ACCESS_TOKEN}, data={
**self._APP_NAME,
'deviceId': self._DEVICE_ID,
'refreshToken': self._REFRESH_TOKEN,
**self._APP_VERSION,
})
refresh_token = response.get('refreshTokenId')
if refresh_token and refresh_token != JioBaseIE._REFRESH_TOKEN:
JioBaseIE._REFRESH_TOKEN = refresh_token
self._cache_token('refresh')
JioBaseIE._ACCESS_TOKEN = response['authToken']
self._cache_token('access')
def _fetch_guest_token(self):
JioBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
guest_token = self._call_auth_api(
'token', 'guest', 'Downloading guest token', data={
**self._APP_NAME,
'deviceType': 'phone',
'os': 'ios',
'deviceId': self._DEVICE_ID,
'freshLaunch': False,
'adId': self._DEVICE_ID,
**self._APP_VERSION,
})
self._GUEST_TOKEN = guest_token['authToken']
self._USER_ID = guest_token['userId']
def _call_login_api(self, endpoint, guest_token, data, note):
return self._call_auth_api(
'user', f'loginotp/{endpoint}', note, headers={
**self.geo_verification_headers(),
'accesstoken': self._GUEST_TOKEN,
**self._APP_NAME,
**traverse_obj(guest_token, 'data', {
'deviceType': ('deviceType', {str}),
'os': ('os', {str}),
})}, data=data)
def _is_token_expired(self, token):
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
def _perform_login(self, username, password):
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
return
if username.lower() == 'token':
if try_call(lambda: jwt_decode_hs256(password)):
JioBaseIE._ACCESS_TOKEN = password
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
if re.fullmatch(r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}', refresh_token):
JioBaseIE._REFRESH_TOKEN = refresh_token
elif refresh_token:
self.report_warning(
'Invalid refresh_token value. Use the "refreshToken" UUID from browser local storage')
elif username.lower() == 'device' and password.isdigit():
JioBaseIE._REFRESH_TOKEN = self.cache.load(JioBaseIE._NETRC_MACHINE, f'{password}-refresh')
JioBaseIE._ACCESS_TOKEN = self.cache.load(JioBaseIE._NETRC_MACHINE, f'{password}-access')
if not JioBaseIE._REFRESH_TOKEN or not JioBaseIE._ACCESS_TOKEN:
raise ExtractorError(f'Failed to load cached tokens for ID "{password}"', expected=True)
elif re.fullmatch(r'\+?\d+', username):
self._fetch_guest_token()
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
initial_data = {
'number': f'{base64.b64encode(username.encode())}',
**self._APP_VERSION,
}
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
if not traverse_obj(response, ('OTPInfo', {dict})):
raise ExtractorError('There was a problem with the phone number login attempt')
is_iphone = guest_token.get('os') == 'ios'
response = self._call_login_api('verify', guest_token, {
'deviceInfo': {
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
'info': {
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
'androidId': self._DEVICE_ID,
'type': 'iOS' if is_iphone else 'Android'
}
},
**initial_data,
'otp': self._get_tfa_info('the one-time password sent to your phone')
}, 'Submitting OTP')
if traverse_obj(response, 'code') == 1043:
raise ExtractorError('Wrong OTP', expected=True)
JioBaseIE._REFRESH_TOKEN = response['refreshToken']
JioBaseIE._ACCESS_TOKEN = response['authToken']
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
user_token = jwt_decode_hs256(JioBaseIE._ACCESS_TOKEN)['data']
JioBaseIE._USER_ID = user_token['userId']
JioBaseIE._DEVICE_ID = user_token['deviceId']
if JioBaseIE._REFRESH_TOKEN and username != 'device':
self._cache_token('all')
elif not JioBaseIE._REFRESH_TOKEN:
JioBaseIE._REFRESH_TOKEN = self.cache.load(
JioBaseIE._NETRC_MACHINE, f'{JioBaseIE._DEVICE_ID}-refresh')
if JioBaseIE._REFRESH_TOKEN:
self._cache_token('access')
self.to_screen(f'Logging in as device {JioBaseIE._DEVICE_ID}')
if self._is_token_expired(JioBaseIE._ACCESS_TOKEN):
self._refresh_token()
def _extract_formats_and_subtitles(self, m3u8_url, video_id):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
self._remove_duplicate_formats(formats)
return {
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
'formats': traverse_obj(formats, (
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
'subtitles': subtitles,
'http_headers': self._API_HEADERS,
}
class JioCinemaIE(JioBaseIE):
_VALID_URL = r'''(?x)
(?:
https?://(?:www\.)?jiocinema\.com/?
(?:
movies?/[^/]+/|
tv-shows/(?:[^/]+/){3}
)
)
(?P<id>\d{3,})
'''
_TESTS = [{
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
'info_dict': {
'id': '3759931',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
'info_dict': {
'id': '3754021',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
formats, subs = [], {}
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
self._fetch_guest_token()
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
self._refresh_token()
meta = 'https://content-jiovoot.voot.com/psapi/voot/v1/voot-web/content/query/asset-details?&ids=include:{video_id}&responseType=common&devicePlatformType=desktop'
meta_formate = meta.format(video_id=video_id)
video_data = self._download_json(meta_formate, None, 'Fetching Metadata')
playback = self._call_api(
f'https://apis-jiovoot.voot.com/playbackjv/v5/{video_id}', video_id,
'Downloading playback JSON', headers={
**self.geo_verification_headers(),
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
**self._APP_NAME,
'deviceid': self._DEVICE_ID,
'uniqueid': self._USER_ID,
'x-apisignatures': self._API_SIGNATURES,
'x-platform': 'androidweb',
'x-platform-token': 'web',
}, data={
'4k': True,
'ageGroup': '18+',
'appVersion': '3.4.0',
'bitrateProfile': 'xhdpi',
'capability': {
'drmCapability': {
'aesSupport': 'yes',
'fairPlayDrmSupport': 'none',
'playreadyDrmSupport': 'none',
'widevineDRMSupport': 'none'
},
'frameRateCapability': [{
'frameRateSupport': '30fps',
'videoQuality': '1440p'
}]
},
'continueWatchingRequired': False,
'dolby': True,
'downloadRequest': False,
'hevc': True,
'kidsSafe': False,
'manufacturer': 'Windows',
'model': 'Windows',
'multiAudioRequired': True,
'osVersion': '10',
'parentalPinValid': True
})['data']['playbackUrls']
current_formats, current_subs = [], {}
for url_data in playback:
if not self.get_param('allow_unplayable_formats') and url_data.get('encryption'):
self.report_drm(video_id)
format_url = url_or_none(url_data.get('url'))
if not format_url:
continue
if url_data['streamtype'] == 'dash':
current_formats, current_subs = self._extract_mpd_formats_and_subtitles(format_url, video_id, headers=self._API_HEADERS)
elif url_data['streamtype'] == 'hls':
current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(format_url, video_id, ext='mp4', m3u8_id='hls', headers=self._API_HEADERS)
formats.extend(current_formats)
subs = self._merge_subtitles(subs, current_subs)
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, ('result', 0, {
'title': ('name', {str}),
'description': ('fullSynopsis', {str}),
'series': ('showName', {str}),
'season': ('seasonName', {str}),
'season_number': ('season', {int_or_none}),
'season_id': ('seasonId', {int_or_none}),
'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}),
'release_date': ('telecastDate', {int_or_none}),
'release_year': ('releaseYear', {int_or_none}),
'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('totalDuration', {float_or_none}),
'parentalRating': ('ageNumeric', {int_or_none}),
'languages': ('languages'),
'genre': ('genres', {str_or_none}),
'thumbnail': ('seo', 'ogImage', {str})
})),
}
class JioVootSeriesBaseIE(JioBaseIE):
def _entries(self, series_id):
seasons = self._download_json(
f'{self._SERIES_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
'Downloading series metadata JSON', query={
'sort': 'season:asc',
'id': series_id,
'responseType': 'common',
})
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
season_id = season['id']
label = season.get('season') or season_num
for page_num in itertools.count(1):
episodes = traverse_obj(self._download_json(
f'{self._SERIES_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
season_id, f'Downloading season {label} page {page_num} JSON', query={
'sort': 'episode:asc',
'id': season_id,
'responseType': 'common',
'page': page_num,
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
if not episodes:
break
for episode in episodes:
yield self.url_result(
episode['slug'], self._RESULT_IE, **traverse_obj(episode, {
'video_id': 'id',
'video_title': ('fullTitle', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
}))
def _real_extract(self, url):
slug, series_id = self._match_valid_url(url).group('slug', 'id')
return self.playlist_result(self._entries(series_id), series_id, slug)
class JioVootSeriesIE(JioVootSeriesBaseIE):
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
'info_dict': {
'id': '3499917',
'title': 'naagin',
},
'playlist_mincount': 120,
}]
_SERIES_API_BASE = 'https://content-jiovoot.voot.com/psapi'
_RESULT_IE = JioCinemaIE

View File

@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': 'md5:00ea70f642f431c379763c17c2f396bc',
'description': str, # TODO: detect/remove SEO spam description in ytdl backport
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
'timestamp': 1606089600,
'thumbnail': r're:https://.+\.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
definitions = self._download_json(
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
self._set_cookie('.youporn.com', 'age_verified', '1')
webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
def get_format_data(data, f):
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
def get_format_data(data, stream_type):
info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
if not info_url:
return []
return traverse_obj(
self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
f['height'] = height
formats.append(f)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title(