mirror of https://github.com/yt-dlp/yt-dlp.git
Compare commits
9 Commits
cdf6a0e1c8
...
c816b405f1
Author | SHA1 | Date |
---|---|---|
Tahasanul Abraham | c816b405f1 | |
bashonly | 89f535e265 | |
bashonly | ff38a011d5 | |
bashonly | 8056a3026e | |
Simon Sawicki | 3ee1194288 | |
bashonly | e3b42d8b1b | |
bashonly | c9ce57d9bf | |
bashonly | 02483bea1c | |
Tahasanul Abraham | da07bbeb12 |
|
@ -254,7 +254,7 @@ jobs:
|
|||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
python3 -m pip install -U --user delocate
|
||||
mkdir curl_cffi_whls curl_cffi_universal2
|
||||
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
|
||||
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
|
||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||
python3 -m pip download \
|
||||
--only-binary=:all: \
|
||||
|
@ -362,7 +362,7 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe --include curl_cffi
|
||||
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
|
|
|
@ -53,7 +53,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
|
@ -202,7 +202,7 @@ #### Impersonation
|
|||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ dependencies = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
default = []
|
||||
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
|
|
|
@ -1906,6 +1906,15 @@ def test_response_with_expected_status_returns_content(self):
|
|||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
def test_search_nextjs_data(self):
|
||||
data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
|
||||
self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||
with self.assertRaises(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -2389,7 +2389,10 @@
|
|||
from .xstream import XstreamIE
|
||||
from .xvideos import (
|
||||
XVideosIE,
|
||||
XVideosQuickiesIE
|
||||
XVideosChannelIE,
|
||||
XVideosPlaylistIE,
|
||||
XVideosRelatedIE,
|
||||
XVideosSearchIE,
|
||||
)
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
|
|
|
@ -105,7 +105,7 @@ def _real_extract(self, url):
|
|||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default='{}'),
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
'title': ('event_name', {str}),
|
||||
'thumbnail': ('event_thumbnail_image', {url_or_none}),
|
||||
|
|
|
@ -1738,12 +1738,16 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
|||
traverse_json_ld(json_ld)
|
||||
return filter_dict(info)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
|
||||
if default == '{}':
|
||||
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
|
||||
default = {}
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
return self._search_json(
|
||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
|
|
|
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||
't-kdgp2h8c3jub8fn0fq',
|
||||
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||
)).encode()).decode()
|
||||
_IS_PREMIUM = None
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
|
@ -43,69 +47,74 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
def _set_auth_info(self, response):
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try: # TODO: Add impersonation support here
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data))
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
return
|
||||
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': f'{self._BASE_URL}/welcome/login'
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
|
||||
if self.is_logged_in:
|
||||
grant_type = 'etp_rt_cookie'
|
||||
else:
|
||||
grant_type = 'client_id'
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers=auth_headers, data=f'grant_type={grant_type}'.encode())
|
||||
login_response = self._request_token(
|
||||
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||
'username': username,
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'scope': 'offline_access',
|
||||
}, note='Logging in', errnote='Failed to log in')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||
self._set_auth_info(login_response)
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||
return
|
||||
|
||||
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
data = {
|
||||
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||
'grant_type': 'refresh_token',
|
||||
'scope': 'offline_access',
|
||||
}
|
||||
else:
|
||||
data = {'grant_type': 'client_id'}
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._request_token(auth_headers, data)
|
||||
except ExtractorError as error:
|
||||
username, password = self._get_login_info()
|
||||
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||
raise
|
||||
self.to_screen('Refresh token has expired. Re-logging in')
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||
self._perform_login(username, password)
|
||||
return
|
||||
|
||||
self._set_auth_info(auth_response)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
|
@ -168,7 +177,8 @@ def _extract_stream(self, identifier, display_id=None):
|
|||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
|
@ -383,9 +393,9 @@ def entries():
|
|||
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
|
@ -575,9 +585,9 @@ def _real_extract(self, url):
|
|||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
|
|
@ -560,7 +560,7 @@ def extract_from_jsmods_instances(js_data):
|
|||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
|
@ -14,7 +14,6 @@
|
|||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -199,6 +198,27 @@ class PatreonIE(PatreonBaseIE):
|
|||
'channel_id': '2147162',
|
||||
'uploader_url': 'https://www.patreon.com/yaboyroshi',
|
||||
},
|
||||
}, {
|
||||
# NSFW vimeo embed URL
|
||||
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
|
||||
'info_dict': {
|
||||
'id': '902250943',
|
||||
'ext': 'mp4',
|
||||
'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'uploader': 'Npickyeonhwa',
|
||||
'uploader_id': '90574422',
|
||||
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'channel_id': '10237902',
|
||||
'channel_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'duration': 70,
|
||||
'timestamp': 1705150153,
|
||||
'upload_date': '20240113',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -268,16 +288,19 @@ def _real_extract(self, url):
|
|||
})
|
||||
|
||||
# handle Vimeo embeds
|
||||
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
|
||||
embed_html = try_get(attributes, lambda x: x['embed']['html'])
|
||||
v_url = url_or_none(compat_urllib_parse_unquote(
|
||||
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
|
||||
if v_url:
|
||||
v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
|
||||
if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||
if url_or_none(v_url) and self._request_webpage(
|
||||
v_url, video_id, 'Checking Vimeo embed URL',
|
||||
headers={'Referer': 'https://patreon.com/'},
|
||||
fatal=False, errnote=False):
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE, url_transparent=True, **info)
|
||||
|
||||
embed_url = try_get(attributes, lambda x: x['embed']['url'])
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(embed_url, **info)
|
||||
|
||||
|
|
|
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
|
|||
|
||||
|
||||
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:vod'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
|
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'videoEpisodes'
|
||||
|
@ -204,7 +207,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:ppv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
|
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'events'
|
||||
|
|
|
@ -41,7 +41,7 @@ def _real_extract(self, url):
|
|||
ptype, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||
props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
|
||||
props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
|
||||
player_api_cache = try_get(
|
||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||
|
||||
|
|
|
@ -776,7 +776,7 @@ def _real_extract(self, url):
|
|||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
||||
self.write_debug('Found next.js data')
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||
|
|
|
@ -147,7 +147,7 @@ def _download_metadata(self, url, video_id, lang, props_keys):
|
|||
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
|
||||
if not metadata:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
metadata = traverse_obj(nextjs_data, (
|
||||
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
|
||||
return metadata
|
||||
|
|
|
@ -1,13 +1,24 @@
|
|||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
@ -171,22 +182,239 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
|
||||
class XVideosQuickiesIE(InfoExtractor):
|
||||
IE_NAME = 'xvideos:quickies'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
|
||||
'md5': '16e322a93282667f1963915568f782c1',
|
||||
'info_dict': {
|
||||
'id': '47258683',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verification video',
|
||||
'age_limit': 18,
|
||||
'duration': 16,
|
||||
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}]
|
||||
class XVideosPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?xvideos2?\.com/
|
||||
(?:c(?:/[sm]:[^/]+)*|
|
||||
profiles|
|
||||
favorite)/
|
||||
(?P<id>[^#?/]+)
|
||||
'''
|
||||
_TESTS = []
|
||||
|
||||
def _extract_videos_from_json_list(self, json_list, path='video'):
|
||||
return (
|
||||
'https://www.xvideos.com/%s%d/%s' % (path, x.get('id'), str(x.get('u')).split('/')[-1])
|
||||
for x in json_list if isinstance(x, dict))
|
||||
|
||||
def _get_playlist_url(self, url, playlist_id):
|
||||
"""URL of first playlist page"""
|
||||
id_match = re.match(self._VALID_URL, url).groupdict()
|
||||
video_sort = id_match.get('sort')
|
||||
if video_sort:
|
||||
url, _ = compat_urlparse.urldefrag(url)
|
||||
if url.endswith('/'):
|
||||
url = url[:-1]
|
||||
url = '%s/%s' % (url, video_sort.replace('-', '/'))
|
||||
return url
|
||||
|
||||
def _get_next_page(self, url, num, page):
|
||||
'''URL of num'th continuation page of url'''
|
||||
if page.startswith('{'):
|
||||
url, sub = re.subn(r'(/)(\d{1,7})($|[#?/])', r'\g<1>%d\3' % (num, ), url)
|
||||
if sub == 0:
|
||||
url += '/%d' % (num, )
|
||||
return url
|
||||
next_page = self._search_regex(
|
||||
r'''(?s)(<a\s[^>]*?\bclass\s*=\s*(?P<q>'|").*?\bnext-page\b.*?(?P=q)[^>]*?>)''',
|
||||
page, 'next page', default=None)
|
||||
if next_page:
|
||||
next_page = extract_attributes(next_page)
|
||||
next_page = next_page.get('href')
|
||||
if next_page:
|
||||
return urljoin(url, next_page)
|
||||
return False
|
||||
|
||||
def _extract_videos(self, url, playlist_id, num, page):
|
||||
"""Get iterable videos plus stop flag"""
|
||||
return ((
|
||||
'https://www.xvideos.com/video' + x.group('video_id')
|
||||
for x in re.finditer(r'''class\s*=\s*"title"\s*>\s*<\s*a\s*href\s*=\s*(\'|")\/video(?P<video_id>(.*?))\1''', page)),
|
||||
None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, id_ = self._match_valid_url(url).group('domain', 'id')
|
||||
return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
|
||||
id_match = re.match(self._VALID_URL, url).groupdict()
|
||||
playlist_id = id_match['id']
|
||||
if "video" in playlist_id and url.endswith(playlist_id):
|
||||
url += '/0'
|
||||
|
||||
next_page = self._get_playlist_url(url, playlist_id)
|
||||
|
||||
if id_match['quickiesid']:
|
||||
return self.url_result(next_page, XVideosIE)
|
||||
|
||||
matches = []
|
||||
for count in itertools.count(0):
|
||||
webpage = self._download_webpage(
|
||||
next_page,
|
||||
'%s (+%d)' % (playlist_id, count) if count > 0 else playlist_id)
|
||||
|
||||
vids, stop = self._extract_videos(next_page, playlist_id, count, webpage)
|
||||
|
||||
if vids:
|
||||
matches.append(vids)
|
||||
|
||||
if stop:
|
||||
break
|
||||
next_page = self._get_next_page(next_page, count + 1, webpage)
|
||||
if not next_page:
|
||||
break
|
||||
|
||||
return self.playlist_from_matches(
|
||||
itertools.chain.from_iterable(matches), playlist_id)
|
||||
|
||||
|
||||
class XVideosRelatedIE(XVideosPlaylistIE):
|
||||
_VALID_URL = XVideosIE._VALID_URL + r'(?:/[^/]+)*?\#_related-(?P<related>videos|playlists)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _extract_videos(self, url, playlist_id, num, page):
|
||||
id_match = re.match(self._VALID_URL, url).groupdict()
|
||||
related = id_match.get('related')
|
||||
if not related:
|
||||
return super(XVideosRelatedIE, self)._extract_videos(url, playlist_id, num, page)
|
||||
|
||||
if related == 'videos':
|
||||
related_json = self._search_regex(
|
||||
r'(?s)videos_related\s*=\s*(\[.*?])\s*;',
|
||||
page, 'related', default='[]')
|
||||
related_json = self._parse_json(related_json, playlist_id, fatal=False) or []
|
||||
return (self._extract_videos_from_json_list(related_json), True)
|
||||
# playlists
|
||||
related_json = self._download_json(
|
||||
'https://www.xvideos.com/video-playlists/' + playlist_id, playlist_id, fatal=False)
|
||||
return (
|
||||
self._extract_videos_from_json_list(
|
||||
try_get(related_json, lambda x: x['playlists'], list) or [],
|
||||
path='favorite/'),
|
||||
True)
|
||||
|
||||
|
||||
class XVideosChannelIE(XVideosPlaylistIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?xvideos2?\.com/
|
||||
(?:
|
||||
(?:amateur-|pornstar-|model-)?channel|profile|
|
||||
pornstar|model|amateur
|
||||
)s/
|
||||
(?P<id>[^#?/]+)
|
||||
(?:\#_tab(?P<tab>Videos|Favorites|Playlists|AboutMe)(?:,(?P<sort>[^,]+))?)?
|
||||
(?:\#quickies/a/(?P<quickiesid>.*))?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xvideos.com/pornstar-channels/sienna-west',
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/pornstars/silvia-jons#_tabVideos',
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/channels/miss_floyd#_tabVideos',
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/models/migurt-1',
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
|
||||
'playlist_mincount': 5,
|
||||
}, ]
|
||||
|
||||
def _get_playlist_url(self, url, playlist_id):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
id_match = re.match(self._VALID_URL, url).groupdict()
|
||||
tab = (id_match.get('tab') or '').lower()
|
||||
quickiesid = (id_match.get('quickiesid') or '').lower()
|
||||
|
||||
if not tab and not quickiesid:
|
||||
url += '#_tabVideos'
|
||||
|
||||
if tab and not quickiesid:
|
||||
if tab in ('videos', 'favorites'):
|
||||
url, frag = compat_urlparse.urldefrag(url)
|
||||
if not url.endswith('/'):
|
||||
url += '/'
|
||||
frag = frag.split(',')
|
||||
url += tab
|
||||
if tab == 'videos':
|
||||
url += '/' + (frag[1] if len(frag) > 1 else 'best')
|
||||
url += '/0'
|
||||
return url
|
||||
|
||||
elif quickiesid:
|
||||
url = f'https://www.xvideos.com/video{quickiesid}/_'
|
||||
return url
|
||||
|
||||
# activity
|
||||
conf = self._search_regex(
|
||||
r'(?s)\.\s*xv\s*\.\s*conf\s*=\s*(\{.*?})[\s;]*</script',
|
||||
webpage, 'XV conf')
|
||||
conf = self._parse_json(conf, playlist_id)
|
||||
act = try_get(conf,
|
||||
((lambda x: x['dyn'][y])
|
||||
for y in ('page_main_cat', 'user_main_cat')),
|
||||
compat_str) or 'straight'
|
||||
|
||||
url, _ = compat_urlparse.urldefrag(url)
|
||||
if url.endswith('/'):
|
||||
url = url[:-1]
|
||||
|
||||
return '%s/activity/%s' % (url, act, )
|
||||
|
||||
def _get_next_page(self, url, num, page):
|
||||
if page.startswith('{') or '#_tab' in url:
|
||||
return super(XVideosChannelIE, self)._get_next_page(url, num, page)
|
||||
|
||||
act_time = int_or_none(url_basename(url)) or 0
|
||||
last_act = int(self._search_regex(
|
||||
r'(?s)id\s*=\s*"?activity-event-(\d{10})(?!.*id\s*=\s*"?activity-event-\d+.*).+$',
|
||||
page, 'last activity', default=act_time))
|
||||
if last_act == act_time:
|
||||
return False
|
||||
return (
|
||||
url.replace('/%d' % (act_time, ), '/%d' % (last_act, ))
|
||||
if act_time
|
||||
else url + ('/%d' % (last_act, )))
|
||||
|
||||
def _extract_videos(self, url, playlist_id, num, page):
|
||||
tab = next((x for x in ('videos', 'favorites') if '/%s/' % (x, ) in url), None)
|
||||
if tab == 'videos':
|
||||
tab_json = self._parse_json(page, playlist_id, fatal=False) or {}
|
||||
more = try_get(tab_json, lambda x: x['current_page'] + 1, int)
|
||||
more = int_or_none(more, scale=tab_json.get('nb_videos'), invscale=tab_json.get('nb_per_page'), default=0)
|
||||
return (
|
||||
self._extract_videos_from_json_list(
|
||||
try_get(tab_json, lambda x: x['videos'], list) or []),
|
||||
more > 0)
|
||||
|
||||
if tab == 'favorites':
|
||||
return ((
|
||||
'https://www.xvideos.com' + x.group('playlist')
|
||||
for x in re.finditer(r'''<a\s[^>]*?href\s*=\s*('|")(?P<playlist>/favorite/\d+/[^#?]+?)\1''', page)),
|
||||
None)
|
||||
|
||||
return super(XVideosChannelIE, self)._extract_videos(url, playlist_id, num, page)
|
||||
|
||||
|
||||
class XVideosSearchIE(XVideosPlaylistIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?xvideos2?\.com/
|
||||
\?k=(?P<id>[^#?/&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# uninteresting search with probably at least two pages of results,
|
||||
# but not too many more
|
||||
'url': 'http://www.xvideos.com/?k=libya&sort=length',
|
||||
'playlist_mincount': 30,
|
||||
}, ]
|
||||
|
||||
def _get_next_page(self, url, num, page):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_parse_qs(parsed_url.query)
|
||||
qs['p'] = [num]
|
||||
parsed_url = (
|
||||
list(parsed_url[:4])
|
||||
+ [compat_urllib_parse_urlencode(qs, True), None])
|
||||
return compat_urlparse.urlunparse(parsed_url), False
|
||||
|
|
Loading…
Reference in New Issue