Compare commits

...

5 Commits

Author SHA1 Message Date
sepro
cf8ae1df10 Use correct netrc in error message 2024-01-18 21:48:32 +01:00
sepro
07c7431951 Inline xdgetId and requestSimpleSign 2024-01-18 21:30:06 +01:00
sepro
e488d1040a
Apply suggestions from code review
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-01-18 21:28:22 +01:00
sepro
624158e0cc typo 2024-01-18 17:46:45 +01:00
sepro
21c6dfaf13 Use separate netrc machine per hostname 2024-01-18 17:23:40 +01:00

View File

@ -3,7 +3,7 @@ import time
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, url_or_none, urlencode_postdata from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
@ -20,7 +20,7 @@ class GetCourseRuPlayerIE(InfoExtractor):
}, },
'skip': 'JWT expired', 'skip': 'JWT expired',
}] }]
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)'] _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, None, 'Downloading player page') webpage = self._download_webpage(url, None, 'Downloading player page')
@ -127,26 +127,19 @@ class GetCourseRuIE(InfoExtractor):
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)', rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
] ]
def _login(self, url, username, password): def _login(self, hostname, username, password):
if self._get_cookies(url).get('PHPSESSID5'): if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
return return
domain = urllib.parse.urlparse(url).netloc login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
login_url = f'https://{domain}{self._LOGIN_URL_PATH}'
webpage = self._download_webpage(login_url, None) webpage = self._download_webpage(login_url, None)
xdget_id = self._html_search_regex(
r'<form[^>]*class="[^"]*state-login[^"]*"[^>]*data-xdget-id="([^"]+)"',
webpage, 'xdgetId')
simple_sign = self._html_search_regex(
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"',
webpage, 'simple sign')
self._request_webpage( self._request_webpage(
login_url, None, 'Logging in', 'Failed to log in', login_url, None, 'Logging in', 'Failed to log in',
data=urlencode_postdata({ data=urlencode_postdata({
'action': 'processXdget', 'action': 'processXdget',
'xdgetId': xdget_id, 'xdgetId': self._html_search_regex(
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
webpage, 'xdgetId'),
'params[action]': 'login', 'params[action]': 'login',
'params[url]': login_url, 'params[url]': login_url,
'params[object_type]': 'cms_page', 'params[object_type]': 'cms_page',
@ -154,19 +147,23 @@ class GetCourseRuIE(InfoExtractor):
'params[email]': username, 'params[email]': username,
'params[password]': password, 'params[password]': password,
'requestTime': int(time.time()), 'requestTime': int(time.time()),
'requestSimpleSign': simple_sign, 'requestSimpleSign': self._html_search_regex(
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
})) }))
def _real_extract(self, url): def _real_extract(self, url):
username, password = self._get_login_info() hostname = urllib.parse.urlparse(url).hostname
username, password = self._get_login_info(netrc_machine=hostname)
if username: if username:
self._login(url, username, password) self._login(hostname, username, password)
display_id = self._match_id(url) display_id = self._match_id(url)
# NB: 404 is returned due to yt-dlp not properly following redirects #9020 # NB: 404 is returned due to yt-dlp not properly following redirects #9020
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404) webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404: if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
self.raise_login_required() raise ExtractorError(
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
expected=True)
playlist_id = self._search_regex( playlist_id = self._search_regex(
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id) r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)