Compare commits

..

No commits in common. "cf8ae1df1077e52a2bbbfa3fb7965ca309da74a1" and "c765ee8f4881550671c00d5d26c6b4406ed4c077" have entirely different histories.

View File

@ -3,7 +3,7 @@ import time
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata from ..utils import int_or_none, url_or_none, urlencode_postdata
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
@ -20,7 +20,7 @@ class GetCourseRuPlayerIE(InfoExtractor):
}, },
'skip': 'JWT expired', 'skip': 'JWT expired',
}] }]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)'] _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, None, 'Downloading player page') webpage = self._download_webpage(url, None, 'Downloading player page')
@ -127,19 +127,26 @@ class GetCourseRuIE(InfoExtractor):
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)', rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
] ]
def _login(self, hostname, username, password): def _login(self, url, username, password):
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'): if self._get_cookies(url).get('PHPSESSID5'):
return return
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}' domain = urllib.parse.urlparse(url).netloc
login_url = f'https://{domain}{self._LOGIN_URL_PATH}'
webpage = self._download_webpage(login_url, None) webpage = self._download_webpage(login_url, None)
xdget_id = self._html_search_regex(
r'<form[^>]*class="[^"]*state-login[^"]*"[^>]*data-xdget-id="([^"]+)"',
webpage, 'xdgetId')
simple_sign = self._html_search_regex(
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"',
webpage, 'simple sign')
self._request_webpage( self._request_webpage(
login_url, None, 'Logging in', 'Failed to log in', login_url, None, 'Logging in', 'Failed to log in',
data=urlencode_postdata({ data=urlencode_postdata({
'action': 'processXdget', 'action': 'processXdget',
'xdgetId': self._html_search_regex( 'xdgetId': xdget_id,
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
webpage, 'xdgetId'),
'params[action]': 'login', 'params[action]': 'login',
'params[url]': login_url, 'params[url]': login_url,
'params[object_type]': 'cms_page', 'params[object_type]': 'cms_page',
@ -147,23 +154,19 @@ class GetCourseRuIE(InfoExtractor):
'params[email]': username, 'params[email]': username,
'params[password]': password, 'params[password]': password,
'requestTime': int(time.time()), 'requestTime': int(time.time()),
'requestSimpleSign': self._html_search_regex( 'requestSimpleSign': simple_sign,
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
})) }))
def _real_extract(self, url): def _real_extract(self, url):
hostname = urllib.parse.urlparse(url).hostname username, password = self._get_login_info()
username, password = self._get_login_info(netrc_machine=hostname)
if username: if username:
self._login(hostname, username, password) self._login(url, username, password)
display_id = self._match_id(url) display_id = self._match_id(url)
# NB: 404 is returned due to yt-dlp not properly following redirects #9020 # NB: 404 is returned due to yt-dlp not properly following redirects #9020
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404) webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404: if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
raise ExtractorError( self.raise_login_required()
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
expected=True)
playlist_id = self._search_regex( playlist_id = self._search_regex(
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id) r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)