2024-11-29 18:51:24 +01:00
1 changed files with 19 additions and 16 deletions
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@ -3,7 +3,7 @@ import time
 import urllib.parse

 from .common import InfoExtractor
-from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
+from ..utils import int_or_none, url_or_none, urlencode_postdata
 from ..utils.traversal import traverse_obj


@ -20,7 +20,7 @@ class GetCourseRuPlayerIE(InfoExtractor):
        },
        'skip': 'JWT expired',
    }]
-    _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
+    _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']

    def _real_extract(self, url):
        webpage = self._download_webpage(url, None, 'Downloading player page')
@ -127,19 +127,26 @@ class GetCourseRuIE(InfoExtractor):
        rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
    ]

-    def _login(self, hostname, username, password):
-        if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
+    def _login(self, url, username, password):
+        if self._get_cookies(url).get('PHPSESSID5'):
            return
-        login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
+        domain = urllib.parse.urlparse(url).netloc
+        login_url = f'https://{domain}{self._LOGIN_URL_PATH}'
+
        webpage = self._download_webpage(login_url, None)
+        xdget_id = self._html_search_regex(
+            r'<form[^>]*class="[^"]*state-login[^"]*"[^>]*data-xdget-id="([^"]+)"',
+            webpage, 'xdgetId')
+
+        simple_sign = self._html_search_regex(
+            r'window.requestSimpleSign\s*=\s*"([\da-f]+)"',
+            webpage, 'simple sign')

        self._request_webpage(
            login_url, None, 'Logging in', 'Failed to log in',
            data=urlencode_postdata({
                'action': 'processXdget',
-                'xdgetId': self._html_search_regex(
-                    r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
-                    webpage, 'xdgetId'),
+                'xdgetId': xdget_id,
                'params[action]': 'login',
                'params[url]': login_url,
                'params[object_type]': 'cms_page',
@ -147,23 +154,19 @@ class GetCourseRuIE(InfoExtractor):
                'params[email]': username,
                'params[password]': password,
                'requestTime': int(time.time()),
-                'requestSimpleSign': self._html_search_regex(
-                    r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
+                'requestSimpleSign': simple_sign,
            }))

    def _real_extract(self, url):
-        hostname = urllib.parse.urlparse(url).hostname
-        username, password = self._get_login_info(netrc_machine=hostname)
+        username, password = self._get_login_info()
        if username:
-            self._login(hostname, username, password)
+            self._login(url, username, password)

        display_id = self._match_id(url)
        # NB: 404 is returned due to yt-dlp not properly following redirects #9020
        webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
        if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
-            raise ExtractorError(
-                f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
-                expected=True)
+            self.raise_login_required()

        playlist_id = self._search_regex(
            r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)