[AcademyMelIE]&[GetCourseRuIE] removed unused variables and to_screen output

[AcademyMelIE]&[GetCourseRuIE] extractor fixes
2025-02-21 12:21:26 +01:00 · 2024-01-14 02:14:00 +01:00 · 2024-01-14 01:54:09 +01:00
2 changed files with 45 additions and 62 deletions
--- a/yt_dlp/extractor/academymel.py
+++ b/yt_dlp/extractor/academymel.py
@ -3,7 +3,6 @@ import time

 from datetime import datetime
 from .common import InfoExtractor
-from ..cookies import LenientSimpleCookie
 from ..utils import urlencode_postdata, ExtractorError


@ -11,9 +10,6 @@ class AcademyMelIE(InfoExtractor):
    _TEST_EMAIL = 'meriat@jaga.email'  # use this as username in the test/local_parameters.json if running the test
    _TEST_PASSWORD = 'bBY-ccbp$8'  # use this as password in the test/local_parameters.json if running the test

-    _CACHE_KEY = 'academymel'
-    _CACHE_SUBKEY = 'login-cookie-header'
-
    _NETRC_MACHINE = 'academymel'
    _LOGIN_URL = 'https://academymel.online/cms/system/login'
    _VALID_URL = r'^https?:\/\/academymel\.online\/(?P<url>.*)$'
@ -21,19 +17,11 @@ class AcademyMelIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://academymel.online/3video_1',
        'info_dict': {
-            'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9',
-            'title': 'master',
+            'id': '4885302',
+            'title': 'Промоуроки Академии МЕЛ',
            'ext': 'mp4',
            'duration': 1693
        }
-    }, {
-        'url': 'http://academymel.online/3video_2',
-        'info_dict': {
-            'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9',
-            'title': 'master',
-            'ext': 'mp4',
-            'duration': 1871
-        }
    }]

    def _perform_login(self, username, password):
@ -49,22 +37,12 @@ class AcademyMelIE(InfoExtractor):
            'requestTime': int(time.time())
        })

-        try:
-            webpage = self._request_webpage(self._LOGIN_URL,
+        self._request_webpage(self._LOGIN_URL,
                              None,
                              data=login_body,
                              note='Logging into the academymel.online',
                              errnote='Failed to log in into academymel.online',
                              fatal=True)
-        except ExtractorError:
-            raise ExtractorError('Could not log in into academymel.online (login URL: "%s")' % self._LOGIN_URL,
-                                 expected=True)
-
-        # The response itself is a JSON, but it is not needed - only the Set-Cookie value(s) are
-        cookie_header = webpage.get_header('Set-Cookie')
-        set_cookie_header = LenientSimpleCookie(cookie_header)
-        set_cookie_header.load(cookie_header)
-        self.cache.store(self._CACHE_KEY, self._CACHE_SUBKEY, set_cookie_header)

    def playlist_from_entries(self, entries, valid_url):
        current_timestamp = int(time.time())
@ -82,26 +60,24 @@ class AcademyMelIE(InfoExtractor):
        if not valid_url:
            raise ExtractorError('Invalid URL found', expected=True)

-        set_cookie_header = self.cache.load(self._CACHE_KEY, self._CACHE_SUBKEY)
-
-        if not set_cookie_header:
-            raise ExtractorError('The set-cookie has not been loaded', expected=True)
-
-        try:
        webpage = self._download_webpage(url,
                                         None,
-                                             headers=set_cookie_header,
                                         fatal=True,
                                         note='Downloading video website',
                                         errnote='Failed to download video website')
-        except ExtractorError:
-            raise ExtractorError('Could not download the video website at "%s"' % url, expected=True)
+
+        title = self._search_regex(r'<title>(?P<title>.*)</title>', webpage, 'title')

        entries = []
+        processed_urls = set()  # Set to keep track of processed URLs
+
        for video_url in re.findall(
-            r'<iframe[^>]+src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*)\"',
-                webpage):
-            self.to_screen('AcademyMel video URL found: %s' % video_url)
-            entries.append(self.url_result(video_url, 'GetCourseRu'))
+            r'data-iframe-src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*?)\"',
+            webpage,
+                re.DOTALL + re.VERBOSE):
+            # Check if the URL has not been processed before
+            if video_url not in processed_urls:
+                entries.append(self.url_result(video_url, 'GetCourseRu', url_transparent=True, title=title))
+                processed_urls.add(video_url)  # Add the URL to the set of processed URLs

        return self.playlist_from_entries(entries, valid_url)
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@ -25,21 +25,28 @@ class GetCourseRuIE(InfoExtractor):
        if not valid_url:
            raise ExtractorError('Invalid URL found', expected=True)

-        try:
        webpage = self._download_webpage(url,
                                         None,
                                         fatal=True,
-                                             note='Retrieving masterPlaylist URL...',
-                                             errnote='Failed to retrieve the masterPlaylist URL')
-        except ExtractorError:
-            raise ExtractorError('Failed to retrieve the masterPlaylist URL', expected=True)
+                                         note='Retrieving metadata...',
+                                         errnote='Failed to retrieve metadata')

-        try:
-            m3u8_url = (self._search_regex(r'\"masterPlaylistUrl\":\"(?P<m3u8>.*?)\"', webpage, 'm3u8', fatal=True)
-                        .replace('\\', ''))
-        except ExtractorError:
-            raise ExtractorError('Could not extract the masterPlaylist URL from the GetCourse.ru response', expected=True)
+        window_configs = self._search_json(
+            r'window\.configs\s*=\s*',
+            webpage,
+            'config',
+            video_id=None,
+            fatal=True)

-        self.to_screen('masterPlaylistUrl is "%s"' % m3u8_url)
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            window_configs.get('masterPlaylistUrl'),
+            window_configs.get('videoId'))

-        return self.url_result(m3u8_url, 'Generic')
+        return {
+            'id': str(window_configs.get('videoId')),
+            'title': window_configs.get('videoHash'),
+            'thumbnail': window_configs.get('thumbnailUrl'),
+            'duration': int(window_configs.get('videoDuration')),
+            'formats': formats,
+            'subtitles': subtitles
+        }
Author	SHA1	Message	Date
Dr. Steven Strange	e90283edac	[AcademyMelIE]&[GetCourseRuIE] removed unused variables and to_screen output	2024-01-14 02:14:00 +01:00
Dr. Steven Strange	c3af3773ee	[AcademyMelIE]&[GetCourseRuIE] extractor fixes	2024-01-14 01:54:09 +01:00