Compare commits

...

2 Commits

Author SHA1 Message Date
Dr. Steven Strange
e90283edac [AcademyMelIE]&[GetCourseRuIE] removed unused variables and to_screen output 2024-01-14 02:14:00 +01:00
Dr. Steven Strange
c3af3773ee [AcademyMelIE]&[GetCourseRuIE] extractor fixes 2024-01-14 01:54:09 +01:00
2 changed files with 45 additions and 62 deletions

View File

@ -3,7 +3,6 @@ import time
from datetime import datetime from datetime import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..cookies import LenientSimpleCookie
from ..utils import urlencode_postdata, ExtractorError from ..utils import urlencode_postdata, ExtractorError
@ -11,9 +10,6 @@ class AcademyMelIE(InfoExtractor):
_TEST_EMAIL = 'meriat@jaga.email' # use this as username in the test/local_parameters.json if running the test _TEST_EMAIL = 'meriat@jaga.email' # use this as username in the test/local_parameters.json if running the test
_TEST_PASSWORD = 'bBY-ccbp$8' # use this as password in the test/local_parameters.json if running the test _TEST_PASSWORD = 'bBY-ccbp$8' # use this as password in the test/local_parameters.json if running the test
_CACHE_KEY = 'academymel'
_CACHE_SUBKEY = 'login-cookie-header'
_NETRC_MACHINE = 'academymel' _NETRC_MACHINE = 'academymel'
_LOGIN_URL = 'https://academymel.online/cms/system/login' _LOGIN_URL = 'https://academymel.online/cms/system/login'
_VALID_URL = r'^https?:\/\/academymel\.online\/(?P<url>.*)$' _VALID_URL = r'^https?:\/\/academymel\.online\/(?P<url>.*)$'
@ -21,19 +17,11 @@ class AcademyMelIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://academymel.online/3video_1', 'url': 'http://academymel.online/3video_1',
'info_dict': { 'info_dict': {
'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9', 'id': '4885302',
'title': 'master', 'title': 'Промоуроки Академии МЕЛ',
'ext': 'mp4', 'ext': 'mp4',
'duration': 1693 'duration': 1693
} }
}, {
'url': 'http://academymel.online/3video_2',
'info_dict': {
'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9',
'title': 'master',
'ext': 'mp4',
'duration': 1871
}
}] }]
def _perform_login(self, username, password): def _perform_login(self, username, password):
@ -49,22 +37,12 @@ class AcademyMelIE(InfoExtractor):
'requestTime': int(time.time()) 'requestTime': int(time.time())
}) })
try: self._request_webpage(self._LOGIN_URL,
webpage = self._request_webpage(self._LOGIN_URL,
None, None,
data=login_body, data=login_body,
note='Logging into the academymel.online', note='Logging into the academymel.online',
errnote='Failed to log in into academymel.online', errnote='Failed to log in into academymel.online',
fatal=True) fatal=True)
except ExtractorError:
raise ExtractorError('Could not log in into academymel.online (login URL: "%s")' % self._LOGIN_URL,
expected=True)
# The response itself is a JSON, but it is not needed - only the Set-Cookie value(s) are
cookie_header = webpage.get_header('Set-Cookie')
set_cookie_header = LenientSimpleCookie(cookie_header)
set_cookie_header.load(cookie_header)
self.cache.store(self._CACHE_KEY, self._CACHE_SUBKEY, set_cookie_header)
def playlist_from_entries(self, entries, valid_url): def playlist_from_entries(self, entries, valid_url):
current_timestamp = int(time.time()) current_timestamp = int(time.time())
@ -82,26 +60,24 @@ class AcademyMelIE(InfoExtractor):
if not valid_url: if not valid_url:
raise ExtractorError('Invalid URL found', expected=True) raise ExtractorError('Invalid URL found', expected=True)
set_cookie_header = self.cache.load(self._CACHE_KEY, self._CACHE_SUBKEY)
if not set_cookie_header:
raise ExtractorError('The set-cookie has not been loaded', expected=True)
try:
webpage = self._download_webpage(url, webpage = self._download_webpage(url,
None, None,
headers=set_cookie_header,
fatal=True, fatal=True,
note='Downloading video website', note='Downloading video website',
errnote='Failed to download video website') errnote='Failed to download video website')
except ExtractorError:
raise ExtractorError('Could not download the video website at "%s"' % url, expected=True) title = self._search_regex(r'<title>(?P<title>.*)</title>', webpage, 'title')
entries = [] entries = []
processed_urls = set() # Set to keep track of processed URLs
for video_url in re.findall( for video_url in re.findall(
r'<iframe[^>]+src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*)\"', r'data-iframe-src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*?)\"',
webpage): webpage,
self.to_screen('AcademyMel video URL found: %s' % video_url) re.DOTALL + re.VERBOSE):
entries.append(self.url_result(video_url, 'GetCourseRu')) # Check if the URL has not been processed before
if video_url not in processed_urls:
entries.append(self.url_result(video_url, 'GetCourseRu', url_transparent=True, title=title))
processed_urls.add(video_url) # Add the URL to the set of processed URLs
return self.playlist_from_entries(entries, valid_url) return self.playlist_from_entries(entries, valid_url)

View File

@ -25,21 +25,28 @@ class GetCourseRuIE(InfoExtractor):
if not valid_url: if not valid_url:
raise ExtractorError('Invalid URL found', expected=True) raise ExtractorError('Invalid URL found', expected=True)
try:
webpage = self._download_webpage(url, webpage = self._download_webpage(url,
None, None,
fatal=True, fatal=True,
note='Retrieving masterPlaylist URL...', note='Retrieving metadata...',
errnote='Failed to retrieve the masterPlaylist URL') errnote='Failed to retrieve metadata')
except ExtractorError:
raise ExtractorError('Failed to retrieve the masterPlaylist URL', expected=True)
try: window_configs = self._search_json(
m3u8_url = (self._search_regex(r'\"masterPlaylistUrl\":\"(?P<m3u8>.*?)\"', webpage, 'm3u8', fatal=True) r'window\.configs\s*=\s*',
.replace('\\', '')) webpage,
except ExtractorError: 'config',
raise ExtractorError('Could not extract the masterPlaylist URL from the GetCourse.ru response', expected=True) video_id=None,
fatal=True)
self.to_screen('masterPlaylistUrl is "%s"' % m3u8_url) formats, subtitles = self._extract_m3u8_formats_and_subtitles(
window_configs.get('masterPlaylistUrl'),
window_configs.get('videoId'))
return self.url_result(m3u8_url, 'Generic') return {
'id': str(window_configs.get('videoId')),
'title': window_configs.get('videoHash'),
'thumbnail': window_configs.get('thumbnailUrl'),
'duration': int(window_configs.get('videoDuration')),
'formats': formats,
'subtitles': subtitles
}