Compare commits

...

2 Commits

Author SHA1 Message Date
Dr. Steven Strange
e90283edac [AcademyMelIE]&[GetCourseRuIE] removed unused variables and to_screen output 2024-01-14 02:14:00 +01:00
Dr. Steven Strange
c3af3773ee [AcademyMelIE]&[GetCourseRuIE] extractor fixes 2024-01-14 01:54:09 +01:00
2 changed files with 45 additions and 62 deletions

View File

@ -3,7 +3,6 @@ import time
from datetime import datetime
from .common import InfoExtractor
from ..cookies import LenientSimpleCookie
from ..utils import urlencode_postdata, ExtractorError
@ -11,9 +10,6 @@ class AcademyMelIE(InfoExtractor):
_TEST_EMAIL = 'meriat@jaga.email' # use this as username in the test/local_parameters.json if running the test
_TEST_PASSWORD = 'bBY-ccbp$8' # use this as password in the test/local_parameters.json if running the test
_CACHE_KEY = 'academymel'
_CACHE_SUBKEY = 'login-cookie-header'
_NETRC_MACHINE = 'academymel'
_LOGIN_URL = 'https://academymel.online/cms/system/login'
_VALID_URL = r'^https?:\/\/academymel\.online\/(?P<url>.*)$'
@ -21,19 +17,11 @@ class AcademyMelIE(InfoExtractor):
_TESTS = [{
'url': 'http://academymel.online/3video_1',
'info_dict': {
'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9',
'title': 'master',
'id': '4885302',
'title': 'Промоуроки Академии МЕЛ',
'ext': 'mp4',
'duration': 1693
}
}, {
'url': 'http://academymel.online/3video_2',
'info_dict': {
'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=359525183&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTk1MjUxODN9',
'title': 'master',
'ext': 'mp4',
'duration': 1871
}
}]
def _perform_login(self, username, password):
@ -49,22 +37,12 @@ class AcademyMelIE(InfoExtractor):
'requestTime': int(time.time())
})
try:
webpage = self._request_webpage(self._LOGIN_URL,
None,
data=login_body,
note='Logging into the academymel.online',
errnote='Failed to log in into academymel.online',
fatal=True)
except ExtractorError:
raise ExtractorError('Could not log in into academymel.online (login URL: "%s")' % self._LOGIN_URL,
expected=True)
# The response itself is a JSON, but it is not needed - only the Set-Cookie value(s) are
cookie_header = webpage.get_header('Set-Cookie')
set_cookie_header = LenientSimpleCookie(cookie_header)
set_cookie_header.load(cookie_header)
self.cache.store(self._CACHE_KEY, self._CACHE_SUBKEY, set_cookie_header)
self._request_webpage(self._LOGIN_URL,
None,
data=login_body,
note='Logging into the academymel.online',
errnote='Failed to log in into academymel.online',
fatal=True)
def playlist_from_entries(self, entries, valid_url):
current_timestamp = int(time.time())
@ -82,26 +60,24 @@ class AcademyMelIE(InfoExtractor):
if not valid_url:
raise ExtractorError('Invalid URL found', expected=True)
set_cookie_header = self.cache.load(self._CACHE_KEY, self._CACHE_SUBKEY)
webpage = self._download_webpage(url,
None,
fatal=True,
note='Downloading video website',
errnote='Failed to download video website')
if not set_cookie_header:
raise ExtractorError('The set-cookie has not been loaded', expected=True)
try:
webpage = self._download_webpage(url,
None,
headers=set_cookie_header,
fatal=True,
note='Downloading video website',
errnote='Failed to download video website')
except ExtractorError:
raise ExtractorError('Could not download the video website at "%s"' % url, expected=True)
title = self._search_regex(r'<title>(?P<title>.*)</title>', webpage, 'title')
entries = []
processed_urls = set() # Set to keep track of processed URLs
for video_url in re.findall(
r'<iframe[^>]+src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*)\"',
webpage):
self.to_screen('AcademyMel video URL found: %s' % video_url)
entries.append(self.url_result(video_url, 'GetCourseRu'))
r'data-iframe-src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*?)\"',
webpage,
re.DOTALL + re.VERBOSE):
# Check if the URL has not been processed before
if video_url not in processed_urls:
entries.append(self.url_result(video_url, 'GetCourseRu', url_transparent=True, title=title))
processed_urls.add(video_url) # Add the URL to the set of processed URLs
return self.playlist_from_entries(entries, valid_url)

View File

@ -25,21 +25,28 @@ class GetCourseRuIE(InfoExtractor):
if not valid_url:
raise ExtractorError('Invalid URL found', expected=True)
try:
webpage = self._download_webpage(url,
None,
fatal=True,
note='Retrieving masterPlaylist URL...',
errnote='Failed to retrieve the masterPlaylist URL')
except ExtractorError:
raise ExtractorError('Failed to retrieve the masterPlaylist URL', expected=True)
webpage = self._download_webpage(url,
None,
fatal=True,
note='Retrieving metadata...',
errnote='Failed to retrieve metadata')
try:
m3u8_url = (self._search_regex(r'\"masterPlaylistUrl\":\"(?P<m3u8>.*?)\"', webpage, 'm3u8', fatal=True)
.replace('\\', ''))
except ExtractorError:
raise ExtractorError('Could not extract the masterPlaylist URL from the GetCourse.ru response', expected=True)
window_configs = self._search_json(
r'window\.configs\s*=\s*',
webpage,
'config',
video_id=None,
fatal=True)
self.to_screen('masterPlaylistUrl is "%s"' % m3u8_url)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
window_configs.get('masterPlaylistUrl'),
window_configs.get('videoId'))
return self.url_result(m3u8_url, 'Generic')
return {
'id': str(window_configs.get('videoId')),
'title': window_configs.get('videoHash'),
'thumbnail': window_configs.get('thumbnailUrl'),
'duration': int(window_configs.get('videoDuration')),
'formats': formats,
'subtitles': subtitles
}