Compare commits

...

6 Commits

Author SHA1 Message Date
shloop
f894afcc09
Merge 38e08a1147 into be3579aaf0 2024-11-09 21:54:32 +05:30
Steve Ovens
be3579aaf0
[ie/GameDevTV] Add extractor (#11368)
Authored by: stratus-ss, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-06 21:58:44 +00:00
bashonly
85fdc66b6e
[ie/adobepass] Fix provider requests (#11472)
Fix bug in dcfeea4dd5

Closes #11469
Authored by: bashonly
2024-11-06 21:26:05 +00:00
shloop
38e08a1147 -Updates URL scheme
-Includes suggested changes
-Updates test cases
2024-08-17 13:45:46 -07:00
shloop
2aa9e75975 merges _extractors.py 2024-08-17 12:59:44 -07:00
shloop
a2f4163e25 Adds xumo extractor. 2023-02-20 17:42:13 -08:00
4 changed files with 322 additions and 1 deletions

View File

@ -708,6 +708,7 @@ from .gab import (
GabTVIE, GabTVIE,
) )
from .gaia import GaiaIE from .gaia import GaiaIE
from .gamedevtv import GameDevTVDashboardIE
from .gamejolt import ( from .gamejolt import (
GameJoltCommunityIE, GameJoltCommunityIE,
GameJoltGameIE, GameJoltGameIE,
@ -2490,6 +2491,7 @@ from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE from .xminus import XMinusIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xstream import XstreamIE from .xstream import XstreamIE
from .xumo import XumoIE
from .xvideos import ( from .xvideos import (
XVideosIE, XVideosIE,
XVideosQuickiesIE, XVideosQuickiesIE,

View File

@ -1362,7 +1362,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers') or {})
kwargs['headers'] = headers kwargs['headers'] = headers
return super()._download_webpage_handle( return super()._download_webpage_handle(
*args, **kwargs) *args, **kwargs)

View File

@ -0,0 +1,141 @@
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GameDevTVDashboardIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
_NETRC_MACHINE = 'gamedevtv'
_TESTS = [{
'url': 'https://www.gamedev.tv/dashboard/courses/25',
'info_dict': {
'id': '25',
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
'categories': ['Blender', '3D Art'],
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
'upload_date': '20220516',
'timestamp': 1652694420,
'modified_date': '20241027',
'modified_timestamp': 1730049658,
},
'playlist_count': 100,
}, {
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
'info_dict': {
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
'ext': 'mp4',
'modified_timestamp': 1701695752,
'upload_date': '20230504',
'episode': 'MagicaVoxel Community Course Introduction',
'series_id': '63',
'title': 'MagicaVoxel Community Course Introduction',
'timestamp': 1683195397,
'modified_date': '20231204',
'categories': ['3D Art', 'MagicaVoxel'],
'season': 'MagicaVoxel Community Course',
'tags': ['MagicaVoxel', 'all', 'course'],
'series': 'MagicaVoxel 3D Art Mini Course',
'duration': 1405,
'episode_number': 1,
'season_number': 1,
'season_id': '219',
'description': 'md5:a378738c5bbec1c785d76c067652d650',
'display_id': '63-219-2279',
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
},
}]
_API_HEADERS = {}
def _perform_login(self, username, password):
try:
response = self._download_json(
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
headers={'Content-Type': 'application/json'},
data=json.dumps({
'email': username,
'password': password,
'cart_items': [],
}).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username/password', expected=True)
raise
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
def _real_initialize(self):
if not self._API_HEADERS.get('Authorization'):
self.raise_login_required(
'This content is only available with purchase', method='password')
def _entries(self, data, course_id, course_info, selected_lecture):
for section in traverse_obj(data, ('sections', ..., {dict})):
section_info = traverse_obj(section, {
'season_id': ('id', {str_or_none}),
'season': ('title', {str}),
'season_number': ('order', {int_or_none}),
})
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
if selected_lecture and str(lecture.get('id')) != selected_lecture:
continue
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
yield {
**course_info,
**section_info,
'id': display_id, # fallback
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'series': course_info.get('title'),
'series_id': course_id,
**traverse_obj(lecture, {
'id': ('video', 'guid', {str}),
'title': ('title', {str}),
'alt_title': ('video', 'title', {str}),
'description': ('description', {clean_html}),
'episode': ('title', {str}),
'episode_number': ('order', {int_or_none}),
'duration': ('video', 'duration_in_sec', {int_or_none}),
'timestamp': ('video', 'created_at', {parse_iso8601}),
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
}),
}
def _real_extract(self, url):
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
data = self._download_json(
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
headers=self._API_HEADERS)['data']
course_info = traverse_obj(data, {
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('categories', ..., 'title', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('image', {url_or_none}),
})
entries = self._entries(data, course_id, course_info, lecture_id)
if lecture_id:
lecture = next(entries, None)
if not lecture:
raise ExtractorError('Lecture not found')
return lecture
return self.playlist_result(entries, course_id, **course_info)

178
yt_dlp/extractor/xumo.py Normal file
View File

@ -0,0 +1,178 @@
from .common import InfoExtractor
from ..utils import determine_ext, int_or_none, smuggle_url, traverse_obj, unsmuggle_url, url_or_none
class XumoIE(InfoExtractor):
_VALID_URL = r'https?://play\.?xumo\.com/[^?#]+/(?P<id>XM[A-Z0-9]{12})'
_TESTS = [{
# movie
'url': 'https://play.xumo.com/free-movies/a-circus-tale-and-a-love-song/XM041I5U497VD3',
'params': {
'check_formats': True,
},
'md5': 'eaac858a8db4ee5a67d6d16920c24e15',
'info_dict': {
'id': 'XM041I5U497VD3',
'title': 'A Circus Tale & A Love Song',
'ext': 'mp4',
'description': 'md5:aa6372f4785c528ff04c94a275f63446',
'duration': 6887,
'release_year': 2016,
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
# entire series
'url': 'https://play.xumo.com/tv-shows/super-mario-world/XM0AN69OG47PRN',
'params': {
'skip_download': True,
},
'playlist_count': 10,
'info_dict': {
'id': 'XM0AN69OG47PRN',
'title': 'Super Mario World',
},
}, {
# episode of series
'url': 'https://play.xumo.com/tv-shows/99991299/XM02D369HADFRR',
'md5': 'ed2f396272b39f2e0fe47f02b9ae34cc',
'info_dict': {
'id': 'XM02D369HADFRR',
'title': 'Fire Sale // Misadventures In Robin Hood Woods',
'ext': 'mp4',
'series': 'Super Mario World',
'season_number': 1,
'episode_number': 2,
'episode': 'Fire Sale // Misadventures In Robin Hood Woods',
'description': 'md5:48134d36781cf4b225ec0ee4f05356d3',
'thumbnail': r're:^https?://.*\.jpg$',
'season': 'Season 1',
'duration': 1368,
},
}, {
# video from network-based alternate URL scheme
'url': 'https://play.xumo.com/networks/fakenetworkname/99991299/XM02D369HADFRR',
'md5': 'ed2f396272b39f2e0fe47f02b9ae34cc',
'info_dict': {
'id': 'XM02D369HADFRR',
'title': 'Fire Sale // Misadventures In Robin Hood Woods',
'ext': 'mp4',
'series': 'Super Mario World',
'season_number': 1,
'episode_number': 2,
'episode': 'Fire Sale // Misadventures In Robin Hood Woods',
'description': 'md5:48134d36781cf4b225ec0ee4f05356d3',
'thumbnail': r're:^https?://.*\.jpg$',
'season': 'Season 1',
'duration': 1368,
},
}]
_INFO_URL = 'https://valencia-app-mds.xumo.com/v2/assets/asset/'
_INFO_QUERY_PARAMS = {
'f': [
'connectorId',
'title',
'providers',
'descriptions',
'runtime',
'originalReleaseYear',
'cuePoints',
'ratings',
'hasCaptions',
'availableSince',
'genres',
'season',
'episode',
'seasons',
'season:all',
'episodes.episodeTitle',
'episodes.runtime',
'episodes.descriptions',
'episodes.hasCaptions',
'episodes.ratings',
],
}
def _get_video_links(self, video_id, info_json):
formats, subtitles = [], {}
for source in traverse_obj(info_json, ('providers', ..., 'sources', ...)) or []:
fmts, subs = [], {}
format_url = url_or_none(source.get('uri'))
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, m3u8_id='hls', fatal=False)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id='dash', fatal=False)
elif format_url.endswith('.ism/Manifest'):
fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False)
if source.get('drm'):
for f in fmts:
f['has_drm'] = True
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
for caption in traverse_obj(info_json, ('providers', ..., 'captions', ...)):
subtitles.setdefault(caption.get('lang') or 'und', []).append({
'url': caption.get('url'),
})
return formats, subtitles
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
media_id = self._match_valid_url(url).group('id')
media_metadata = self._download_json(f'{self._INFO_URL}{media_id}.json', media_id, query=self._INFO_QUERY_PARAMS)
title = media_metadata.get('title')
content_type = media_metadata['contentType']
if content_type == 'SERIES':
# series => return set of URLs pointing to individual episodes and smuggle series title to avoid extra API call for each episode
return self.playlist_result([
self.url_result(
smuggle_url(f'https://play.xumo.com/tv-shows/x/{episode["id"]}', {'series': title}),
XumoIE, episode['id'], episode.get('episodeTitle'))
for episode in traverse_obj(media_metadata, ('seasons', ..., 'episodes', ...))
], media_id, title)
# video => return video info
season_number = None
series_title = None
is_episode = content_type == 'EPISODIC'
if is_episode:
season_number = int_or_none(media_metadata.get('season'))
if smuggled_data:
series_title = traverse_obj(smuggled_data, 'series')
else:
series_data = self._download_json(f'{self._INFO_URL}{media_metadata["connectorId"]}.json', media_id, query=self._INFO_QUERY_PARAMS)
series_title = traverse_obj(series_data, 'title')
formats, subtitles = self._get_video_links(media_id, media_metadata)
return {
'id': media_id,
'title': title,
'description': traverse_obj(media_metadata, ('descriptions', ('large', 'medium', 'small', 'tiny')), get_all=False),
'release_year': media_metadata.get('originalReleaseYear'),
'duration': media_metadata.get('runtime'),
'thumbnail': f'https://image.xumo.com/v1/assets/asset/{media_id}/1024x576.jpg',
'formats': formats,
'subtitles': subtitles,
'episode_number': media_metadata.get('episode'),
'season_number': season_number,
'episode': title if is_episode else None,
'series': series_title,
}