Compare commits

...

2 Commits

Author SHA1 Message Date
Max
95e82347b3
[ie/Viously] Add extractor (#8927)
Replaces Turbo extractor

Authored by: nbr23, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-09 04:11:52 +01:00
DmitryScaletta
5b8c69ae04
[ie/twitch] Fix m3u8 extraction (#8960)
Closes #8958
Authored by: DmitryScaletta
2024-01-09 02:47:13 +00:00
4 changed files with 81 additions and 95 deletions

View File

@ -2019,7 +2019,6 @@ from .tunein import (
TuneInPodcastEpisodeIE, TuneInPodcastEpisodeIE,
TuneInShortenerIE, TuneInShortenerIE,
) )
from .turbo import TurboIE
from .tv2 import ( from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
@ -2223,6 +2222,7 @@ from .viki import (
VikiIE, VikiIE,
VikiChannelIE, VikiChannelIE,
) )
from .viously import ViouslyIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .viu import ( from .viu import (
ViuIE, ViuIE,

View File

@ -1,64 +0,0 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
qualities,
xpath_text,
)
class TurboIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
_API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
_TEST = {
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
'md5': '33f4b91099b36b5d5a91f84b5bcba600',
'info_dict': {
'id': '454443',
'ext': 'mp4',
'duration': 3715,
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
playlist = self._download_xml(self._API_URL.format(video_id), video_id)
item = playlist.find('./channel/item')
if item is None:
raise ExtractorError('Playlist item was not found', expected=True)
title = xpath_text(item, './title', 'title')
duration = int_or_none(xpath_text(item, './durate', 'duration'))
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
description = self._html_search_meta('description', webpage)
formats = []
get_quality = qualities(['3g', 'sd', 'hq'])
for child in item:
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
if m:
quality = compat_str(m.group('quality'))
formats.append({
'format_id': quality,
'url': child.text,
'quality': get_quality(quality),
})
return {
'id': video_id,
'title': title,
'duration': duration,
'thumbnail': thumbnail,
'description': description,
'formats': formats,
}

View File

@ -8,7 +8,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
@ -191,6 +190,20 @@ class TwitchBaseIE(InfoExtractor):
'url': thumbnail, 'url': thumbnail,
}] if thumbnail else None }] if thumbnail else None
def _extract_twitch_m3u8_formats(self, video_id, token, signature):
"""Subclasses must define _M3U8_PATH"""
return self._extract_m3u8_formats(
f'{self._USHER_BASE}/{self._M3U8_PATH}/{video_id}.m3u8', video_id, 'mp4', query={
'allow_source': 'true',
'allow_audio_only': 'true',
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'playlist_include_framerate': 'true',
'sig': signature,
'token': token,
})
class TwitchVodIE(TwitchBaseIE): class TwitchVodIE(TwitchBaseIE):
IE_NAME = 'twitch:vod' IE_NAME = 'twitch:vod'
@ -203,6 +216,7 @@ class TwitchVodIE(TwitchBaseIE):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_M3U8_PATH = 'vod'
_TESTS = [{ _TESTS = [{
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
@ -532,20 +546,8 @@ class TwitchVodIE(TwitchBaseIE):
info = self._extract_info_gql(video, vod_id) info = self._extract_info_gql(video, vod_id)
access_token = self._download_access_token(vod_id, 'video', 'id') access_token = self._download_access_token(vod_id, 'video', 'id')
formats = self._extract_m3u8_formats( formats = self._extract_twitch_m3u8_formats(
'%s/vod/%s.m3u8?%s' % ( vod_id, access_token['value'], access_token['signature'])
self._USHER_BASE, vod_id,
compat_urllib_parse_urlencode({
'allow_source': 'true',
'allow_audio_only': 'true',
'allow_spectre': 'true',
'player': 'twitchweb',
'playlist_include_framerate': 'true',
'nauth': access_token['value'],
'nauthsig': access_token['signature'],
})),
vod_id, 'mp4', entry_protocol='m3u8_native')
formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration'))) formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
self._prefer_source(formats) self._prefer_source(formats)
@ -924,6 +926,7 @@ class TwitchStreamIE(TwitchBaseIE):
) )
(?P<id>[^/#?]+) (?P<id>[^/#?]+)
''' '''
_M3U8_PATH = 'api/channel/hls'
_TESTS = [{ _TESTS = [{
'url': 'http://www.twitch.tv/shroomztv', 'url': 'http://www.twitch.tv/shroomztv',
@ -1026,23 +1029,10 @@ class TwitchStreamIE(TwitchBaseIE):
access_token = self._download_access_token( access_token = self._download_access_token(
channel_name, 'stream', 'channelName') channel_name, 'stream', 'channelName')
token = access_token['value']
stream_id = stream.get('id') or channel_name stream_id = stream.get('id') or channel_name
query = { formats = self._extract_twitch_m3u8_formats(
'allow_source': 'true', channel_name, access_token['value'], access_token['signature'])
'allow_audio_only': 'true',
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'playlist_include_framerate': 'true',
'segment_preference': '4',
'sig': access_token['signature'].encode('utf-8'),
'token': token.encode('utf-8'),
}
formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
stream_id, 'mp4', query=query)
self._prefer_source(formats) self._prefer_source(formats)
view_count = stream.get('viewers') view_count = stream.get('viewers')

View File

@ -0,0 +1,60 @@
import base64
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
parse_iso8601,
)
from ..utils.traversal import traverse_obj
class ViouslyIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
'md5': '37a6c3381599381ff53a7e1e0575c0bc',
'info_dict': {
'id': 'F_xQzS2jwb3',
'ext': 'mp4',
'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'age_limit': 0,
'upload_date': '20230328',
'timestamp': 1680037507,
'duration': 3716,
'categories': ['motors'],
}
}]
def _extract_from_webpage(self, url, webpage):
viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
if not viously_players:
return
def custom_decode(text):
STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
return data.decode('utf-8').strip('\x00')
for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
formats = self._extract_m3u8_formats(
f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
if not formats:
continue
data = self._download_json(
f'https://www.viously.com/export/json/{video_id}', video_id,
transform_source=custom_decode, fatal=False)
yield {
'id': video_id,
'formats': formats,
**traverse_obj(data, ('video', {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'timestamp': ('iso_date', {parse_iso8601}),
'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
})),
}