Compare commits

...

3 Commits

Author SHA1 Message Date
sepro
04a5e06350
[ie/ondemandkorea] Fix upgraded format extraction (#8677)
Closes #8675
Authored by: seproDev
2023-12-06 18:58:00 +01:00
Nicolas Cisco
b03c89309e
[ie/mediastream] Fix authenticated format extraction (#8657)
Authored by: NickCis
2023-12-06 18:55:38 +01:00
Pierrick Guillaume
71f28097fe
[ie/francetv] Improve metadata extraction (#8409)
Authored by: Fymyte
2023-12-06 16:10:11 +01:00
3 changed files with 57 additions and 19 deletions

View File

@ -1,12 +1,14 @@
from .common import InfoExtractor from .common import InfoExtractor
from .dailymotion import DailymotionIE
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
determine_ext,
format_field, format_field,
int_or_none,
join_nonempty,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
) )
from .dailymotion import DailymotionIE
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
@ -82,6 +84,8 @@ class FranceTVIE(InfoExtractor):
videos = [] videos = []
title = None title = None
subtitle = None subtitle = None
episode_number = None
season_number = None
image = None image = None
duration = None duration = None
timestamp = None timestamp = None
@ -112,7 +116,9 @@ class FranceTVIE(InfoExtractor):
if meta: if meta:
if title is None: if title is None:
title = meta.get('title') title = meta.get('title')
# XXX: what is meta['pre_title']? # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
season_number, episode_number = self._search_regex(
r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
if subtitle is None: if subtitle is None:
subtitle = meta.get('additional_title') subtitle = meta.get('additional_title')
if image is None: if image is None:
@ -191,19 +197,19 @@ class FranceTVIE(InfoExtractor):
} for sheet in spritesheets] } for sheet in spritesheets]
}) })
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': join_nonempty(title, subtitle, delim=' - ').strip(),
'thumbnail': image, 'thumbnail': image,
'duration': duration, 'duration': duration,
'timestamp': timestamp, 'timestamp': timestamp,
'is_live': is_live, 'is_live': is_live,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'episode': subtitle if episode_number else None,
'series': title if episode_number else None,
'episode_number': int_or_none(episode_number),
'season_number': int_or_none(season_number),
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -230,14 +236,31 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500, 'timestamp': 1502623500,
'duration': 2580,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170813', 'upload_date': '20170813',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [FranceTVIE.ie_key()], 'add_ie': [FranceTVIE.ie_key()],
}, {
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
'info_dict': {
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
'ext': 'mp4',
'title': 'Foot2Rue - Duel au vieux port',
'episode': 'Duel au vieux port',
'series': 'Foot2Rue',
'episode_number': 1,
'season_number': 1,
'timestamp': 1642761360,
'upload_date': '20220121',
'season': 'Season 1',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1441,
},
}, { }, {
# france3 # france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',

View File

@ -3,8 +3,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
filter_dict,
parse_qs,
remove_end, remove_end,
traverse_obj, traverse_obj,
update_url_query,
urljoin, urljoin,
) )
@ -108,7 +111,9 @@ class MediaStreamIE(MediaStreamBaseIE):
for message in [ for message in [
'Debido a tu ubicación no puedes ver el contenido', 'Debido a tu ubicación no puedes ver el contenido',
'You are not allowed to watch this video: Geo Fencing Restriction' 'You are not allowed to watch this video: Geo Fencing Restriction',
'Este contenido no está disponible en tu zona geográfica.',
'El contenido sólo está disponible dentro de',
]: ]:
if message in webpage: if message in webpage:
self.raise_geo_restricted() self.raise_geo_restricted()
@ -118,7 +123,16 @@ class MediaStreamIE(MediaStreamBaseIE):
formats, subtitles = [], {} formats, subtitles = [], {}
for video_format in player_config['src']: for video_format in player_config['src']:
if video_format == 'hls': if video_format == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id) params = {
'at': 'web-app',
'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
}
for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
params[key] = self._search_regex(
rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
fmts, subs = self._extract_m3u8_formats_and_subtitles(
update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif video_format == 'mpd': elif video_format == 'mpd':

View File

@ -3,7 +3,6 @@ import re
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -84,15 +83,17 @@ class OnDemandKoreaIE(InfoExtractor):
def try_geo_bypass(url): def try_geo_bypass(url):
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
def try_upgrade_quality(url):
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
return mod_url if mod_url != url and self._request_webpage(
HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
formats = [] formats = []
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})): for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False)) mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url)
if mod_url != m3u8_url:
mod_format = self._extract_m3u8_formats(
mod_url, video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False)
if mod_format:
formats.extend(mod_format)
continue
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
subtitles = {} subtitles = {}
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))): for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):