Compare commits

...

8 Commits

Author SHA1 Message Date
Mozi
9ec0846c03
Merge a9fc46dc7c into c699bafc50 2024-11-16 07:05:39 +00:00
Mozi
a9fc46dc7c use 'filter' in traversal 2024-11-16 07:04:10 +00:00
Mozi
a9aa5500a5 merge 'master' 2024-11-16 06:59:12 +00:00
bashonly
c699bafc50 [ie/soop] Fix thumbnail extraction (#11545)
Closes #11537

Authored by: bashonly
2024-11-15 22:51:55 +00:00
bashonly
eb64ae7d5d [ie] Allow ext override for thumbnails (#11545)
Authored by: bashonly
2024-11-15 22:51:55 +00:00
Simon Sawicki
c014fbcddc
[utils] subs_list_to_dict: Add lang default parameter (#11508)
Authored by: Grub4K
2024-11-15 23:25:52 +01:00
Simon Sawicki
39d79c9b9c
[utils] Fix join_nonempty, add **kwargs to unpack (#11559)
Authored by: Grub4K
2024-11-15 22:06:15 +01:00
Mozi
513d4c358e [ie/RTS] Support new URLs; fix tests; drop old useless formats 2024-09-16 19:07:46 +00:00
9 changed files with 298 additions and 163 deletions

View File

@ -481,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -507,6 +507,54 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self): def test_trim_str(self):
with pytest.raises(TypeError): with pytest.raises(TypeError):
@ -525,7 +573,7 @@ class TestTraversalHelpers:
def test_unpack(self): def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3' assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError): with pytest.raises(TypeError):
unpack(join_nonempty)() unpack(join_nonempty)()
with pytest.raises(TypeError): with pytest.raises(TypeError):

View File

@ -72,7 +72,6 @@ from yt_dlp.utils import (
intlist_to_bytes, intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
join_nonempty,
js_to_json, js_to_json,
limit_length, limit_length,
locked_file, locked_file,
@ -2158,10 +2157,6 @@ Line 1
assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -4381,7 +4381,9 @@ class YoutubeDL:
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

View File

@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'soop'
@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))

View File

@ -279,6 +279,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)

View File

@ -1,182 +1,290 @@
import re import functools
from .srgssr import SRGSSRIE from .srgssr import SRGSSRIE
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_duration, orderedSet,
parse_iso8601, parse_iso8601,
unescapeHTML, parse_resolution,
urljoin,
) )
from ..utils.traversal import traverse_obj
class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE class RTSIE(SRGSSRIE):
_WORKING = False _GEO_COUNTRIES = ['CH']
IE_DESC = 'RTS.ch' IE_DESC = 'RTS.ch'
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' _VALID_URL = [
r'rts:(?P<id>\d+)',
r'https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html',
r'https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<display_id>.+?)-(?P<id>[0-9]+)\.html',
]
_TESTS = [ _TESTS = [
{ {
# article with videos
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': { 'info_dict': {
'id': '3449373', 'id': '3449373',
'display_id': 'les-enfants-terribles',
'ext': 'mp4',
'duration': 1488,
'title': 'Les Enfants Terribles', 'title': 'Les Enfants Terribles',
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'uploader': 'Divers', 'display_id': 'les-enfants-terribles',
'upload_date': '19680921', 'tags': ['Divers', 'Archives TV', 'Culture et Arts', 'Les archives', 'Personnalités', 'RTS Archives', 'Années 1960', 'Autres arts', 'Décennies', 'Société'],
'timestamp': -40280400,
'thumbnail': r're:^https?://.*\.image',
'view_count': int,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'playlist': [{
}, 'info_dict': {
{ 'id': '3449373',
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', 'ext': 'mp4',
'info_dict': { 'title': 'Les Enfants Terribles',
'id': '5624065', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'title': 'Passe-moi les jumelles', 'thumbnail': r're:^https?://.*\.image',
}, 'upload_date': '19680921',
'playlist_mincount': 4, 'timestamp': -40280400,
'duration': 1488,
'categories': ['Divers'],
},
}],
'params': {'skip_download': 'm3u8'}, # 700-byte first fragment
}, },
{ {
# video without text content
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
'info_dict': { 'info_dict': {
'id': '5745975', 'id': '5745975',
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
'ext': 'mp4',
'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
'description': 'Hockey - Playoff', 'description': 'Hockey - Playoff',
'uploader': 'Hockey', 'tags': ['Hockey', 'Sport', 'RTS Sport'],
'upload_date': '20140403',
'timestamp': 1396556882,
'thumbnail': r're:^https?://.*\.image',
'view_count': int,
}, },
'params': { 'playlist': [{
# m3u8 download 'info_dict': {
'skip_download': True, 'id': '5745975',
}, 'ext': 'mp4',
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
'description': 'Hockey - Playoff',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20140403',
'timestamp': 1396556882,
'duration': 48,
'categories': ['Hockey sur glace'],
},
}],
'params': {'skip_download': 'm3u8'}, # 700-byte first fragment
'skip': 'Blocked outside Switzerland', 'skip': 'Blocked outside Switzerland',
}, },
{ {
# video player; redirection: https://www.rts.ch/play/tv/lactu-en-video/video/londres-cachee-par-un-epais-smog?urn=urn:rts:video:5745356
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': { 'info_dict': {
'id': '5745356', 'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
'ext': 'mp4', 'ext': 'mp4',
'duration': 33, 'duration': 33.76,
'title': 'Londres cachée par un épais smog', 'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
'uploader': 'L\'actu en vidéo',
'upload_date': '20140403', 'upload_date': '20140403',
'timestamp': 1396537322, 'timestamp': 1396537322,
'thumbnail': r're:^https?://.*\.image', 'thumbnail': r're:^https?://.*\.image',
'view_count': int, 'webpage_url': 'srgssr:rts:video:5745356',
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'params': {'skip_download': 'm3u8'}, # 700-byte first fragment
}, },
{ {
# audio & podcast
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': { 'info_dict': {
'id': '5706148', 'id': '5706148',
'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
'ext': 'mp3',
'duration': 123,
'title': '"Urban Hippie", de Damien Krisl', 'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.', 'description': 'Des Hippies super glam.',
'upload_date': '20140403', 'display_id': 'urban-hippie-de-damien-krisl',
'timestamp': 1396551600, 'tags': ['Media Radio', 'Couleur3'],
}, },
'playlist': [{
'info_dict': {
'id': '5706148',
'ext': 'mp3',
'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20140403',
'timestamp': 1396546481,
'duration': 123,
'categories': ['La belle vidéo de Stéphane Laurenceau'],
},
}, {
'info_dict': {
'id': '5747185',
'ext': 'mp3',
'title': 'Le musée du psychédélisme',
'description': 'md5:72f8662f48c32050ae817e3bde7e0acc',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20140402',
'timestamp': 1396476000,
'duration': 274,
'categories': ['Happy Culture'],
},
}, {
'info_dict': {
'id': '5706149',
'ext': 'mp3',
'title': 'Silk Art Hippie Culture',
'description': 'md5:8e3b9d8d84d85ca8a1905cf50b39bba4',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20140403',
'timestamp': 1396545649,
'duration': 161,
'categories': ['Happy Pics'],
},
}, {
'info_dict': {
'id': '5706148',
'ext': 'mp3',
'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20140403',
'timestamp': 1396546481,
'duration': 123,
'categories': ['La belle vidéo de Stéphane Laurenceau'],
},
}],
}, },
{ {
# article with videos on rhs # article with videos on rhs
'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html',
'info_dict': { 'info_dict': {
'id': '6693917', 'id': '6693917',
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', 'title': 'Davos décroche le 31e titre de son histoire',
'description': 'md5:3c9a767b2a332413eda33c526024578c',
'display_id': 'hockey-davos-decroche-son-31e-titre-de-champion-de-suisse',
'tags': ['Hockey', 'Tout le sport', 'RTS Info', 'LNA', "Toute l'info", 'RTS Sport'],
}, },
'playlist_mincount': 5, 'playlist_mincount': 5,
'skip': 'Blocked outside Switzerland',
},
{
# articles containing recordings of TV shows
'url': 'https://www.rts.ch/info/regions/valais/12865814-un-bouquetin-emporte-par-un-aigle-royal-sur-les-hauts-de-fully-vs.html',
'info_dict': {
'id': '12865814',
'title': 'Un bouquetin emporté par un aigle royal sur les hauts de Fully (VS)',
'description': 'md5:9b511f89075e2730bd2dd59915c25574',
'display_id': 'un-bouquetin-emporte-par-un-aigle-royal-sur-les-hauts-de-fully-vs',
'tags': ['Régions', 'RTS Info', 'Valais', "Toute l'info"],
},
'playlist': [{
'info_dict': {
'id': '12861415',
'ext': 'mp4',
'title': 'En Valais, un bouquetin emporté dans les airs par un aigle royal. Décryptage dune image rare.',
'thumbnail': r're:^https?://.*\.image',
'timestamp': 1644690600,
'upload_date': '20220212',
'duration': 107,
'categories': ['19h30'],
},
}],
'params': {'skip_download': 'm3u8'}, # 700-byte first fragment
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
},
{
# new URL format; article with videos
'url': 'https://www.rts.ch/info/suisse/2024/article/doris-leuthard-il-y-a-des-alternatives-au-nucleaire-qui-sont-moins-risquees-28631869.html',
'info_dict': {
'id': '28631869',
'title': 'Doris Leuthard: "Il y a des alternatives au nucléaire qui sont moins risquées"',
'description': 'md5:ba9930e218dcd177801a34b89a16b86e',
'display_id': 'doris-leuthard-il-y-a-des-alternatives-au-nucleaire-qui-sont-moins-risquees',
'tags': 'count:13',
},
'playlist': [{
'info_dict': {
'id': '15162786',
'ext': 'mp4',
'title': 'L\'invitée de La Matinale (vidéo) - Doris Leuthard, co-présidente du projet d\'exposition nationale Svizra27',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20240916',
'timestamp': 1726462800,
'duration': 860,
'categories': ['La Matinale'],
},
}, {
'info_dict': {
'id': '15164848',
'ext': 'mp4',
'title': 'Le Centre pourrait faire pencher la balance en faveur de la construction de nouvelles centrales nucléaires',
'thumbnail': r're:^https?://.*\.image',
'upload_date': '20240916',
'timestamp': 1726502400,
'duration': 227,
'categories': ['Forum'],
},
}],
'params': {'skip_download': 'm3u8'}, # 700-byte first fragment
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}, },
{ {
'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
'only_matching': True, 'only_matching': True,
}, },
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'only_matching': True,
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
m = self._match_valid_url(url) webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
media_id = m.group('rts_id') or m.group('id') if urlh.url != url:
display_id = m.group('display_id') or media_id return self.url_result(urlh.url)
def download_json(internal_id): mobj = self._match_valid_url(url)
return self._download_json( display_id = traverse_obj(mobj, 'display_id', default=mobj.group('id')) or mobj.group('id')
f'http://www.rts.ch/a/{internal_id}.html?f=json/article',
display_id)
all_info = download_json(media_id) media_list = []
article_details = self._search_json(r'articleDetails\s*=\s*', webpage, 'article details', display_id)
traverse_obj(article_details, ('mainMedia', filter, {media_list.append}))
traverse_obj(article_details, ('innerMediaElements', filter, {media_list.extend}))
traverse_obj(article_details, ('mediaElements', filter, {media_list.extend}))
media_list = orderedSet(media_list)
# media_id extracted out of URL is not always a real id entries = []
if 'video' not in all_info and 'audio' not in all_info: for media in media_list:
entries = [] media_id = media['oid']
media_info = self._get_media_data('rts', media['type'], media_id)
for item in all_info.get('items', []): if fmts := self._extract_formats(media_info, media_id):
item_url = item.get('url') entries.append({
if not item_url: 'id': media_info['id'],
continue 'title': media_info['title'],
entries.append(self.url_result(item_url, 'RTS')) 'formats': fmts,
'description': media_info.get('description'),
'thumbnails': [traverse_obj(media_info, ('imageUrl', {lambda x: {
'url': x,
**parse_resolution(x),
}}))],
'timestamp': parse_iso8601(media_info.get('date')),
'duration': traverse_obj(media_info, ('duration', {functools.partial(int_or_none, scale=1000)})),
'categories': [media.get('category')],
})
if not entries: return self.playlist_result(
page, urlh = self._download_webpage_handle(url, display_id) entries, article_details.get('oid'), article_details.get('title'),
if re.match(self._VALID_URL, urlh.url).group('id') != media_id: article_details.get('lead'), display_id=display_id,
return self.url_result(urlh.url, 'RTS') tags=traverse_obj(article_details, ('tags', ..., 'name')))
# article with videos on rhs
videos = re.findall(
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
page)
if not videos:
videos = re.findall(
r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
page)
if videos:
entries = [self.url_result(f'srgssr:{video_urn}', 'SRGSSR') for video_urn in videos]
if entries:
return self.playlist_result(entries, media_id, all_info.get('title'))
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id')
all_info = download_json(internal_id)
media_type = 'video' if 'video' in all_info else 'audio'
# check for errors
self._get_media_data('rts', media_type, media_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
title = info['title']
def _extract_formats(self, media_info, media_id):
def extract_bitrate(url): def extract_bitrate(url):
return int_or_none(self._search_regex( return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None)) r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = [] formats = []
streams = info.get('streams', {}) for idx, stream in enumerate(traverse_obj(
for format_id, format_url in streams.items(): media_info, ('resourceList', lambda _, v: v['url']))):
if format_id == 'hds_sd' and 'hds' in streams: format_id = stream.get('protocol') or str(idx)
format_url = stream['url']
if format_id == 'hds_sd' and 'hds' in stream:
continue continue
if format_id == 'hls_sd' and 'hls' in streams: if format_id == 'hls_sd' and 'hls' in stream:
continue continue
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext in ('m3u8', 'f4m'): if ext in ('m3u8', 'f4m'):
@ -195,37 +303,5 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE
'tbr': extract_bitrate(format_url), 'tbr': extract_bitrate(format_url),
}) })
download_base = 'http://rtsww{}-d.rts.ch/'.format('-a' if media_type == 'audio' else '')
for media in info.get('media', []):
media_url = media.get('url')
if not media_url or re.match(r'https?://', media_url):
continue
rate = media.get('rate')
ext = media.get('ext') or determine_ext(media_url, 'mp4')
format_id = ext
if rate:
format_id += '-%dk' % rate
formats.append({
'format_id': format_id,
'url': urljoin(download_base, media_url),
'tbr': rate or extract_bitrate(media_url),
})
self._check_formats(formats, media_id) self._check_formats(formats, media_id)
return formats
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, str):
duration = parse_duration(duration)
return {
'id': media_id,
'display_id': display_id,
'formats': formats,
'title': title,
'description': info.get('intro'),
'duration': duration,
'view_count': int_or_none(info.get('plays')),
'uploader': info.get('programName'),
'timestamp': parse_iso8601(info.get('broadcast_date')),
'thumbnail': unescapeHTML(info.get('preview_image_url')),
}

View File

@ -57,7 +57,7 @@ class SRGSSRIE(InfoExtractor):
def _get_media_data(self, bu, media_type, media_id): def _get_media_data(self, bu, media_type, media_id):
query = {'onlyChapters': True} if media_type == 'video' else {} query = {'onlyChapters': True} if media_type == 'video' else {}
full_media_data = self._download_json( full_media_data = self._download_json(
f'https://il.srgssr.ch/integrationlayer/2.0/{bu}/mediaComposition/{media_type}/{media_id}.json', f'https://il.srgssr.ch/integrationlayer/2.0/mediaComposition/byUrn/urn:{bu}:{media_type}:{media_id}.json',
media_id, query=query)['chapterList'] media_id, query=query)['chapterList']
try: try:
media_data = next( media_data = next(
@ -165,7 +165,7 @@ class SRGSSRPlayIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': '6db2226ba97f62ad42ce09783680046c', 'md5': '81c6ad90d774c46e3c54ea2f01a94db3',
'info_dict': { 'info_dict': {
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'ext': 'mp4', 'ext': 'mp4',
@ -173,7 +173,7 @@ class SRGSSRPlayIE(InfoExtractor):
'title': 'Snowden beantragt Asyl in Russland', 'title': 'Snowden beantragt Asyl in Russland',
'timestamp': 1372708215, 'timestamp': 1372708215,
'duration': 113.827, 'duration': 113.827,
'thumbnail': r're:^https?://.*1383719781\.png$', 'thumbnail': r're:^https?://download-media\.srf\.ch/.*\.(?:png|jpg)$',
}, },
'expected_warnings': ['Unable to download f4m manifest'], 'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
@ -185,6 +185,7 @@ class SRGSSRPlayIE(InfoExtractor):
'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem', 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
'timestamp': 1444709160, 'timestamp': 1444709160,
'duration': 336.816, 'duration': 336.816,
'thumbnail': r're:^https?://download-media\.srf\.ch/.*\.(?:png|jpg)$',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -217,7 +218,7 @@ class SRGSSRPlayIE(InfoExtractor):
'duration': 94.0, 'duration': 94.0,
'upload_date': '20170215', 'upload_date': '20170215',
'timestamp': 1487173560, 'timestamp': 1487173560,
'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964', 'thumbnail': r're:https?://cdn\.prod\.swi-services\.ch/.+',
'subtitles': 'count:9', 'subtitles': 'count:9',
}, },
'params': { 'params': {

View File

@ -216,7 +216,7 @@ def partial_application(func):
sig = inspect.signature(func) sig = inspect.signature(func)
required_args = [ required_args = [
param.name for param in sig.parameters.values() param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL) if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
if param.default is inspect.Parameter.empty if param.default is inspect.Parameter.empty
] ]
@ -4837,7 +4837,6 @@ def number_of_digits(number):
return len('%d' % number) return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None): def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None: if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values) values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)

View File

@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload @typing.overload
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload @typing.overload
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ... def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
""" """
Convert subtitles from a traversal into a subtitle dict. Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function. The path should have an `all` immediately before this function.
@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
`quality` The sort order for each subtitle `quality` The sort order for each subtitle
""" """
if subs is None: if subs is None:
return functools.partial(subs_list_to_dict, ext=ext) return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
result = collections.defaultdict(list) result = collections.defaultdict(list)
@ -360,10 +360,16 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
if not url_or_none(sub.get('url')) and not sub.get('data'): if not url_or_none(sub.get('url')) and not sub.get('data'):
continue continue
sub_id = sub.pop('id', None) sub_id = sub.pop('id', None)
if sub_id is None: if not isinstance(sub_id, str):
continue if not lang:
if ext is not None and not sub.get('ext'): continue
sub['ext'] = ext sub_id = lang
sub_ext = sub.get('ext')
if not isinstance(sub_ext, str):
if not ext:
sub.pop('ext', None)
else:
sub['ext'] = ext
result[sub_id].append(sub) result[sub_id].append(sub)
result = dict(result) result = dict(result)
@ -452,9 +458,9 @@ def trim_str(*, start=None, end=None):
return trim return trim
def unpack(func): def unpack(func, **kwargs):
@functools.wraps(func) @functools.wraps(func)
def inner(items, **kwargs): def inner(items):
return func(*items, **kwargs) return func(*items, **kwargs)
return inner return inner