Compare commits

...

7 Commits

Author SHA1 Message Date
bibiak
f5dd875a02 moved txt_or_none outside if statement 2023-10-02 14:13:50 +00:00
bibiak
efee229d66
Merge branch 'ytdl-org:master' into master 2023-10-02 15:51:03 +02:00
dirkf
00ef748cc0 [downloader] Fix baa6c5e: show ETA of http download as ETA instead of total d/l time 2023-09-24 22:07:47 +01:00
dirkf
66ab0814c4 [utils] Revert bbd3e7e, updating docstring, test instead 2023-09-03 23:15:19 +01:00
dirkf
bbd3e7e999 [utils] Properly handle list values in update_url()
An actual list value in a query update could have been treated
as a list of values because of the key:list parse_qs format.
2023-09-03 01:18:22 +01:00
dirkf
21caaf2380 [test] Remove redundancy from lambda expected value regex 2023-09-03 01:13:40 +01:00
dirkf
31f50c8194 [S4C] Add thumbnail extraction, extract series as playlist
Based on https://github.com/yt-dlp/yt-dlp/pull/7776: thx ifan-t, bashonly
2023-08-31 23:16:50 +01:00
8 changed files with 88 additions and 38 deletions

View File

@ -142,7 +142,7 @@ def expect_value(self, got, expected, field):
self.assertTrue( self.assertTrue(
contains_str in got, contains_str in got,
'field %s (value: %r) should contain %r' % (field, got, contains_str)) 'field %s (value: %r) should contain %r' % (field, got, contains_str))
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected): elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
fn = eval(expected) fn = eval(expected)
suite = expected.split(':', 1)[1].strip() suite = expected.split(':', 1)[1].strip()
self.assertTrue( self.assertTrue(

View File

@ -62,13 +62,14 @@ from youtube_dl.utils import (
OnDemandPagedList, OnDemandPagedList,
orderedSet, orderedSet,
parse_age_limit, parse_age_limit,
parse_bitrate,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
parse_codecs, parse_codecs,
parse_count, parse_count,
parse_iso8601, parse_iso8601,
parse_resolution, parse_resolution,
parse_bitrate, parse_qs,
pkcs1pad, pkcs1pad,
prepend_extension, prepend_extension,
read_batch_urls, read_batch_urls,
@ -125,7 +126,6 @@ from youtube_dl.compat import (
compat_setenv, compat_setenv,
compat_str, compat_str,
compat_urlparse, compat_urlparse,
compat_parse_qs,
) )
@ -683,38 +683,36 @@ class TestUtil(unittest.TestCase):
self.assertTrue(isinstance(data, bytes)) self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self): def test_update_url_query(self):
def query_dict(url): self.assertEqual(parse_qs(update_url_query(
return compat_parse_qs(compat_urlparse.urlparse(url).query)
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
query_dict('http://example.com/path?quality=HD&format=mp4')) parse_qs('http://example.com/path?quality=HD&format=mp4'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})), 'http://example.com/path', {'fields': 'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles')) parse_qs('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})), 'http://example.com/path?manifest=f4m', {'manifest': []})),
query_dict('http://example.com/path')) parse_qs('http://example.com/path'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
query_dict('http://example.com/path?system=LINUX')) parse_qs('http://example.com/path?system=LINUX'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})), 'http://example.com/path', {'fields': b'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles')) parse_qs('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})), 'http://example.com/path', {'width': 1080, 'height': 720})),
query_dict('http://example.com/path?width=1080&height=720')) parse_qs('http://example.com/path?width=1080&height=720'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})), 'http://example.com/path', {'bitrate': 5020.43})),
query_dict('http://example.com/path?bitrate=5020.43')) parse_qs('http://example.com/path?bitrate=5020.43'))
self.assertEqual(query_dict(update_url_query( self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'test': '第二行тест'})), 'http://example.com/path', {'test': '第二行тест'})),
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self): def test_multipart_encode(self):
self.assertEqual( self.assertEqual(

View File

@ -96,7 +96,7 @@ class FileDownloader(object):
return None return None
return int(float(remaining) / rate) return int(float(remaining) / rate)
start, now = (start_or_rate, now_or_remaining) start, now = (start_or_rate, now_or_remaining)
total, current = args total, current = args[:2]
if total is None: if total is None:
return None return None
if now is None: if now is None:

View File

@ -294,7 +294,7 @@ class HttpFD(FileDownloader):
# Progress message # Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len)) eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
self._hook_progress({ self._hook_progress({
'status': 'downloading', 'status': 'downloading',

View File

@ -1087,7 +1087,10 @@ from .rutube import (
from .rutv import RUTVIE from .rutv import RUTVIE
from .ruutu import RuutuIE from .ruutu import RuutuIE
from .ruv import RuvIE from .ruv import RuvIE
from .s4c import S4CIE from .s4c import (
S4CIE,
S4CSeriesIE,
)
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE, SafariApiIE,

View File

@ -2,6 +2,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from functools import partial as partial_f
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
@ -9,6 +11,7 @@ from ..utils import (
T, T,
traverse_obj, traverse_obj,
txt_or_none, txt_or_none,
url_or_none,
) )
@ -21,7 +24,8 @@ class S4CIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Y Swn', 'title': 'Y Swn',
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
'duration': 5340 'duration': 5340,
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
}, },
}, { }, {
'url': 'https://www.s4c.cymru/clic/programme/856636948', 'url': 'https://www.s4c.cymru/clic/programme/856636948',
@ -31,6 +35,7 @@ class S4CIE(InfoExtractor):
'title': 'Am Dro', 'title': 'Am Dro',
'duration': 2880, 'duration': 2880,
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
}, },
}] }]
@ -43,7 +48,7 @@ class S4CIE(InfoExtractor):
'programme_id': video_id, 'programme_id': video_id,
}, fatal=False) }, fatal=False)
filename = self._download_json( player_config = self._download_json(
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
'programme_id': video_id, 'programme_id': video_id,
'signed': '0', 'signed': '0',
@ -51,7 +56,8 @@ class S4CIE(InfoExtractor):
'mode': 'od', 'mode': 'od',
'appId': 'clic', 'appId': 'clic',
'streamName': '', 'streamName': '',
}, note='Downloading player config JSON')['filename'] }, note='Downloading player config JSON')
m3u8_url = self._download_json( m3u8_url = self._download_json(
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
'mode': 'od', 'mode': 'od',
@ -59,18 +65,60 @@ class S4CIE(InfoExtractor):
'region': 'WW', 'region': 'WW',
'extra': 'false', 'extra': 'false',
'thirdParty': 'false', 'thirdParty': 'false',
'filename': filename, 'filename': player_config['filename'],
}, note='Downloading streaming urls JSON')['hls'] }, note='Downloading streaming urls JSON')['hls']
# ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {} self._sort_formats(formats)
subtitles = {}
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
subtitles.setdefault(sub.get('3', 'en'), []).append({
'url': sub['0'],
'name': sub.get('1'),
})
return merge_dicts({ return merge_dicts({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnail': url_or_none(player_config.get('poster')),
}, traverse_obj(details, ('full_prog_details', 0, { }, traverse_obj(details, ('full_prog_details', 0, {
'title': (('programme_title', 'series_title'), T(txt_or_none)), 'title': (('programme_title', 'series_title'), T(txt_or_none)),
'description': ('full_billing', T(txt_or_none)), 'description': ('full_billing', T(txt_or_none)),
'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))), 'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
}), get_all=False), }), get_all=False),
rev=True) rev=True)
class S4CSeriesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.s4c.cymru/clic/series/864982911',
'playlist_mincount': 6,
'info_dict': {
'id': '864982911',
'title': 'Iaith ar Daith',
},
}, {
'url': 'https://www.s4c.cymru/clic/series/866852587',
'playlist_mincount': 8,
'info_dict': {
'id': '866852587',
'title': 'FFIT Cymru',
},
}]
def _real_extract(self, url):
series_id = self._match_id(url)
series_details = self._download_json(
'https://www.s4c.cymru/df/series_details', series_id, query={
'lang': 'e',
'series_id': series_id,
'show_prog_in_series': 'Y'
}, note='Downloading series details JSON')
return self.playlist_result(
(self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
playlist_id=series_id, playlist_title=traverse_obj(
series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))

View File

@ -21,6 +21,9 @@ from ..utils import (
) )
def txt_or_none(v, default=None):
return default if v is None else (compat_str(v).strip() or default)
if not hasattr(InfoExtractor, '_match_valid_url'): if not hasattr(InfoExtractor, '_match_valid_url'):
import sys import sys
@ -35,9 +38,6 @@ if not hasattr(InfoExtractor, '_match_valid_url'):
RegexNotFoundError, RegexNotFoundError,
) )
def txt_or_none(v, default=None):
return default if v is None else (compat_str(v).strip() or default)
BaseIE = InfoExtractor BaseIE = InfoExtractor
class InfoExtractor(BaseIE): class InfoExtractor(BaseIE):

View File

@ -4248,6 +4248,7 @@ def update_url(url, **kwargs):
url: compat_str or parsed URL tuple url: compat_str or parsed URL tuple
if query_update is in kwargs, update query with if query_update is in kwargs, update query with
its value instead of replacing (overrides any `query`) its value instead of replacing (overrides any `query`)
NB: query_update expects parse_qs() format: [key: value_list, ...]
returns: compat_str returns: compat_str
""" """
if not kwargs: if not kwargs: