Compare commits

...

7 Commits

Author SHA1 Message Date
bashonly
eaa7c5cd1c
[utils] add another trim_str test case
Authored by: bashonly
2024-10-29 22:47:54 -05:00
bashonly
756e9c16d1
[utils] Add trim_str traversal helper
Authored by: bashonly
2024-10-29 22:39:18 -05:00
bashonly
35310f8dfa
[utils] Fix bug in remove_end with empty string end args
Authored by: bashonly
2024-10-29 22:22:10 -05:00
bashonly
abf569e194
no partial_application for remove_start, remove_end
Authored by: bashonly
2024-10-29 22:16:17 -05:00
bashonly
4e1cb8e0c5
Merge branch 'yt-dlp:master' into feat/more-papply 2024-10-29 22:10:50 -05:00
bashonly
5bc5fb2835
Allow thumbnails with .jpe extension (#11408)
Fix 5ce582448e

Closes #11407
Authored by: bashonly
2024-10-29 23:25:46 +00:00
bashonly
f93c16395c
[utils] Fix find_element by class (#11402)
Fix d710a6ca7c

Authored by: bashonly
2024-10-29 23:24:17 +00:00
4 changed files with 37 additions and 8 deletions

View File

@ -12,9 +12,10 @@ from yt_dlp.utils import (
str_or_none, str_or_none,
) )
from yt_dlp.utils.traversal import ( from yt_dlp.utils.traversal import (
traverse_obj,
require, require,
subs_list_to_dict, subs_list_to_dict,
traverse_obj,
trim_str,
) )
_TEST_DATA = { _TEST_DATA = {
@ -495,6 +496,20 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
def test_trim_str(self):
with pytest.raises(TypeError):
trim_str('positional')
assert callable(trim_str(start='a'))
assert trim_str(start='ab')('abc') == 'c'
assert trim_str(end='bc')('abc') == 'a'
assert trim_str(start='a', end='c')('abc') == 'b'
assert trim_str(start='ab', end='c')('abc') == ''
assert trim_str(start='a', end='bc')('abc') == ''
assert trim_str(start='ab', end='bc')('abc') == ''
assert trim_str(start='abc', end='abc')('abc') == ''
assert trim_str(start='', end='')('abc') == 'abc'
class TestDictGet: class TestDictGet:
def test_dict_get(self): def test_dict_get(self):

View File

@ -345,11 +345,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('A - B', 'A - '), 'B')
self.assertEqual(remove_start('B - A', 'A - '), 'B - A') self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
def test_remove_end(self): def test_remove_end(self):
self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end(None, ' - B'), None)
self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('A - B', ' - B'), 'A')
self.assertEqual(remove_end('B - A', ' - B'), 'B - A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
def test_remove_quotes(self): def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes(None), None)

View File

@ -1958,14 +1958,12 @@ def setproctitle(title):
return # Strange libc, just skip this return # Strange libc, just skip this
@partial_application
def remove_start(s, start): def remove_start(s, start):
return s[len(start):] if s is not None and s.startswith(start) else s return s[len(start):] if s is not None and s.startswith(start) else s
@partial_application
def remove_end(s, end): def remove_end(s, end):
return s[:-len(end)] if s is not None and s.endswith(end) else s return s[:-len(end)] if s is not None and end and s.endswith(end) else s
def remove_quotes(s): def remove_quotes(s):
@ -5177,6 +5175,7 @@ class _UnsafeExtensionError(Exception):
'ico', 'ico',
'image', 'image',
'jng', 'jng',
'jpe',
'jpeg', 'jpeg',
'jxl', 'jxl',
'svg', 'svg',

View File

@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
# deliberately using `id=` and `cls=` for ease of readability # deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
if not tag: ANY_TAG = r'[\w:.-]+'
tag = r'[\w:.-]+'
if attr and value: if attr and value:
assert not cls, 'Cannot match both attr and cls' assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id' assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag) return functools.partial(func, attr, value, tag=tag or ANY_TAG)
elif cls: elif cls:
assert not id, 'Cannot match both cls and id' assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
elif id: elif id:
func = get_element_html_by_id if html else get_element_by_id func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag) return functools.partial(func, id, tag=tag or ANY_TAG)
index = int(bool(html)) index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index] return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@ -436,6 +435,20 @@ def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
return functools.partial(func, cls) return functools.partial(func, cls)
def trim_str(*, start=None, end=None):
def trim(s):
if s is None:
return None
start_idx = 0
if start and s.startswith(start):
start_idx = len(start)
if end and s.endswith(end):
return s[start_idx:-len(end)]
return s[start_idx:]
return trim
def get_first(obj, *paths, **kwargs): def get_first(obj, *paths, **kwargs):
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False) return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)