[utils] add another trim_str test case

Authored by: bashonly
[utils] Add trim_str traversal helper
2025-02-18 19:01:25 +01:00 · 2024-10-29 22:47:54 -05:00 · 2024-10-29 22:39:18 -05:00 · 2024-10-29 22:22:10 -05:00 · 2024-10-29 22:16:17 -05:00 · 2024-10-29 22:10:50 -05:00
4 changed files with 37 additions and 8 deletions
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@ -12,9 +12,10 @@ from yt_dlp.utils import (
    str_or_none,
 )
 from yt_dlp.utils.traversal import (
    traverse_obj,
    require,
    subs_list_to_dict,
    traverse_obj,
    trim_str,
 )
 _TEST_DATA = {
@ -495,6 +496,20 @@ class TestTraversalHelpers:
            {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
        ]}, '`quality` key should sort subtitle list accordingly'
    def test_trim_str(self):
        with pytest.raises(TypeError):
            trim_str('positional')
        assert callable(trim_str(start='a'))
        assert trim_str(start='ab')('abc') == 'c'
        assert trim_str(end='bc')('abc') == 'a'
        assert trim_str(start='a', end='c')('abc') == 'b'
        assert trim_str(start='ab', end='c')('abc') == ''
        assert trim_str(start='a', end='bc')('abc') == ''
        assert trim_str(start='ab', end='bc')('abc') == ''
        assert trim_str(start='abc', end='abc')('abc') == ''
        assert trim_str(start='', end='')('abc') == 'abc'
 class TestDictGet:
    def test_dict_get(self):
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -345,11 +345,13 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(remove_start(None, 'A - '), None)
        self.assertEqual(remove_start('A - B', 'A - '), 'B')
        self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
        self.assertEqual(remove_start('non-empty', ''), 'non-empty')
    def test_remove_end(self):
        self.assertEqual(remove_end(None, ' - B'), None)
        self.assertEqual(remove_end('A - B', ' - B'), 'A')
        self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
        self.assertEqual(remove_end('non-empty', ''), 'non-empty')
    def test_remove_quotes(self):
        self.assertEqual(remove_quotes(None), None)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -1958,14 +1958,12 @@ def setproctitle(title):
        return  # Strange libc, just skip this
@partial_application
 def remove_start(s, start):
    return s[len(start):] if s is not None and s.startswith(start) else s
@partial_application
 def remove_end(s, end):
-    return s[:-len(end)] if s is not None and s.endswith(end) else s
+    return s[:-len(end)] if s is not None and end and s.endswith(end) else s
 def remove_quotes(s):
@ -5177,6 +5175,7 @@ class _UnsafeExtensionError(Exception):
        'ico',
        'image',
        'jng',
        'jpe',
        'jpeg',
        'jxl',
        'svg',
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
 def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
    # deliberately using `id=` and `cls=` for ease of readability
    assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
-    if not tag:
+    ANY_TAG = r'[\w:.-]+'
        tag = r'[\w:.-]+'
    if attr and value:
        assert not cls, 'Cannot match both attr and cls'
        assert not id, 'Cannot match both attr and id'
        func = get_element_html_by_attribute if html else get_element_by_attribute
-        return functools.partial(func, attr, value, tag=tag)
+        return functools.partial(func, attr, value, tag=tag or ANY_TAG)
    elif cls:
        assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
    elif id:
        func = get_element_html_by_id if html else get_element_by_id
-        return functools.partial(func, id, tag=tag)
+        return functools.partial(func, id, tag=tag or ANY_TAG)
    index = int(bool(html))
    return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@ -436,6 +435,20 @@ def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
    return functools.partial(func, cls)
 def trim_str(*, start=None, end=None):
    def trim(s):
        if s is None:
            return None
        start_idx = 0
        if start and s.startswith(start):
            start_idx = len(start)
        if end and s.endswith(end):
            return s[start_idx:-len(end)]
        return s[start_idx:]
    return trim
 def get_first(obj, *paths, **kwargs):
    return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
Author	SHA1	Message	Date
bashonly	eaa7c5cd1c	[utils] add another `trim_str` test case Authored by: bashonly	2024-10-29 22:47:54 -05:00
bashonly	756e9c16d1	[utils] Add `trim_str` traversal helper Authored by: bashonly	2024-10-29 22:39:18 -05:00
bashonly	35310f8dfa	[utils] Fix bug in `remove_end` with empty string `end` args Authored by: bashonly	2024-10-29 22:22:10 -05:00
bashonly	abf569e194	no `partial_application` for `remove_start`, `remove_end` Authored by: bashonly	2024-10-29 22:16:17 -05:00
bashonly	4e1cb8e0c5	Merge branch 'yt-dlp:master' into feat/more-papply	2024-10-29 22:10:50 -05:00
bashonly	5bc5fb2835	Allow thumbnails with `.jpe` extension (#11408 ) Fix `5ce582448e` Closes #11407 Authored by: bashonly	2024-10-29 23:25:46 +00:00
bashonly	f93c16395c	[utils] Fix `find_element` by class (#11402 ) Fix `d710a6ca7c` Authored by: bashonly	2024-10-29 23:24:17 +00:00