Merge branch 'yt-dlp:master' into misc-cleanup-another-one

Allow thumbnails with .jpe extension (#11408 )
Fix 5ce582448e Closes #11407 Authored by: bashonly
2024-11-29 18:51:24 +01:00 · 2024-10-29 22:10:03 -05:00 · 2024-10-29 23:25:46 +00:00 · 2024-10-29 23:24:17 +00:00 · 2024-10-28 12:08:46 +01:00 · 2024-10-27 23:18:25 +00:00
6 changed files with 97 additions and 46 deletions
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -216,5 +216,23 @@
        "action": "add",
        "when": "d784464399b600ba9516bbcec6286f11d68974dd",
        "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
    },
    {
        "action": "change",
        "when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
        "short": "Expand paths in `--plugin-dirs` (#11334)",
        "authors": ["bashonly"]
    },
    {
        "action": "change",
        "when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
        "short": "[ie/generic] Do not impersonate by default (#11336)",
        "authors": ["bashonly"]
    },
    {
        "action": "change",
        "when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
        "short": "[ie/vimeo] Fix API retries (#11351)",
        "authors": ["bashonly"]
    }
 ]
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@ -71,14 +71,13 @@ class CommitGroup(enum.Enum):
    def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
        group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
-        result = cls.group_lookup().get(group)
+        if result := cls.group_lookup().get(group):
-        if not result:
+            return result, subgroup or None
        if subgroup:
            return None, value
            subgroup = group
            result = cls.subgroup_lookup().get(subgroup)
-        return result, subgroup or None
+        return cls.subgroup_lookup().get(group), group or None
@dataclass
@ -136,8 +135,7 @@ class Changelog:
                first = False
                yield '\n<details><summary><h3>Changelog</h3></summary>\n'
-            group = groups[item]
+            if group := groups[item]:
            if group:
                yield self.format_module(item.value, group)
        if self._collapsible:
@ -253,7 +251,7 @@ class CommitRange:
        ''', re.VERBOSE | re.DOTALL)
    EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
    REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
-    FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
+    FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
    UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
    def __init__(self, start, end, default_author=None):
@ -287,11 +285,16 @@ class CommitRange:
            short = next(lines)
            skip = short.startswith('Release ') or short == '[version] update'
            fix_commitish = None
            if match := self.FIXES_RE.search(short):
                fix_commitish = match.group(1)
            authors = [default_author] if default_author else []
            for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
-                match = self.AUTHOR_INDICATOR_RE.match(line)
+                if match := self.AUTHOR_INDICATOR_RE.match(line):
                if match:
                    authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
                if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
                    fix_commitish = match.group(1)
            commit = Commit(commit_hash, short, authors)
            if skip and (self._start or not i):
@ -301,21 +304,17 @@ class CommitRange:
                logger.debug(f'Reached Release commit, breaking: {commit}')
                break
-            revert_match = self.REVERT_RE.fullmatch(commit.short)
+            if match := self.REVERT_RE.fullmatch(commit.short):
-            if revert_match:
+                reverts[match.group(1)] = commit
                reverts[revert_match.group(1)] = commit
                continue
-            fix_match = self.FIXES_RE.search(commit.short)
+            if fix_commitish:
-            if fix_match:
+                fixes[fix_commitish].append(commit)
                commitish = fix_match.group(1)
                fixes[commitish].append(commit)
            commits[commit.hash] = commit
        for commitish, revert_commit in reverts.items():
-            reverted = commits.pop(commitish, None)
+            if reverted := commits.pop(commitish, None):
            if reverted:
                logger.debug(f'{commitish} fully reverted {reverted}')
            else:
                commits[revert_commit.hash] = revert_commit
@ -461,8 +460,7 @@ def create_changelog(args):
    logger.info(f'Loaded {len(commits)} commits')
-    new_contributors = get_new_contributors(args.contributors_path, commits)
+    if new_contributors := get_new_contributors(args.contributors_path, commits):
    if new_contributors:
        if args.contributors:
            write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
        logger.info(f'New contributors: {", ".join(new_contributors)}')
--- a/yt_dlp/extractor/ccma.py
+++ b/yt_dlp/extractor/ccma.py
@ -12,53 +12,86 @@ from ..utils import (
 class CCMAIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
+    IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
    _VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
    _TESTS = [{
-        'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
+        # ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
        'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
        'md5': '7296ca43977c8ea4469e719c609b0871',
        'info_dict': {
            'id': '5630208',
            'ext': 'mp4',
-            'title': 'L\'espot de La Marató de TV3',
+            'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
            'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
            'timestamp': 1478608140,
            'upload_date': '20161108',
            'age_limit': 0,
            'alt_title': 'EsportMarató2016WEB_PerPublicar',
            'duration': 79,
            'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
            'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
            'categories': ['Divulgació'],
        },
    }, {
-        'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
+        # ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
        'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
        'md5': 'fa3e38f269329a278271276330261425',
        'info_dict': {
            'id': '943685',
            'ext': 'mp3',
            'title': 'El Consell de Savis analitza el derbi',
            'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
-            'upload_date': '20170512',
+            'upload_date': '20161217',
-            'timestamp': 1494622500,
+            'timestamp': 1482011700,
            'vcodec': 'none',
            'categories': ['Esports'],
            'series': 'Tot gira',
            'duration': 821,
            'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
        },
    }, {
-        'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+        'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
-        'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+        'md5': '27493513d08a3e5605814aee9bb778d2',
        'info_dict': {
            'id': '6031387',
            'ext': 'mp4',
-            'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+            'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
            'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
-            'timestamp': 1582577700,
+            'timestamp': 1582577919,
            'upload_date': '20200224',
-            'subtitles': 'mincount:4',
+            'subtitles': 'mincount:1',
-            'age_limit': 16,
+            'age_limit': 13,
            'series': 'Crims',
            'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
            'duration': 3203,
            'categories': ['Divulgació'],
            'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
            'episode_number': 5,
            'episode': 'Episode 5',
        },
    }, {
        'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
        'info_dict': {
            'id': '5759227',
            'ext': 'mp4',
            'title': 'Una mosca volava per la llum',
            'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
            'description': 'md5:9ab64276944b0825336f4147f13f7854',
            'series': 'Mic',
            'upload_date': '20180411',
            'timestamp': 1523440105,
            'duration': 160,
            'age_limit': 0,
            'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
            'categories': ['Música'],
        },
    }]
    def _real_extract(self, url):
-        media_type, media_id = self._match_valid_url(url).groups()
+        media_type, media_id = self._match_valid_url(url).group('type', 'id')
        media = self._download_json(
-            'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+            'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
                'media': media_type,
                'idint': media_id,
                'format': 'dm',
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
    def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
        track_id = str(info['id'])
        title = info['title']
        format_urls = set()
        formats = []
@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
            'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
            'uploader_url': user.get('permalink_url'),
            'timestamp': unified_timestamp(info.get('created_at')),
-            'title': title,
+            'title': info.get('title'),
            'description': info.get('description'),
            'thumbnails': thumbnails,
            'duration': float_or_none(info.get('duration'), 1000),
@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
            'like_count': extract_count('favoritings') or extract_count('likes'),
            'comment_count': extract_count('comment'),
            'repost_count': extract_count('reposts'),
-            'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
+            'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
            'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
            'formats': formats if not extract_flat else None,
        }
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
                'repost_count': int,
                'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
                'uploader_url': 'https://soundcloud.com/ethmusic',
                'genres': [],
            },
        },
        # geo-restricted
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'uploader_url': 'https://soundcloud.com/the-concept-band',
                'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
                'genres': ['Alternative'],
                'artists': ['The Royal Concept'],
            },
        },
        # private link
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'repost_count': int,
                'view_count': int,
                'genres': ['Dance & EDM'],
                'artists': ['80M'],
            },
        },
        # private link, downloadable format
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
                'uploader_url': 'https://soundcloud.com/oriuplift',
                'genres': ['Trance'],
                'artists': ['Ori Uplift'],
            },
        },
        # no album art, use avatar pic for thumbnail
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'comment_count': int,
                'repost_count': int,
                'uploader_url': 'https://soundcloud.com/garyvee',
-                'genres': [],
+                'artists': ['MadReal'],
            },
            'params': {
                'skip_download': True,
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -5165,6 +5165,7 @@ class _UnsafeExtensionError(Exception):
        'ico',
        'image',
        'jng',
        'jpe',
        'jpeg',
        'jxl',
        'svg',
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
 def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
    # deliberately using `id=` and `cls=` for ease of readability
    assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
-    if not tag:
+    ANY_TAG = r'[\w:.-]+'
        tag = r'[\w:.-]+'
    if attr and value:
        assert not cls, 'Cannot match both attr and cls'
        assert not id, 'Cannot match both attr and id'
        func = get_element_html_by_attribute if html else get_element_by_attribute
-        return functools.partial(func, attr, value, tag=tag)
+        return functools.partial(func, attr, value, tag=tag or ANY_TAG)
    elif cls:
        assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
    elif id:
        func = get_element_html_by_id if html else get_element_by_id
-        return functools.partial(func, id, tag=tag)
+        return functools.partial(func, id, tag=tag or ANY_TAG)
    index = int(bool(html))
    return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
Author	SHA1	Message	Date
bashonly	261b2be36e	Merge branch 'yt-dlp:master' into misc-cleanup-another-one	2024-10-29 22:10:03 -05:00
bashonly	5bc5fb2835	Allow thumbnails with `.jpe` extension (#11408 ) Fix `5ce582448e` Closes #11407 Authored by: bashonly	2024-10-29 23:25:46 +00:00
bashonly	f93c16395c	[utils] Fix `find_element` by class (#11402 ) Fix `d710a6ca7c` Authored by: bashonly	2024-10-29 23:24:17 +00:00
sepro	f101e5d34c	[ie/Soundcloud] Extract artists (#11377 ) Closes #11375 Authored by: seproDev	2024-10-28 12:08:46 +01:00
JAB	330335386d	[ie/ccma] Support new 3cat.cat domain (#11222 ) Closes #11163 Authored by: JoseAngelB	2024-10-27 23:18:25 +00:00
bashonly	0a3991edae	[devscripts] `make_changelog`: Parse full commit message for fixes (#11366 ) Authored by: Grub4K, bashonly Co-authored-by: Simon Sawicki <contact@grub4k.xyz>	2024-10-27 23:00:02 +00:00