[docs] Make --break-on-existing help string more clear

Authored by: bashonly
[ie] additional partial_application-related cleanup
2024-11-29 02:31:25 +01:00 · 2024-10-31 23:17:35 -05:00 · 2024-10-31 23:17:05 -05:00 · 2024-10-31 22:34:06 -05:00 · 2024-10-31 22:32:56 -05:00 · 2024-10-31 22:30:24 -05:00
28 changed files with 141 additions and 106 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -282,6 +282,7 @@ jobs:
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
+          attestations: false  # Currently doesn't work w/ reusable workflows (breaks nightly)

  publish:
    needs: [prepare, build]
--- a/README.md
+++ b/README.md
@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
    --no-download-archive           Do not use archive file (default)
    --max-downloads NUMBER          Abort after downloading NUMBER files
    --break-on-existing             Stop the download process when encountering
-                                    a file that is in the archive
+                                    a file that is in the archive supplied with
+                                    the --download-archive option
    --no-break-on-existing          Do not stop the download process when
                                    encountering a file that is in the archive
                                    (default)
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@ -478,7 +478,7 @@ class TestTraversalHelpers:
            {'url': 'https://example.com/subs/en', 'name': 'en'},
        ], [..., {
            'id': 'name',
-            'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
+            'ext': ['url', {determine_ext(default_ext=None)}],
            'url': 'url',
        }, all, {subs_list_to_dict(ext='ext')}]) == {
            'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2156,7 +2156,7 @@ Line 1
        assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
        assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
        assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
-        assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
+        assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'

        assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
        assert callable(join_nonempty()), 'varargs positional should apply partially'
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@ -520,4 +520,4 @@ class BandcampUserIE(InfoExtractor):

        return self.playlist_from_matches(
            self._yield_items(webpage), uploader, f'Discography of {uploader}',
-            getter=functools.partial(urljoin, url))
+            getter=urljoin(url))
--- a/yt_dlp/extractor/bpb.py
+++ b/yt_dlp/extractor/bpb.py
@ -165,6 +165,6 @@ class BpbIE(InfoExtractor):
            'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
            **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
                'formats': (':sources', ..., {self._process_source}),
-                'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
+                'thumbnail': ('poster', {urljoin(url)}),
            }),
        }
--- a/yt_dlp/extractor/bravotv.py
+++ b/yt_dlp/extractor/bravotv.py
@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
        tp_metadata = self._download_json(
            update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)

-        seconds_or_none = lambda x: float_or_none(x, 1000)
        chapters = traverse_obj(tp_metadata, ('chapters', ..., {
-            'start_time': ('startTime', {seconds_or_none}),
-            'end_time': ('endTime', {seconds_or_none}),
+            'start_time': ('startTime', {float_or_none(scale=1000)}),
+            'end_time': ('endTime', {float_or_none(scale=1000)}),
        }))
        # prune pointless single chapters that span the entire duration from short videos
        if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
            **merge_dicts(traverse_obj(tp_metadata, {
                'title': 'title',
                'description': 'description',
-                'duration': ('duration', {seconds_or_none}),
-                'timestamp': ('pubDate', {seconds_or_none}),
+                'duration': ('duration', {float_or_none(scale=1000)}),
+                'timestamp': ('pubDate', {float_or_none(scale=1000)}),
                'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
                'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
                'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
--- a/yt_dlp/extractor/caffeinetv.py
+++ b/yt_dlp/extractor/caffeinetv.py
@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
                'title': ('broadcast_title', {str}),
                'duration': ('content_duration', {int_or_none}),
                'timestamp': ('broadcast_start_time', {parse_iso8601}),
-                'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
+                'thumbnail': ('preview_image_path', {urljoin(url)}),
            }),
            'age_limit': {
                # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -465,7 +465,7 @@ class CBCPlayerIE(InfoExtractor):
            **traverse_obj(data, {
                'title': ('title', {str}),
                'description': ('description', {str.strip}),
-                'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
+                'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
                'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
                'media_type': ('media', 'clipType', {str}),
                'series': ('showName', {str}),
--- a/yt_dlp/extractor/chzzk.py
+++ b/yt_dlp/extractor/chzzk.py
@ -1,5 +1,3 @@
-import functools
-
 from .common import InfoExtractor
 from ..utils import (
    UserNotLive,
@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
            'thumbnails': thumbnails,
            **traverse_obj(live_detail, {
                'title': ('liveTitle', {str}),
-                'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
+                'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
                'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
                'view_count': ('accumulateCount', {int_or_none}),
                'channel': ('channel', 'channelName', {str}),
--- a/yt_dlp/extractor/cnn.py
+++ b/yt_dlp/extractor/cnn.py
@ -1,4 +1,3 @@
-import functools
 import json
 import re

@ -199,7 +198,7 @@ class CNNIE(InfoExtractor):
                    'timestamp': ('data-publish-date', {parse_iso8601}),
                    'thumbnail': (
                        'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
-                        {functools.partial(update_url, query='c=original')}),
+                        {update_url(query='c=original')}),
                    'display_id': 'data-video-slug',
                }),
                **traverse_obj(video_data, {
--- a/yt_dlp/extractor/condenast.py
+++ b/yt_dlp/extractor/condenast.py
@ -12,6 +12,7 @@ from ..utils import (
    parse_iso8601,
    strip_or_none,
    try_get,
+    urljoin,
 )


@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
        m_paths = re.finditer(
            r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
        paths = orderedSet(m.group(1) for m in m_paths)
-        build_url = lambda path: urllib.parse.urljoin(base_url, path)
-        entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
+        entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
        return self.playlist_result(entries, playlist_title=title)

    def _extract_video_params(self, webpage, display_id):
--- a/yt_dlp/extractor/laracasts.py
+++ b/yt_dlp/extractor/laracasts.py
@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
            VimeoIE, url_transparent=True,
            **traverse_obj(episode, {
                'id': ('id', {int}, {str_or_none}),
-                'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
+                'webpage_url': ('path', {urljoin('https://laracasts.com')}),
                'title': ('title', {clean_html}),
                'season_number': ('chapter', {int_or_none}),
                'episode_number': ('position', {int_or_none}),
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):

    def _extract_video_metadata(self, episode):
        channel_url = traverse_obj(
-            episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
+            episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
        return {
            'id': episode['id'].partition(':')[2],
            **traverse_obj(episode, {
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
    _API_BASE = 'http://music.163.com/api/'
    _GEO_BYPASS = False

-    @staticmethod
-    def _kilo_or_none(value):
-        return int_or_none(value, scale=1000)
-
    def _create_eapi_cipher(self, api_path, query_body, cookies):
        request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))

@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
                'vcodec': 'none',
                **traverse_obj(song, {
                    'ext': ('type', {str}),
-                    'abr': ('br', {self._kilo_or_none}),
+                    'abr': ('br', {int_or_none(scale=1000)}),
                    'filesize': ('size', {int_or_none}),
                }),
            })
@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
            **lyric_data,
            **traverse_obj(info, {
                'title': ('name', {str}),
-                'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
+                'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
                'thumbnail': ('album', 'picUrl', {url_or_none}),
-                'duration': ('duration', {self._kilo_or_none}),
+                'duration': ('duration', {int_or_none(scale=1000)}),
                'album': ('album', 'name', {str}),
                'average_rating': ('score', {int_or_none}),
            }),
@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
            'tags': ('tags', ..., {str}),
            'uploader': ('creator', 'nickname', {str}),
            'uploader_id': ('creator', 'userId', {str_or_none}),
-            'timestamp': ('updateTime', {self._kilo_or_none}),
+            'timestamp': ('updateTime', {int_or_none(scale=1000)}),
        }))
        if traverse_obj(info, ('playlist', 'specialType')) == 10:
            metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@ -520,7 +516,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
                'description': (('desc', 'briefDesc'), {str}, filter),
                'upload_date': ('publishTime', {unified_strdate}),
                'thumbnail': ('cover', {url_or_none}),
-                'duration': ('duration', {self._kilo_or_none}),
+                'duration': ('duration', {int_or_none(scale=1000)}),
                'view_count': ('playCount', {int_or_none}),
                'like_count': ('likeCount', {int_or_none}),
                'comment_count': ('commentCount', {int_or_none}),
@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
            'description': ('description', {str}),
            'creator': ('dj', 'brand', {str}),
            'thumbnail': ('coverUrl', {url_or_none}),
-            'timestamp': ('createTime', {self._kilo_or_none}),
+            'timestamp': ('createTime', {int_or_none(scale=1000)}),
        })

        if not self._yes_playlist(
@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
            return {
                'id': str(info['mainSong']['id']),
                'formats': formats,
-                'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
+                'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
                **metainfo,
            }

--- a/yt_dlp/extractor/parler.py
+++ b/yt_dlp/extractor/parler.py
@ -1,5 +1,3 @@
-import functools
-
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
@ -83,7 +81,7 @@ class ParlerIE(InfoExtractor):
                'timestamp': ('date_created', {unified_timestamp}),
                'uploader': ('user', 'name', {strip_or_none}),
                'uploader_id': ('user', 'username', {str}),
-                'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
+                'uploader_url': ('user', 'username', {urljoin('https://parler.com/')}),
                'view_count': ('views', {int_or_none}),
                'comment_count': ('total_comments', {int_or_none}),
                'repost_count': ('echos', {int_or_none}),
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@ -198,6 +198,6 @@ class Pr0grammIE(InfoExtractor):
                'dislike_count': ('down', {int}),
                'timestamp': ('created', {int}),
                'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
-                'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}),
+                'thumbnail': ('thumb', {urljoin('https://thumb.pr0gramm.com')}),
            }),
        }
--- a/yt_dlp/extractor/rtvslo.py
+++ b/yt_dlp/extractor/rtvslo.py
@ -187,4 +187,4 @@ class RTVSLOShowIE(InfoExtractor):
        return self.playlist_from_matches(
            re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
            playlist_id, self._html_extract_title(webpage),
-            getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)
+            getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
--- a/yt_dlp/extractor/teamcoco.py
+++ b/yt_dlp/extractor/teamcoco.py
@ -136,7 +136,7 @@ class TeamcocoIE(TeamcocoBaseIE):
            'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))

        thumbnail = traverse_obj(
-            info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
+            info, (('image', 'poster'), {urljoin('https://teamcoco.com/')}), get_all=False)
        video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id

        formats, subtitles = self._get_formats_and_subtitles(info, video_id)
--- a/yt_dlp/extractor/telewebion.py
+++ b/yt_dlp/extractor/telewebion.py
@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj


 def _fmt_url(url):
-    return functools.partial(format_field, template=url, default=None)
+    return format_field(template=url, default=None)


 class TelewebionIE(InfoExtractor):
--- a/yt_dlp/extractor/tencent.py
+++ b/yt_dlp/extractor/tencent.py
@ -1,4 +1,3 @@
-import functools
 import random
 import re
 import string
@ -278,7 +277,7 @@ class VQQSeriesIE(VQQBaseIE):
            webpage)]

        return self.playlist_from_matches(
-            episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
+            episode_paths, series_id, ie=VQQVideoIE, getter=urljoin(url),
            title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
                                        or self._og_search_title(webpage)),
            description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
@ -328,7 +327,7 @@ class WeTvBaseIE(TencentBaseIE):
                         or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))

        return self.playlist_from_matches(
-            episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url),
+            episode_paths, series_id, ie=ie, getter=urljoin(url),
            title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
                                        or self._og_search_title(webpage)),
            description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@ -1,4 +1,3 @@
-import functools
 import itertools

 from .common import InfoExtractor
@ -161,4 +160,4 @@ class TenPlaySeasonIE(InfoExtractor):
        return self.playlist_from_matches(
            self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
            playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
-            getter=functools.partial(urljoin, url))
+            getter=urljoin(url))
--- a/yt_dlp/extractor/theguardian.py
+++ b/yt_dlp/extractor/theguardian.py
@ -131,4 +131,4 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor):

        return self.playlist_from_matches(
            self._entries(url, podcast_id), podcast_id, title, description=description,
-            ie=TheGuardianPodcastIE, getter=lambda x: urljoin('https://www.theguardian.com', x))
+            ie=TheGuardianPodcastIE, getter=urljoin('https://www.theguardian.com'))
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@ -114,7 +114,7 @@ class WeiboBaseIE(InfoExtractor):
                'thumbnail': ('page_info', 'page_pic', {url_or_none}),
                'uploader': ('user', 'screen_name', {str}),
                'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
-                'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
+                'uploader_url': ('user', 'profile_url', {urljoin('https://weibo.com/')}),
                'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
                'like_count': ('attitudes_count', {int_or_none}),
                'repost_count': ('reposts_count', {int_or_none}),
--- a/yt_dlp/extractor/yle_areena.py
+++ b/yt_dlp/extractor/yle_areena.py
@ -1,12 +1,13 @@
 from .common import InfoExtractor
 from .kaltura import KalturaIE
 from ..utils import (
+    ExtractorError,
    int_or_none,
+    parse_iso8601,
    smuggle_url,
-    traverse_obj,
-    unified_strdate,
    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class YleAreenaIE(InfoExtractor):
@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'https://areena.yle.fi/1-4371942',
-            'md5': '932edda0ecf5dfd6423804182d32f8ac',
+            'md5': 'd87e9a1e74e67e009990ddd413e426b4',
            'info_dict': {
-                'id': '0_a3tjk92c',
+                'id': '1-4371942',
                'ext': 'mp4',
                'title': 'Pouchit',
                'description': 'md5:01071d7056ceec375f63960f90c35366',
@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
                'season_number': 1,
                'episode': 'Episode 2',
                'episode_number': 2,
-                'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
-                'uploader_id': 'ovp@yle.fi',
-                'duration': 1435,
-                'view_count': int,
-                'upload_date': '20181204',
-                'release_date': '20190106',
-                'timestamp': 1543916210,
-                'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
+                'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
                'age_limit': 7,
-                'webpage_url': 'https://areena.yle.fi/1-4371942',
+                'release_date': '20190105',
+                'release_timestamp': 1546725660,
+                'duration': 1435,
            },
        },
        {
            'url': 'https://areena.yle.fi/1-2158940',
-            'md5': 'cecb603661004e36af8c5188b5212b12',
+            'md5': '6369ddc5e07b5fdaeda27a495184143c',
            'info_dict': {
-                'id': '1_l38iz9ur',
+                'id': '1-2158940',
                'ext': 'mp4',
                'title': 'Albi haluaa vessan',
-                'description': 'md5:15236d810c837bed861fae0e88663c33',
+                'description': 'Albi haluaa vessan.',
                'series': 'Albi Lumiukko',
-                'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
-                'uploader_id': 'ovp@yle.fi',
-                'duration': 319,
-                'view_count': int,
-                'upload_date': '20211202',
-                'release_date': '20211215',
-                'timestamp': 1638448202,
-                'subtitles': {},
+                'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
                'age_limit': 0,
-                'webpage_url': 'https://areena.yle.fi/1-2158940',
+                'release_date': '20211215',
+                'release_timestamp': 1639555200,
+                'duration': 319,
            },
        },
        {
@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
                'title': 'HKO & Mälkki & Tanner',
                'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
                'series': 'Helsingin kaupunginorkesterin konsertteja',
-                'thumbnail': r're:^https?://.+\.jpg$',
+                'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
                'release_date': '20230120',
+                'release_timestamp': 1674242079,
+                'duration': 8004,
            },
            'params': {
                'skip_download': 'm3u8',
            },
        },
+        {
+            'url': 'https://areena.yle.fi/1-72251830',
+            'info_dict': {
+                'id': '1-72251830',
+                'ext': 'mp4',
+                'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+                'description': 'md5:1f118707d9093bf894a34fbbc865397b',
+                'series': 'Pentulive',
+                'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
+                'live_status': 'is_live',
+                'release_date': '20241025',
+                'release_timestamp': 1729875600,
+            },
+            'params': {
+                'skip_download': 'livestream',
+            },
+        },
+        {
+            'url': 'https://areena.yle.fi/podcastit/1-71022852',
+            'info_dict': {
+                'id': '1-71022852',
+                'ext': 'mp3',
+                'title': 'Värityspäivä',
+                'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
+                'series': 'Murun ja Paukun ikioma kaupunki',
+                'episode': 'Episode 1',
+                'episode_number': 1,
+                'release_date': '20240607',
+                'release_timestamp': 1717736400,
+                'duration': 442,
+            },
+        },
    ]

    def _real_extract(self, url):
        video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
-        info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
+        json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
        video_data = self._download_json(
            f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
            video_id, headers={
                'origin': 'https://areena.yle.fi',
                'referer': 'https://areena.yle.fi/',
                'content-type': 'application/json',
-            })
+            })['data']

        # Example title: 'K1, J2: Pouchit | Modernit miehet'
        season_number, episode_number, episode, series = self._search_regex(
            r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
-            info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
+            json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
            default=(None, None, None, None))
-        description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
+        description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))

        subtitles = {}
-        for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
-            if url_or_none(sub.get('uri')):
+        for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
            subtitles.setdefault(sub.get('language') or 'und', []).append({
                'url': sub['uri'],
                'ext': 'srt',
                'name': sub.get('kind'),
            })

-        if is_podcast:
-            info_dict = {
-                'url': video_data['data']['ongoing_ondemand']['media_url'],
-            }
-        elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
-            info_dict = {
+        info_dict, metadata = {}, {}
+        if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
+            metadata = video_data['ongoing_ondemand']
+            info_dict['url'] = metadata['media_url']
+        elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
+            metadata = video_data['ongoing_event']
+            metadata.pop('duration', None)  # Duration is not accurate for livestreams
+            info_dict['live_status'] = 'is_live'
+        elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
+            metadata = video_data['ongoing_ondemand']
+        # XXX: Has all externally-hosted Kaltura content been moved to native hosting?
+        elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
+            metadata = video_data['ongoing_ondemand']
+            info_dict.update({
                '_type': 'url_transparent',
                'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
                'ie_key': KalturaIE.ie_key(),
-            }
+            })
+        elif traverse_obj(video_data, ('gone', {dict})):
+            self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
+            metadata = video_data['gone']
        else:
-            formats, subs = self._extract_m3u8_formats_and_subtitles(
-                video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
+            raise ExtractorError('Unable to extract content')
+
+        if not info_dict.get('url') and metadata.get('manifest_url'):
+            info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
+                metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
            self._merge_subtitles(subs, target=subtitles)
-            info_dict = {'formats': formats}

        return {
-            **info_dict,
+            **traverse_obj(json_ld, {
+                'title': 'title',
+                'thumbnails': ('thumbnails', ..., {'url': 'url'}),
+            }),
            'id': video_id,
-            'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
-                      or episode or info.get('title')),
+            'title': episode,
            'description': description,
-            'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
-                       or series),
+            'series': series,
            'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
                              or int_or_none(season_number)),
-            'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
-                               or int_or_none(episode_number)),
-            'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
-            'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
+            'episode_number': int_or_none(episode_number),
            'subtitles': subtitles or None,
-            'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
+            **traverse_obj(metadata, {
+                'title': ('title', 'fin', {str}),
+                'description': ('description', 'fin', {str}),
+                'series': ('series', 'title', 'fin', {str}),
+                'episode_number': ('episode_number', {int_or_none}),
+                'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
+                'release_timestamp': ('start_time', {parse_iso8601}),
+                'duration': ('duration', 'duration_in_seconds', {int_or_none}),
+            }),
+            **info_dict,
        }
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@ -247,7 +247,7 @@ class YouPornListBase(InfoExtractor):
            if not html:
                return
            for element in get_elements_html_by_class('video-title', html):
-                if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
+                if video_url := traverse_obj(element, ({extract_attributes}, 'href', {urljoin(url)})):
                    yield self.url_result(video_url)

            if page_num is not None:
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -3637,7 +3637,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'author_is_verified': ('author', 'isVerified', {bool}),
                'author_url': ('author', 'channelCommand', 'innertubeCommand', (
                    ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
-                ), {lambda x: urljoin('https://www.youtube.com', x)}),
+                ), {urljoin('https://www.youtube.com')}),
            }, get_all=False),
            'is_favorited': (None if toolbar_entity_payload is None else
                             toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -700,7 +700,8 @@ def create_parser():
    selection.add_option(
        '--break-on-existing',
        action='store_true', dest='break_on_existing', default=False,
-        help='Stop the download process when encountering a file that is in the archive')
+        help='Stop the download process when encountering a file that is in the archive '
+             'supplied with the --download-archive option')
    selection.add_option(
        '--no-break-on-existing',
        action='store_false', dest='break_on_existing',
Author	SHA1	Message	Date
bashonly	d5219cfea3	[docs] Make `--break-on-existing` help string more clear Authored by: bashonly	2024-10-31 23:17:35 -05:00
bashonly	091805bb11	[ie] additional `partial_application`-related cleanup Authored by: bashonly	2024-10-31 23:17:05 -05:00
N/Ame	61dcaf74ba	[ie] `lambda`=>`partial_application` cleanup Authored by: grqz	2024-10-31 22:34:06 -05:00
N/Ame	44518dcbda	[test:utils] Fix typo in `partial_application` test message Authored by: grqz	2024-10-31 22:32:56 -05:00
bashonly	28cb5e6c42	Merge branch 'yt-dlp:master' into misc-cleanup-another-one	2024-10-31 22:30:24 -05:00
Nicolas F.	a6783a3b99	[ie/yle_areena] Support live events (#11358 ) Authored by: CounterPillow, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-10-31 23:23:42 +00:00
bashonly	428ffb75aa	[build] Disable attestations for trusted publishing (#11418 ) Currently does not work with reusable workflows, e.g. release-nightly.yml calling release.yml Ref: https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.11.0 https://github.com/pypa/gh-action-pypi-publish/discussions/255 https://github.com/pypi/warehouse/issues/11096 Authored by: bashonly	2024-10-31 09:00:08 +00:00