Compare commits

..

7 Commits

Author SHA1 Message Date
bashonly
d5219cfea3
[docs] Make --break-on-existing help string more clear
Authored by: bashonly
2024-10-31 23:17:35 -05:00
bashonly
091805bb11
[ie] additional partial_application-related cleanup
Authored by: bashonly
2024-10-31 23:17:05 -05:00
N/Ame
61dcaf74ba
[ie] lambda=>partial_application cleanup
Authored by: grqz
2024-10-31 22:34:06 -05:00
N/Ame
44518dcbda
[test:utils] Fix typo in partial_application test message
Authored by: grqz
2024-10-31 22:32:56 -05:00
bashonly
28cb5e6c42
Merge branch 'yt-dlp:master' into misc-cleanup-another-one 2024-10-31 22:30:24 -05:00
Nicolas F.
a6783a3b99
[ie/yle_areena] Support live events (#11358)
Authored by: CounterPillow, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-10-31 23:23:42 +00:00
bashonly
428ffb75aa
[build] Disable attestations for trusted publishing (#11418)
Currently does not work with reusable workflows, e.g. release-nightly.yml calling release.yml

Ref: https://github.com/pypa/gh-action-pypi-publish/releases/tag/v1.11.0
     https://github.com/pypa/gh-action-pypi-publish/discussions/255
     https://github.com/pypi/warehouse/issues/11096

Authored by: bashonly
2024-10-31 09:00:08 +00:00
28 changed files with 141 additions and 106 deletions

View File

@ -282,6 +282,7 @@ jobs:
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
publish: publish:
needs: [prepare, build] needs: [prepare, build]

View File

@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-download-archive Do not use archive file (default) --no-download-archive Do not use archive file (default)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering --break-on-existing Stop the download process when encountering
a file that is in the archive a file that is in the archive supplied with
the --download-archive option
--no-break-on-existing Do not stop the download process when --no-break-on-existing Do not stop the download process when
encountering a file that is in the archive encountering a file that is in the archive
(default) (default)

View File

@ -478,7 +478,7 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en', 'name': 'en'}, {'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., { ], [..., {
'id': 'name', 'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}], 'ext': ['url', {determine_ext(default_ext=None)}],
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == { }, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],

View File

@ -2156,7 +2156,7 @@ Line 1
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially' assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function' assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially' assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially' assert callable(join_nonempty()), 'varargs positional should apply partially'

View File

@ -520,4 +520,4 @@ class BandcampUserIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_items(webpage), uploader, f'Discography of {uploader}', self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url)) getter=urljoin(url))

View File

@ -165,6 +165,6 @@ class BpbIE(InfoExtractor):
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)), 'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), { **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
'formats': (':sources', ..., {self._process_source}), 'formats': (':sources', ..., {self._process_source}),
'thumbnail': ('poster', {lambda x: urljoin(url, x)}), 'thumbnail': ('poster', {urljoin(url)}),
}), }),
} }

View File

@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
tp_metadata = self._download_json( tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
seconds_or_none = lambda x: float_or_none(x, 1000)
chapters = traverse_obj(tp_metadata, ('chapters', ..., { chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {seconds_or_none}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {seconds_or_none}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
})) }))
# prune pointless single chapters that span the entire duration from short videos # prune pointless single chapters that span the entire duration from short videos
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
**merge_dicts(traverse_obj(tp_metadata, { **merge_dicts(traverse_obj(tp_metadata, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {seconds_or_none}), 'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {seconds_or_none}), 'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),

View File

@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
'title': ('broadcast_title', {str}), 'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}), 'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}), 'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), 'thumbnail': ('preview_image_path', {urljoin(url)}),
}), }),
'age_limit': { 'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system

View File

@ -465,7 +465,7 @@ class CBCPlayerIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {str.strip}), 'description': ('description', {str.strip}),
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), 'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
'timestamp': ('publishedAt', {float_or_none(scale=1000)}), 'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
'media_type': ('media', 'clipType', {str}), 'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}), 'series': ('showName', {str}),

View File

@ -1,5 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UserNotLive, UserNotLive,
@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
**traverse_obj(live_detail, { **traverse_obj(live_detail, {
'title': ('liveTitle', {str}), 'title': ('liveTitle', {str}),
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}), 'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
'concurrent_view_count': ('concurrentUserCount', {int_or_none}), 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
'view_count': ('accumulateCount', {int_or_none}), 'view_count': ('accumulateCount', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),

View File

@ -1,4 +1,3 @@
import functools
import json import json
import re import re
@ -199,7 +198,7 @@ class CNNIE(InfoExtractor):
'timestamp': ('data-publish-date', {parse_iso8601}), 'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': ( 'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none}, 'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}), {update_url(query='c=original')}),
'display_id': 'data-video-slug', 'display_id': 'data-video-slug',
}), }),
**traverse_obj(video_data, { **traverse_obj(video_data, {

View File

@ -12,6 +12,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
urljoin,
) )
@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
m_paths = re.finditer( m_paths = re.finditer(
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
paths = orderedSet(m.group(1) for m in m_paths) paths = orderedSet(m.group(1) for m in m_paths)
build_url = lambda path: urllib.parse.urljoin(base_url, path) entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage, display_id): def _extract_video_params(self, webpage, display_id):

View File

@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
VimeoIE, url_transparent=True, VimeoIE, url_transparent=True,
**traverse_obj(episode, { **traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}), 'id': ('id', {int}, {str_or_none}),
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}), 'webpage_url': ('path', {urljoin('https://laracasts.com')}),
'title': ('title', {clean_html}), 'title': ('title', {clean_html}),
'season_number': ('chapter', {int_or_none}), 'season_number': ('chapter', {int_or_none}),
'episode_number': ('position', {int_or_none}), 'episode_number': ('position', {int_or_none}),

View File

@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
def _extract_video_metadata(self, episode): def _extract_video_metadata(self, episode):
channel_url = traverse_obj( channel_url = traverse_obj(
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False) episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
return { return {
'id': episode['id'].partition(':')[2], 'id': episode['id'].partition(':')[2],
**traverse_obj(episode, { **traverse_obj(episode, {

View File

@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False _GEO_BYPASS = False
@staticmethod
def _kilo_or_none(value):
return int_or_none(value, scale=1000)
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
'vcodec': 'none', 'vcodec': 'none',
**traverse_obj(song, { **traverse_obj(song, {
'ext': ('type', {str}), 'ext': ('type', {str}),
'abr': ('br', {self._kilo_or_none}), 'abr': ('br', {int_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
}), }),
}) })
@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
**lyric_data, **lyric_data,
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self._kilo_or_none}), 'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
'thumbnail': ('album', 'picUrl', {url_or_none}), 'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self._kilo_or_none}), 'duration': ('duration', {int_or_none(scale=1000)}),
'album': ('album', 'name', {str}), 'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}), 'average_rating': ('score', {int_or_none}),
}), }),
@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'tags': ('tags', ..., {str}), 'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}), 'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}), 'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self._kilo_or_none}), 'timestamp': ('updateTime', {int_or_none(scale=1000)}),
})) }))
if traverse_obj(info, ('playlist', 'specialType')) == 10: if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@ -520,7 +516,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'description': (('desc', 'briefDesc'), {str}, filter), 'description': (('desc', 'briefDesc'), {str}, filter),
'upload_date': ('publishTime', {unified_strdate}), 'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self._kilo_or_none}), 'duration': ('duration', {int_or_none(scale=1000)}),
'view_count': ('playCount', {int_or_none}), 'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}), 'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}), 'comment_count': ('commentCount', {int_or_none}),
@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'description': ('description', {str}), 'description': ('description', {str}),
'creator': ('dj', 'brand', {str}), 'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}), 'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self._kilo_or_none}), 'timestamp': ('createTime', {int_or_none(scale=1000)}),
}) })
if not self._yes_playlist( if not self._yes_playlist(
@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
return { return {
'id': str(info['mainSong']['id']), 'id': str(info['mainSong']['id']),
'formats': formats, 'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})), 'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
**metainfo, **metainfo,
} }

View File

@ -1,5 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
@ -83,7 +81,7 @@ class ParlerIE(InfoExtractor):
'timestamp': ('date_created', {unified_timestamp}), 'timestamp': ('date_created', {unified_timestamp}),
'uploader': ('user', 'name', {strip_or_none}), 'uploader': ('user', 'name', {strip_or_none}),
'uploader_id': ('user', 'username', {str}), 'uploader_id': ('user', 'username', {str}),
'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}), 'uploader_url': ('user', 'username', {urljoin('https://parler.com/')}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),
'comment_count': ('total_comments', {int_or_none}), 'comment_count': ('total_comments', {int_or_none}),
'repost_count': ('echos', {int_or_none}), 'repost_count': ('echos', {int_or_none}),

View File

@ -198,6 +198,6 @@ class Pr0grammIE(InfoExtractor):
'dislike_count': ('down', {int}), 'dislike_count': ('down', {int}),
'timestamp': ('created', {int}), 'timestamp': ('created', {int}),
'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), 'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}), 'thumbnail': ('thumb', {urljoin('https://thumb.pr0gramm.com')}),
}), }),
} }

View File

@ -187,4 +187,4 @@ class RTVSLOShowIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage), re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
playlist_id, self._html_extract_title(webpage), playlist_id, self._html_extract_title(webpage),
getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE) getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)

View File

@ -136,7 +136,7 @@ class TeamcocoIE(TeamcocoBaseIE):
'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict}))) 'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
thumbnail = traverse_obj( thumbnail = traverse_obj(
info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False) info, (('image', 'poster'), {urljoin('https://teamcoco.com/')}), get_all=False)
video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
formats, subtitles = self._get_formats_and_subtitles(info, video_id) formats, subtitles = self._get_formats_and_subtitles(info, video_id)

View File

@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj
def _fmt_url(url): def _fmt_url(url):
return functools.partial(format_field, template=url, default=None) return format_field(template=url, default=None)
class TelewebionIE(InfoExtractor): class TelewebionIE(InfoExtractor):

View File

@ -1,4 +1,3 @@
import functools
import random import random
import re import re
import string import string
@ -278,7 +277,7 @@ class VQQSeriesIE(VQQBaseIE):
webpage)] webpage)]
return self.playlist_from_matches( return self.playlist_from_matches(
episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url), episode_paths, series_id, ie=VQQVideoIE, getter=urljoin(url),
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title')) title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
or self._og_search_title(webpage)), or self._og_search_title(webpage)),
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description')) description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
@ -328,7 +327,7 @@ class WeTvBaseIE(TencentBaseIE):
or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage)) or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
return self.playlist_from_matches( return self.playlist_from_matches(
episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url), episode_paths, series_id, ie=ie, getter=urljoin(url),
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title')) title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
or self._og_search_title(webpage)), or self._og_search_title(webpage)),
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description')) description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))

View File

@ -1,4 +1,3 @@
import functools
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
@ -161,4 +160,4 @@ class TenPlaySeasonIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id), self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})), playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
getter=functools.partial(urljoin, url)) getter=urljoin(url))

View File

@ -131,4 +131,4 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
self._entries(url, podcast_id), podcast_id, title, description=description, self._entries(url, podcast_id), podcast_id, title, description=description,
ie=TheGuardianPodcastIE, getter=lambda x: urljoin('https://www.theguardian.com', x)) ie=TheGuardianPodcastIE, getter=urljoin('https://www.theguardian.com'))

View File

@ -114,7 +114,7 @@ class WeiboBaseIE(InfoExtractor):
'thumbnail': ('page_info', 'page_pic', {url_or_none}), 'thumbnail': ('page_info', 'page_pic', {url_or_none}),
'uploader': ('user', 'screen_name', {str}), 'uploader': ('user', 'screen_name', {str}),
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}), 'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}), 'uploader_url': ('user', 'profile_url', {urljoin('https://weibo.com/')}),
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}), 'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
'like_count': ('attitudes_count', {int_or_none}), 'like_count': ('attitudes_count', {int_or_none}),
'repost_count': ('reposts_count', {int_or_none}), 'repost_count': ('reposts_count', {int_or_none}),

View File

@ -1,12 +1,13 @@
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
parse_iso8601,
smuggle_url, smuggle_url,
traverse_obj,
unified_strdate,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class YleAreenaIE(InfoExtractor): class YleAreenaIE(InfoExtractor):
@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'https://areena.yle.fi/1-4371942', 'url': 'https://areena.yle.fi/1-4371942',
'md5': '932edda0ecf5dfd6423804182d32f8ac', 'md5': 'd87e9a1e74e67e009990ddd413e426b4',
'info_dict': { 'info_dict': {
'id': '0_a3tjk92c', 'id': '1-4371942',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Pouchit', 'title': 'Pouchit',
'description': 'md5:01071d7056ceec375f63960f90c35366', 'description': 'md5:01071d7056ceec375f63960f90c35366',
@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
'season_number': 1, 'season_number': 1,
'episode': 'Episode 2', 'episode': 'Episode 2',
'episode_number': 2, 'episode_number': 2,
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061', 'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
'uploader_id': 'ovp@yle.fi',
'duration': 1435,
'view_count': int,
'upload_date': '20181204',
'release_date': '20190106',
'timestamp': 1543916210,
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
'age_limit': 7, 'age_limit': 7,
'webpage_url': 'https://areena.yle.fi/1-4371942', 'release_date': '20190105',
'release_timestamp': 1546725660,
'duration': 1435,
}, },
}, },
{ {
'url': 'https://areena.yle.fi/1-2158940', 'url': 'https://areena.yle.fi/1-2158940',
'md5': 'cecb603661004e36af8c5188b5212b12', 'md5': '6369ddc5e07b5fdaeda27a495184143c',
'info_dict': { 'info_dict': {
'id': '1_l38iz9ur', 'id': '1-2158940',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Albi haluaa vessan', 'title': 'Albi haluaa vessan',
'description': 'md5:15236d810c837bed861fae0e88663c33', 'description': 'Albi haluaa vessan.',
'series': 'Albi Lumiukko', 'series': 'Albi Lumiukko',
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021', 'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
'uploader_id': 'ovp@yle.fi',
'duration': 319,
'view_count': int,
'upload_date': '20211202',
'release_date': '20211215',
'timestamp': 1638448202,
'subtitles': {},
'age_limit': 0, 'age_limit': 0,
'webpage_url': 'https://areena.yle.fi/1-2158940', 'release_date': '20211215',
'release_timestamp': 1639555200,
'duration': 319,
}, },
}, },
{ {
@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
'title': 'HKO & Mälkki & Tanner', 'title': 'HKO & Mälkki & Tanner',
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156', 'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
'series': 'Helsingin kaupunginorkesterin konsertteja', 'series': 'Helsingin kaupunginorkesterin konsertteja',
'thumbnail': r're:^https?://.+\.jpg$', 'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
'release_date': '20230120', 'release_date': '20230120',
'release_timestamp': 1674242079,
'duration': 8004,
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, },
{
'url': 'https://areena.yle.fi/1-72251830',
'info_dict': {
'id': '1-72251830',
'ext': 'mp4',
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
'series': 'Pentulive',
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
'live_status': 'is_live',
'release_date': '20241025',
'release_timestamp': 1729875600,
},
'params': {
'skip_download': 'livestream',
},
},
{
'url': 'https://areena.yle.fi/podcastit/1-71022852',
'info_dict': {
'id': '1-71022852',
'ext': 'mp3',
'title': 'Värityspäivä',
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
'series': 'Murun ja Paukun ikioma kaupunki',
'episode': 'Episode 1',
'episode_number': 1,
'release_date': '20240607',
'release_timestamp': 1717736400,
'duration': 442,
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast') video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={}) json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
video_data = self._download_json( video_data = self._download_json(
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b', f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
video_id, headers={ video_id, headers={
'origin': 'https://areena.yle.fi', 'origin': 'https://areena.yle.fi',
'referer': 'https://areena.yle.fi/', 'referer': 'https://areena.yle.fi/',
'content-type': 'application/json', 'content-type': 'application/json',
}) })['data']
# Example title: 'K1, J2: Pouchit | Modernit miehet' # Example title: 'K1, J2: Pouchit | Modernit miehet'
season_number, episode_number, episode, series = self._search_regex( season_number, episode_number, episode, series = self._search_regex(
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)', r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'), json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
default=(None, None, None, None)) default=(None, None, None, None))
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str) description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))
subtitles = {} subtitles = {}
for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)): for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
if url_or_none(sub.get('uri')):
subtitles.setdefault(sub.get('language') or 'und', []).append({ subtitles.setdefault(sub.get('language') or 'und', []).append({
'url': sub['uri'], 'url': sub['uri'],
'ext': 'srt', 'ext': 'srt',
'name': sub.get('kind'), 'name': sub.get('kind'),
}) })
if is_podcast: info_dict, metadata = {}, {}
info_dict = { if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
'url': video_data['data']['ongoing_ondemand']['media_url'], metadata = video_data['ongoing_ondemand']
} info_dict['url'] = metadata['media_url']
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})): elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
info_dict = { metadata = video_data['ongoing_event']
metadata.pop('duration', None) # Duration is not accurate for livestreams
info_dict['live_status'] = 'is_live'
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
metadata = video_data['ongoing_ondemand']
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
metadata = video_data['ongoing_ondemand']
info_dict.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}), 'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
'ie_key': KalturaIE.ie_key(), 'ie_key': KalturaIE.ie_key(),
} })
elif traverse_obj(video_data, ('gone', {dict})):
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
metadata = video_data['gone']
else: else:
formats, subs = self._extract_m3u8_formats_and_subtitles( raise ExtractorError('Unable to extract content')
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
if not info_dict.get('url') and metadata.get('manifest_url'):
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
info_dict = {'formats': formats}
return { return {
**info_dict, **traverse_obj(json_ld, {
'title': 'title',
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
}),
'id': video_id, 'id': video_id,
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str) 'title': episode,
or episode or info.get('title')),
'description': description, 'description': description,
'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str) 'series': series,
or series),
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None)) 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
or int_or_none(season_number)), or int_or_none(season_number)),
'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none) 'episode_number': int_or_none(episode_number),
or int_or_none(episode_number)),
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
'subtitles': subtitles or None, 'subtitles': subtitles or None,
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)), **traverse_obj(metadata, {
'title': ('title', 'fin', {str}),
'description': ('description', 'fin', {str}),
'series': ('series', 'title', 'fin', {str}),
'episode_number': ('episode_number', {int_or_none}),
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
'release_timestamp': ('start_time', {parse_iso8601}),
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
}),
**info_dict,
} }

View File

@ -247,7 +247,7 @@ class YouPornListBase(InfoExtractor):
if not html: if not html:
return return
for element in get_elements_html_by_class('video-title', html): for element in get_elements_html_by_class('video-title', html):
if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})): if video_url := traverse_obj(element, ({extract_attributes}, 'href', {urljoin(url)})):
yield self.url_result(video_url) yield self.url_result(video_url)
if page_num is not None: if page_num is not None:

View File

@ -3637,7 +3637,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'author_is_verified': ('author', 'isVerified', {bool}), 'author_is_verified': ('author', 'isVerified', {bool}),
'author_url': ('author', 'channelCommand', 'innertubeCommand', ( 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
), {lambda x: urljoin('https://www.youtube.com', x)}), ), {urljoin('https://www.youtube.com')}),
}, get_all=False), }, get_all=False),
'is_favorited': (None if toolbar_entity_payload is None else 'is_favorited': (None if toolbar_entity_payload is None else
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'), toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),

View File

@ -700,7 +700,8 @@ def create_parser():
selection.add_option( selection.add_option(
'--break-on-existing', '--break-on-existing',
action='store_true', dest='break_on_existing', default=False, action='store_true', dest='break_on_existing', default=False,
help='Stop the download process when encountering a file that is in the archive') help='Stop the download process when encountering a file that is in the archive '
'supplied with the --download-archive option')
selection.add_option( selection.add_option(
'--no-break-on-existing', '--no-break-on-existing',
action='store_false', dest='break_on_existing', action='store_false', dest='break_on_existing',