mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-29 02:31:25 +01:00
Compare commits
7 Commits
90e62715b6
...
d5219cfea3
Author | SHA1 | Date | |
---|---|---|---|
|
d5219cfea3 | ||
|
091805bb11 | ||
|
61dcaf74ba | ||
|
44518dcbda | ||
|
28cb5e6c42 | ||
|
a6783a3b99 | ||
|
428ffb75aa |
1
.github/workflows/release.yml
vendored
1
.github/workflows/release.yml
vendored
|
@ -282,6 +282,7 @@ jobs:
|
|||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
|
||||
|
||||
publish:
|
||||
needs: [prepare, build]
|
||||
|
|
|
@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
|||
--no-download-archive Do not use archive file (default)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--break-on-existing Stop the download process when encountering
|
||||
a file that is in the archive
|
||||
a file that is in the archive supplied with
|
||||
the --download-archive option
|
||||
--no-break-on-existing Do not stop the download process when
|
||||
encountering a file that is in the archive
|
||||
(default)
|
||||
|
|
|
@ -478,7 +478,7 @@ class TestTraversalHelpers:
|
|||
{'url': 'https://example.com/subs/en', 'name': 'en'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
|
||||
'ext': ['url', {determine_ext(default_ext=None)}],
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict(ext='ext')}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
|
||||
|
|
|
@ -2156,7 +2156,7 @@ Line 1
|
|||
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
|
||||
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
|
||||
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
|
||||
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
|
||||
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
|
||||
|
||||
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
|
||||
assert callable(join_nonempty()), 'varargs positional should apply partially'
|
||||
|
|
|
@ -520,4 +520,4 @@ class BandcampUserIE(InfoExtractor):
|
|||
|
||||
return self.playlist_from_matches(
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=functools.partial(urljoin, url))
|
||||
getter=urljoin(url))
|
||||
|
|
|
@ -165,6 +165,6 @@ class BpbIE(InfoExtractor):
|
|||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('poster', {urljoin(url)}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
|
|||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
|
@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
|
|||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
|
|
|
@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
|
|||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
|
|
|
@ -465,7 +465,7 @@ class CBCPlayerIE(InfoExtractor):
|
|||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
|
@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
|
|||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
|
@ -199,7 +198,7 @@ class CNNIE(InfoExtractor):
|
|||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{functools.partial(update_url, query='c=original')}),
|
||||
{update_url(query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
|
|
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
|
|||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: urllib.parse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
|
|
|
@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
|
|||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'webpage_url': ('path', {urljoin('https://laracasts.com')}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
|
|
|
@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
|
|||
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
|
||||
episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
|
||||
return {
|
||||
'id': episode['id'].partition(':')[2],
|
||||
**traverse_obj(episode, {
|
||||
|
|
|
@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
|||
_API_BASE = 'http://music.163.com/api/'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def _kilo_or_none(value):
|
||||
return int_or_none(value, scale=1000)
|
||||
|
||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
||||
|
||||
|
@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
|||
'vcodec': 'none',
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self._kilo_or_none}),
|
||||
'abr': ('br', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
|||
**lyric_data,
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||
'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'album': ('album', 'name', {str}),
|
||||
'average_rating': ('score', {int_or_none}),
|
||||
}),
|
||||
|
@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
|||
'tags': ('tags', ..., {str}),
|
||||
'uploader': ('creator', 'nickname', {str}),
|
||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||
'timestamp': ('updateTime', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||
|
@ -520,7 +516,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
|||
'description': (('desc', 'briefDesc'), {str}, filter),
|
||||
'upload_date': ('publishTime', {unified_strdate}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
|
@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
|||
'description': ('description', {str}),
|
||||
'creator': ('dj', 'brand', {str}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||
'timestamp': ('createTime', {int_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
if not self._yes_playlist(
|
||||
|
@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
|||
return {
|
||||
'id': str(info['mainSong']['id']),
|
||||
'formats': formats,
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
|
||||
**metainfo,
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
|
@ -83,7 +81,7 @@ class ParlerIE(InfoExtractor):
|
|||
'timestamp': ('date_created', {unified_timestamp}),
|
||||
'uploader': ('user', 'name', {strip_or_none}),
|
||||
'uploader_id': ('user', 'username', {str}),
|
||||
'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
|
||||
'uploader_url': ('user', 'username', {urljoin('https://parler.com/')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'comment_count': ('total_comments', {int_or_none}),
|
||||
'repost_count': ('echos', {int_or_none}),
|
||||
|
|
|
@ -198,6 +198,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'dislike_count': ('down', {int}),
|
||||
'timestamp': ('created', {int}),
|
||||
'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}),
|
||||
'thumbnail': ('thumb', {urljoin('https://thumb.pr0gramm.com')}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -187,4 +187,4 @@ class RTVSLOShowIE(InfoExtractor):
|
|||
return self.playlist_from_matches(
|
||||
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
||||
playlist_id, self._html_extract_title(webpage),
|
||||
getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
|
||||
|
|
|
@ -136,7 +136,7 @@ class TeamcocoIE(TeamcocoBaseIE):
|
|||
'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
|
||||
|
||||
thumbnail = traverse_obj(
|
||||
info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
|
||||
info, (('image', 'poster'), {urljoin('https://teamcoco.com/')}), get_all=False)
|
||||
video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(info, video_id)
|
||||
|
|
|
@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj
|
|||
|
||||
|
||||
def _fmt_url(url):
|
||||
return functools.partial(format_field, template=url, default=None)
|
||||
return format_field(template=url, default=None)
|
||||
|
||||
|
||||
class TelewebionIE(InfoExtractor):
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import functools
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
@ -278,7 +277,7 @@ class VQQSeriesIE(VQQBaseIE):
|
|||
webpage)]
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
|
||||
episode_paths, series_id, ie=VQQVideoIE, getter=urljoin(url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
|
@ -328,7 +327,7 @@ class WeTvBaseIE(TencentBaseIE):
|
|||
or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url),
|
||||
episode_paths, series_id, ie=ie, getter=urljoin(url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import functools
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -161,4 +160,4 @@ class TenPlaySeasonIE(InfoExtractor):
|
|||
return self.playlist_from_matches(
|
||||
self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
|
||||
playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
|
||||
getter=functools.partial(urljoin, url))
|
||||
getter=urljoin(url))
|
||||
|
|
|
@ -131,4 +131,4 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor):
|
|||
|
||||
return self.playlist_from_matches(
|
||||
self._entries(url, podcast_id), podcast_id, title, description=description,
|
||||
ie=TheGuardianPodcastIE, getter=lambda x: urljoin('https://www.theguardian.com', x))
|
||||
ie=TheGuardianPodcastIE, getter=urljoin('https://www.theguardian.com'))
|
||||
|
|
|
@ -114,7 +114,7 @@ class WeiboBaseIE(InfoExtractor):
|
|||
'thumbnail': ('page_info', 'page_pic', {url_or_none}),
|
||||
'uploader': ('user', 'screen_name', {str}),
|
||||
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
|
||||
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
|
||||
'uploader_url': ('user', 'profile_url', {urljoin('https://weibo.com/')}),
|
||||
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
|
||||
'like_count': ('attitudes_count', {int_or_none}),
|
||||
'repost_count': ('reposts_count', {int_or_none}),
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YleAreenaIE(InfoExtractor):
|
||||
|
@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
|
|||
_TESTS = [
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-4371942',
|
||||
'md5': '932edda0ecf5dfd6423804182d32f8ac',
|
||||
'md5': 'd87e9a1e74e67e009990ddd413e426b4',
|
||||
'info_dict': {
|
||||
'id': '0_a3tjk92c',
|
||||
'id': '1-4371942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pouchit',
|
||||
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
||||
|
@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
|
|||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 1435,
|
||||
'view_count': int,
|
||||
'upload_date': '20181204',
|
||||
'release_date': '20190106',
|
||||
'timestamp': 1543916210,
|
||||
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 7,
|
||||
'webpage_url': 'https://areena.yle.fi/1-4371942',
|
||||
'release_date': '20190105',
|
||||
'release_timestamp': 1546725660,
|
||||
'duration': 1435,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-2158940',
|
||||
'md5': 'cecb603661004e36af8c5188b5212b12',
|
||||
'md5': '6369ddc5e07b5fdaeda27a495184143c',
|
||||
'info_dict': {
|
||||
'id': '1_l38iz9ur',
|
||||
'id': '1-2158940',
|
||||
'ext': 'mp4',
|
||||
'title': 'Albi haluaa vessan',
|
||||
'description': 'md5:15236d810c837bed861fae0e88663c33',
|
||||
'description': 'Albi haluaa vessan.',
|
||||
'series': 'Albi Lumiukko',
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 319,
|
||||
'view_count': int,
|
||||
'upload_date': '20211202',
|
||||
'release_date': '20211215',
|
||||
'timestamp': 1638448202,
|
||||
'subtitles': {},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 0,
|
||||
'webpage_url': 'https://areena.yle.fi/1-2158940',
|
||||
'release_date': '20211215',
|
||||
'release_timestamp': 1639555200,
|
||||
'duration': 319,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
|
|||
'title': 'HKO & Mälkki & Tanner',
|
||||
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
|
||||
'series': 'Helsingin kaupunginorkesterin konsertteja',
|
||||
'thumbnail': r're:^https?://.+\.jpg$',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'release_date': '20230120',
|
||||
'release_timestamp': 1674242079,
|
||||
'duration': 8004,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-72251830',
|
||||
'info_dict': {
|
||||
'id': '1-72251830',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
|
||||
'series': 'Pentulive',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'live_status': 'is_live',
|
||||
'release_date': '20241025',
|
||||
'release_timestamp': 1729875600,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'livestream',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/podcastit/1-71022852',
|
||||
'info_dict': {
|
||||
'id': '1-71022852',
|
||||
'ext': 'mp3',
|
||||
'title': 'Värityspäivä',
|
||||
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
|
||||
'series': 'Murun ja Paukun ikioma kaupunki',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'release_date': '20240607',
|
||||
'release_timestamp': 1717736400,
|
||||
'duration': 442,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
video_data = self._download_json(
|
||||
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
||||
video_id, headers={
|
||||
'origin': 'https://areena.yle.fi',
|
||||
'referer': 'https://areena.yle.fi/',
|
||||
'content-type': 'application/json',
|
||||
})
|
||||
})['data']
|
||||
|
||||
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
||||
season_number, episode_number, episode, series = self._search_regex(
|
||||
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
default=(None, None, None, None))
|
||||
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
|
||||
description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
|
||||
if url_or_none(sub.get('uri')):
|
||||
for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['uri'],
|
||||
'ext': 'srt',
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
|
||||
if is_podcast:
|
||||
info_dict = {
|
||||
'url': video_data['data']['ongoing_ondemand']['media_url'],
|
||||
}
|
||||
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
info_dict = {
|
||||
info_dict, metadata = {}, {}
|
||||
if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict['url'] = metadata['media_url']
|
||||
elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_event']
|
||||
metadata.pop('duration', None) # Duration is not accurate for livestreams
|
||||
info_dict['live_status'] = 'is_live'
|
||||
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
|
||||
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
}
|
||||
})
|
||||
elif traverse_obj(video_data, ('gone', {dict})):
|
||||
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
|
||||
metadata = video_data['gone']
|
||||
else:
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
raise ExtractorError('Unable to extract content')
|
||||
|
||||
if not info_dict.get('url') and metadata.get('manifest_url'):
|
||||
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
|
||||
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
info_dict = {'formats': formats}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**traverse_obj(json_ld, {
|
||||
'title': 'title',
|
||||
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
|
||||
or episode or info.get('title')),
|
||||
'title': episode,
|
||||
'description': description,
|
||||
'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
|
||||
or series),
|
||||
'series': series,
|
||||
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
|
||||
or int_or_none(season_number)),
|
||||
'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
|
||||
or int_or_none(episode_number)),
|
||||
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
|
||||
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'subtitles': subtitles or None,
|
||||
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', 'fin', {str}),
|
||||
'description': ('description', 'fin', {str}),
|
||||
'series': ('series', 'title', 'fin', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
|
||||
'release_timestamp': ('start_time', {parse_iso8601}),
|
||||
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
|
||||
}),
|
||||
**info_dict,
|
||||
}
|
||||
|
|
|
@ -247,7 +247,7 @@ class YouPornListBase(InfoExtractor):
|
|||
if not html:
|
||||
return
|
||||
for element in get_elements_html_by_class('video-title', html):
|
||||
if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
|
||||
if video_url := traverse_obj(element, ({extract_attributes}, 'href', {urljoin(url)})):
|
||||
yield self.url_result(video_url)
|
||||
|
||||
if page_num is not None:
|
||||
|
|
|
@ -3637,7 +3637,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'author_is_verified': ('author', 'isVerified', {bool}),
|
||||
'author_url': ('author', 'channelCommand', 'innertubeCommand', (
|
||||
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
|
||||
), {lambda x: urljoin('https://www.youtube.com', x)}),
|
||||
), {urljoin('https://www.youtube.com')}),
|
||||
}, get_all=False),
|
||||
'is_favorited': (None if toolbar_entity_payload is None else
|
||||
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
|
||||
|
|
|
@ -700,7 +700,8 @@ def create_parser():
|
|||
selection.add_option(
|
||||
'--break-on-existing',
|
||||
action='store_true', dest='break_on_existing', default=False,
|
||||
help='Stop the download process when encountering a file that is in the archive')
|
||||
help='Stop the download process when encountering a file that is in the archive '
|
||||
'supplied with the --download-archive option')
|
||||
selection.add_option(
|
||||
'--no-break-on-existing',
|
||||
action='store_false', dest='break_on_existing',
|
||||
|
|
Loading…
Reference in New Issue
Block a user