Compare commits

..

13 Commits

Author SHA1 Message Date
Mozi
77debf548f FifaContent: no positive redirection detection 2024-10-30 17:42:19 +00:00
Mozi
7f94f2fceb merge 'master' 2024-10-30 17:37:36 +00:00
Mozi
5f6a442047 Rewrite code; DRM content is being only matching 2024-10-30 17:37:09 +00:00
bashonly
5bc5fb2835
Allow thumbnails with .jpe extension (#11408)
Fix 5ce582448e

Closes #11407
Authored by: bashonly
2024-10-29 23:25:46 +00:00
bashonly
f93c16395c
[utils] Fix find_element by class (#11402)
Fix d710a6ca7c

Authored by: bashonly
2024-10-29 23:24:17 +00:00
sepro
f101e5d34c
[ie/Soundcloud] Extract artists (#11377)
Closes #11375
Authored by: seproDev
2024-10-28 12:08:46 +01:00
JAB
330335386d
[ie/ccma] Support new 3cat.cat domain (#11222)
Closes #11163
Authored by: JoseAngelB
2024-10-27 23:18:25 +00:00
bashonly
0a3991edae
[devscripts] make_changelog: Parse full commit message for fixes (#11366)
Authored by: Grub4K, bashonly

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
2024-10-27 23:00:02 +00:00
Simon Sawicki
5c880ef42e
[core] Populate format sorting fields before dependent fields (#11353)
Authored by: Grub4K
2024-10-27 00:17:26 +02:00
bashonly
21cdcf03a2
[ie] Resolve language to ISO639-2 for ISM formats (#11359)
Closes #11356
Authored by: bashonly
2024-10-26 18:02:21 +00:00
Wehzuri
6abef74232
[ie/NiconicoUser] Fix extractor (#11324)
Partially addresses #10084

Authored by: Wesley107772
2024-10-26 13:52:09 +00:00
kylegustavo
9acf79c91a
[ie/CNN] Fix extractor (#10185)
Closes #2640, Closes #9719
Authored by: kylegustavo, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
2024-10-26 06:06:28 +02:00
bashonly
57212a5f97
[ie/vimeo] Fix API retries (fix c1c9bb4adb) (#11351)
Authored by: bashonly
2024-10-25 18:37:18 +00:00
13 changed files with 491 additions and 432 deletions

View File

@ -216,5 +216,23 @@
"action": "add", "action": "add",
"when": "d784464399b600ba9516bbcec6286f11d68974dd", "when": "d784464399b600ba9516bbcec6286f11d68974dd",
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
},
{
"action": "change",
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
"short": "Expand paths in `--plugin-dirs` (#11334)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
"short": "[ie/generic] Do not impersonate by default (#11336)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"]
} }
] ]

View File

@ -71,14 +71,13 @@ class CommitGroup(enum.Enum):
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
group, _, subgroup = (group.strip().lower() for group in value.partition('/')) group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
result = cls.group_lookup().get(group) if result := cls.group_lookup().get(group):
if not result: return result, subgroup or None
if subgroup:
return None, value
subgroup = group
result = cls.subgroup_lookup().get(subgroup)
return result, subgroup or None if subgroup:
return None, value
return cls.subgroup_lookup().get(group), group or None
@dataclass @dataclass
@ -136,8 +135,7 @@ class Changelog:
first = False first = False
yield '\n<details><summary><h3>Changelog</h3></summary>\n' yield '\n<details><summary><h3>Changelog</h3></summary>\n'
group = groups[item] if group := groups[item]:
if group:
yield self.format_module(item.value, group) yield self.format_module(item.value, group)
if self._collapsible: if self._collapsible:
@ -253,7 +251,7 @@ class CommitRange:
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
def __init__(self, start, end, default_author=None): def __init__(self, start, end, default_author=None):
@ -287,11 +285,16 @@ class CommitRange:
short = next(lines) short = next(lines)
skip = short.startswith('Release ') or short == '[version] update' skip = short.startswith('Release ') or short == '[version] update'
fix_commitish = None
if match := self.FIXES_RE.search(short):
fix_commitish = match.group(1)
authors = [default_author] if default_author else [] authors = [default_author] if default_author else []
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
match = self.AUTHOR_INDICATOR_RE.match(line) if match := self.AUTHOR_INDICATOR_RE.match(line):
if match:
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
fix_commitish = match.group(1)
commit = Commit(commit_hash, short, authors) commit = Commit(commit_hash, short, authors)
if skip and (self._start or not i): if skip and (self._start or not i):
@ -301,21 +304,17 @@ class CommitRange:
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
revert_match = self.REVERT_RE.fullmatch(commit.short) if match := self.REVERT_RE.fullmatch(commit.short):
if revert_match: reverts[match.group(1)] = commit
reverts[revert_match.group(1)] = commit
continue continue
fix_match = self.FIXES_RE.search(commit.short) if fix_commitish:
if fix_match: fixes[fix_commitish].append(commit)
commitish = fix_match.group(1)
fixes[commitish].append(commit)
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items(): for commitish, revert_commit in reverts.items():
reverted = commits.pop(commitish, None) if reverted := commits.pop(commitish, None):
if reverted:
logger.debug(f'{commitish} fully reverted {reverted}') logger.debug(f'{commitish} fully reverted {reverted}')
else: else:
commits[revert_commit.hash] = revert_commit commits[revert_commit.hash] = revert_commit
@ -461,8 +460,7 @@ def create_changelog(args):
logger.info(f'Loaded {len(commits)} commits') logger.info(f'Loaded {len(commits)} commits')
new_contributors = get_new_contributors(args.contributors_path, commits) if new_contributors := get_new_contributors(args.contributors_path, commits):
if new_contributors:
if args.contributors: if args.contributors:
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
logger.info(f'New contributors: {", ".join(new_contributors)}') logger.info(f'New contributors: {", ".join(new_contributors)}')

View File

@ -2849,13 +2849,10 @@ class YoutubeDL:
sanitize_string_field(fmt, 'format_id') sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt) sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url']) fmt['url'] = sanitize_url(fmt['url'])
if fmt.get('ext') is None: FormatSorter._fill_sorting_fields(fmt)
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None: if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext'] fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None: if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None) fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':

View File

@ -401,8 +401,6 @@ from .cmt import CMTIE
from .cnbc import CNBCVideoIE from .cnbc import CNBCVideoIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE, CNNIndonesiaIE,
) )
from .comedycentral import ( from .comedycentral import (
@ -646,9 +644,7 @@ from .fczenit import FczenitIE
from .fifa import ( from .fifa import (
FifaArticleIE, FifaArticleIE,
FifaContentIE, FifaContentIE,
FifaIE, FifaPlayerIE,
FifaMovieIE,
FifaSeriesIE,
) )
from .filmon import ( from .filmon import (
FilmOnChannelIE, FilmOnChannelIE,

View File

@ -12,53 +12,86 @@ from ..utils import (
class CCMAIE(InfoExtractor): class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)' IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', # ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871', 'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': { 'info_dict': {
'id': '5630208', 'id': '5630208',
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3', 'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140, 'timestamp': 1478608140,
'upload_date': '20161108', 'upload_date': '20161108',
'age_limit': 0, 'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
}, },
}, { }, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', # ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425', 'md5': 'fa3e38f269329a278271276330261425',
'info_dict': { 'info_dict': {
'id': '943685', 'id': '943685',
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20170512', 'upload_date': '20161217',
'timestamp': 1494622500, 'timestamp': 1482011700,
'vcodec': 'none', 'vcodec': 'none',
'categories': ['Esports'], 'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
}, },
}, { }, {
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
'md5': 'b43c3d3486f430f3032b5b160d80cbc3', 'md5': '27493513d08a3e5605814aee9bb778d2',
'info_dict': { 'info_dict': {
'id': '6031387', 'id': '6031387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', 'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577700, 'timestamp': 1582577919,
'upload_date': '20200224', 'upload_date': '20200224',
'subtitles': 'mincount:4', 'subtitles': 'mincount:1',
'age_limit': 16, 'age_limit': 13,
'series': 'Crims', 'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).groups() media_type, media_id = self._match_valid_url(url).group('type', 'id')
media = self._download_json( media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm', 'format': 'dm',

View File

@ -1,146 +1,226 @@
import functools
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from .turner import TurnerBaseIE from ..utils import (
from ..utils import merge_dicts, try_call, url_basename clean_html,
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(TurnerBaseIE): class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ _VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'sports/2013/06/09/nadal-1-on-1.cnn', 'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nadal wins 8th French Open title', 'upload_date': '20240531',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
'duration': 135, 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
'upload_date': '20130609', 'duration': 373.0,
'timestamp': 1717148586,
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
'modified_date': '20240531',
'modified_timestamp': 1717150140,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': { 'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4', 'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen", 'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'upload_date': '20130821', 'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': { 'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', 'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:e7223a503315c9f150acac52e76de086', 'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'upload_date': '20141222', 'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': { 'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', 'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4', 'ext': 'mp4',
'title': '5 stunning stats about Netflix', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', 'duration': 158.0,
'upload_date': '20160819', 'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
}, },
'params': { 'params': {'format': 'direct'},
# m3u8 download }, {
'skip_download': True, 'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
}, },
}, { }, {
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'only_matching': True, 'info_dict': {
}, { 'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', },
'only_matching': True, 'playlist_count': 2,
}, { 'playlist': [{
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', 'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'only_matching': True, 'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, {
'md5': '11604ab4af83b650826753f1ccb8ecff',
'info_dict': {
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'ext': 'mp4',
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}] }]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
'edition': {
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
},
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
'money': {
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
'media_src': 'http://ht3.cdn.turner.com/money/big',
},
}
def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url): def _real_extract(self, url):
sub_domain, path, page_title = self._match_valid_url(url).groups() display_id = self._match_valid_url(url).group('display_id')
if sub_domain not in ('money', 'edition'): webpage = self._download_webpage(url, display_id)
sub_domain = 'edition' app_id = traverse_obj(
config = self._CONFIG[sub_domain] self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
return self._extract_cvp_info( ('TOP_AUTH_SERVICE_APP_ID', {str}))
config['data_src'] % path, page_title, {
'default': { entries = []
'media_src': config['media_src'], for player_data in traverse_obj(webpage, (
}, {find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
'f4m': { ..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
'host': 'cnn-vh.akamaihd.net', media_id = player_data['data-media-id']
}, parent_uri = player_data.get('data-video-resource-parent-uri')
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
})
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
entries.append({
**traverse_obj(player_data, {
'title': ('data-headline', {clean_html}),
'description': ('data-description', {clean_html}),
'duration': ('data-duration', {parse_duration}),
'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}),
'display_id': 'data-video-slug',
}),
**traverse_obj(video_data, {
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
}) })
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
}
class CNNBlogsIE(InfoExtractor): return self.playlist_result(entries, display_id)
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
class CNNIndonesiaIE(InfoExtractor): class CNNIndonesiaIE(InfoExtractor):

View File

@ -47,6 +47,7 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
ISO639Utils,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -3071,7 +3072,11 @@ class InfoExtractor:
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und') # IsmFD expects ISO 639 Set 2 language codes (3-character length)
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))

View File

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
@ -14,9 +15,7 @@ from ..utils import (
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
class FifaContentIE(InfoExtractor): class FifaBaseIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?P<locale>\w{2})/content/(?P<display_id>[\w-]+)/(?P<id>[\w-]+)/?(?:[#?]|$)'
def _real_initialize(self): def _real_initialize(self):
self._HEADERS = { self._HEADERS = {
'content-type': 'application/json; charset=UTF-8', 'content-type': 'application/json; charset=UTF-8',
@ -54,18 +53,12 @@ class FifaContentIE(InfoExtractor):
def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None):
return self._download_json( return self._download_json(
f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, headers={ f'https://www.plus.fifa.com/{path}', video_id, note, headers={
**self._HEADERS, **self._HEADERS,
**(headers or {}), **(headers or {}),
}, query=query, data=data) }, query=query, data=data)
def _real_extract(self, url): def _extract_video(self, video_info, video_id):
urlh = self._request_webpage(url, self._match_id(url))
video_id, display_id, locale = self._match_valid_url(urlh.url).group('id', 'display_id', 'locale')
video_info = self._call_api(
'videoasset', video_id, 'Downloading video asset', query={'catalog': video_id})[0]
formats = [] formats = []
subtitles = {} subtitles = {}
@ -74,12 +67,11 @@ class FifaContentIE(InfoExtractor):
'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1', 'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1',
]: ]:
session_info = self._call_api( session_info = self._call_api(
'streaming/session', video_id, 'Getting streaming session', 'flux-capacitor/api/v1/streaming/session', video_id, 'Getting streaming session',
headers={'x-chili-accept-stream': stream_type}, headers={'x-chili-accept-stream': stream_type},
data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode())
streams_info = self._call_api( streams_info = self._call_api(
'streaming/urls', video_id, 'Getting streaming urls', 'flux-capacitor/api/v1/streaming/urls', video_id, 'Getting streaming urls',
headers={'x-chili-streaming-session': session_info['id']}) headers={'x-chili-streaming-session': session_info['id']})
for playlist_url in traverse_obj(streams_info, (..., 'url')): for playlist_url in traverse_obj(streams_info, (..., 'url')):
@ -99,10 +91,11 @@ class FifaContentIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_info['title'], 'title': strip_or_none(video_info['title']),
'display_id': display_id, 'duration': float_or_none(video_info.get('duration'), scale=1000),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'age_limit': traverse_obj(video_info, ('parental', 'age', {int_or_none})),
'thumbnails': [{ 'thumbnails': [{
'url': update_url_query(x, {'width': 1408}), 'url': update_url_query(x, {'width': 1408}),
'width': 1408, 'width': 1408,
@ -110,161 +103,154 @@ class FifaContentIE(InfoExtractor):
} }
class FifaBaseIE(InfoExtractor): class FifaPlayerIE(FifaBaseIE):
@functools.cached_property _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/player/(?P<id>[\w-]+)/?\?(?:[^#]+&)?catalogId=(?P<display_id>[\w-]+)'
def _preconnect_link(self):
return self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"',
self._download_webpage('https://fifa.com/', None), 'Preconnect Link')
def _call_api(self, path, video_id, note=None, query=None, fatal=True):
return self._download_json(
f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal)
class FifaIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/watch/(?P<id>[-\w]+)/?(?:[#?]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', 'url': 'https://www.plus.fifa.com/en/player/f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a?catalogId=b9c32230-1426-46d0-8448-ca824ae48603&entryPoint=Slider',
'info_dict': { 'info_dict': {
'id': 'fee2f7e8-92fa-42c5-805c-a2c949015eae', 'id': 'f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a',
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
'display_id': 'italy-v-france-final-2006-fifa-world-cup-germany-full-match-replay',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
'info_dict': {
'id': 'd4f4a2cb-5966-4af7-8a05-98ef4732af2b',
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
'display_id': 'brasil-x-alemanha-semifinais-copa-do-mundo-fifa-brasil-2014-compacto',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
'info_dict': {
'id': '3C6gQH9C2DLwzNx7BMRQdp',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Josimar goal against Northern Ireland | Classic Goals', 'title': 'Trailer | HD Cutz',
'age_limit': 0,
'duration': 195.84,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'HTTP Error 403: Forbidden',
}, { }, {
'url': 'https://www.fifa.com/fifaplus/en/watch/2KhLLn6aiGW3nr8sNm8Hkv', 'url': 'https://www.plus.fifa.com/en/player/af65939f-bbce-4b8f-8462-5140af533c5f?catalogId=fac6685c-a900-4e78-b5cd-192af5131ffe&entryPoint=Slider',
'md5': '2c4f5c591448d372f6ba85b8f3be37df',
'info_dict': { 'info_dict': {
'id': '2KhLLn6aiGW3nr8sNm8Hkv', 'id': 'af65939f-bbce-4b8f-8462-5140af533c5f',
'ext': 'mp4', 'ext': 'mp4',
'title': "Le Sommer: Lyon-Barcelona a beautiful final for women's football", 'title': 'Trailer | Bravas de Juárez',
}, 'age_limit': 0,
'params': {'skip_download': 'm3u8'}, 'duration': 73.984,
'skip': 'HTTP Error 403: Forbidden',
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/4V8H8qv7QM1LNVk5gUwYFa',
'info_dict': {
'id': '709abaec-5eef-4ad8-a02d-19a8932f42a2',
'title': "Christine Sinclair at 19 | FIFA U-19 Women's World Championship Canada 2002™",
'display_id': 'christine-sinclair-at-19-fifa-u-19-womens-world-championship-canada-2002',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
}, },
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, { }, {
'url': 'https://www.fifa.com/fifaplus/en/watch/d85632f9-7009-4ea0-aaf1-8d6847e4a148', 'url': 'https://plus.fifa.com/en/player/eeebdd38-5d51-4891-8307-ab5dd62c2c32?catalogId=ed3b2dcb-6886-4b34-8ba7-c8800027f7dd',
'info_dict': { 'only_matching': True,
'id': 'bbe5d2a3-3dfd-4283-a1af-3a66022e8254',
'title': 'Croatia v Australia | Group F | 2006 FIFA World Cup Germany™ | Full Match Replay',
'display_id': 'croatia-v-australia-or-group-f-or-2006-fifa-world-cup',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/Ny88zzqsVnxCBUJ6fZzPy',
'info_dict': {
'id': '3d2612ff-c06f-4a7e-a2d7-ec73504515b5',
'title': 'The Happiest Man in the World',
'display_id': 'o-homem-mais-feliz-do-mundo',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id, locale = self._match_valid_url(url).group('id', 'locale') video_id, catelog_id = self._match_valid_url(url).group('id', 'display_id')
video_asset = self._call_api(
if redirect_url := traverse_obj(self._call_api( 'flux-capacitor/api/v1/videoasset', video_id,
f'pages/{locale}/watch/{video_id}', video_id, 'Downloading redirection info'), 'redirectUrl'): 'Downloading video asset', query={'catalog': catelog_id})
return self.url_result(redirect_url) video_info = traverse_obj(video_asset, (lambda _, v: v['id'] == video_id), get_all=False)
urlh = self._request_webpage(url, self._match_id(url)) if not video_info:
if urlh.url != url: raise ExtractorError('Unable to extract video info')
return self.url_result(urlh.url) return self._extract_video(video_info, video_id)
video_details = self._call_api(
f'sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False)
preplay_parameters = self._call_api(
f'videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
video_id, 'Downloading Content Data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
return {
'id': video_id,
'title': video_details.get('title'),
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}
class FifaArticleIE(FifaBaseIE): class FifaContentIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/content/(?P<display_id>[\w-]+)/(?P<id>[\w-]+)/?(?:[#?]|$)'
_TESTS = [{
# from https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore
'url': 'https://www.plus.fifa.com/en/content/kariobangi/6f3be63f-76d9-4290-9e60-fd62afa95ed7',
'info_dict': {
'id': '6f3be63f-76d9-4290-9e60-fd62afa95ed7',
'title': 'Kariobangi',
'description': 'md5:b57eb012db2b84d482adedda82faf1c8',
'display_id': 'kariobangi',
'thumbnails': 'count:2',
},
'playlist_count': 0,
}, {
# from https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5
'url': 'https://www.plus.fifa.com/en/content/hd-cutz/b9c32230-1426-46d0-8448-ca824ae48603',
'info_dict': {
'id': 'b9c32230-1426-46d0-8448-ca824ae48603',
'title': 'HD Cutz',
'description': 'md5:86dd1e6d9b4463b3ccc2063ab3180c44',
'display_id': 'hd-cutz',
'thumbnails': 'count:2',
},
'playlist': [{
'info_dict': {
'id': 'b9c32230-1426-46d0-8448-ca824ae48603',
'ext': 'mp4',
'title': 'Trailer | HD Cutz',
'age_limit': 0,
'duration': 195.840,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
}],
'params': {'skip_download': 'm3u8'},
}, {
# from https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN
'url': 'https://www.plus.fifa.com/en/content/bravas-de-juarez/fac6685c-a900-4e78-b5cd-192af5131ffe',
'info_dict': {
'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe',
'title': 'Bravas de Juárez',
'description': 'md5:e48e0f56fb27ac334e616976e0e62362',
'display_id': 'bravas-de-juarez',
},
'playlist': [{
'info_dict': {
'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe',
'ext': 'mp4',
'title': 'Trailer | Bravas de Juárez',
'age_limit': 0,
'duration': 73.984,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
}],
}]
_WEBPAGE_TESTS = [{
# https://www.plus.fifa.com/en/content/le-moment-the-official-film-of-the-2019-fifa-womens-world-cup/68a89002-0182-4cc7-b858-e548de0fb9cc
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2',
'info_dict': {
'id': '68a89002-0182-4cc7-b858-e548de0fb9cc',
'title': 'Le Moment',
'description': 'md5:155f0c28ea9de733668d7eb1f7dbcb52',
'display_id': 'le-moment-the-official-film-of-the-2019-fifa-womens-world-cup',
},
'playlist_count': 0,
}, {
# https://www.plus.fifa.com/en/content/dreams-2018-fifa-world-cup-official-film/ebdce1da-ab82-4c0b-a7d3-b4fc71030339
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB',
'info_dict': {
'id': 'ebdce1da-ab82-4c0b-a7d3-b4fc71030339',
'title': 'Dreams',
'description': 'md5:b795d218d5c2b88bff3c1569cb617acb',
'display_id': 'dreams-2018-fifa-world-cup-official-film',
},
'playlist_count': 0,
}]
def _entries(self, video_asset, video_id):
for video_info in traverse_obj(video_asset, (lambda _, v: v['type'] == 'TRAILER', {dict})):
yield self._extract_video(video_info, video_id)
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
video_content = self._call_api(
f'entertainment/api/v1/contents/{video_id}', video_id, 'Downloading video content')
video_asset = self._call_api(
'flux-capacitor/api/v1/videoasset', video_id,
'Downloading video asset', query={'catalog': video_id})
thumbnails = []
for key, width in [('coverUrl', 330), ('wideCoverUrl', 1408)]:
if thumbnail_url := video_content.get(key):
thumbnails.append({
'url': update_url_query(thumbnail_url, {'width': width}),
'width': width,
})
return self.playlist_result(
self._entries(video_asset, video_id), video_id,
strip_or_none(video_content['title']), strip_or_none(video_content.get('storyLine')),
display_id=display_id, thumbnails=thumbnails)
class FifaArticleIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/articles/(?P<id>[\w-]+)' _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/articles/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title',
'info_dict': { 'info_dict': {
'_type': 'multi_video',
'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title',
'title': 'Foord talks 2023 and battling Kerr for the WSL title', 'title': 'Foord talks 2023 and battling Kerr for the WSL title',
'timestamp': 1651136400, 'timestamp': 1651136400,
@ -293,18 +279,47 @@ class FifaArticleIE(FifaBaseIE):
}], }],
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# https://www.fifa.com/en/articles/stars-set-to-collide-in-uwcl-final
'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final', 'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final',
'info_dict': { 'only_matching': True,
'_type': 'multi_video',
'id': 'stars-set-to-collide-in-uwcl-final',
'title': 'Stars set to collide in Womens Champions League final ',
'timestamp': 1652950800,
'upload_date': '20220519',
},
'playlist_count': 3,
'params': {'skip_download': 'm3u8'},
}] }]
@functools.cached_property
def _preconnect_link(self):
return self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"',
self._download_webpage('https://fifa.com/', None), 'Preconnect Link')
def _call_api(self, path, video_id, note=None, query=None, fatal=True):
return self._download_json(
f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal)
def _entries(self, video_ids, article_id):
for video_id in video_ids:
video_details = self._call_api(
f'sections/videoDetails/{video_id}', article_id,
'Downloading Video Details', fatal=False)
preplay_parameters = self._call_api(
f'videoPlayerData/{video_id}', article_id,
'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(
**preplay_parameters), article_id, 'Downloading Content Data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], article_id)
yield {
'id': video_id,
'title': video_details.get('title'),
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url): def _real_extract(self, url):
article_id, locale = self._match_valid_url(url).group('id', 'locale') article_id, locale = self._match_valid_url(url).group('id', 'locale')
@ -319,96 +334,6 @@ class FifaArticleIE(FifaBaseIE):
'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video',
'data', 'target', 'sys', 'id'))) 'data', 'target', 'sys', 'id')))
return self.playlist_from_matches( return self.playlist_result(
video_ids, article_id, page_info.get('articleTitle'), self._entries(video_ids, article_id), article_id, page_info.get('articleTitle'),
getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', timestamp=parse_iso8601(page_info.get('articlePublishedDate')))
ie=FifaIE, multi_video=True, timestamp=parse_iso8601(page_info.get('articlePublishedDate')))
class FifaMovieIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P<locale>\w{2})/watch/movie/(?P<id>\w+)[/?\?\#]?'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN',
'info_dict': {
'_type': 'multi_video',
'id': '2OFuZ9TGyPH6x7nZsgnVBN',
'title': 'Bravas de Juárez',
'description': 'md5:1c36885f34d1c142f66ddd5acd5226b2',
},
'playlist_count': 2,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2',
'info_dict': {
'_type': 'multi_video',
'id': '01ioUo8QHiajSisrvP3ES2',
'title': 'Le Moment | The Official Film of the 2019 FIFA Womens World Cup™',
'description': 'md5:fbc803feb6fcbc82d2a73e914244484c',
},
'playlist_count': 1,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB',
'info_dict': {
'_type': 'multi_video',
'id': '69GbI9lVcwhOeBvea5eKUB',
'title': 'Dreams | The Official Film of the 2018 FIFA World Cup™',
'description': 'md5:e79dd17af4dcab1dd446ef6e22a79330',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
movie_id, locale = self._match_valid_url(url).group('id', 'locale')
movie_details = self._call_api(
f'sections/movieDetails/{movie_id}', movie_id, 'Downloading Movie Details', query={'locale': locale})
video_ids = traverse_obj(movie_details, ('trailers', ..., 'entryId'))
if video_entry_id := traverse_obj(movie_details, ('video', 'videoEntryId')):
video_ids.append(video_entry_id)
return self.playlist_from_matches(
video_ids, movie_id, traverse_obj(movie_details, ('video', 'title')),
getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}',
ie=FifaIE, multi_video=True, playlist_description=traverse_obj(movie_details, ('video', 'description')))
class FifaSeriesIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P<locale>\w{2})/watch/series/(?P<serie_id>\w+)/(?P<season_id>\w+)/(?P<episode_id>\w+)[/?\?\#]?'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore',
'info_dict': {
'_type': 'multi_video',
'id': '48PQFX2J4TiDJcxWOxUPho',
'title': 'Episode 1 | Kariobangi',
'description': 'md5:ecbc8668f828d3cc2c0d00edcc0af04f',
},
'playlist_count': 4,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5',
'info_dict': {
'_type': 'multi_video',
'id': '5Ja1dDLuudkFF95OVHcYBG',
'title': 'Paul Pogba and Aaron Wan Bissaka | HD Cutz',
'description': 'md5:16dc373774f503ef91f4489ca17c3f49',
},
'playlist_count': 10,
}]
def _real_extract(self, url):
series_id, locale, season_id, episode_id = self._match_valid_url(url).group('serie_id', 'locale', 'season_id', 'episode_id')
serie_details = self._call_api(
'sections/videoEpisodeDetails', series_id, 'Downloading Serie Details', query={
'locale': locale,
'seriesId': series_id,
'seasonId': season_id,
'episodeId': episode_id,
})
video_ids = traverse_obj(serie_details, ('seasons', ..., 'episodes', ..., 'entryId'))
video_ids.extend(traverse_obj(serie_details, ('trailers', ..., 'entryId')))
return self.playlist_from_matches(
video_ids, series_id, strip_or_none(serie_details.get('title')),
getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}',
ie=FifaIE, multi_video=True, playlist_description=strip_or_none(serie_details.get('description')))

View File

@ -869,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
class NiconicoUserIE(InfoExtractor): class NiconicoUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'https://www.nicovideo.jp/user/419948', 'url': 'https://www.nicovideo.jp/user/419948',
'info_dict': { 'info_dict': {
@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
}, },
'playlist_mincount': 101, 'playlist_mincount': 101,
} }
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_PAGE_SIZE = 100 _PAGE_SIZE = 100
_API_HEADERS = { _API_HEADERS = {
@ -897,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
total_count = int_or_none(json_parsed['data'].get('totalCount')) total_count = int_or_none(json_parsed['data'].get('totalCount'))
for entry in json_parsed['data']['items']: for entry in json_parsed['data']['items']:
count += 1 count += 1
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id'])) yield self.url_result(
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
page_num += 1 page_num += 1
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) return self.playlist_result(self._entries(list_id), list_id)
class NiconicoLiveIE(InfoExtractor): class NiconicoLiveIE(InfoExtractor):

View File

@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
track_id = str(info['id']) track_id = str(info['id'])
title = info['title']
format_urls = set() format_urls = set()
formats = [] formats = []
@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'), 'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': title, 'title': info.get('title'),
'description': info.get('description'), 'description': info.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000), 'duration': float_or_none(info.get('duration'), 1000),
@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'), 'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)), 'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None, 'formats': formats if not extract_flat else None,
} }
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic', 'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
}, },
}, },
# geo-restricted # geo-restricted
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the-concept-band', 'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'], 'genres': ['Alternative'],
'artists': ['The Royal Concept'],
}, },
}, },
# private link # private link
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'view_count': int, 'view_count': int,
'genres': ['Dance & EDM'], 'genres': ['Dance & EDM'],
'artists': ['80M'],
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift', 'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'], 'genres': ['Trance'],
'artists': ['Ori Uplift'],
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee', 'uploader_url': 'https://soundcloud.com/garyvee',
'genres': [], 'artists': ['MadReal'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -869,11 +869,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
for retry in (False, True): for retry in (False, True):
try: try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
break
except ExtractorError as e: except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj( and 'password' in traverse_obj(
e.cause.response.read(), self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'), ({json.loads}, 'invalid_parameters', ..., 'field'),
)): )):
self._verify_video_password( self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft']) video_id, self._get_video_password(), viewer['xsrft'])

View File

@ -5165,6 +5165,7 @@ class _UnsafeExtensionError(Exception):
'ico', 'ico',
'image', 'image',
'jng', 'jng',
'jpe',
'jpeg', 'jpeg',
'jxl', 'jxl',
'svg', 'svg',
@ -5578,14 +5579,15 @@ class FormatSorter:
value = get_value(field) value = get_value(field)
return self._calculate_field_preference_from_value(format_, field, type_, value) return self._calculate_field_preference_from_value(format_, field, type_, value)
def calculate_preference(self, format): @staticmethod
def _fill_sorting_fields(format):
# Determine missing protocol # Determine missing protocol
if not format.get('protocol'): if not format.get('protocol'):
format['protocol'] = determine_protocol(format) format['protocol'] = determine_protocol(format)
# Determine missing ext # Determine missing ext
if not format.get('ext') and 'url' in format: if not format.get('ext') and 'url' in format:
format['ext'] = determine_ext(format['url']) format['ext'] = determine_ext(format['url']).lower()
if format.get('vcodec') == 'none': if format.get('vcodec') == 'none':
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
format['video_ext'] = 'none' format['video_ext'] = 'none'
@ -5613,6 +5615,8 @@ class FormatSorter:
if not format.get('tbr'): if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
def calculate_preference(self, format):
self._fill_sorting_fields(format)
return tuple(self._calculate_field_preference(format, field) for field in self._order) return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
# deliberately using `id=` and `cls=` for ease of readability # deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
if not tag: ANY_TAG = r'[\w:.-]+'
tag = r'[\w:.-]+'
if attr and value: if attr and value:
assert not cls, 'Cannot match both attr and cls' assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id' assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag) return functools.partial(func, attr, value, tag=tag or ANY_TAG)
elif cls: elif cls:
assert not id, 'Cannot match both cls and id' assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
elif id: elif id:
func = get_element_html_by_id if html else get_element_by_id func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag) return functools.partial(func, id, tag=tag or ANY_TAG)
index = int(bool(html)) index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index] return lambda html: get_element_text_and_html_by_tag(tag, html)[index]