Compare commits

..

No commits in common. "77debf548f1aac95265ebe51e31a71522442d31e" and "76111ea6257f94a23bf388b99d721bdf5135bd42" have entirely different histories.

13 changed files with 443 additions and 502 deletions

View File

@ -216,23 +216,5 @@
"action": "add", "action": "add",
"when": "d784464399b600ba9516bbcec6286f11d68974dd", "when": "d784464399b600ba9516bbcec6286f11d68974dd",
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
},
{
"action": "change",
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
"short": "Expand paths in `--plugin-dirs` (#11334)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
"short": "[ie/generic] Do not impersonate by default (#11336)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"]
} }
] ]

View File

@ -71,13 +71,14 @@ class CommitGroup(enum.Enum):
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
group, _, subgroup = (group.strip().lower() for group in value.partition('/')) group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
if result := cls.group_lookup().get(group): result = cls.group_lookup().get(group)
return result, subgroup or None if not result:
if subgroup: if subgroup:
return None, value return None, value
subgroup = group
result = cls.subgroup_lookup().get(subgroup)
return cls.subgroup_lookup().get(group), group or None return result, subgroup or None
@dataclass @dataclass
@ -135,7 +136,8 @@ class Changelog:
first = False first = False
yield '\n<details><summary><h3>Changelog</h3></summary>\n' yield '\n<details><summary><h3>Changelog</h3></summary>\n'
if group := groups[item]: group = groups[item]
if group:
yield self.format_module(item.value, group) yield self.format_module(item.value, group)
if self._collapsible: if self._collapsible:
@ -251,7 +253,7 @@ class CommitRange:
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
def __init__(self, start, end, default_author=None): def __init__(self, start, end, default_author=None):
@ -285,16 +287,11 @@ class CommitRange:
short = next(lines) short = next(lines)
skip = short.startswith('Release ') or short == '[version] update' skip = short.startswith('Release ') or short == '[version] update'
fix_commitish = None
if match := self.FIXES_RE.search(short):
fix_commitish = match.group(1)
authors = [default_author] if default_author else [] authors = [default_author] if default_author else []
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
if match := self.AUTHOR_INDICATOR_RE.match(line): match = self.AUTHOR_INDICATOR_RE.match(line)
if match:
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
fix_commitish = match.group(1)
commit = Commit(commit_hash, short, authors) commit = Commit(commit_hash, short, authors)
if skip and (self._start or not i): if skip and (self._start or not i):
@ -304,17 +301,21 @@ class CommitRange:
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
if match := self.REVERT_RE.fullmatch(commit.short): revert_match = self.REVERT_RE.fullmatch(commit.short)
reverts[match.group(1)] = commit if revert_match:
reverts[revert_match.group(1)] = commit
continue continue
if fix_commitish: fix_match = self.FIXES_RE.search(commit.short)
fixes[fix_commitish].append(commit) if fix_match:
commitish = fix_match.group(1)
fixes[commitish].append(commit)
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items(): for commitish, revert_commit in reverts.items():
if reverted := commits.pop(commitish, None): reverted = commits.pop(commitish, None)
if reverted:
logger.debug(f'{commitish} fully reverted {reverted}') logger.debug(f'{commitish} fully reverted {reverted}')
else: else:
commits[revert_commit.hash] = revert_commit commits[revert_commit.hash] = revert_commit
@ -460,7 +461,8 @@ def create_changelog(args):
logger.info(f'Loaded {len(commits)} commits') logger.info(f'Loaded {len(commits)} commits')
if new_contributors := get_new_contributors(args.contributors_path, commits): new_contributors = get_new_contributors(args.contributors_path, commits)
if new_contributors:
if args.contributors: if args.contributors:
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
logger.info(f'New contributors: {", ".join(new_contributors)}') logger.info(f'New contributors: {", ".join(new_contributors)}')

View File

@ -2849,10 +2849,13 @@ class YoutubeDL:
sanitize_string_field(fmt, 'format_id') sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt) sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url']) fmt['url'] = sanitize_url(fmt['url'])
FormatSorter._fill_sorting_fields(fmt) if fmt.get('ext') is None:
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None: if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext'] fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None: if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None) fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':

View File

@ -401,6 +401,8 @@ from .cmt import CMTIE
from .cnbc import CNBCVideoIE from .cnbc import CNBCVideoIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE, CNNIndonesiaIE,
) )
from .comedycentral import ( from .comedycentral import (
@ -644,7 +646,9 @@ from .fczenit import FczenitIE
from .fifa import ( from .fifa import (
FifaArticleIE, FifaArticleIE,
FifaContentIE, FifaContentIE,
FifaPlayerIE, FifaIE,
FifaMovieIE,
FifaSeriesIE,
) )
from .filmon import ( from .filmon import (
FilmOnChannelIE, FilmOnChannelIE,

View File

@ -12,86 +12,53 @@ from ..utils import (
class CCMAIE(InfoExtractor): class CCMAIE(InfoExtractor):
IE_DESC = '3Cat, TV3 and Catalunya Ràdio' _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871', 'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': { 'info_dict': {
'id': '5630208', 'id': '5630208',
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques', 'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140, 'timestamp': 1478608140,
'upload_date': '20161108', 'upload_date': '20161108',
'age_limit': 0, 'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
}, },
}, { }, {
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/ 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425', 'md5': 'fa3e38f269329a278271276330261425',
'info_dict': { 'info_dict': {
'id': '943685', 'id': '943685',
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20161217', 'upload_date': '20170512',
'timestamp': 1482011700, 'timestamp': 1494622500,
'vcodec': 'none', 'vcodec': 'none',
'categories': ['Esports'], 'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
}, },
}, { }, {
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/', 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
'md5': '27493513d08a3e5605814aee9bb778d2', 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
'info_dict': { 'info_dict': {
'id': '6031387', 'id': '6031387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)', 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577919, 'timestamp': 1582577700,
'upload_date': '20200224', 'upload_date': '20200224',
'subtitles': 'mincount:1', 'subtitles': 'mincount:4',
'age_limit': 13, 'age_limit': 16,
'series': 'Crims', 'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).group('type', 'id') media_type, media_id = self._match_valid_url(url).groups()
media = self._download_json( media = self._download_json(
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm', 'format': 'dm',

View File

@ -1,226 +1,146 @@
import functools
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from .turner import TurnerBaseIE
clean_html, from ..utils import merge_dicts, try_call, url_basename
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(InfoExtractor): class CNNIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)' _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl', 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b', 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20240531', 'title': 'Nadal wins 8th French Open title',
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original', 'duration': 135,
'duration': 373.0, 'upload_date': '20130609',
'timestamp': 1717148586, },
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt', 'expected_warnings': ['Failed to download m3u8 information'],
'modified_date': '20240531', }, {
'modified_timestamp': 1717150140, 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen",
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
'upload_date': '20130821',
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
'ext': 'mp4',
'title': '5 stunning stats about Netflix',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
'upload_date': '20160819',
},
'params': {
# m3u8 download
'skip_download': True,
}, },
}, { }, {
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid', 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
'info_dict': { 'only_matching': True,
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4',
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
},
}, { }, {
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html', 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
'info_dict': { 'only_matching': True,
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
},
}, { }, {
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln', 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
'info_dict': { 'only_matching': True,
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'duration': 158.0,
'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
},
'params': {'format': 'direct'},
}, {
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
},
}, {
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'info_dict': {
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
},
'playlist_count': 2,
'playlist': [{
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, {
'md5': '11604ab4af83b650826753f1ccb8ecff',
'info_dict': {
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'ext': 'mp4',
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}] }]
def _real_extract(self, url): _CONFIG = {
display_id = self._match_valid_url(url).group('display_id') # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
webpage = self._download_webpage(url, display_id) 'edition': {
app_id = traverse_obj( 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}), 'media_src': 'http://pmd.cdn.turner.com/cnn/big',
('TOP_AUTH_SERVICE_APP_ID', {str})) },
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
entries = [] 'money': {
for player_data in traverse_obj(webpage, ( 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)}, 'media_src': 'http://ht3.cdn.turner.com/money/big',
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])): },
media_id = player_data['data-media-id']
parent_uri = player_data.get('data-video-resource-parent-uri')
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
})
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
entries.append({
**traverse_obj(player_data, {
'title': ('data-headline', {clean_html}),
'description': ('data-description', {clean_html}),
'duration': ('data-duration', {parse_duration}),
'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}),
'display_id': 'data-video-slug',
}),
**traverse_obj(video_data, {
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
})
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
} }
return self.playlist_result(entries, display_id) def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url):
sub_domain, path, page_title = self._match_valid_url(url).groups()
if sub_domain not in ('money', 'edition'):
sub_domain = 'edition'
config = self._CONFIG[sub_domain]
return self._extract_cvp_info(
config['data_src'] % path, page_title, {
'default': {
'media_src': config['media_src'],
},
'f4m': {
'host': 'cnn-vh.akamaihd.net',
},
})
class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
class CNNIndonesiaIE(InfoExtractor): class CNNIndonesiaIE(InfoExtractor):

View File

@ -47,7 +47,6 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
ISO639Utils,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -3072,11 +3071,7 @@ class InfoExtractor:
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
# IsmFD expects ISO 639 Set 2 language codes (3-character length) stream_language = stream.get('Language', 'und')
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))

View File

@ -5,7 +5,6 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
@ -15,7 +14,9 @@ from ..utils import (
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
class FifaBaseIE(InfoExtractor): class FifaContentIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?P<locale>\w{2})/content/(?P<display_id>[\w-]+)/(?P<id>[\w-]+)/?(?:[#?]|$)'
def _real_initialize(self): def _real_initialize(self):
self._HEADERS = { self._HEADERS = {
'content-type': 'application/json; charset=UTF-8', 'content-type': 'application/json; charset=UTF-8',
@ -53,12 +54,18 @@ class FifaBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None):
return self._download_json( return self._download_json(
f'https://www.plus.fifa.com/{path}', video_id, note, headers={ f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, headers={
**self._HEADERS, **self._HEADERS,
**(headers or {}), **(headers or {}),
}, query=query, data=data) }, query=query, data=data)
def _extract_video(self, video_info, video_id): def _real_extract(self, url):
urlh = self._request_webpage(url, self._match_id(url))
video_id, display_id, locale = self._match_valid_url(urlh.url).group('id', 'display_id', 'locale')
video_info = self._call_api(
'videoasset', video_id, 'Downloading video asset', query={'catalog': video_id})[0]
formats = [] formats = []
subtitles = {} subtitles = {}
@ -67,11 +74,12 @@ class FifaBaseIE(InfoExtractor):
'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1', 'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1',
]: ]:
session_info = self._call_api( session_info = self._call_api(
'flux-capacitor/api/v1/streaming/session', video_id, 'Getting streaming session', 'streaming/session', video_id, 'Getting streaming session',
headers={'x-chili-accept-stream': stream_type}, headers={'x-chili-accept-stream': stream_type},
data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode())
streams_info = self._call_api( streams_info = self._call_api(
'flux-capacitor/api/v1/streaming/urls', video_id, 'Getting streaming urls', 'streaming/urls', video_id, 'Getting streaming urls',
headers={'x-chili-streaming-session': session_info['id']}) headers={'x-chili-streaming-session': session_info['id']})
for playlist_url in traverse_obj(streams_info, (..., 'url')): for playlist_url in traverse_obj(streams_info, (..., 'url')):
@ -91,11 +99,10 @@ class FifaBaseIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': strip_or_none(video_info['title']), 'title': video_info['title'],
'duration': float_or_none(video_info.get('duration'), scale=1000), 'display_id': display_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'age_limit': traverse_obj(video_info, ('parental', 'age', {int_or_none})),
'thumbnails': [{ 'thumbnails': [{
'url': update_url_query(x, {'width': 1408}), 'url': update_url_query(x, {'width': 1408}),
'width': 1408, 'width': 1408,
@ -103,154 +110,161 @@ class FifaBaseIE(InfoExtractor):
} }
class FifaPlayerIE(FifaBaseIE): class FifaBaseIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/player/(?P<id>[\w-]+)/?\?(?:[^#]+&)?catalogId=(?P<display_id>[\w-]+)' @functools.cached_property
def _preconnect_link(self):
return self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"',
self._download_webpage('https://fifa.com/', None), 'Preconnect Link')
def _call_api(self, path, video_id, note=None, query=None, fatal=True):
return self._download_json(
f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal)
class FifaIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/watch/(?P<id>[-\w]+)/?(?:[#?]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.plus.fifa.com/en/player/f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a?catalogId=b9c32230-1426-46d0-8448-ca824ae48603&entryPoint=Slider', 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
'info_dict': { 'info_dict': {
'id': 'f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a', 'id': 'fee2f7e8-92fa-42c5-805c-a2c949015eae',
'ext': 'mp4', 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
'title': 'Trailer | HD Cutz', 'display_id': 'italy-v-france-final-2006-fifa-world-cup-germany-full-match-replay',
'age_limit': 0,
'duration': 195.84,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
}, },
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
'info_dict': {
'id': 'd4f4a2cb-5966-4af7-8a05-98ef4732af2b',
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
'display_id': 'brasil-x-alemanha-semifinais-copa-do-mundo-fifa-brasil-2014-compacto',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
'info_dict': {
'id': '3C6gQH9C2DLwzNx7BMRQdp',
'ext': 'mp4',
'title': 'Josimar goal against Northern Ireland | Classic Goals',
},
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'HTTP Error 403: Forbidden',
}, { }, {
'url': 'https://www.plus.fifa.com/en/player/af65939f-bbce-4b8f-8462-5140af533c5f?catalogId=fac6685c-a900-4e78-b5cd-192af5131ffe&entryPoint=Slider', 'url': 'https://www.fifa.com/fifaplus/en/watch/2KhLLn6aiGW3nr8sNm8Hkv',
'md5': '2c4f5c591448d372f6ba85b8f3be37df',
'info_dict': { 'info_dict': {
'id': 'af65939f-bbce-4b8f-8462-5140af533c5f', 'id': '2KhLLn6aiGW3nr8sNm8Hkv',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Trailer | Bravas de Juárez', 'title': "Le Sommer: Lyon-Barcelona a beautiful final for women's football",
'age_limit': 0, },
'duration': 73.984, 'params': {'skip_download': 'm3u8'},
'skip': 'HTTP Error 403: Forbidden',
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/4V8H8qv7QM1LNVk5gUwYFa',
'info_dict': {
'id': '709abaec-5eef-4ad8-a02d-19a8932f42a2',
'title': "Christine Sinclair at 19 | FIFA U-19 Women's World Championship Canada 2002™",
'display_id': 'christine-sinclair-at-19-fifa-u-19-womens-world-championship-canada-2002',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
}, },
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, { }, {
'url': 'https://plus.fifa.com/en/player/eeebdd38-5d51-4891-8307-ab5dd62c2c32?catalogId=ed3b2dcb-6886-4b34-8ba7-c8800027f7dd', 'url': 'https://www.fifa.com/fifaplus/en/watch/d85632f9-7009-4ea0-aaf1-8d6847e4a148',
'only_matching': True, 'info_dict': {
'id': 'bbe5d2a3-3dfd-4283-a1af-3a66022e8254',
'title': 'Croatia v Australia | Group F | 2006 FIFA World Cup Germany™ | Full Match Replay',
'display_id': 'croatia-v-australia-or-group-f-or-2006-fifa-world-cup',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/Ny88zzqsVnxCBUJ6fZzPy',
'info_dict': {
'id': '3d2612ff-c06f-4a7e-a2d7-ec73504515b5',
'title': 'The Happiest Man in the World',
'display_id': 'o-homem-mais-feliz-do-mundo',
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
'params': {
'skip_download': 'm3u8',
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Requested format is not available',
'This video is DRM protected',
],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id, catelog_id = self._match_valid_url(url).group('id', 'display_id') video_id, locale = self._match_valid_url(url).group('id', 'locale')
video_asset = self._call_api(
'flux-capacitor/api/v1/videoasset', video_id, if redirect_url := traverse_obj(self._call_api(
'Downloading video asset', query={'catalog': catelog_id}) f'pages/{locale}/watch/{video_id}', video_id, 'Downloading redirection info'), 'redirectUrl'):
video_info = traverse_obj(video_asset, (lambda _, v: v['id'] == video_id), get_all=False) return self.url_result(redirect_url)
if not video_info: urlh = self._request_webpage(url, self._match_id(url))
raise ExtractorError('Unable to extract video info') if urlh.url != url:
return self._extract_video(video_info, video_id) return self.url_result(urlh.url)
video_details = self._call_api(
f'sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False)
preplay_parameters = self._call_api(
f'videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
video_id, 'Downloading Content Data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
return {
'id': video_id,
'title': video_details.get('title'),
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}
class FifaContentIE(FifaBaseIE): class FifaArticleIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/content/(?P<display_id>[\w-]+)/(?P<id>[\w-]+)/?(?:[#?]|$)'
_TESTS = [{
# from https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore
'url': 'https://www.plus.fifa.com/en/content/kariobangi/6f3be63f-76d9-4290-9e60-fd62afa95ed7',
'info_dict': {
'id': '6f3be63f-76d9-4290-9e60-fd62afa95ed7',
'title': 'Kariobangi',
'description': 'md5:b57eb012db2b84d482adedda82faf1c8',
'display_id': 'kariobangi',
'thumbnails': 'count:2',
},
'playlist_count': 0,
}, {
# from https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5
'url': 'https://www.plus.fifa.com/en/content/hd-cutz/b9c32230-1426-46d0-8448-ca824ae48603',
'info_dict': {
'id': 'b9c32230-1426-46d0-8448-ca824ae48603',
'title': 'HD Cutz',
'description': 'md5:86dd1e6d9b4463b3ccc2063ab3180c44',
'display_id': 'hd-cutz',
'thumbnails': 'count:2',
},
'playlist': [{
'info_dict': {
'id': 'b9c32230-1426-46d0-8448-ca824ae48603',
'ext': 'mp4',
'title': 'Trailer | HD Cutz',
'age_limit': 0,
'duration': 195.840,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
}],
'params': {'skip_download': 'm3u8'},
}, {
# from https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN
'url': 'https://www.plus.fifa.com/en/content/bravas-de-juarez/fac6685c-a900-4e78-b5cd-192af5131ffe',
'info_dict': {
'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe',
'title': 'Bravas de Juárez',
'description': 'md5:e48e0f56fb27ac334e616976e0e62362',
'display_id': 'bravas-de-juarez',
},
'playlist': [{
'info_dict': {
'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe',
'ext': 'mp4',
'title': 'Trailer | Bravas de Juárez',
'age_limit': 0,
'duration': 73.984,
'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408',
},
}],
}]
_WEBPAGE_TESTS = [{
# https://www.plus.fifa.com/en/content/le-moment-the-official-film-of-the-2019-fifa-womens-world-cup/68a89002-0182-4cc7-b858-e548de0fb9cc
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2',
'info_dict': {
'id': '68a89002-0182-4cc7-b858-e548de0fb9cc',
'title': 'Le Moment',
'description': 'md5:155f0c28ea9de733668d7eb1f7dbcb52',
'display_id': 'le-moment-the-official-film-of-the-2019-fifa-womens-world-cup',
},
'playlist_count': 0,
}, {
# https://www.plus.fifa.com/en/content/dreams-2018-fifa-world-cup-official-film/ebdce1da-ab82-4c0b-a7d3-b4fc71030339
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB',
'info_dict': {
'id': 'ebdce1da-ab82-4c0b-a7d3-b4fc71030339',
'title': 'Dreams',
'description': 'md5:b795d218d5c2b88bff3c1569cb617acb',
'display_id': 'dreams-2018-fifa-world-cup-official-film',
},
'playlist_count': 0,
}]
def _entries(self, video_asset, video_id):
for video_info in traverse_obj(video_asset, (lambda _, v: v['type'] == 'TRAILER', {dict})):
yield self._extract_video(video_info, video_id)
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
video_content = self._call_api(
f'entertainment/api/v1/contents/{video_id}', video_id, 'Downloading video content')
video_asset = self._call_api(
'flux-capacitor/api/v1/videoasset', video_id,
'Downloading video asset', query={'catalog': video_id})
thumbnails = []
for key, width in [('coverUrl', 330), ('wideCoverUrl', 1408)]:
if thumbnail_url := video_content.get(key):
thumbnails.append({
'url': update_url_query(thumbnail_url, {'width': width}),
'width': width,
})
return self.playlist_result(
self._entries(video_asset, video_id), video_id,
strip_or_none(video_content['title']), strip_or_none(video_content.get('storyLine')),
display_id=display_id, thumbnails=thumbnails)
class FifaArticleIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/articles/(?P<id>[\w-]+)' _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P<locale>\w{2})/articles/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title',
'info_dict': { 'info_dict': {
'_type': 'multi_video',
'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title',
'title': 'Foord talks 2023 and battling Kerr for the WSL title', 'title': 'Foord talks 2023 and battling Kerr for the WSL title',
'timestamp': 1651136400, 'timestamp': 1651136400,
@ -279,47 +293,18 @@ class FifaArticleIE(InfoExtractor):
}], }],
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# https://www.fifa.com/en/articles/stars-set-to-collide-in-uwcl-final
'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final', 'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final',
'only_matching': True, 'info_dict': {
'_type': 'multi_video',
'id': 'stars-set-to-collide-in-uwcl-final',
'title': 'Stars set to collide in Womens Champions League final ',
'timestamp': 1652950800,
'upload_date': '20220519',
},
'playlist_count': 3,
'params': {'skip_download': 'm3u8'},
}] }]
@functools.cached_property
def _preconnect_link(self):
return self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"',
self._download_webpage('https://fifa.com/', None), 'Preconnect Link')
def _call_api(self, path, video_id, note=None, query=None, fatal=True):
return self._download_json(
f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal)
def _entries(self, video_ids, article_id):
for video_id in video_ids:
video_details = self._call_api(
f'sections/videoDetails/{video_id}', article_id,
'Downloading Video Details', fatal=False)
preplay_parameters = self._call_api(
f'videoPlayerData/{video_id}', article_id,
'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(
**preplay_parameters), article_id, 'Downloading Content Data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], article_id)
yield {
'id': video_id,
'title': video_details.get('title'),
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url): def _real_extract(self, url):
article_id, locale = self._match_valid_url(url).group('id', 'locale') article_id, locale = self._match_valid_url(url).group('id', 'locale')
@ -334,6 +319,96 @@ class FifaArticleIE(InfoExtractor):
'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video',
'data', 'target', 'sys', 'id'))) 'data', 'target', 'sys', 'id')))
return self.playlist_result( return self.playlist_from_matches(
self._entries(video_ids, article_id), article_id, page_info.get('articleTitle'), video_ids, article_id, page_info.get('articleTitle'),
timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}',
ie=FifaIE, multi_video=True, timestamp=parse_iso8601(page_info.get('articlePublishedDate')))
class FifaMovieIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P<locale>\w{2})/watch/movie/(?P<id>\w+)[/?\?\#]?'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN',
'info_dict': {
'_type': 'multi_video',
'id': '2OFuZ9TGyPH6x7nZsgnVBN',
'title': 'Bravas de Juárez',
'description': 'md5:1c36885f34d1c142f66ddd5acd5226b2',
},
'playlist_count': 2,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2',
'info_dict': {
'_type': 'multi_video',
'id': '01ioUo8QHiajSisrvP3ES2',
'title': 'Le Moment | The Official Film of the 2019 FIFA Womens World Cup™',
'description': 'md5:fbc803feb6fcbc82d2a73e914244484c',
},
'playlist_count': 1,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB',
'info_dict': {
'_type': 'multi_video',
'id': '69GbI9lVcwhOeBvea5eKUB',
'title': 'Dreams | The Official Film of the 2018 FIFA World Cup™',
'description': 'md5:e79dd17af4dcab1dd446ef6e22a79330',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
movie_id, locale = self._match_valid_url(url).group('id', 'locale')
movie_details = self._call_api(
f'sections/movieDetails/{movie_id}', movie_id, 'Downloading Movie Details', query={'locale': locale})
video_ids = traverse_obj(movie_details, ('trailers', ..., 'entryId'))
if video_entry_id := traverse_obj(movie_details, ('video', 'videoEntryId')):
video_ids.append(video_entry_id)
return self.playlist_from_matches(
video_ids, movie_id, traverse_obj(movie_details, ('video', 'title')),
getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}',
ie=FifaIE, multi_video=True, playlist_description=traverse_obj(movie_details, ('video', 'description')))
class FifaSeriesIE(FifaBaseIE):
_VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P<locale>\w{2})/watch/series/(?P<serie_id>\w+)/(?P<season_id>\w+)/(?P<episode_id>\w+)[/?\?\#]?'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore',
'info_dict': {
'_type': 'multi_video',
'id': '48PQFX2J4TiDJcxWOxUPho',
'title': 'Episode 1 | Kariobangi',
'description': 'md5:ecbc8668f828d3cc2c0d00edcc0af04f',
},
'playlist_count': 4,
}, {
'url': 'https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5',
'info_dict': {
'_type': 'multi_video',
'id': '5Ja1dDLuudkFF95OVHcYBG',
'title': 'Paul Pogba and Aaron Wan Bissaka | HD Cutz',
'description': 'md5:16dc373774f503ef91f4489ca17c3f49',
},
'playlist_count': 10,
}]
def _real_extract(self, url):
series_id, locale, season_id, episode_id = self._match_valid_url(url).group('serie_id', 'locale', 'season_id', 'episode_id')
serie_details = self._call_api(
'sections/videoEpisodeDetails', series_id, 'Downloading Serie Details', query={
'locale': locale,
'seriesId': series_id,
'seasonId': season_id,
'episodeId': episode_id,
})
video_ids = traverse_obj(serie_details, ('seasons', ..., 'episodes', ..., 'entryId'))
video_ids.extend(traverse_obj(serie_details, ('trailers', ..., 'entryId')))
return self.playlist_from_matches(
video_ids, series_id, strip_or_none(serie_details.get('title')),
getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}',
ie=FifaIE, multi_video=True, playlist_description=strip_or_none(serie_details.get('description')))

View File

@ -869,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
class NiconicoUserIE(InfoExtractor): class NiconicoUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'https://www.nicovideo.jp/user/419948', 'url': 'https://www.nicovideo.jp/user/419948',
'info_dict': { 'info_dict': {
@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
}, },
'playlist_mincount': 101, 'playlist_mincount': 101,
} }
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_PAGE_SIZE = 100 _PAGE_SIZE = 100
_API_HEADERS = { _API_HEADERS = {
@ -897,13 +897,12 @@ class NiconicoUserIE(InfoExtractor):
total_count = int_or_none(json_parsed['data'].get('totalCount')) total_count = int_or_none(json_parsed['data'].get('totalCount'))
for entry in json_parsed['data']['items']: for entry in json_parsed['data']['items']:
count += 1 count += 1
yield self.url_result( yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
page_num += 1 page_num += 1
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
return self.playlist_result(self._entries(list_id), list_id) return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
class NiconicoLiveIE(InfoExtractor): class NiconicoLiveIE(InfoExtractor):

View File

@ -208,6 +208,7 @@ class SoundcloudBaseIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
track_id = str(info['id']) track_id = str(info['id'])
title = info['title']
format_urls = set() format_urls = set()
formats = [] formats = []
@ -366,7 +367,7 @@ class SoundcloudBaseIE(InfoExtractor):
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'), 'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': info.get('title'), 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000), 'duration': float_or_none(info.get('duration'), 1000),
@ -376,8 +377,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'), 'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)), 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None, 'formats': formats if not extract_flat else None,
} }
@ -429,6 +429,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic', 'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
}, },
}, },
# geo-restricted # geo-restricted
@ -452,7 +453,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the-concept-band', 'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'], 'genres': ['Alternative'],
'artists': ['The Royal Concept'],
}, },
}, },
# private link # private link
@ -525,7 +525,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'view_count': int, 'view_count': int,
'genres': ['Dance & EDM'], 'genres': ['Dance & EDM'],
'artists': ['80M'],
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -550,7 +549,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift', 'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'], 'genres': ['Trance'],
'artists': ['Ori Uplift'],
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -574,7 +572,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee', 'uploader_url': 'https://soundcloud.com/garyvee',
'artists': ['MadReal'], 'genres': [],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -869,12 +869,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
for retry in (False, True): for retry in (False, True):
try: try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
break
except ExtractorError as e: except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj( and 'password' in traverse_obj(
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), e.cause.response.read(),
({json.loads}, 'invalid_parameters', ..., 'field'), ({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
)): )):
self._verify_video_password( self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft']) video_id, self._get_video_password(), viewer['xsrft'])

View File

@ -5165,7 +5165,6 @@ class _UnsafeExtensionError(Exception):
'ico', 'ico',
'image', 'image',
'jng', 'jng',
'jpe',
'jpeg', 'jpeg',
'jxl', 'jxl',
'svg', 'svg',
@ -5579,15 +5578,14 @@ class FormatSorter:
value = get_value(field) value = get_value(field)
return self._calculate_field_preference_from_value(format_, field, type_, value) return self._calculate_field_preference_from_value(format_, field, type_, value)
@staticmethod def calculate_preference(self, format):
def _fill_sorting_fields(format):
# Determine missing protocol # Determine missing protocol
if not format.get('protocol'): if not format.get('protocol'):
format['protocol'] = determine_protocol(format) format['protocol'] = determine_protocol(format)
# Determine missing ext # Determine missing ext
if not format.get('ext') and 'url' in format: if not format.get('ext') and 'url' in format:
format['ext'] = determine_ext(format['url']).lower() format['ext'] = determine_ext(format['url'])
if format.get('vcodec') == 'none': if format.get('vcodec') == 'none':
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
format['video_ext'] = 'none' format['video_ext'] = 'none'
@ -5615,8 +5613,6 @@ class FormatSorter:
if not format.get('tbr'): if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
def calculate_preference(self, format):
self._fill_sorting_fields(format)
return tuple(self._calculate_field_preference(format, field) for field in self._order) return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@ -391,13 +391,14 @@ def find_element(*, tag: str, html=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
# deliberately using `id=` and `cls=` for ease of readability # deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
ANY_TAG = r'[\w:.-]+' if not tag:
tag = r'[\w:.-]+'
if attr and value: if attr and value:
assert not cls, 'Cannot match both attr and cls' assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id' assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag or ANY_TAG) return functools.partial(func, attr, value, tag=tag)
elif cls: elif cls:
assert not id, 'Cannot match both cls and id' assert not id, 'Cannot match both cls and id'
@ -407,7 +408,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
elif id: elif id:
func = get_element_html_by_id if html else get_element_by_id func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag or ANY_TAG) return functools.partial(func, id, tag=tag)
index = int(bool(html)) index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index] return lambda html: get_element_text_and_html_by_tag(tag, html)[index]