Compare commits

..

No commits in common. "50b4b87a3d6f87ef26b86581ba65054693138ace" and "6311170b6adb4391d2ec844639c08ecdd3cc3d67" have entirely different histories.

24 changed files with 312 additions and 635 deletions

View File

@ -282,7 +282,6 @@ jobs:
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
publish: publish:
needs: [prepare, build] needs: [prepare, build]

View File

@ -1791,7 +1791,7 @@ The following extractors use this feature:
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `generic:impersonate=safari,chrome-110`. Use `generic:impersonate` to impersonate any available target, and use `generic:impersonate=false` to disable impersonation (default) * `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
#### funimation #### funimation
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`

View File

@ -216,23 +216,5 @@
"action": "add", "action": "add",
"when": "d784464399b600ba9516bbcec6286f11d68974dd", "when": "d784464399b600ba9516bbcec6286f11d68974dd",
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
},
{
"action": "change",
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
"short": "Expand paths in `--plugin-dirs` (#11334)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
"short": "[ie/generic] Do not impersonate by default (#11336)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"]
} }
] ]

View File

@ -71,13 +71,14 @@ class CommitGroup(enum.Enum):
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
group, _, subgroup = (group.strip().lower() for group in value.partition('/')) group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
if result := cls.group_lookup().get(group): result = cls.group_lookup().get(group)
return result, subgroup or None if not result:
if subgroup:
return None, value
subgroup = group
result = cls.subgroup_lookup().get(subgroup)
if subgroup: return result, subgroup or None
return None, value
return cls.subgroup_lookup().get(group), group or None
@dataclass @dataclass
@ -135,7 +136,8 @@ class Changelog:
first = False first = False
yield '\n<details><summary><h3>Changelog</h3></summary>\n' yield '\n<details><summary><h3>Changelog</h3></summary>\n'
if group := groups[item]: group = groups[item]
if group:
yield self.format_module(item.value, group) yield self.format_module(item.value, group)
if self._collapsible: if self._collapsible:
@ -251,7 +253,7 @@ class CommitRange:
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
def __init__(self, start, end, default_author=None): def __init__(self, start, end, default_author=None):
@ -285,16 +287,11 @@ class CommitRange:
short = next(lines) short = next(lines)
skip = short.startswith('Release ') or short == '[version] update' skip = short.startswith('Release ') or short == '[version] update'
fix_commitish = None
if match := self.FIXES_RE.search(short):
fix_commitish = match.group(1)
authors = [default_author] if default_author else [] authors = [default_author] if default_author else []
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
if match := self.AUTHOR_INDICATOR_RE.match(line): match = self.AUTHOR_INDICATOR_RE.match(line)
if match:
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
fix_commitish = match.group(1)
commit = Commit(commit_hash, short, authors) commit = Commit(commit_hash, short, authors)
if skip and (self._start or not i): if skip and (self._start or not i):
@ -304,17 +301,21 @@ class CommitRange:
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
if match := self.REVERT_RE.fullmatch(commit.short): revert_match = self.REVERT_RE.fullmatch(commit.short)
reverts[match.group(1)] = commit if revert_match:
reverts[revert_match.group(1)] = commit
continue continue
if fix_commitish: fix_match = self.FIXES_RE.search(commit.short)
fixes[fix_commitish].append(commit) if fix_match:
commitish = fix_match.group(1)
fixes[commitish].append(commit)
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items(): for commitish, revert_commit in reverts.items():
if reverted := commits.pop(commitish, None): reverted = commits.pop(commitish, None)
if reverted:
logger.debug(f'{commitish} fully reverted {reverted}') logger.debug(f'{commitish} fully reverted {reverted}')
else: else:
commits[revert_commit.hash] = revert_commit commits[revert_commit.hash] = revert_commit
@ -460,7 +461,8 @@ def create_changelog(args):
logger.info(f'Loaded {len(commits)} commits') logger.info(f'Loaded {len(commits)} commits')
if new_contributors := get_new_contributors(args.contributors_path, commits): new_contributors = get_new_contributors(args.contributors_path, commits)
if new_contributors:
if args.contributors: if args.contributors:
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
logger.info(f'New contributors: {", ".join(new_contributors)}') logger.info(f'New contributors: {", ".join(new_contributors)}')

View File

@ -53,18 +53,6 @@ class TestInfoExtractor(unittest.TestCase):
def test_ie_key(self): def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_get_netrc_login_info(self):
for params in [
{'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'},
{'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'},
]:
ie = DummyIE(FakeYDL(params))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
def test_html_search_regex(self): def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args) search = lambda re, *args: self.ie._html_search_regex(re, html, *args)

View File

@ -12,10 +12,9 @@ from yt_dlp.utils import (
str_or_none, str_or_none,
) )
from yt_dlp.utils.traversal import ( from yt_dlp.utils.traversal import (
traverse_obj,
require, require,
subs_list_to_dict, subs_list_to_dict,
traverse_obj,
trim_str,
) )
_TEST_DATA = { _TEST_DATA = {
@ -496,20 +495,6 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
def test_trim_str(self):
with pytest.raises(TypeError):
trim_str('positional')
assert callable(trim_str(start='a'))
assert trim_str(start='ab')('abc') == 'c'
assert trim_str(end='bc')('abc') == 'a'
assert trim_str(start='a', end='c')('abc') == 'b'
assert trim_str(start='ab', end='c')('abc') == ''
assert trim_str(start='a', end='bc')('abc') == ''
assert trim_str(start='ab', end='bc')('abc') == ''
assert trim_str(start='abc', end='abc')('abc') == ''
assert trim_str(start='', end='')('abc') == 'abc'
class TestDictGet: class TestDictGet:
def test_dict_get(self): def test_dict_get(self):

View File

@ -4,7 +4,6 @@
import os import os
import sys import sys
import unittest import unittest
import unittest.mock
import warnings import warnings
import datetime as dt import datetime as dt
@ -72,7 +71,6 @@ from yt_dlp.utils import (
intlist_to_bytes, intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
join_nonempty,
js_to_json, js_to_json,
limit_length, limit_length,
locked_file, locked_file,
@ -345,13 +343,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('A - B', 'A - '), 'B')
self.assertEqual(remove_start('B - A', 'A - '), 'B - A') self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
def test_remove_end(self): def test_remove_end(self):
self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end(None, ' - B'), None)
self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('A - B', ' - B'), 'A')
self.assertEqual(remove_end('B - A', ' - B'), 'B - A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
def test_remove_quotes(self): def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes(None), None)
@ -2152,16 +2148,6 @@ Line 1
assert run_shell(args) == expected assert run_shell(args) == expected
assert run_shell(shell_quote(args, shell=True)) == expected assert run_shell(shell_quote(args, shell=True)) == expected
def test_partial_application(self):
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,4 +0,0 @@
machine normal_use login user password pass
machine empty_user login "" password pass
machine empty_pass login user password ""
machine both_empty login "" password ""

View File

@ -1,2 +0,0 @@
with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp:
print(fp.read())

View File

@ -2849,10 +2849,13 @@ class YoutubeDL:
sanitize_string_field(fmt, 'format_id') sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt) sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url']) fmt['url'] = sanitize_url(fmt['url'])
FormatSorter._fill_sorting_fields(fmt) if fmt.get('ext') is None:
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None: if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext'] fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None: if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None) fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':

View File

@ -401,6 +401,8 @@ from .cmt import CMTIE
from .cnbc import CNBCVideoIE from .cnbc import CNBCVideoIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE, CNNIndonesiaIE,
) )
from .comedycentral import ( from .comedycentral import (

View File

@ -12,86 +12,53 @@ from ..utils import (
class CCMAIE(InfoExtractor): class CCMAIE(InfoExtractor):
IE_DESC = '3Cat, TV3 and Catalunya Ràdio' _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871', 'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': { 'info_dict': {
'id': '5630208', 'id': '5630208',
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques', 'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140, 'timestamp': 1478608140,
'upload_date': '20161108', 'upload_date': '20161108',
'age_limit': 0, 'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
}, },
}, { }, {
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/ 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425', 'md5': 'fa3e38f269329a278271276330261425',
'info_dict': { 'info_dict': {
'id': '943685', 'id': '943685',
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20161217', 'upload_date': '20170512',
'timestamp': 1482011700, 'timestamp': 1494622500,
'vcodec': 'none', 'vcodec': 'none',
'categories': ['Esports'], 'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
}, },
}, { }, {
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/', 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
'md5': '27493513d08a3e5605814aee9bb778d2', 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
'info_dict': { 'info_dict': {
'id': '6031387', 'id': '6031387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)', 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577919, 'timestamp': 1582577700,
'upload_date': '20200224', 'upload_date': '20200224',
'subtitles': 'mincount:1', 'subtitles': 'mincount:4',
'age_limit': 13, 'age_limit': 16,
'series': 'Crims', 'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).group('type', 'id') media_type, media_id = self._match_valid_url(url).groups()
media = self._download_json( media = self._download_json(
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm', 'format': 'dm',

View File

@ -1,226 +1,146 @@
import functools
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from .turner import TurnerBaseIE
clean_html, from ..utils import merge_dicts, try_call, url_basename
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(InfoExtractor): class CNNIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)' _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl', 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b', 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20240531', 'title': 'Nadal wins 8th French Open title',
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original', 'duration': 135,
'duration': 373.0, 'upload_date': '20130609',
'timestamp': 1717148586, },
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt', 'expected_warnings': ['Failed to download m3u8 information'],
'modified_date': '20240531', }, {
'modified_timestamp': 1717150140, 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen",
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
'upload_date': '20130821',
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
'ext': 'mp4',
'title': '5 stunning stats about Netflix',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
'upload_date': '20160819',
},
'params': {
# m3u8 download
'skip_download': True,
}, },
}, { }, {
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid', 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
'info_dict': { 'only_matching': True,
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4',
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
},
}, { }, {
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html', 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
'info_dict': { 'only_matching': True,
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
},
}, { }, {
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln', 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
'info_dict': { 'only_matching': True,
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'duration': 158.0,
'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
},
'params': {'format': 'direct'},
}, {
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
},
}, {
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'info_dict': {
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
},
'playlist_count': 2,
'playlist': [{
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, {
'md5': '11604ab4af83b650826753f1ccb8ecff',
'info_dict': {
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'ext': 'mp4',
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}] }]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
'edition': {
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
},
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
'money': {
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
'media_src': 'http://ht3.cdn.turner.com/money/big',
},
}
def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_valid_url(url).group('display_id') sub_domain, path, page_title = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id) if sub_domain not in ('money', 'edition'):
app_id = traverse_obj( sub_domain = 'edition'
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}), config = self._CONFIG[sub_domain]
('TOP_AUTH_SERVICE_APP_ID', {str})) return self._extract_cvp_info(
config['data_src'] % path, page_title, {
entries = [] 'default': {
for player_data in traverse_obj(webpage, ( 'media_src': config['media_src'],
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)}, },
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])): 'f4m': {
media_id = player_data['data-media-id'] 'host': 'cnn-vh.akamaihd.net',
parent_uri = player_data.get('data-video-resource-parent-uri') },
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
})
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
entries.append({
**traverse_obj(player_data, {
'title': ('data-headline', {clean_html}),
'description': ('data-description', {clean_html}),
'duration': ('data-duration', {parse_duration}),
'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}),
'display_id': 'data-video-slug',
}),
**traverse_obj(video_data, {
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
}) })
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
}
return self.playlist_result(entries, display_id) class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
class CNNIndonesiaIE(InfoExtractor): class CNNIndonesiaIE(InfoExtractor):

View File

@ -47,7 +47,6 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
ISO639Utils,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -1409,13 +1408,6 @@ class InfoExtractor:
return None, None return None, None
self.write_debug(f'Using netrc for {netrc_machine} authentication') self.write_debug(f'Using netrc for {netrc_machine} authentication')
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
if sys.version_info < (3, 11):
return tuple(x if x != '""' else '' for x in info[::2])
return info[0], info[2] return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
@ -3079,11 +3071,7 @@ class InfoExtractor:
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
# IsmFD expects ISO 639 Set 2 language codes (3-character length) stream_language = stream.get('Language', 'und')
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))

View File

@ -8,8 +8,6 @@ from .common import InfoExtractor
from .commonprotocols import RtmpIE from .commonprotocols import RtmpIE
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
from ..cookies import LenientSimpleCookie
from ..networking.exceptions import HTTPError
from ..networking.impersonate import ImpersonateTarget from ..networking.impersonate import ImpersonateTarget
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
@ -2376,9 +2374,10 @@ class GenericIE(InfoExtractor):
else: else:
video_id = self._generic_id(url) video_id = self._generic_id(url)
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335 # Try to impersonate a web-browser by default if possible
impersonate = self._configuration_arg('impersonate', ['false']) # Skip impersonation if not available to omit the warning
if 'false' in impersonate: impersonate = self._configuration_arg('impersonate', [''])
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
impersonate = None impersonate = None
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
@ -2389,29 +2388,10 @@ class GenericIE(InfoExtractor):
# to accept raw bytes and being able to download only a chunk. # to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream # It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this. # after a HEAD request, but not sure if we can rely on this.
try: full_response = self._request_webpage(url, video_id, headers=filter_dict({
full_response = self._request_webpage(url, video_id, headers=filter_dict({ 'Accept-Encoding': 'identity',
'Accept-Encoding': 'identity', 'Referer': smuggled_data.get('referer'),
'Referer': smuggled_data.get('referer'), }), impersonate=impersonate)
}), impersonate=impersonate)
except ExtractorError as e:
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
and e.cause.response.get_header('cf-mitigated') == 'challenge'
and e.cause.response.extensions.get('impersonate') is None):
raise
cf_cookie_domain = traverse_obj(
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
('__cf_bm', 'domain'))
if cf_cookie_domain:
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
'how to install the required impersonation dependency, and ')
raise ExtractorError(
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
new_url = full_response.url new_url = full_response.url
if new_url != extract_basic_auth(url)[0]: if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)

View File

@ -869,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
class NiconicoUserIE(InfoExtractor): class NiconicoUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'https://www.nicovideo.jp/user/419948', 'url': 'https://www.nicovideo.jp/user/419948',
'info_dict': { 'info_dict': {
@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
}, },
'playlist_mincount': 101, 'playlist_mincount': 101,
} }
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_PAGE_SIZE = 100 _PAGE_SIZE = 100
_API_HEADERS = { _API_HEADERS = {
@ -897,13 +897,12 @@ class NiconicoUserIE(InfoExtractor):
total_count = int_or_none(json_parsed['data'].get('totalCount')) total_count = int_or_none(json_parsed['data'].get('totalCount'))
for entry in json_parsed['data']['items']: for entry in json_parsed['data']['items']:
count += 1 count += 1
yield self.url_result( yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
page_num += 1 page_num += 1
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
return self.playlist_result(self._entries(list_id), list_id) return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
class NiconicoLiveIE(InfoExtractor): class NiconicoLiveIE(InfoExtractor):

View File

@ -7,99 +7,49 @@ class RadioRadicaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.radioradicale.it/scheda/471591', 'url': 'https://www.radioradicale.it/scheda/471591',
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
'info_dict': { 'info_dict': {
'id': '471591', 'id': '471591',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:e8fbb8de57011a3255db0beca69af73d', 'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'location': 'Napoli', 'location': 'Napoli',
'duration': 2852.0,
'timestamp': 1459987200, 'timestamp': 1459987200,
'upload_date': '20160407', 'upload_date': '20160407',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg', 'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
}, },
}, { 'params': {
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura', 'skip_download': True,
'info_dict': {
'id': '742783',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
'location': 'CAMERA',
'duration': 5868.0,
'timestamp': 1730246400,
'upload_date': '20241030',
}, },
'playlist': [{
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
'info_dict': {
'id': '742783-0',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'be915c189c70ad2920e5810f32260ff5',
'info_dict': {
'id': '742783-1',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
'info_dict': {
'id': '742783-2',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}],
}] }]
def _entries(self, videos_info, page_id):
for idx, video in enumerate(traverse_obj(
videos_info, ('playlist', lambda _, v: v['sources']))):
video_id = f'{page_id}-{idx}'
formats = []
subtitles = {}
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
self._merge_subtitles({sub.get('srclang') or 'und': [{
'url': sub['src'],
'name': sub.get('label'),
}]}, target=subtitles)
yield {
'id': video_id,
'title': video.get('title'),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, video_id)
videos_info = self._search_json( video_info = self._search_json(
r'jQuery\.extend\(Drupal\.settings\s*,', r'jQuery\.extend\(Drupal\.settings\s*,',
webpage, 'videos_info', page_id)['RRscheda'] webpage, 'video_info', video_id)['RRscheda']
json_ld = self._search_json_ld(webpage, video_id)
entries = list(self._entries(videos_info, page_id)) formats = []
for source in traverse_obj(video_info, ('playlist', 0, 'sources')):
formats.extend(
self._extract_m3u8_formats(source.get('src'), video_id))
common_info = { subtitles = {}
'id': page_id, for sub in traverse_obj(video_info, ('playlist', 0, 'subtitles')):
'title': self._og_search_title(webpage), self._merge_subtitles({sub.get('srclang') or 'und': [{
'description': self._og_search_description(webpage), 'url': sub.get('src'),
'location': videos_info.get('luogo'), 'name': sub.get('label'),
**self._search_json_ld(webpage, page_id), }]}, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'title': json_ld.get('title') or self._og_search_title(webpage),
'location': video_info.get('luogo'),
'timestamp': json_ld.get('timestamp'),
'thumbnail': traverse_obj(json_ld, ('thumbnails', 0, 'url', {url_or_none})),
'description': json_ld.get('description') or self._og_search_description(webpage),
} }
if len(entries) == 1:
return {
**entries[0],
**common_info,
}
return self.playlist_result(entries, multi_video=True, **common_info)

View File

@ -208,6 +208,7 @@ class SoundcloudBaseIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
track_id = str(info['id']) track_id = str(info['id'])
title = info['title']
format_urls = set() format_urls = set()
formats = [] formats = []
@ -366,7 +367,7 @@ class SoundcloudBaseIE(InfoExtractor):
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'), 'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': info.get('title'), 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000), 'duration': float_or_none(info.get('duration'), 1000),
@ -376,8 +377,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'), 'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)), 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None, 'formats': formats if not extract_flat else None,
} }
@ -429,6 +429,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic', 'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
}, },
}, },
# geo-restricted # geo-restricted
@ -452,7 +453,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the-concept-band', 'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'], 'genres': ['Alternative'],
'artists': ['The Royal Concept'],
}, },
}, },
# private link # private link
@ -525,7 +525,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'view_count': int, 'view_count': int,
'genres': ['Dance & EDM'], 'genres': ['Dance & EDM'],
'artists': ['80M'],
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -550,7 +549,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift', 'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'], 'genres': ['Trance'],
'artists': ['Ori Uplift'],
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -574,7 +572,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee', 'uploader_url': 'https://soundcloud.com/garyvee',
'artists': ['MadReal'], 'genres': [],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -150,6 +150,14 @@ class TwitterBaseIE(InfoExtractor):
def is_logged_in(self): def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token')) return bool(self._get_cookies(self._API_BASE).get('auth_token'))
# XXX: Temporary workaround until twitter.com => x.com migration is completed
def _real_initialize(self):
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
return
# User has not yet been migrated to x.com and has passed twitter.com cookies
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
@functools.cached_property @functools.cached_property
def _selected_api(self): def _selected_api(self):
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0] return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]

View File

@ -869,12 +869,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
for retry in (False, True): for retry in (False, True):
try: try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
break
except ExtractorError as e: except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj( and 'password' in traverse_obj(
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), e.cause.response.read(),
({json.loads}, 'invalid_parameters', ..., 'field'), ({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
)): )):
self._verify_video_password( self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft']) video_id, self._get_video_password(), viewer['xsrft'])

View File

@ -1,13 +1,12 @@
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
parse_iso8601,
smuggle_url, smuggle_url,
traverse_obj,
unified_strdate,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class YleAreenaIE(InfoExtractor): class YleAreenaIE(InfoExtractor):
@ -16,9 +15,9 @@ class YleAreenaIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'https://areena.yle.fi/1-4371942', 'url': 'https://areena.yle.fi/1-4371942',
'md5': 'd87e9a1e74e67e009990ddd413e426b4', 'md5': '932edda0ecf5dfd6423804182d32f8ac',
'info_dict': { 'info_dict': {
'id': '1-4371942', 'id': '0_a3tjk92c',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Pouchit', 'title': 'Pouchit',
'description': 'md5:01071d7056ceec375f63960f90c35366', 'description': 'md5:01071d7056ceec375f63960f90c35366',
@ -27,27 +26,37 @@ class YleAreenaIE(InfoExtractor):
'season_number': 1, 'season_number': 1,
'episode': 'Episode 2', 'episode': 'Episode 2',
'episode_number': 2, 'episode_number': 2,
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg', 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
'age_limit': 7, 'uploader_id': 'ovp@yle.fi',
'release_date': '20190105',
'release_timestamp': 1546725660,
'duration': 1435, 'duration': 1435,
'view_count': int,
'upload_date': '20181204',
'release_date': '20190106',
'timestamp': 1543916210,
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
'age_limit': 7,
'webpage_url': 'https://areena.yle.fi/1-4371942',
}, },
}, },
{ {
'url': 'https://areena.yle.fi/1-2158940', 'url': 'https://areena.yle.fi/1-2158940',
'md5': '6369ddc5e07b5fdaeda27a495184143c', 'md5': 'cecb603661004e36af8c5188b5212b12',
'info_dict': { 'info_dict': {
'id': '1-2158940', 'id': '1_l38iz9ur',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Albi haluaa vessan', 'title': 'Albi haluaa vessan',
'description': 'Albi haluaa vessan.', 'description': 'md5:15236d810c837bed861fae0e88663c33',
'series': 'Albi Lumiukko', 'series': 'Albi Lumiukko',
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg', 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
'age_limit': 0, 'uploader_id': 'ovp@yle.fi',
'release_date': '20211215',
'release_timestamp': 1639555200,
'duration': 319, 'duration': 319,
'view_count': int,
'upload_date': '20211202',
'release_date': '20211215',
'timestamp': 1638448202,
'subtitles': {},
'age_limit': 0,
'webpage_url': 'https://areena.yle.fi/1-2158940',
}, },
}, },
{ {
@ -58,125 +67,72 @@ class YleAreenaIE(InfoExtractor):
'title': 'HKO & Mälkki & Tanner', 'title': 'HKO & Mälkki & Tanner',
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156', 'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
'series': 'Helsingin kaupunginorkesterin konsertteja', 'series': 'Helsingin kaupunginorkesterin konsertteja',
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg$',
'release_date': '20230120', 'release_date': '20230120',
'release_timestamp': 1674242079,
'duration': 8004,
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, },
{
'url': 'https://areena.yle.fi/1-72251830',
'info_dict': {
'id': '1-72251830',
'ext': 'mp4',
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
'series': 'Pentulive',
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
'live_status': 'is_live',
'release_date': '20241025',
'release_timestamp': 1729875600,
},
'params': {
'skip_download': 'livestream',
},
},
{
'url': 'https://areena.yle.fi/podcastit/1-71022852',
'info_dict': {
'id': '1-71022852',
'ext': 'mp3',
'title': 'Värityspäivä',
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
'series': 'Murun ja Paukun ikioma kaupunki',
'episode': 'Episode 1',
'episode_number': 1,
'release_date': '20240607',
'release_timestamp': 1717736400,
'duration': 442,
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast') video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={}) info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
video_data = self._download_json( video_data = self._download_json(
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b', f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
video_id, headers={ video_id, headers={
'origin': 'https://areena.yle.fi', 'origin': 'https://areena.yle.fi',
'referer': 'https://areena.yle.fi/', 'referer': 'https://areena.yle.fi/',
'content-type': 'application/json', 'content-type': 'application/json',
})['data'] })
# Example title: 'K1, J2: Pouchit | Modernit miehet' # Example title: 'K1, J2: Pouchit | Modernit miehet'
season_number, episode_number, episode, series = self._search_regex( season_number, episode_number, episode, series = self._search_regex(
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)', r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'), info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
default=(None, None, None, None)) default=(None, None, None, None))
description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str})) description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
subtitles = {} subtitles = {}
for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))): for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
subtitles.setdefault(sub.get('language') or 'und', []).append({ if url_or_none(sub.get('uri')):
'url': sub['uri'], subtitles.setdefault(sub.get('language') or 'und', []).append({
'ext': 'srt', 'url': sub['uri'],
'name': sub.get('kind'), 'ext': 'srt',
}) 'name': sub.get('kind'),
})
info_dict, metadata = {}, {} if is_podcast:
if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})): info_dict = {
metadata = video_data['ongoing_ondemand'] 'url': video_data['data']['ongoing_ondemand']['media_url'],
info_dict['url'] = metadata['media_url'] }
elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})): elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
metadata = video_data['ongoing_event'] info_dict = {
metadata.pop('duration', None) # Duration is not accurate for livestreams
info_dict['live_status'] = 'is_live'
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
metadata = video_data['ongoing_ondemand']
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
metadata = video_data['ongoing_ondemand']
info_dict.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}), 'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
'ie_key': KalturaIE.ie_key(), 'ie_key': KalturaIE.ie_key(),
}) }
elif traverse_obj(video_data, ('gone', {dict})):
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
metadata = video_data['gone']
else: else:
raise ExtractorError('Unable to extract content') formats, subs = self._extract_m3u8_formats_and_subtitles(
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
if not info_dict.get('url') and metadata.get('manifest_url'):
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
info_dict = {'formats': formats}
return { return {
**traverse_obj(json_ld, { **info_dict,
'title': 'title',
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
}),
'id': video_id, 'id': video_id,
'title': episode, 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
or episode or info.get('title')),
'description': description, 'description': description,
'series': series, 'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
or series),
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None)) 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
or int_or_none(season_number)), or int_or_none(season_number)),
'episode_number': int_or_none(episode_number), 'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
or int_or_none(episode_number)),
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
'subtitles': subtitles or None, 'subtitles': subtitles or None,
**traverse_obj(metadata, { 'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
'title': ('title', 'fin', {str}),
'description': ('description', 'fin', {str}),
'series': ('series', 'title', 'fin', {str}),
'episode_number': ('episode_number', {int_or_none}),
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
'release_timestamp': ('start_time', {parse_iso8601}),
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
}),
**info_dict,
} }

View File

@ -644,14 +644,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {} YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
if refresh_token: if refresh_token:
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token' refresh_token = refresh_token.strip('\'') or None
if self.get_param('cachedir') is not False:
msg += ' and caching token to disk; you should supply an empty password next time'
self.to_screen(msg)
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
else:
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
# Allow refresh token passed to initialize cache
if refresh_token:
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
if refresh_token: if refresh_token:
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
try: try:

View File

@ -212,23 +212,6 @@ def write_json_file(obj, fn):
raise raise
def partial_application(func):
sig = inspect.signature(func)
required_args = [
param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
if param.default is inspect.Parameter.empty
]
@functools.wraps(func)
def wrapped(*args, **kwargs):
if set(required_args[len(args):]).difference(kwargs):
return functools.partial(func, *args, **kwargs)
return func(*args, **kwargs)
return wrapped
def find_xpath_attr(node, xpath, key, val=None): def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z_-]+$', key) assert re.match(r'^[a-zA-Z_-]+$', key)
@ -1209,7 +1192,6 @@ def extract_timezone(date_str, default=None):
return timezone, date_str return timezone, date_str
@partial_application
def parse_iso8601(date_str, delimiter='T', timezone=None): def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """ """ Return a UNIX timestamp from the given date """
@ -1287,7 +1269,6 @@ def unified_timestamp(date_str, day_first=True):
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
@partial_application
def determine_ext(url, default_ext='unknown_video'): def determine_ext(url, default_ext='unknown_video'):
if url is None or '.' not in url: if url is None or '.' not in url:
return default_ext return default_ext
@ -1963,7 +1944,7 @@ def remove_start(s, start):
def remove_end(s, end): def remove_end(s, end):
return s[:-len(end)] if s is not None and end and s.endswith(end) else s return s[:-len(end)] if s is not None and s.endswith(end) else s
def remove_quotes(s): def remove_quotes(s):
@ -1992,7 +1973,6 @@ def base_url(url):
return re.match(r'https?://[^?#]+/', url).group() return re.match(r'https?://[^?#]+/', url).group()
@partial_application
def urljoin(base, path): def urljoin(base, path):
if isinstance(path, bytes): if isinstance(path, bytes):
path = path.decode() path = path.decode()
@ -2008,6 +1988,21 @@ def urljoin(base, path):
return urllib.parse.urljoin(base, path) return urllib.parse.urljoin(base, path)
def partial_application(func):
sig = inspect.signature(func)
@functools.wraps(func)
def wrapped(*args, **kwargs):
try:
sig.bind(*args, **kwargs)
except TypeError:
return functools.partial(func, *args, **kwargs)
else:
return func(*args, **kwargs)
return wrapped
@partial_application @partial_application
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
if get_attr and v is not None: if get_attr and v is not None:
@ -2588,7 +2583,6 @@ def urlencode_postdata(*args, **kargs):
return urllib.parse.urlencode(*args, **kargs).encode('ascii') return urllib.parse.urlencode(*args, **kargs).encode('ascii')
@partial_application
def update_url(url, *, query_update=None, **kwargs): def update_url(url, *, query_update=None, **kwargs):
"""Replace URL components specified by kwargs """Replace URL components specified by kwargs
@param url str or parse url tuple @param url str or parse url tuple
@ -2609,7 +2603,6 @@ def update_url(url, *, query_update=None, **kwargs):
return urllib.parse.urlunparse(url._replace(**kwargs)) return urllib.parse.urlunparse(url._replace(**kwargs))
@partial_application
def update_url_query(url, query): def update_url_query(url, query):
return update_url(url, query_update=query) return update_url(url, query_update=query)
@ -2931,7 +2924,6 @@ def error_to_str(err):
return f'{type(err).__name__}: {err}' return f'{type(err).__name__}: {err}'
@partial_application
def mimetype2ext(mt, default=NO_DEFAULT): def mimetype2ext(mt, default=NO_DEFAULT):
if not isinstance(mt, str): if not isinstance(mt, str):
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
@ -4672,7 +4664,6 @@ def to_high_limit_path(path):
return path return path
@partial_application
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY): def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
val = traversal.traverse_obj(obj, *variadic(field)) val = traversal.traverse_obj(obj, *variadic(field))
if not val if ignore is NO_DEFAULT else val in variadic(ignore): if not val if ignore is NO_DEFAULT else val in variadic(ignore):
@ -4837,7 +4828,6 @@ def number_of_digits(number):
return len('%d' % number) return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None): def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None: if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values) values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
@ -5175,7 +5165,6 @@ class _UnsafeExtensionError(Exception):
'ico', 'ico',
'image', 'image',
'jng', 'jng',
'jpe',
'jpeg', 'jpeg',
'jxl', 'jxl',
'svg', 'svg',
@ -5288,7 +5277,6 @@ class RetryManager:
time.sleep(delay) time.sleep(delay)
@partial_application
def make_archive_id(ie, video_id): def make_archive_id(ie, video_id):
ie_key = ie if isinstance(ie, str) else ie.ie_key() ie_key = ie if isinstance(ie, str) else ie.ie_key()
return f'{ie_key.lower()} {video_id}' return f'{ie_key.lower()} {video_id}'
@ -5590,15 +5578,14 @@ class FormatSorter:
value = get_value(field) value = get_value(field)
return self._calculate_field_preference_from_value(format_, field, type_, value) return self._calculate_field_preference_from_value(format_, field, type_, value)
@staticmethod def calculate_preference(self, format):
def _fill_sorting_fields(format):
# Determine missing protocol # Determine missing protocol
if not format.get('protocol'): if not format.get('protocol'):
format['protocol'] = determine_protocol(format) format['protocol'] = determine_protocol(format)
# Determine missing ext # Determine missing ext
if not format.get('ext') and 'url' in format: if not format.get('ext') and 'url' in format:
format['ext'] = determine_ext(format['url']).lower() format['ext'] = determine_ext(format['url'])
if format.get('vcodec') == 'none': if format.get('vcodec') == 'none':
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
format['video_ext'] = 'none' format['video_ext'] = 'none'
@ -5626,8 +5613,6 @@ class FormatSorter:
if not format.get('tbr'): if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
def calculate_preference(self, format):
self._fill_sorting_fields(format)
return tuple(self._calculate_field_preference(format, field) for field in self._order) return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@ -391,13 +391,14 @@ def find_element(*, tag: str, html=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
# deliberately using `id=` and `cls=` for ease of readability # deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
ANY_TAG = r'[\w:.-]+' if not tag:
tag = r'[\w:.-]+'
if attr and value: if attr and value:
assert not cls, 'Cannot match both attr and cls' assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id' assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag or ANY_TAG) return functools.partial(func, attr, value, tag=tag)
elif cls: elif cls:
assert not id, 'Cannot match both cls and id' assert not id, 'Cannot match both cls and id'
@ -407,7 +408,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
elif id: elif id:
func = get_element_html_by_id if html else get_element_by_id func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag or ANY_TAG) return functools.partial(func, id, tag=tag)
index = int(bool(html)) index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index] return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@ -435,20 +436,6 @@ def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
return functools.partial(func, cls) return functools.partial(func, cls)
def trim_str(*, start=None, end=None):
def trim(s):
if s is None:
return None
start_idx = 0
if start and s.startswith(start):
start_idx = len(start)
if end and s.endswith(end):
return s[start_idx:-len(end)]
return s[start_idx:]
return trim
def get_first(obj, *paths, **kwargs): def get_first(obj, *paths, **kwargs):
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False) return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)