mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 18:21:26 +01:00
Compare commits
22 Commits
6311170b6a
...
50b4b87a3d
Author | SHA1 | Date | |
---|---|---|---|
|
50b4b87a3d | ||
|
915da295cb | ||
|
e26e4e0113 | ||
|
2e4fa64c83 | ||
|
9f278cd4e2 | ||
|
a6783a3b99 | ||
|
428ffb75aa | ||
|
b6dc2c49e8 | ||
|
76802f4613 | ||
|
d569a88452 | ||
|
88402b714e | ||
|
5bc5fb2835 | ||
|
f93c16395c | ||
|
f101e5d34c | ||
|
330335386d | ||
|
0a3991edae | ||
|
5c880ef42e | ||
|
21cdcf03a2 | ||
|
6abef74232 | ||
|
9acf79c91a | ||
|
57212a5f97 | ||
|
c29f5a7fae |
1
.github/workflows/release.yml
vendored
1
.github/workflows/release.yml
vendored
|
@ -282,6 +282,7 @@ jobs:
|
|||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
|
||||
|
||||
publish:
|
||||
needs: [prepare, build]
|
||||
|
|
|
@ -1791,7 +1791,7 @@ The following extractors use this feature:
|
|||
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
||||
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
|
||||
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `generic:impersonate=safari,chrome-110`. Use `generic:impersonate` to impersonate any available target, and use `generic:impersonate=false` to disable impersonation (default)
|
||||
|
||||
#### funimation
|
||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||
|
|
|
@ -216,5 +216,23 @@
|
|||
"action": "add",
|
||||
"when": "d784464399b600ba9516bbcec6286f11d68974dd",
|
||||
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
|
||||
"short": "Expand paths in `--plugin-dirs` (#11334)",
|
||||
"authors": ["bashonly"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
|
||||
"short": "[ie/generic] Do not impersonate by default (#11336)",
|
||||
"authors": ["bashonly"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
|
||||
"short": "[ie/vimeo] Fix API retries (#11351)",
|
||||
"authors": ["bashonly"]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -71,14 +71,13 @@ class CommitGroup(enum.Enum):
|
|||
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||
|
||||
result = cls.group_lookup().get(group)
|
||||
if not result:
|
||||
if subgroup:
|
||||
return None, value
|
||||
subgroup = group
|
||||
result = cls.subgroup_lookup().get(subgroup)
|
||||
if result := cls.group_lookup().get(group):
|
||||
return result, subgroup or None
|
||||
|
||||
return result, subgroup or None
|
||||
if subgroup:
|
||||
return None, value
|
||||
|
||||
return cls.subgroup_lookup().get(group), group or None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -136,8 +135,7 @@ class Changelog:
|
|||
first = False
|
||||
yield '\n<details><summary><h3>Changelog</h3></summary>\n'
|
||||
|
||||
group = groups[item]
|
||||
if group:
|
||||
if group := groups[item]:
|
||||
yield self.format_module(item.value, group)
|
||||
|
||||
if self._collapsible:
|
||||
|
@ -253,7 +251,7 @@ class CommitRange:
|
|||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
def __init__(self, start, end, default_author=None):
|
||||
|
@ -287,11 +285,16 @@ class CommitRange:
|
|||
short = next(lines)
|
||||
skip = short.startswith('Release ') or short == '[version] update'
|
||||
|
||||
fix_commitish = None
|
||||
if match := self.FIXES_RE.search(short):
|
||||
fix_commitish = match.group(1)
|
||||
|
||||
authors = [default_author] if default_author else []
|
||||
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
|
||||
match = self.AUTHOR_INDICATOR_RE.match(line)
|
||||
if match:
|
||||
if match := self.AUTHOR_INDICATOR_RE.match(line):
|
||||
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
|
||||
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
|
||||
fix_commitish = match.group(1)
|
||||
|
||||
commit = Commit(commit_hash, short, authors)
|
||||
if skip and (self._start or not i):
|
||||
|
@ -301,21 +304,17 @@ class CommitRange:
|
|||
logger.debug(f'Reached Release commit, breaking: {commit}')
|
||||
break
|
||||
|
||||
revert_match = self.REVERT_RE.fullmatch(commit.short)
|
||||
if revert_match:
|
||||
reverts[revert_match.group(1)] = commit
|
||||
if match := self.REVERT_RE.fullmatch(commit.short):
|
||||
reverts[match.group(1)] = commit
|
||||
continue
|
||||
|
||||
fix_match = self.FIXES_RE.search(commit.short)
|
||||
if fix_match:
|
||||
commitish = fix_match.group(1)
|
||||
fixes[commitish].append(commit)
|
||||
if fix_commitish:
|
||||
fixes[fix_commitish].append(commit)
|
||||
|
||||
commits[commit.hash] = commit
|
||||
|
||||
for commitish, revert_commit in reverts.items():
|
||||
reverted = commits.pop(commitish, None)
|
||||
if reverted:
|
||||
if reverted := commits.pop(commitish, None):
|
||||
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||
else:
|
||||
commits[revert_commit.hash] = revert_commit
|
||||
|
@ -461,8 +460,7 @@ def create_changelog(args):
|
|||
|
||||
logger.info(f'Loaded {len(commits)} commits')
|
||||
|
||||
new_contributors = get_new_contributors(args.contributors_path, commits)
|
||||
if new_contributors:
|
||||
if new_contributors := get_new_contributors(args.contributors_path, commits):
|
||||
if args.contributors:
|
||||
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
|
||||
logger.info(f'New contributors: {", ".join(new_contributors)}')
|
||||
|
|
|
@ -53,6 +53,18 @@ class TestInfoExtractor(unittest.TestCase):
|
|||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
def test_get_netrc_login_info(self):
|
||||
for params in [
|
||||
{'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'},
|
||||
{'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'},
|
||||
]:
|
||||
ie = DummyIE(FakeYDL(params))
|
||||
self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass'))
|
||||
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass'))
|
||||
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
|
||||
self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
|
||||
self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
|
||||
|
||||
def test_html_search_regex(self):
|
||||
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||
|
|
|
@ -12,9 +12,10 @@ from yt_dlp.utils import (
|
|||
str_or_none,
|
||||
)
|
||||
from yt_dlp.utils.traversal import (
|
||||
traverse_obj,
|
||||
require,
|
||||
subs_list_to_dict,
|
||||
traverse_obj,
|
||||
trim_str,
|
||||
)
|
||||
|
||||
_TEST_DATA = {
|
||||
|
@ -495,6 +496,20 @@ class TestTraversalHelpers:
|
|||
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||
]}, '`quality` key should sort subtitle list accordingly'
|
||||
|
||||
def test_trim_str(self):
|
||||
with pytest.raises(TypeError):
|
||||
trim_str('positional')
|
||||
|
||||
assert callable(trim_str(start='a'))
|
||||
assert trim_str(start='ab')('abc') == 'c'
|
||||
assert trim_str(end='bc')('abc') == 'a'
|
||||
assert trim_str(start='a', end='c')('abc') == 'b'
|
||||
assert trim_str(start='ab', end='c')('abc') == ''
|
||||
assert trim_str(start='a', end='bc')('abc') == ''
|
||||
assert trim_str(start='ab', end='bc')('abc') == ''
|
||||
assert trim_str(start='abc', end='abc')('abc') == ''
|
||||
assert trim_str(start='', end='')('abc') == 'abc'
|
||||
|
||||
|
||||
class TestDictGet:
|
||||
def test_dict_get(self):
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
import os
|
||||
import sys
|
||||
import unittest
|
||||
import unittest.mock
|
||||
import warnings
|
||||
import datetime as dt
|
||||
|
||||
|
@ -71,6 +72,7 @@ from yt_dlp.utils import (
|
|||
intlist_to_bytes,
|
||||
iri_to_uri,
|
||||
is_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
locked_file,
|
||||
|
@ -343,11 +345,13 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
|
||||
|
||||
def test_remove_end(self):
|
||||
self.assertEqual(remove_end(None, ' - B'), None)
|
||||
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
|
||||
|
||||
def test_remove_quotes(self):
|
||||
self.assertEqual(remove_quotes(None), None)
|
||||
|
@ -2148,6 +2152,16 @@ Line 1
|
|||
assert run_shell(args) == expected
|
||||
assert run_shell(shell_quote(args, shell=True)) == expected
|
||||
|
||||
def test_partial_application(self):
|
||||
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
|
||||
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
|
||||
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
|
||||
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
|
||||
|
||||
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
|
||||
assert callable(join_nonempty()), 'varargs positional should apply partially'
|
||||
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
4
test/testdata/netrc/netrc
vendored
Normal file
4
test/testdata/netrc/netrc
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
machine normal_use login user password pass
|
||||
machine empty_user login "" password pass
|
||||
machine empty_pass login user password ""
|
||||
machine both_empty login "" password ""
|
2
test/testdata/netrc/print_netrc.py
vendored
Normal file
2
test/testdata/netrc/print_netrc.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp:
|
||||
print(fp.read())
|
|
@ -2849,13 +2849,10 @@ class YoutubeDL:
|
|||
sanitize_string_field(fmt, 'format_id')
|
||||
sanitize_numeric_fields(fmt)
|
||||
fmt['url'] = sanitize_url(fmt['url'])
|
||||
if fmt.get('ext') is None:
|
||||
fmt['ext'] = determine_ext(fmt['url']).lower()
|
||||
FormatSorter._fill_sorting_fields(fmt)
|
||||
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
|
||||
if fmt.get('acodec') is None:
|
||||
fmt['acodec'] = fmt['ext']
|
||||
if fmt.get('protocol') is None:
|
||||
fmt['protocol'] = determine_protocol(fmt)
|
||||
if fmt.get('resolution') is None:
|
||||
fmt['resolution'] = self.format_resolution(fmt, default=None)
|
||||
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
|
||||
|
|
|
@ -401,8 +401,6 @@ from .cmt import CMTIE
|
|||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
|
|
|
@ -12,53 +12,86 @@ from ..utils import (
|
|||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
|
|
@ -1,146 +1,226 @@
|
|||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{functools.partial(update_url, query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
|
|
@ -47,6 +47,7 @@ from ..utils import (
|
|||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
|
@ -1408,6 +1409,13 @@ class InfoExtractor:
|
|||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
|
@ -3071,7 +3079,11 @@ class InfoExtractor:
|
|||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
|
|
|
@ -8,6 +8,8 @@ from .common import InfoExtractor
|
|||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
|
@ -2374,10 +2376,9 @@ class GenericIE(InfoExtractor):
|
|||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Try to impersonate a web-browser by default if possible
|
||||
# Skip impersonation if not available to omit the warning
|
||||
impersonate = self._configuration_arg('impersonate', [''])
|
||||
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
|
@ -2388,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
|||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
|
|
|
@ -869,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
|
|||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
|
@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
|
|||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
|
@ -897,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
|
|||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed['data']['items']:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
|
||||
yield self.url_result(
|
||||
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
|
|
|
@ -7,49 +7,99 @@ class RadioRadicaleIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radioradicale.it/scheda/471591',
|
||||
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
|
||||
'info_dict': {
|
||||
'id': '471591',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
|
||||
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
|
||||
'location': 'Napoli',
|
||||
'duration': 2852.0,
|
||||
'timestamp': 1459987200,
|
||||
'upload_date': '20160407',
|
||||
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
|
||||
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}, {
|
||||
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
|
||||
'info_dict': {
|
||||
'id': '742783',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
|
||||
'location': 'CAMERA',
|
||||
'duration': 5868.0,
|
||||
'timestamp': 1730246400,
|
||||
'upload_date': '20241030',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
|
||||
'info_dict': {
|
||||
'id': '742783-0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}, {
|
||||
'md5': 'be915c189c70ad2920e5810f32260ff5',
|
||||
'info_dict': {
|
||||
'id': '742783-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}, {
|
||||
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
|
||||
'info_dict': {
|
||||
'id': '742783-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _entries(self, videos_info, page_id):
|
||||
for idx, video in enumerate(traverse_obj(
|
||||
videos_info, ('playlist', lambda _, v: v['sources']))):
|
||||
video_id = f'{page_id}-{idx}'
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
|
||||
self._merge_subtitles({sub.get('srclang') or 'und': [{
|
||||
'url': sub['src'],
|
||||
'name': sub.get('label'),
|
||||
}]}, target=subtitles)
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
video_info = self._search_json(
|
||||
videos_info = self._search_json(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,',
|
||||
webpage, 'video_info', video_id)['RRscheda']
|
||||
json_ld = self._search_json_ld(webpage, video_id)
|
||||
webpage, 'videos_info', page_id)['RRscheda']
|
||||
|
||||
formats = []
|
||||
for source in traverse_obj(video_info, ('playlist', 0, 'sources')):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(source.get('src'), video_id))
|
||||
entries = list(self._entries(videos_info, page_id))
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(video_info, ('playlist', 0, 'subtitles')):
|
||||
self._merge_subtitles({sub.get('srclang') or 'und': [{
|
||||
'url': sub.get('src'),
|
||||
'name': sub.get('label'),
|
||||
}]}, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': json_ld.get('title') or self._og_search_title(webpage),
|
||||
'location': video_info.get('luogo'),
|
||||
'timestamp': json_ld.get('timestamp'),
|
||||
'thumbnail': traverse_obj(json_ld, ('thumbnails', 0, 'url', {url_or_none})),
|
||||
'description': json_ld.get('description') or self._og_search_description(webpage),
|
||||
common_info = {
|
||||
'id': page_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'location': videos_info.get('luogo'),
|
||||
**self._search_json_ld(webpage, page_id),
|
||||
}
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
**common_info,
|
||||
}
|
||||
|
||||
return self.playlist_result(entries, multi_video=True, **common_info)
|
||||
|
|
|
@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
|
||||
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
|
||||
track_id = str(info['id'])
|
||||
title = info['title']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
|
@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
|
||||
'uploader_url': user.get('permalink_url'),
|
||||
'timestamp': unified_timestamp(info.get('created_at')),
|
||||
'title': title,
|
||||
'title': info.get('title'),
|
||||
'description': info.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
|
@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
||||
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
}
|
||||
|
||||
|
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'genres': [],
|
||||
},
|
||||
},
|
||||
# geo-restricted
|
||||
|
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
|
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'repost_count': int,
|
||||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
'artists': ['80M'],
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
|
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
'artists': ['Ori Uplift'],
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
|
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'genres': [],
|
||||
'artists': ['MadReal'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
|
|
@ -150,14 +150,6 @@ class TwitterBaseIE(InfoExtractor):
|
|||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||
|
||||
# XXX: Temporary workaround until twitter.com => x.com migration is completed
|
||||
def _real_initialize(self):
|
||||
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
|
||||
return
|
||||
# User has not yet been migrated to x.com and has passed twitter.com cookies
|
||||
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
|
||||
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
|
||||
@functools.cached_property
|
||||
def _selected_api(self):
|
||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||
|
|
|
@ -869,11 +869,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
and 'password' in traverse_obj(
|
||||
e.cause.response.read(),
|
||||
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YleAreenaIE(InfoExtractor):
|
||||
|
@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
|
|||
_TESTS = [
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-4371942',
|
||||
'md5': '932edda0ecf5dfd6423804182d32f8ac',
|
||||
'md5': 'd87e9a1e74e67e009990ddd413e426b4',
|
||||
'info_dict': {
|
||||
'id': '0_a3tjk92c',
|
||||
'id': '1-4371942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pouchit',
|
||||
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
||||
|
@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
|
|||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 1435,
|
||||
'view_count': int,
|
||||
'upload_date': '20181204',
|
||||
'release_date': '20190106',
|
||||
'timestamp': 1543916210,
|
||||
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 7,
|
||||
'webpage_url': 'https://areena.yle.fi/1-4371942',
|
||||
'release_date': '20190105',
|
||||
'release_timestamp': 1546725660,
|
||||
'duration': 1435,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-2158940',
|
||||
'md5': 'cecb603661004e36af8c5188b5212b12',
|
||||
'md5': '6369ddc5e07b5fdaeda27a495184143c',
|
||||
'info_dict': {
|
||||
'id': '1_l38iz9ur',
|
||||
'id': '1-2158940',
|
||||
'ext': 'mp4',
|
||||
'title': 'Albi haluaa vessan',
|
||||
'description': 'md5:15236d810c837bed861fae0e88663c33',
|
||||
'description': 'Albi haluaa vessan.',
|
||||
'series': 'Albi Lumiukko',
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 319,
|
||||
'view_count': int,
|
||||
'upload_date': '20211202',
|
||||
'release_date': '20211215',
|
||||
'timestamp': 1638448202,
|
||||
'subtitles': {},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 0,
|
||||
'webpage_url': 'https://areena.yle.fi/1-2158940',
|
||||
'release_date': '20211215',
|
||||
'release_timestamp': 1639555200,
|
||||
'duration': 319,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
|
|||
'title': 'HKO & Mälkki & Tanner',
|
||||
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
|
||||
'series': 'Helsingin kaupunginorkesterin konsertteja',
|
||||
'thumbnail': r're:^https?://.+\.jpg$',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'release_date': '20230120',
|
||||
'release_timestamp': 1674242079,
|
||||
'duration': 8004,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-72251830',
|
||||
'info_dict': {
|
||||
'id': '1-72251830',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
|
||||
'series': 'Pentulive',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'live_status': 'is_live',
|
||||
'release_date': '20241025',
|
||||
'release_timestamp': 1729875600,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'livestream',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/podcastit/1-71022852',
|
||||
'info_dict': {
|
||||
'id': '1-71022852',
|
||||
'ext': 'mp3',
|
||||
'title': 'Värityspäivä',
|
||||
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
|
||||
'series': 'Murun ja Paukun ikioma kaupunki',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'release_date': '20240607',
|
||||
'release_timestamp': 1717736400,
|
||||
'duration': 442,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
video_data = self._download_json(
|
||||
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
||||
video_id, headers={
|
||||
'origin': 'https://areena.yle.fi',
|
||||
'referer': 'https://areena.yle.fi/',
|
||||
'content-type': 'application/json',
|
||||
})
|
||||
})['data']
|
||||
|
||||
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
||||
season_number, episode_number, episode, series = self._search_regex(
|
||||
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
default=(None, None, None, None))
|
||||
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
|
||||
description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
|
||||
if url_or_none(sub.get('uri')):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['uri'],
|
||||
'ext': 'srt',
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['uri'],
|
||||
'ext': 'srt',
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
|
||||
if is_podcast:
|
||||
info_dict = {
|
||||
'url': video_data['data']['ongoing_ondemand']['media_url'],
|
||||
}
|
||||
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
info_dict = {
|
||||
info_dict, metadata = {}, {}
|
||||
if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict['url'] = metadata['media_url']
|
||||
elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_event']
|
||||
metadata.pop('duration', None) # Duration is not accurate for livestreams
|
||||
info_dict['live_status'] = 'is_live'
|
||||
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
|
||||
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
}
|
||||
})
|
||||
elif traverse_obj(video_data, ('gone', {dict})):
|
||||
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
|
||||
metadata = video_data['gone']
|
||||
else:
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
raise ExtractorError('Unable to extract content')
|
||||
|
||||
if not info_dict.get('url') and metadata.get('manifest_url'):
|
||||
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
|
||||
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
info_dict = {'formats': formats}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**traverse_obj(json_ld, {
|
||||
'title': 'title',
|
||||
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
|
||||
or episode or info.get('title')),
|
||||
'title': episode,
|
||||
'description': description,
|
||||
'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
|
||||
or series),
|
||||
'series': series,
|
||||
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
|
||||
or int_or_none(season_number)),
|
||||
'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
|
||||
or int_or_none(episode_number)),
|
||||
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
|
||||
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'subtitles': subtitles or None,
|
||||
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', 'fin', {str}),
|
||||
'description': ('description', 'fin', {str}),
|
||||
'series': ('series', 'title', 'fin', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
|
||||
'release_timestamp': ('start_time', {parse_iso8601}),
|
||||
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
|
||||
}),
|
||||
**info_dict,
|
||||
}
|
||||
|
|
|
@ -644,13 +644,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
|
||||
|
||||
if refresh_token:
|
||||
refresh_token = refresh_token.strip('\'') or None
|
||||
|
||||
# Allow refresh token passed to initialize cache
|
||||
if refresh_token:
|
||||
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
|
||||
if self.get_param('cachedir') is not False:
|
||||
msg += ' and caching token to disk; you should supply an empty password next time'
|
||||
self.to_screen(msg)
|
||||
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
|
||||
else:
|
||||
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
||||
|
||||
refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
||||
if refresh_token:
|
||||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
|
||||
try:
|
||||
|
|
|
@ -212,6 +212,23 @@ def write_json_file(obj, fn):
|
|||
raise
|
||||
|
||||
|
||||
def partial_application(func):
|
||||
sig = inspect.signature(func)
|
||||
required_args = [
|
||||
param.name for param in sig.parameters.values()
|
||||
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
|
||||
if param.default is inspect.Parameter.empty
|
||||
]
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
if set(required_args[len(args):]).difference(kwargs):
|
||||
return functools.partial(func, *args, **kwargs)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def find_xpath_attr(node, xpath, key, val=None):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z_-]+$', key)
|
||||
|
@ -1192,6 +1209,7 @@ def extract_timezone(date_str, default=None):
|
|||
return timezone, date_str
|
||||
|
||||
|
||||
@partial_application
|
||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||
""" Return a UNIX timestamp from the given date """
|
||||
|
||||
|
@ -1269,6 +1287,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
||||
|
||||
|
||||
@partial_application
|
||||
def determine_ext(url, default_ext='unknown_video'):
|
||||
if url is None or '.' not in url:
|
||||
return default_ext
|
||||
|
@ -1944,7 +1963,7 @@ def remove_start(s, start):
|
|||
|
||||
|
||||
def remove_end(s, end):
|
||||
return s[:-len(end)] if s is not None and s.endswith(end) else s
|
||||
return s[:-len(end)] if s is not None and end and s.endswith(end) else s
|
||||
|
||||
|
||||
def remove_quotes(s):
|
||||
|
@ -1973,6 +1992,7 @@ def base_url(url):
|
|||
return re.match(r'https?://[^?#]+/', url).group()
|
||||
|
||||
|
||||
@partial_application
|
||||
def urljoin(base, path):
|
||||
if isinstance(path, bytes):
|
||||
path = path.decode()
|
||||
|
@ -1988,21 +2008,6 @@ def urljoin(base, path):
|
|||
return urllib.parse.urljoin(base, path)
|
||||
|
||||
|
||||
def partial_application(func):
|
||||
sig = inspect.signature(func)
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
try:
|
||||
sig.bind(*args, **kwargs)
|
||||
except TypeError:
|
||||
return functools.partial(func, *args, **kwargs)
|
||||
else:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
@partial_application
|
||||
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
|
||||
if get_attr and v is not None:
|
||||
|
@ -2583,6 +2588,7 @@ def urlencode_postdata(*args, **kargs):
|
|||
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
|
||||
|
||||
|
||||
@partial_application
|
||||
def update_url(url, *, query_update=None, **kwargs):
|
||||
"""Replace URL components specified by kwargs
|
||||
@param url str or parse url tuple
|
||||
|
@ -2603,6 +2609,7 @@ def update_url(url, *, query_update=None, **kwargs):
|
|||
return urllib.parse.urlunparse(url._replace(**kwargs))
|
||||
|
||||
|
||||
@partial_application
|
||||
def update_url_query(url, query):
|
||||
return update_url(url, query_update=query)
|
||||
|
||||
|
@ -2924,6 +2931,7 @@ def error_to_str(err):
|
|||
return f'{type(err).__name__}: {err}'
|
||||
|
||||
|
||||
@partial_application
|
||||
def mimetype2ext(mt, default=NO_DEFAULT):
|
||||
if not isinstance(mt, str):
|
||||
if default is not NO_DEFAULT:
|
||||
|
@ -4664,6 +4672,7 @@ def to_high_limit_path(path):
|
|||
return path
|
||||
|
||||
|
||||
@partial_application
|
||||
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
|
||||
val = traversal.traverse_obj(obj, *variadic(field))
|
||||
if not val if ignore is NO_DEFAULT else val in variadic(ignore):
|
||||
|
@ -4828,6 +4837,7 @@ def number_of_digits(number):
|
|||
return len('%d' % number)
|
||||
|
||||
|
||||
@partial_application
|
||||
def join_nonempty(*values, delim='-', from_dict=None):
|
||||
if from_dict is not None:
|
||||
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
|
||||
|
@ -5165,6 +5175,7 @@ class _UnsafeExtensionError(Exception):
|
|||
'ico',
|
||||
'image',
|
||||
'jng',
|
||||
'jpe',
|
||||
'jpeg',
|
||||
'jxl',
|
||||
'svg',
|
||||
|
@ -5277,6 +5288,7 @@ class RetryManager:
|
|||
time.sleep(delay)
|
||||
|
||||
|
||||
@partial_application
|
||||
def make_archive_id(ie, video_id):
|
||||
ie_key = ie if isinstance(ie, str) else ie.ie_key()
|
||||
return f'{ie_key.lower()} {video_id}'
|
||||
|
@ -5578,14 +5590,15 @@ class FormatSorter:
|
|||
value = get_value(field)
|
||||
return self._calculate_field_preference_from_value(format_, field, type_, value)
|
||||
|
||||
def calculate_preference(self, format):
|
||||
@staticmethod
|
||||
def _fill_sorting_fields(format):
|
||||
# Determine missing protocol
|
||||
if not format.get('protocol'):
|
||||
format['protocol'] = determine_protocol(format)
|
||||
|
||||
# Determine missing ext
|
||||
if not format.get('ext') and 'url' in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
if format.get('vcodec') == 'none':
|
||||
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
|
||||
format['video_ext'] = 'none'
|
||||
|
@ -5613,6 +5626,8 @@ class FormatSorter:
|
|||
if not format.get('tbr'):
|
||||
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
|
||||
|
||||
def calculate_preference(self, format):
|
||||
self._fill_sorting_fields(format)
|
||||
return tuple(self._calculate_field_preference(format, field) for field in self._order)
|
||||
|
||||
|
||||
|
|
|
@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
|
|||
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
|
||||
# deliberately using `id=` and `cls=` for ease of readability
|
||||
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
||||
if not tag:
|
||||
tag = r'[\w:.-]+'
|
||||
ANY_TAG = r'[\w:.-]+'
|
||||
|
||||
if attr and value:
|
||||
assert not cls, 'Cannot match both attr and cls'
|
||||
assert not id, 'Cannot match both attr and id'
|
||||
func = get_element_html_by_attribute if html else get_element_by_attribute
|
||||
return functools.partial(func, attr, value, tag=tag)
|
||||
return functools.partial(func, attr, value, tag=tag or ANY_TAG)
|
||||
|
||||
elif cls:
|
||||
assert not id, 'Cannot match both cls and id'
|
||||
|
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
|
|||
|
||||
elif id:
|
||||
func = get_element_html_by_id if html else get_element_by_id
|
||||
return functools.partial(func, id, tag=tag)
|
||||
return functools.partial(func, id, tag=tag or ANY_TAG)
|
||||
|
||||
index = int(bool(html))
|
||||
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
||||
|
@ -436,6 +435,20 @@ def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
|
|||
return functools.partial(func, cls)
|
||||
|
||||
|
||||
def trim_str(*, start=None, end=None):
|
||||
def trim(s):
|
||||
if s is None:
|
||||
return None
|
||||
start_idx = 0
|
||||
if start and s.startswith(start):
|
||||
start_idx = len(start)
|
||||
if end and s.endswith(end):
|
||||
return s[start_idx:-len(end)]
|
||||
return s[start_idx:]
|
||||
|
||||
return trim
|
||||
|
||||
|
||||
def get_first(obj, *paths, **kwargs):
|
||||
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user