mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 02:01:25 +01:00
Compare commits
30 Commits
99ac02f85b
...
06d0d6c769
Author | SHA1 | Date | |
---|---|---|---|
|
06d0d6c769 | ||
|
88606c86b9 | ||
|
06b18d79ef | ||
|
beec97f8fd | ||
|
d8c0145408 | ||
|
50e71793c7 | ||
|
b136f4dd8f | ||
|
4613096f2e | ||
|
838f4385de | ||
|
d135823137 | ||
|
59f8dd8239 | ||
|
9c6534da81 | ||
|
a403dcf9be | ||
|
754940e9a5 | ||
|
beae2db127 | ||
|
3945677a75 | ||
|
b103aca24d | ||
|
5c7a5aaab2 | ||
|
422195ec70 | ||
|
a6783a3b99 | ||
|
428ffb75aa | ||
|
b6dc2c49e8 | ||
|
76802f4613 | ||
|
d569a88452 | ||
|
88402b714e | ||
|
5bc5fb2835 | ||
|
f93c16395c | ||
|
f101e5d34c | ||
|
330335386d | ||
|
0a3991edae |
1
.github/workflows/release.yml
vendored
1
.github/workflows/release.yml
vendored
|
@ -282,6 +282,7 @@ jobs:
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
with:
|
with:
|
||||||
verbose: true
|
verbose: true
|
||||||
|
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
|
||||||
|
|
||||||
publish:
|
publish:
|
||||||
needs: [prepare, build]
|
needs: [prepare, build]
|
||||||
|
|
12
README.md
12
README.md
|
@ -1553,9 +1553,9 @@ The available fields are:
|
||||||
|
|
||||||
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||||
|
|
||||||
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
||||||
|
|
||||||
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
|
Note that the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. This choice was made since DV formats are not yet fully compatible with most devices. This may be changed in the future.
|
||||||
|
|
||||||
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
||||||
|
|
||||||
|
@ -2205,7 +2205,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||||
* `avconv` is not supported as an alternative to `ffmpeg`
|
* `avconv` is not supported as an alternative to `ffmpeg`
|
||||||
* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
|
* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
|
||||||
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
|
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
|
||||||
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
|
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order. Older versions of yt-dlp preferred VP9 due to its broader compatibility; you can use `--compat-options prefer-vp9-sort` to revert to that format sorting preference. These two compat options cannot be used together
|
||||||
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
|
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
|
||||||
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
|
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
|
||||||
* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
||||||
|
@ -2234,11 +2234,11 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||||
For ease of use, a few more compat options are available:
|
For ease of use, a few more compat options are available:
|
||||||
|
|
||||||
* `--compat-options all`: Use all compat options (**Do NOT use this!**)
|
* `--compat-options all`: Use all compat options (**Do NOT use this!**)
|
||||||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext`
|
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
|
||||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext`
|
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
|
||||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||||
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
* `--compat-options 2023`: Same as `--compat-options prefer-vp9-sort`. Use this to enable all future compat options
|
||||||
|
|
||||||
The following compat options restore vulnerable behavior from before security patches:
|
The following compat options restore vulnerable behavior from before security patches:
|
||||||
|
|
||||||
|
|
|
@ -216,5 +216,23 @@
|
||||||
"action": "add",
|
"action": "add",
|
||||||
"when": "d784464399b600ba9516bbcec6286f11d68974dd",
|
"when": "d784464399b600ba9516bbcec6286f11d68974dd",
|
||||||
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
|
||||||
|
"short": "Expand paths in `--plugin-dirs` (#11334)",
|
||||||
|
"authors": ["bashonly"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
|
||||||
|
"short": "[ie/generic] Do not impersonate by default (#11336)",
|
||||||
|
"authors": ["bashonly"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
|
||||||
|
"short": "[ie/vimeo] Fix API retries (#11351)",
|
||||||
|
"authors": ["bashonly"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -71,14 +71,13 @@ class CommitGroup(enum.Enum):
|
||||||
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||||
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||||
|
|
||||||
result = cls.group_lookup().get(group)
|
if result := cls.group_lookup().get(group):
|
||||||
if not result:
|
return result, subgroup or None
|
||||||
|
|
||||||
if subgroup:
|
if subgroup:
|
||||||
return None, value
|
return None, value
|
||||||
subgroup = group
|
|
||||||
result = cls.subgroup_lookup().get(subgroup)
|
|
||||||
|
|
||||||
return result, subgroup or None
|
return cls.subgroup_lookup().get(group), group or None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -136,8 +135,7 @@ class Changelog:
|
||||||
first = False
|
first = False
|
||||||
yield '\n<details><summary><h3>Changelog</h3></summary>\n'
|
yield '\n<details><summary><h3>Changelog</h3></summary>\n'
|
||||||
|
|
||||||
group = groups[item]
|
if group := groups[item]:
|
||||||
if group:
|
|
||||||
yield self.format_module(item.value, group)
|
yield self.format_module(item.value, group)
|
||||||
|
|
||||||
if self._collapsible:
|
if self._collapsible:
|
||||||
|
@ -253,7 +251,7 @@ class CommitRange:
|
||||||
''', re.VERBOSE | re.DOTALL)
|
''', re.VERBOSE | re.DOTALL)
|
||||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||||
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
|
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
|
||||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||||
|
|
||||||
def __init__(self, start, end, default_author=None):
|
def __init__(self, start, end, default_author=None):
|
||||||
|
@ -287,11 +285,16 @@ class CommitRange:
|
||||||
short = next(lines)
|
short = next(lines)
|
||||||
skip = short.startswith('Release ') or short == '[version] update'
|
skip = short.startswith('Release ') or short == '[version] update'
|
||||||
|
|
||||||
|
fix_commitish = None
|
||||||
|
if match := self.FIXES_RE.search(short):
|
||||||
|
fix_commitish = match.group(1)
|
||||||
|
|
||||||
authors = [default_author] if default_author else []
|
authors = [default_author] if default_author else []
|
||||||
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
|
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
|
||||||
match = self.AUTHOR_INDICATOR_RE.match(line)
|
if match := self.AUTHOR_INDICATOR_RE.match(line):
|
||||||
if match:
|
|
||||||
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
|
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
|
||||||
|
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
|
||||||
|
fix_commitish = match.group(1)
|
||||||
|
|
||||||
commit = Commit(commit_hash, short, authors)
|
commit = Commit(commit_hash, short, authors)
|
||||||
if skip and (self._start or not i):
|
if skip and (self._start or not i):
|
||||||
|
@ -301,21 +304,17 @@ class CommitRange:
|
||||||
logger.debug(f'Reached Release commit, breaking: {commit}')
|
logger.debug(f'Reached Release commit, breaking: {commit}')
|
||||||
break
|
break
|
||||||
|
|
||||||
revert_match = self.REVERT_RE.fullmatch(commit.short)
|
if match := self.REVERT_RE.fullmatch(commit.short):
|
||||||
if revert_match:
|
reverts[match.group(1)] = commit
|
||||||
reverts[revert_match.group(1)] = commit
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
fix_match = self.FIXES_RE.search(commit.short)
|
if fix_commitish:
|
||||||
if fix_match:
|
fixes[fix_commitish].append(commit)
|
||||||
commitish = fix_match.group(1)
|
|
||||||
fixes[commitish].append(commit)
|
|
||||||
|
|
||||||
commits[commit.hash] = commit
|
commits[commit.hash] = commit
|
||||||
|
|
||||||
for commitish, revert_commit in reverts.items():
|
for commitish, revert_commit in reverts.items():
|
||||||
reverted = commits.pop(commitish, None)
|
if reverted := commits.pop(commitish, None):
|
||||||
if reverted:
|
|
||||||
logger.debug(f'{commitish} fully reverted {reverted}')
|
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||||
else:
|
else:
|
||||||
commits[revert_commit.hash] = revert_commit
|
commits[revert_commit.hash] = revert_commit
|
||||||
|
@ -461,8 +460,7 @@ def create_changelog(args):
|
||||||
|
|
||||||
logger.info(f'Loaded {len(commits)} commits')
|
logger.info(f'Loaded {len(commits)} commits')
|
||||||
|
|
||||||
new_contributors = get_new_contributors(args.contributors_path, commits)
|
if new_contributors := get_new_contributors(args.contributors_path, commits):
|
||||||
if new_contributors:
|
|
||||||
if args.contributors:
|
if args.contributors:
|
||||||
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
|
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
|
||||||
logger.info(f'New contributors: {", ".join(new_contributors)}')
|
logger.info(f'New contributors: {", ".join(new_contributors)}')
|
||||||
|
|
|
@ -53,6 +53,18 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
def test_ie_key(self):
|
def test_ie_key(self):
|
||||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||||
|
|
||||||
|
def test_get_netrc_login_info(self):
|
||||||
|
for params in [
|
||||||
|
{'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'},
|
||||||
|
{'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'},
|
||||||
|
]:
|
||||||
|
ie = DummyIE(FakeYDL(params))
|
||||||
|
self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass'))
|
||||||
|
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass'))
|
||||||
|
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
|
||||||
|
self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
|
||||||
|
self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
|
||||||
|
|
||||||
def test_html_search_regex(self):
|
def test_html_search_regex(self):
|
||||||
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||||
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||||
|
|
|
@ -83,6 +83,18 @@ class TestAES(unittest.TestCase):
|
||||||
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
|
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
|
||||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||||
|
|
||||||
|
def test_gcm_aligned_decrypt(self):
|
||||||
|
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f'
|
||||||
|
authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8'
|
||||||
|
|
||||||
|
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
|
||||||
|
list(data), self.key, list(authentication_tag), self.iv[:12]))
|
||||||
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
|
||||||
|
if Cryptodome.AES:
|
||||||
|
decrypted = aes_gcm_decrypt_and_verify_bytes(
|
||||||
|
data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
|
||||||
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
|
||||||
|
|
||||||
def test_decrypt_text(self):
|
def test_decrypt_text(self):
|
||||||
password = intlist_to_bytes(self.key).decode()
|
password = intlist_to_bytes(self.key).decode()
|
||||||
encrypted = base64.b64encode(
|
encrypted = base64.b64encode(
|
||||||
|
|
|
@ -105,6 +105,13 @@ class TestCookies(unittest.TestCase):
|
||||||
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
|
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
|
||||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||||
|
|
||||||
|
def test_chrome_cookie_decryptor_linux_v10_meta24(self):
|
||||||
|
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
|
||||||
|
encrypted_value = b'v10\x1f\xe4\x0e[\x83\x0c\xcc*kPi \xce\x8d\x1d\xbb\x80\r\x11\t\xbb\x9e^Hy\x94\xf4\x963\x9f\x82\xba\xfe\xa1\xed\xb9\xf1)\x00710\x92\xc8/<\x96B'
|
||||||
|
value = 'DE'
|
||||||
|
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger(), meta_version=24)
|
||||||
|
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||||
|
|
||||||
def test_chrome_cookie_decryptor_windows_v10(self):
|
def test_chrome_cookie_decryptor_windows_v10(self):
|
||||||
with MonkeyPatch(cookies, {
|
with MonkeyPatch(cookies, {
|
||||||
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&',
|
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&',
|
||||||
|
@ -114,6 +121,15 @@ class TestCookies(unittest.TestCase):
|
||||||
decryptor = WindowsChromeCookieDecryptor('', Logger())
|
decryptor = WindowsChromeCookieDecryptor('', Logger())
|
||||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||||
|
|
||||||
|
def test_chrome_cookie_decryptor_windows_v10_meta24(self):
|
||||||
|
with MonkeyPatch(cookies, {
|
||||||
|
'_get_windows_v10_key': lambda *args, **kwargs: b'\xea\x8b\x02\xc3\xc6\xc5\x99\xc3\xa3[ j\xfa\xf6\xfcU\xac\x13u\xdc\x0c\x0e\xf1\x03\x90\xb6\xdf\xbb\x8fL\xb1\xb2',
|
||||||
|
}):
|
||||||
|
encrypted_value = b'v10dN\xe1\xacy\x84^\xe1I\xact\x03r\xfb\xe2\xce{^\x0e<(\xb0y\xeb\x01\xfb@"\x9e\x8c\xa53~\xdb*\x8f\xac\x8b\xe3\xfd3\x06\xe5\x93\x19OyOG\xb2\xfb\x1d$\xc0\xda\x13j\x9e\xfe\xc5\xa3\xa8\xfe\xd9'
|
||||||
|
value = '1234'
|
||||||
|
decryptor = WindowsChromeCookieDecryptor('', Logger(), meta_version=24)
|
||||||
|
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||||
|
|
||||||
def test_chrome_cookie_decryptor_mac_v10(self):
|
def test_chrome_cookie_decryptor_mac_v10(self):
|
||||||
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
|
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
|
||||||
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
|
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
|
||||||
|
|
|
@ -9,12 +9,17 @@ from yt_dlp.utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
)
|
)
|
||||||
from yt_dlp.utils.traversal import (
|
from yt_dlp.utils.traversal import (
|
||||||
traverse_obj,
|
find_element,
|
||||||
|
find_elements,
|
||||||
require,
|
require,
|
||||||
subs_list_to_dict,
|
subs_list_to_dict,
|
||||||
|
traverse_obj,
|
||||||
|
trim_str,
|
||||||
|
unpack,
|
||||||
)
|
)
|
||||||
|
|
||||||
_TEST_DATA = {
|
_TEST_DATA = {
|
||||||
|
@ -34,6 +39,14 @@ _TEST_DATA = {
|
||||||
'dict': {},
|
'dict': {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_TEST_HTML = '''<html><body>
|
||||||
|
<div class="a">1</div>
|
||||||
|
<div class="a" id="x" custom="z">2</div>
|
||||||
|
<div class="b" data-id="y" custom="z">3</div>
|
||||||
|
<p class="a">4</p>
|
||||||
|
<p id="d" custom="e">5</p>
|
||||||
|
</body></html>'''
|
||||||
|
|
||||||
|
|
||||||
class TestTraversal:
|
class TestTraversal:
|
||||||
def test_traversal_base(self):
|
def test_traversal_base(self):
|
||||||
|
@ -495,6 +508,73 @@ class TestTraversalHelpers:
|
||||||
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||||
]}, '`quality` key should sort subtitle list accordingly'
|
]}, '`quality` key should sort subtitle list accordingly'
|
||||||
|
|
||||||
|
def test_trim_str(self):
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
trim_str('positional')
|
||||||
|
|
||||||
|
assert callable(trim_str(start='a'))
|
||||||
|
assert trim_str(start='ab')('abc') == 'c'
|
||||||
|
assert trim_str(end='bc')('abc') == 'a'
|
||||||
|
assert trim_str(start='a', end='c')('abc') == 'b'
|
||||||
|
assert trim_str(start='ab', end='c')('abc') == ''
|
||||||
|
assert trim_str(start='a', end='bc')('abc') == ''
|
||||||
|
assert trim_str(start='ab', end='bc')('abc') == ''
|
||||||
|
assert trim_str(start='abc', end='abc')('abc') == ''
|
||||||
|
assert trim_str(start='', end='')('abc') == 'abc'
|
||||||
|
|
||||||
|
def test_unpack(self):
|
||||||
|
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
|
||||||
|
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
|
||||||
|
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
unpack(join_nonempty)()
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
unpack()
|
||||||
|
|
||||||
|
def test_find_element(self):
|
||||||
|
for improper_kwargs in [
|
||||||
|
dict(attr='data-id'),
|
||||||
|
dict(value='y'),
|
||||||
|
dict(attr='data-id', value='y', cls='a'),
|
||||||
|
dict(attr='data-id', value='y', id='x'),
|
||||||
|
dict(cls='a', id='x'),
|
||||||
|
dict(cls='a', tag='p'),
|
||||||
|
dict(cls='[ab]', regex=True),
|
||||||
|
]:
|
||||||
|
with pytest.raises(AssertionError):
|
||||||
|
find_element(**improper_kwargs)(_TEST_HTML)
|
||||||
|
|
||||||
|
assert find_element(cls='a')(_TEST_HTML) == '1'
|
||||||
|
assert find_element(cls='a', html=True)(_TEST_HTML) == '<div class="a">1</div>'
|
||||||
|
assert find_element(id='x')(_TEST_HTML) == '2'
|
||||||
|
assert find_element(id='[ex]')(_TEST_HTML) is None
|
||||||
|
assert find_element(id='[ex]', regex=True)(_TEST_HTML) == '2'
|
||||||
|
assert find_element(id='x', html=True)(_TEST_HTML) == '<div class="a" id="x" custom="z">2</div>'
|
||||||
|
assert find_element(attr='data-id', value='y')(_TEST_HTML) == '3'
|
||||||
|
assert find_element(attr='data-id', value='y(?:es)?')(_TEST_HTML) is None
|
||||||
|
assert find_element(attr='data-id', value='y(?:es)?', regex=True)(_TEST_HTML) == '3'
|
||||||
|
assert find_element(
|
||||||
|
attr='data-id', value='y', html=True)(_TEST_HTML) == '<div class="b" data-id="y" custom="z">3</div>'
|
||||||
|
|
||||||
|
def test_find_elements(self):
|
||||||
|
for improper_kwargs in [
|
||||||
|
dict(tag='p'),
|
||||||
|
dict(attr='data-id'),
|
||||||
|
dict(value='y'),
|
||||||
|
dict(attr='data-id', value='y', cls='a'),
|
||||||
|
dict(cls='a', tag='div'),
|
||||||
|
dict(cls='[ab]', regex=True),
|
||||||
|
]:
|
||||||
|
with pytest.raises(AssertionError):
|
||||||
|
find_elements(**improper_kwargs)(_TEST_HTML)
|
||||||
|
|
||||||
|
assert find_elements(cls='a')(_TEST_HTML) == ['1', '2', '4']
|
||||||
|
assert find_elements(cls='a', html=True)(_TEST_HTML) == [
|
||||||
|
'<div class="a">1</div>', '<div class="a" id="x" custom="z">2</div>', '<p class="a">4</p>']
|
||||||
|
assert find_elements(attr='custom', value='z')(_TEST_HTML) == ['2', '3']
|
||||||
|
assert find_elements(attr='custom', value='[ez]')(_TEST_HTML) == []
|
||||||
|
assert find_elements(attr='custom', value='[ez]', regex=True)(_TEST_HTML) == ['2', '3', '5']
|
||||||
|
|
||||||
|
|
||||||
class TestDictGet:
|
class TestDictGet:
|
||||||
def test_dict_get(self):
|
def test_dict_get(self):
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import unittest.mock
|
||||||
import warnings
|
import warnings
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
|
||||||
|
@ -71,6 +72,7 @@ from yt_dlp.utils import (
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
iri_to_uri,
|
iri_to_uri,
|
||||||
is_html,
|
is_html,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
@ -343,11 +345,13 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(remove_start(None, 'A - '), None)
|
self.assertEqual(remove_start(None, 'A - '), None)
|
||||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||||
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||||
|
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
|
||||||
|
|
||||||
def test_remove_end(self):
|
def test_remove_end(self):
|
||||||
self.assertEqual(remove_end(None, ' - B'), None)
|
self.assertEqual(remove_end(None, ' - B'), None)
|
||||||
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||||
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||||
|
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
|
||||||
|
|
||||||
def test_remove_quotes(self):
|
def test_remove_quotes(self):
|
||||||
self.assertEqual(remove_quotes(None), None)
|
self.assertEqual(remove_quotes(None), None)
|
||||||
|
@ -2148,6 +2152,16 @@ Line 1
|
||||||
assert run_shell(args) == expected
|
assert run_shell(args) == expected
|
||||||
assert run_shell(shell_quote(args, shell=True)) == expected
|
assert run_shell(shell_quote(args, shell=True)) == expected
|
||||||
|
|
||||||
|
def test_partial_application(self):
|
||||||
|
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
|
||||||
|
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
|
||||||
|
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
|
||||||
|
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
|
||||||
|
|
||||||
|
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
|
||||||
|
assert callable(join_nonempty()), 'varargs positional should apply partially'
|
||||||
|
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
4
test/testdata/netrc/netrc
vendored
Normal file
4
test/testdata/netrc/netrc
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
machine normal_use login user password pass
|
||||||
|
machine empty_user login "" password pass
|
||||||
|
machine empty_pass login user password ""
|
||||||
|
machine both_empty login "" password ""
|
2
test/testdata/netrc/print_netrc.py
vendored
Normal file
2
test/testdata/netrc/print_netrc.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp:
|
||||||
|
print(fp.read())
|
|
@ -470,7 +470,7 @@ class YoutubeDL:
|
||||||
The following options do not work when used through the API:
|
The following options do not work when used through the API:
|
||||||
filename, abort-on-error, multistreams, no-live-chat,
|
filename, abort-on-error, multistreams, no-live-chat,
|
||||||
format-sort, no-clean-infojson, no-playlist-metafiles,
|
format-sort, no-clean-infojson, no-playlist-metafiles,
|
||||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
|
no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
|
||||||
Refer __init__.py for their implementation
|
Refer __init__.py for their implementation
|
||||||
progress_template: Dictionary of templates for progress outputs.
|
progress_template: Dictionary of templates for progress outputs.
|
||||||
Allowed keys are 'download', 'postprocess',
|
Allowed keys are 'download', 'postprocess',
|
||||||
|
|
|
@ -159,6 +159,9 @@ def set_compat_opts(opts):
|
||||||
opts.embed_infojson = False
|
opts.embed_infojson = False
|
||||||
if 'format-sort' in opts.compat_opts:
|
if 'format-sort' in opts.compat_opts:
|
||||||
opts.format_sort.extend(FormatSorter.ytdl_default)
|
opts.format_sort.extend(FormatSorter.ytdl_default)
|
||||||
|
elif 'prefer-vp9-sort' in opts.compat_opts:
|
||||||
|
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
|
||||||
|
|
||||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||||
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
||||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||||
|
|
|
@ -230,11 +230,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||||
iv_ctr = inc(j0)
|
iv_ctr = inc(j0)
|
||||||
|
|
||||||
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
|
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
|
||||||
pad_len = len(data) // 16 * 16
|
pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
|
||||||
s_tag = ghash(
|
s_tag = ghash(
|
||||||
hash_subkey,
|
hash_subkey,
|
||||||
data
|
data
|
||||||
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
|
+ [0] * pad_len # pad
|
||||||
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||||
)
|
)
|
||||||
|
|
|
@ -302,12 +302,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||||
|
|
||||||
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
||||||
cursor = None
|
cursor = None
|
||||||
try:
|
try:
|
||||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||||
|
|
||||||
|
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
|
||||||
|
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
|
||||||
|
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
|
||||||
|
decryptor = get_cookie_decryptor(
|
||||||
|
config['browser_dir'], config['keyring_name'], logger,
|
||||||
|
keyring=keyring, meta_version=meta_version)
|
||||||
|
|
||||||
cursor.connection.text_factory = bytes
|
cursor.connection.text_factory = bytes
|
||||||
column_names = _get_column_names(cursor, 'cookies')
|
column_names = _get_column_names(cursor, 'cookies')
|
||||||
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
||||||
|
@ -405,22 +411,23 @@ class ChromeCookieDecryptor:
|
||||||
raise NotImplementedError('Must be implemented by sub classes')
|
raise NotImplementedError('Must be implemented by sub classes')
|
||||||
|
|
||||||
|
|
||||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
|
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
return MacChromeCookieDecryptor(browser_keyring_name, logger)
|
return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
|
||||||
elif sys.platform in ('win32', 'cygwin'):
|
elif sys.platform in ('win32', 'cygwin'):
|
||||||
return WindowsChromeCookieDecryptor(browser_root, logger)
|
return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
|
||||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
|
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
|
||||||
|
|
||||||
|
|
||||||
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
self._v10_key = self.derive_key(b'peanuts')
|
self._v10_key = self.derive_key(b'peanuts')
|
||||||
self._empty_key = self.derive_key(b'')
|
self._empty_key = self.derive_key(b'')
|
||||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||||
self._browser_keyring_name = browser_keyring_name
|
self._browser_keyring_name = browser_keyring_name
|
||||||
self._keyring = keyring
|
self._keyring = keyring
|
||||||
|
self._meta_version = meta_version or 0
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def _v11_key(self):
|
def _v11_key(self):
|
||||||
|
@ -449,14 +456,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
|
|
||||||
if version == b'v10':
|
if version == b'v10':
|
||||||
self._cookie_counts['v10'] += 1
|
self._cookie_counts['v10'] += 1
|
||||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
return _decrypt_aes_cbc_multi(
|
||||||
|
ciphertext, (self._v10_key, self._empty_key), self._logger,
|
||||||
|
hash_prefix=self._meta_version >= 24)
|
||||||
|
|
||||||
elif version == b'v11':
|
elif version == b'v11':
|
||||||
self._cookie_counts['v11'] += 1
|
self._cookie_counts['v11'] += 1
|
||||||
if self._v11_key is None:
|
if self._v11_key is None:
|
||||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||||
return None
|
return None
|
||||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
return _decrypt_aes_cbc_multi(
|
||||||
|
ciphertext, (self._v11_key, self._empty_key), self._logger,
|
||||||
|
hash_prefix=self._meta_version >= 24)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||||
|
@ -465,11 +476,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
|
|
||||||
|
|
||||||
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
def __init__(self, browser_keyring_name, logger):
|
def __init__(self, browser_keyring_name, logger, meta_version=None):
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
password = _get_mac_keyring_password(browser_keyring_name, logger)
|
password = _get_mac_keyring_password(browser_keyring_name, logger)
|
||||||
self._v10_key = None if password is None else self.derive_key(password)
|
self._v10_key = None if password is None else self.derive_key(password)
|
||||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||||
|
self._meta_version = meta_version or 0
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def derive_key(password):
|
def derive_key(password):
|
||||||
|
@ -487,7 +499,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
return _decrypt_aes_cbc_multi(
|
||||||
|
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._cookie_counts['other'] += 1
|
self._cookie_counts['other'] += 1
|
||||||
|
@ -497,10 +510,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
|
|
||||||
|
|
||||||
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
def __init__(self, browser_root, logger):
|
def __init__(self, browser_root, logger, meta_version=None):
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
||||||
self._cookie_counts = {'v10': 0, 'other': 0}
|
self._cookie_counts = {'v10': 0, 'other': 0}
|
||||||
|
self._meta_version = meta_version or 0
|
||||||
|
|
||||||
def decrypt(self, encrypted_value):
|
def decrypt(self, encrypted_value):
|
||||||
version = encrypted_value[:3]
|
version = encrypted_value[:3]
|
||||||
|
@ -524,7 +538,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
||||||
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
||||||
|
|
||||||
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
|
return _decrypt_aes_gcm(
|
||||||
|
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
|
||||||
|
hash_prefix=self._meta_version >= 24)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._cookie_counts['other'] += 1
|
self._cookie_counts['other'] += 1
|
||||||
|
@ -1010,10 +1026,12 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||||
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
|
||||||
for key in keys:
|
for key in keys:
|
||||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||||
try:
|
try:
|
||||||
|
if hash_prefix:
|
||||||
|
return plaintext[32:].decode()
|
||||||
return plaintext.decode()
|
return plaintext.decode()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
|
@ -1021,7 +1039,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
|
||||||
try:
|
try:
|
||||||
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
|
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -1029,6 +1047,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if hash_prefix:
|
||||||
|
return plaintext[32:].decode()
|
||||||
return plaintext.decode()
|
return plaintext.decode()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||||
|
|
|
@ -278,6 +278,7 @@ from .bleacherreport import (
|
||||||
from .blerp import BlerpIE
|
from .blerp import BlerpIE
|
||||||
from .blogger import BloggerIE
|
from .blogger import BloggerIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
|
from .bluesky import BlueskyIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bongacams import BongaCamsIE
|
from .bongacams import BongaCamsIE
|
||||||
from .boosty import BoostyIE
|
from .boosty import BoostyIE
|
||||||
|
|
|
@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
|
||||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
|
||||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
|
||||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '899441',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
|
||||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
|
||||||
'upload_date': '20201215',
|
|
||||||
'timestamp': 1608009755,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'uploader': 'NFL',
|
|
||||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
|
||||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
|
||||||
'duration': 157,
|
|
||||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||||
|
@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
|
||||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _generate_nfl_token(self, anvack, mcp_id):
|
|
||||||
reroute = self._download_json(
|
|
||||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
|
||||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
|
||||||
token_type = reroute.get('token_type') or 'Bearer'
|
|
||||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
|
||||||
response = self._download_json(
|
|
||||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
|
||||||
'query': '''{
|
|
||||||
viewer {
|
|
||||||
mediaToken(anvack: "%s", id: %s) {
|
|
||||||
token
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}''' % (anvack, mcp_id), # noqa: UP031
|
|
||||||
}).encode(), headers={
|
|
||||||
'Authorization': auth_token,
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
}, note='Fetching NFL API token')
|
|
||||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
|
||||||
|
|
||||||
_TOKEN_GENERATORS = {
|
|
||||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _server_time(self, access_key, video_id):
|
def _server_time(self, access_key, video_id):
|
||||||
return int_or_none(traverse_obj(self._download_json(
|
return int_or_none(traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||||
|
@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
if extracted_token is not None:
|
if extracted_token is not None:
|
||||||
api['anvstk2'] = extracted_token
|
api['anvstk2'] = extracted_token
|
||||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
|
||||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
|
||||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '94834686',
|
'id': '94834686',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 2700,
|
'duration': 2670,
|
||||||
'episode': '7 Tage ... unter harten Jungs',
|
'episode': '7 Tage ... unter harten Jungs',
|
||||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||||
'upload_date': '20231005',
|
'upload_date': '20231005',
|
||||||
|
@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||||
'series': '7 Tage ...',
|
'series': '7 Tage ...',
|
||||||
'channel': 'HR',
|
'channel': 'HR',
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||||
'title': '7 Tage ... unter harten Jungs',
|
'title': '7 Tage ... unter harten Jungs',
|
||||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '13847165',
|
||||||
|
'chapters': 'count:8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'channel': 'WDR',
|
||||||
|
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||||
|
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||||
|
'series': 'Lokalzeit aus Düsseldorf',
|
||||||
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||||
|
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||||
|
'upload_date': '20241031',
|
||||||
|
'timestamp': 1730399400,
|
||||||
|
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||||
|
'duration': 1759,
|
||||||
|
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
**traverse_obj(media_data, {
|
||||||
|
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||||
|
'start_time': ('chapterTime', {int_or_none}),
|
||||||
|
'title': ('chapterTitle', {str}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
**traverse_obj(media_data, ('meta', {
|
**traverse_obj(media_data, ('meta', {
|
||||||
'title': 'title',
|
'title': 'title',
|
||||||
'description': 'synopsis',
|
'description': 'synopsis',
|
||||||
|
|
|
@ -1,18 +1,33 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import extract_attributes
|
from ..utils import ExtractorError, extract_attributes
|
||||||
|
|
||||||
|
|
||||||
class BFMTVBaseIE(InfoExtractor):
|
class BFMTVBaseIE(InfoExtractor):
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||||
|
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _brightcove_url_result(self, video_id, video_block):
|
def _extract_video(self, video_block):
|
||||||
account_id = video_block.get('accountid') or '876450612001'
|
video_element = self._search_regex(
|
||||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||||
|
if video_element:
|
||||||
|
video_element_attrs = extract_attributes(video_element)
|
||||||
|
video_id = video_element_attrs.get('data-video-id')
|
||||||
|
if not video_id:
|
||||||
|
return
|
||||||
|
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||||
|
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||||
|
else:
|
||||||
|
video_block_attrs = extract_attributes(video_block)
|
||||||
|
video_id = video_block_attrs.get('videoid')
|
||||||
|
if not video_id:
|
||||||
|
return
|
||||||
|
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||||
|
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||||
'BrightcoveNew', video_id)
|
'BrightcoveNew', video_id)
|
||||||
|
@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
bfmtv_id = self._match_id(url)
|
bfmtv_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, bfmtv_id)
|
webpage = self._download_webpage(url, bfmtv_id)
|
||||||
video_block = extract_attributes(self._search_regex(
|
video = self._extract_video(self._search_regex(
|
||||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
if not video:
|
||||||
|
raise ExtractorError('Failed to extract video')
|
||||||
|
return video
|
||||||
|
|
||||||
|
|
||||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
class BFMTVLiveIE(BFMTVBaseIE):
|
||||||
IE_NAME = 'bfmtv:live'
|
IE_NAME = 'bfmtv:live'
|
||||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bfmtv.com/en-direct/',
|
'url': 'https://www.bfmtv.com/en-direct/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5615950982001',
|
'id': '6346069778112',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'uploader_id': '876450610001',
|
'uploader_id': '876450610001',
|
||||||
'upload_date': '20220926',
|
'upload_date': '20240202',
|
||||||
'timestamp': 1664207191,
|
'timestamp': 1706887572,
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
'thumbnail': r're:https://.+/image\.jpg',
|
'thumbnail': r're:https://.+/image\.jpg',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
|
@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
bfmtv_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, bfmtv_id)
|
||||||
|
video = self._extract_video(self._search_regex(
|
||||||
|
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||||
|
if not video:
|
||||||
|
raise ExtractorError('Failed to extract video')
|
||||||
|
return video
|
||||||
|
|
||||||
|
|
||||||
class BFMTVArticleIE(BFMTVBaseIE):
|
class BFMTVArticleIE(BFMTVBaseIE):
|
||||||
IE_NAME = 'bfmtv:article'
|
IE_NAME = 'bfmtv:article'
|
||||||
|
@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _entries(self, webpage):
|
||||||
|
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||||
|
video = self._extract_video(video_block_el)
|
||||||
|
if video:
|
||||||
|
yield video
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
bfmtv_id = self._match_id(url)
|
bfmtv_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, bfmtv_id)
|
webpage = self._download_webpage(url, bfmtv_id)
|
||||||
|
|
||||||
entries = []
|
|
||||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
|
||||||
video_block = extract_attributes(video_block_el)
|
|
||||||
video_id = video_block.get('videoid')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||||
self._html_search_meta(['og:description', 'description'], webpage))
|
self._html_search_meta(['og:description', 'description'], webpage))
|
||||||
|
|
388
yt_dlp/extractor/bluesky.py
Normal file
388
yt_dlp/extractor/bluesky.py
Normal file
|
@ -0,0 +1,388 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
format_field,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
|
truncate_string,
|
||||||
|
update_url_query,
|
||||||
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
|
variadic,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class BlueskyIE(InfoExtractor):
|
||||||
|
_VALID_URL = [
|
||||||
|
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||||
|
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||||
|
]
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||||
|
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l4omssdl632g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Blu3Blu3Lilith',
|
||||||
|
'uploader_id': 'blu3blue.bsky.social',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||||
|
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||||
|
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||||
|
'upload_date': '20240921',
|
||||||
|
'timestamp': 1726940605,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||||
|
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l3vgf77uco2g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Bluesky',
|
||||||
|
'uploader_id': 'bsky.app',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||||
|
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'Bluesky now has video! Update your app to versi...',
|
||||||
|
'alt_title': 'Bluesky video feature announcement',
|
||||||
|
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||||
|
'upload_date': '20240911',
|
||||||
|
'timestamp': 1726074716,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||||
|
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l4qhp7bcs52c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'souris',
|
||||||
|
'uploader_id': 'souris.moe',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||||
|
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||||
|
'upload_date': '20240922',
|
||||||
|
'timestamp': 1727003838,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||||
|
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l3w4tnezek2e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'clean',
|
||||||
|
'uploader_id': 'de1.pds.tentacle.expert',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||||
|
'channel_id': 'did:web:de1.tentacle.expert',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||||
|
'upload_date': '20240911',
|
||||||
|
'timestamp': 1726098823,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'XxK3t_5V3ao',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'yunayu',
|
||||||
|
'uploader_id': '@yunayuispink',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||||
|
'channel': 'yunayu',
|
||||||
|
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||||
|
'description': r're:Have a good goodx10000day',
|
||||||
|
'title': '5min vs 5hours drawing',
|
||||||
|
'availability': 'public',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'upload_date': '20241026',
|
||||||
|
'timestamp': 1729967784,
|
||||||
|
'duration': 321,
|
||||||
|
'age_limit': 0,
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '222792849',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'uploader': 'LASERBAT',
|
||||||
|
'uploader_id': 'laserbatx',
|
||||||
|
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||||
|
'artists': ['LASERBAT'],
|
||||||
|
'album_artists': ['LASERBAT'],
|
||||||
|
'album': 'Hari Nezumi [EP]',
|
||||||
|
'track': 'Forward to the End',
|
||||||
|
'title': 'LASERBAT - Forward to the End',
|
||||||
|
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||||
|
'duration': 228.571,
|
||||||
|
'track_id': '222792849',
|
||||||
|
'release_date': '20230423',
|
||||||
|
'upload_date': '20230423',
|
||||||
|
'timestamp': 1682276040.0,
|
||||||
|
'release_timestamp': 1682276040.0,
|
||||||
|
'track_number': 1,
|
||||||
|
},
|
||||||
|
'add_ie': ['Bandcamp'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||||
|
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l3vgf77uco2g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Bluesky',
|
||||||
|
'uploader_id': 'bsky.app',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||||
|
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'Bluesky now has video! Update your app to versi...',
|
||||||
|
'alt_title': 'Bluesky video feature announcement',
|
||||||
|
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||||
|
'upload_date': '20240911',
|
||||||
|
'timestamp': 1726074716,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||||
|
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l7rdfxhyds2f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'cinnamon',
|
||||||
|
'uploader_id': 'alt.bun.how',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||||
|
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'crazy that i look like this tbh',
|
||||||
|
'description': 'crazy that i look like this tbh',
|
||||||
|
'upload_date': '20241030',
|
||||||
|
'timestamp': 1730332128,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': ['sexual'],
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||||
|
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l6zrz6zyl2dr',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'mary🐇',
|
||||||
|
'uploader_id': 'mary.my.id',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||||
|
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||||
|
'upload_date': '20241021',
|
||||||
|
'timestamp': 1729523172,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l7gv55dc2o2w',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l7gv55dc2o2w',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20241026',
|
||||||
|
'description': 'One of my favorite videos',
|
||||||
|
'comment_count': int,
|
||||||
|
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||||
|
'uploader': 'Purple.Ice.Tea',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||||
|
'like_count': int,
|
||||||
|
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||||
|
'repost_count': int,
|
||||||
|
'timestamp': 1729973202,
|
||||||
|
'tags': [],
|
||||||
|
'uploader_id': 'purpleicetea.bsky.social',
|
||||||
|
'title': 'One of my favorite videos',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3l77u64l7le2e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader_id': 'thafnine.net',
|
||||||
|
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||||
|
'upload_date': '20241024',
|
||||||
|
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||||
|
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||||
|
'tags': [],
|
||||||
|
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||||
|
'uploader': 'T9',
|
||||||
|
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||||
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
|
'timestamp': 1729731642,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}]
|
||||||
|
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||||
|
|
||||||
|
def _get_service_endpoint(self, did, video_id):
|
||||||
|
if did.startswith('did:web:'):
|
||||||
|
url = f'https://{did[8:]}/.well-known/did.json'
|
||||||
|
else:
|
||||||
|
url = f'https://plc.directory/{did}'
|
||||||
|
services = self._download_json(
|
||||||
|
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||||
|
return traverse_obj(
|
||||||
|
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||||
|
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||||
|
|
||||||
|
post = self._download_json(
|
||||||
|
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||||
|
video_id, query={
|
||||||
|
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||||
|
'depth': 0,
|
||||||
|
'parentHeight': 0,
|
||||||
|
})['thread']['post']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||||
|
entries.extend(self._extract_videos(post, video_id))
|
||||||
|
# app.bsky.embed.recordWithMedia.view
|
||||||
|
entries.extend(self._extract_videos(
|
||||||
|
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||||
|
# app.bsky.embed.record.view
|
||||||
|
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||||
|
entries.extend(self._extract_videos(
|
||||||
|
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
raise ExtractorError('No video could be found in this post', expected=True)
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_profile_url(path):
|
||||||
|
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||||
|
|
||||||
|
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||||
|
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||||
|
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||||
|
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
if external_uri := traverse_obj(root, (
|
||||||
|
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||||
|
entries.append(self.url_result(external_uri))
|
||||||
|
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
else:
|
||||||
|
return entries
|
||||||
|
|
||||||
|
video_cid = traverse_obj(
|
||||||
|
root, (*embed_path, 'cid', {str}),
|
||||||
|
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||||
|
did = traverse_obj(root, ('author', 'did', {str}))
|
||||||
|
|
||||||
|
if did and video_cid:
|
||||||
|
endpoint = self._get_service_endpoint(did, video_id)
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'blob',
|
||||||
|
'url': update_url_query(
|
||||||
|
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||||
|
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
})),
|
||||||
|
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
'ext': ('mimeType', {mimetype2ext}),
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
|
||||||
|
for sub_data in traverse_obj(root, (
|
||||||
|
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||||
|
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||||
|
'url': update_url_query(
|
||||||
|
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||||
|
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||||
|
})
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(root, {
|
||||||
|
'id': ('uri', {url_basename}),
|
||||||
|
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||||
|
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||||
|
'uploader': ('author', 'displayName', {str}),
|
||||||
|
'uploader_id': ('author', 'handle', {str}),
|
||||||
|
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||||
|
'channel_id': ('author', 'did', {str}),
|
||||||
|
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||||
|
'like_count': ('likeCount', {int_or_none}),
|
||||||
|
'repost_count': ('repostCount', {int_or_none}),
|
||||||
|
'comment_count': ('replyCount', {int_or_none}),
|
||||||
|
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||||
|
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||||
|
'age_limit': (
|
||||||
|
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||||
|
'description': (*record_path, 'text', {str}, filter),
|
||||||
|
'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
return entries
|
|
@ -12,53 +12,86 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class CCMAIE(InfoExtractor):
|
class CCMAIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||||
|
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5630208',
|
'id': '5630208',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'L\'espot de La Marató de TV3',
|
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||||
'timestamp': 1478608140,
|
'timestamp': 1478608140,
|
||||||
'upload_date': '20161108',
|
'upload_date': '20161108',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||||
|
'duration': 79,
|
||||||
|
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||||
|
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||||
|
'categories': ['Divulgació'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||||
|
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||||
'md5': 'fa3e38f269329a278271276330261425',
|
'md5': 'fa3e38f269329a278271276330261425',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '943685',
|
'id': '943685',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'El Consell de Savis analitza el derbi',
|
'title': 'El Consell de Savis analitza el derbi',
|
||||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||||
'upload_date': '20170512',
|
'upload_date': '20161217',
|
||||||
'timestamp': 1494622500,
|
'timestamp': 1482011700,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'categories': ['Esports'],
|
'categories': ['Esports'],
|
||||||
|
'series': 'Tot gira',
|
||||||
|
'duration': 821,
|
||||||
|
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6031387',
|
'id': '6031387',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||||
'timestamp': 1582577700,
|
'timestamp': 1582577919,
|
||||||
'upload_date': '20200224',
|
'upload_date': '20200224',
|
||||||
'subtitles': 'mincount:4',
|
'subtitles': 'mincount:1',
|
||||||
'age_limit': 16,
|
'age_limit': 13,
|
||||||
'series': 'Crims',
|
'series': 'Crims',
|
||||||
|
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||||
|
'duration': 3203,
|
||||||
|
'categories': ['Divulgació'],
|
||||||
|
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||||
|
'episode_number': 5,
|
||||||
|
'episode': 'Episode 5',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5759227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Una mosca volava per la llum',
|
||||||
|
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||||
|
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||||
|
'series': 'Mic',
|
||||||
|
'upload_date': '20180411',
|
||||||
|
'timestamp': 1523440105,
|
||||||
|
'duration': 160,
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||||
|
'categories': ['Música'],
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_type, media_id = self._match_valid_url(url).groups()
|
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||||
|
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||||
'media': media_type,
|
'media': media_type,
|
||||||
'idint': media_id,
|
'idint': media_id,
|
||||||
'format': 'dm',
|
'format': 'dm',
|
||||||
|
|
|
@ -146,19 +146,33 @@ class CHZZKVideoIE(InfoExtractor):
|
||||||
video_meta = self._download_json(
|
video_meta = self._download_json(
|
||||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||||
|
|
||||||
|
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||||
|
video_status = video_meta.get('vodStatus')
|
||||||
|
if video_status == 'UPLOAD':
|
||||||
|
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||||
|
elif video_status == 'ABR_HLS':
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||||
query={
|
video_id, query={
|
||||||
'key': video_meta['inKey'],
|
'key': video_meta['inKey'],
|
||||||
'env': 'real',
|
'env': 'real',
|
||||||
'lc': 'en_US',
|
'lc': 'en_US',
|
||||||
'cpl': 'en_US',
|
'cpl': 'en_US',
|
||||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
})
|
||||||
|
else:
|
||||||
|
self.raise_no_formats(
|
||||||
|
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
live_status = 'post_live' if live_status == 'was_live' else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'live_status': live_status,
|
||||||
**traverse_obj(video_meta, {
|
**traverse_obj(video_meta, {
|
||||||
'title': ('videoTitle', {str}),
|
'title': ('videoTitle', {str}),
|
||||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||||
|
|
|
@ -1409,6 +1409,13 @@ class InfoExtractor:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||||
|
|
||||||
|
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||||
|
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||||
|
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||||
|
if sys.version_info < (3, 11):
|
||||||
|
return tuple(x if x != '""' else '' for x in info[::2])
|
||||||
|
|
||||||
return info[0], info[2]
|
return info[0], info[2]
|
||||||
|
|
||||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||||
|
|
|
@ -10,11 +10,14 @@ from ..utils import (
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -99,10 +102,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'''(?ix)
|
_VALID_URL = r'''(?ix)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
dai\.ly/|
|
||||||
(?:www\.)?lequipe\.fr/video
|
(?:
|
||||||
|
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||||
|
(?:www\.)?lequipe\.fr
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
swf/(?!video)|
|
||||||
|
(?:(?:crawler|embed|swf)/)?video/|
|
||||||
|
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||||
)
|
)
|
||||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
)
|
||||||
|
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||||
|
@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||||
|
@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['en_quete_d_esprit'],
|
'tags': ['en_quete_d_esprit'],
|
||||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
|
@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, { # playlist-only
|
||||||
|
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://dai.ly/x94cnnk',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||||
|
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x93blhi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'OnAir - 01/08/24',
|
||||||
|
'description': '',
|
||||||
|
'duration': 217,
|
||||||
|
'timestamp': 1722505658,
|
||||||
|
'upload_date': '20240801',
|
||||||
|
'uploader': 'Financialounge',
|
||||||
|
'uploader_id': 'x2vtgmm',
|
||||||
|
'age_limit': 0,
|
||||||
|
'tags': [],
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||||
|
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x7wdsj',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||||
|
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x8u4owg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader': 'Le Parisien',
|
||||||
|
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||||
|
'upload_date': '20240309',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1709997866,
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader_id': 'x32f7b',
|
||||||
|
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||||
|
'duration': 428.0,
|
||||||
|
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||||
|
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_COMMON_MEDIA_FIELDS = '''description
|
_COMMON_MEDIA_FIELDS = '''description
|
||||||
|
@ -232,15 +303,34 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||||
|
attrs = extract_attributes(mobj.group(0))
|
||||||
|
player_url = url_or_none(attrs.get('src'))
|
||||||
|
if not player_url:
|
||||||
|
continue
|
||||||
|
player_url = player_url.replace('.js', '.html')
|
||||||
|
if player_url.startswith('//'):
|
||||||
|
player_url = f'https:{player_url}'
|
||||||
|
if video_id := attrs.get('data-video'):
|
||||||
|
query_string = f'video={video_id}'
|
||||||
|
elif playlist_id := attrs.get('data-playlist'):
|
||||||
|
query_string = f'playlist={playlist_id}'
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
yield update_url(player_url, query=query_string)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||||
|
|
||||||
|
if is_playlist: # We matched the playlist query param as video_id
|
||||||
|
playlist_id = video_id
|
||||||
|
video_id = None
|
||||||
|
|
||||||
if playlist_id:
|
|
||||||
if self._yes_playlist(playlist_id, video_id):
|
if self._yes_playlist(playlist_id, video_id):
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||||
'DailymotionPlaylist', playlist_id)
|
'DailymotionPlaylist', playlist_id)
|
||||||
|
|
||||||
password = self.get_param('videopassword')
|
password = self.get_param('videopassword')
|
||||||
|
@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
title = metadata['title']
|
title = metadata['title']
|
||||||
is_live = media.get('isOnAir')
|
is_live = media.get('isOnAir')
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
for quality, media_list in metadata['qualities'].items():
|
for quality, media_list in metadata['qualities'].items():
|
||||||
for m in media_list:
|
for m in media_list:
|
||||||
media_url = m.get('url')
|
media_url = m.get('url')
|
||||||
|
@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||||
continue
|
continue
|
||||||
if media_type == 'application/x-mpegURL':
|
if media_type == 'application/x-mpegURL':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(fmt)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
f = {
|
f = {
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
|
@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||||
f['fps'] = 60
|
f['fps'] = 60
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||||
for subtitle_lang, subtitle in subtitles_data.items():
|
for subtitle_lang, subtitle in subtitles_data.items():
|
||||||
subtitles[subtitle_lang] = [{
|
subtitles[subtitle_lang] = [{
|
||||||
'url': subtitle_url,
|
'url': subtitle_url,
|
||||||
} for subtitle_url in subtitle.get('urls', [])]
|
} for subtitle_url in subtitle.get('urls', [])]
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = traverse_obj(metadata, (
|
||||||
for height, poster_url in metadata.get('posters', {}).items():
|
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||||
thumbnails.append({
|
'height': (0, {int_or_none}),
|
||||||
'height': int_or_none(height),
|
'id': (0, {str}),
|
||||||
'id': height,
|
'url': 1,
|
||||||
'url': poster_url,
|
}))
|
||||||
})
|
|
||||||
|
|
||||||
owner = metadata.get('owner') or {}
|
owner = metadata.get('owner') or {}
|
||||||
stats = media.get('stats') or {}
|
stats = media.get('stats') or {}
|
||||||
|
@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||||
IE_NAME = 'dailymotion:user'
|
IE_NAME = 'dailymotion:user'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -11,9 +11,12 @@ from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
traverse_obj,
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NFLBaseIE(InfoExtractor):
|
class NFLBaseIE(InfoExtractor):
|
||||||
|
@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
|
||||||
'osVersion': '10.0',
|
'osVersion': '10.0',
|
||||||
}, separators=(',', ':')).encode()).decode(),
|
}, separators=(',', ':')).encode()).decode(),
|
||||||
'networkType': 'other',
|
'networkType': 'other',
|
||||||
'nflClaimGroupsToAdd': [],
|
'peacockUUID': 'undefined',
|
||||||
'nflClaimGroupsToRemove': [],
|
|
||||||
}
|
}
|
||||||
_ACCOUNT_INFO = {}
|
_ACCOUNT_INFO = {}
|
||||||
_API_KEY = None
|
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||||
|
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
_TOKEN_EXPIRY = 0
|
_TOKEN_EXPIRY = 0
|
||||||
|
|
||||||
def _get_account_info(self, url, slug):
|
def _get_account_info(self):
|
||||||
if not self._API_KEY:
|
|
||||||
webpage = self._download_webpage(url, slug, fatal=False) or ''
|
|
||||||
self._API_KEY = self._search_regex(
|
|
||||||
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
|
|
||||||
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
|
||||||
|
|
||||||
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
||||||
login_token = traverse_obj(cookies, (
|
login_token = traverse_obj(cookies, (
|
||||||
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
||||||
|
@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
|
||||||
'or else try using --cookies-from-browser instead', expected=True)
|
'or else try using --cookies-from-browser instead', expected=True)
|
||||||
|
|
||||||
account = self._download_json(
|
account = self._download_json(
|
||||||
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
|
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
|
||||||
note='Downloading account info', data=urlencode_postdata({
|
note='Downloading account info', data=urlencode_postdata({
|
||||||
'include': 'profile,data',
|
'include': 'profile,data',
|
||||||
'lang': 'en',
|
'lang': 'en',
|
||||||
|
@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
|
||||||
'sdk': 'js_latest',
|
'sdk': 'js_latest',
|
||||||
'login_token': login_token,
|
'login_token': login_token,
|
||||||
'authMode': 'cookie',
|
'authMode': 'cookie',
|
||||||
'pageURL': url,
|
'pageURL': 'https://www.nfl.com/',
|
||||||
'sdkBuild': traverse_obj(cookies, (
|
'sdkBuild': traverse_obj(cookies, (
|
||||||
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
|
@ -126,53 +122,76 @@ class NFLBaseIE(InfoExtractor):
|
||||||
if len(self._ACCOUNT_INFO) != 3:
|
if len(self._ACCOUNT_INFO) != 3:
|
||||||
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
||||||
|
|
||||||
def _get_auth_token(self, url, slug):
|
def _get_auth_token(self):
|
||||||
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self._ACCOUNT_INFO:
|
|
||||||
self._get_account_info(url, slug)
|
|
||||||
|
|
||||||
token = self._download_json(
|
token = self._download_json(
|
||||||
'https://api.nfl.com/identity/v3/token%s' % (
|
'https://api.nfl.com/identity/v3/token%s' % (
|
||||||
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
||||||
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||||
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
||||||
|
|
||||||
self._TOKEN = token['accessToken']
|
self._TOKEN = token['accessToken']
|
||||||
self._TOKEN_EXPIRY = token['expiresIn']
|
self._TOKEN_EXPIRY = token['expiresIn']
|
||||||
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
||||||
|
|
||||||
|
def _extract_video(self, mcp_id, is_live=False):
|
||||||
|
self._get_auth_token()
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
|
||||||
|
'Authorization': f'Bearer {self._TOKEN}',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mcp_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
|
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
|
||||||
|
**traverse_obj(data, ('metadata', {
|
||||||
|
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
|
||||||
|
'description': ('event', 'def_description', {str}),
|
||||||
|
'duration': ('event', 'duration', {int_or_none}),
|
||||||
|
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
||||||
def _parse_video_config(self, video_config, display_id):
|
def _parse_video_config(self, video_config, display_id):
|
||||||
video_config = self._parse_json(video_config, display_id)
|
video_config = self._parse_json(video_config, display_id)
|
||||||
|
is_live = traverse_obj(video_config, ('live', {bool})) or False
|
||||||
item = video_config['playlist'][0]
|
item = video_config['playlist'][0]
|
||||||
mcp_id = item.get('mcpID')
|
if mcp_id := item.get('mcpID'):
|
||||||
if mcp_id:
|
return self._extract_video(mcp_id, is_live=is_live)
|
||||||
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
|
|
||||||
else:
|
info = {'id': item.get('id') or item['entityId']}
|
||||||
media_id = item.get('id') or item['entityId']
|
|
||||||
title = item.get('title')
|
|
||||||
item_url = item['url']
|
item_url = item['url']
|
||||||
info = {'id': media_id}
|
|
||||||
ext = determine_ext(item_url)
|
ext = determine_ext(item_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
|
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
|
||||||
else:
|
else:
|
||||||
info['url'] = item_url
|
info['url'] = item_url
|
||||||
if item.get('audio') is True:
|
if item.get('audio') is True:
|
||||||
info['vcodec'] = 'none'
|
info['vcodec'] = 'none'
|
||||||
is_live = video_config.get('live') is True
|
|
||||||
thumbnails = None
|
thumbnails = None
|
||||||
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
|
if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
|
||||||
if image_url:
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': image_url,
|
'url': image_url,
|
||||||
'ext': determine_ext(image_url, 'jpg'),
|
'ext': determine_ext(image_url, 'jpg'),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
'title': title,
|
**traverse_obj(item, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
}),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'description': clean_html(item.get('description')),
|
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||||
'upload_date': '20201215',
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
'timestamp': 1608009755,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'NFL',
|
|
||||||
'tags': 'count:6',
|
|
||||||
'duration': 157,
|
'duration': 157,
|
||||||
'categories': 'count:3',
|
'_old_archive_ids': ['anvato 899441'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||||
'md5': '6886b32c24b463038c760ceb55a34566',
|
'md5': '92a517f05bd3eb50fe50244bc621aec8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||||
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||||
|
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
|
||||||
},
|
},
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
entries = []
|
|
||||||
|
def entries():
|
||||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||||
entries.append(self._parse_video_config(video_config, display_id))
|
yield self._parse_video_config(video_config, display_id)
|
||||||
|
|
||||||
title = clean_html(get_element_by_class(
|
title = clean_html(get_element_by_class(
|
||||||
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||||
['og:title', 'twitter:title'], webpage)
|
['og:title', 'twitter:title'], webpage)
|
||||||
return self.playlist_result(entries, display_id, title)
|
|
||||||
|
return self.playlist_result(entries(), display_id, title)
|
||||||
|
|
||||||
|
|
||||||
class NFLPlusReplayIE(NFLBaseIE):
|
class NFLPlusReplayIE(NFLBaseIE):
|
||||||
|
@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||||
'all_22': 'All-22',
|
'all_22': 'All-22',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._get_account_info()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
requested_types = self._configuration_arg('type', ['all'])
|
requested_types = self._configuration_arg('type', ['all'])
|
||||||
|
@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||||
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
self._get_auth_token(url, slug)
|
self._get_auth_token()
|
||||||
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
||||||
game_id = self._download_json(
|
game_id = self._download_json(
|
||||||
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
||||||
|
@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||||
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
||||||
|
|
||||||
if video_id:
|
if video_id:
|
||||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for replay in traverse_obj(
|
for replay in traverse_obj(
|
||||||
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
||||||
):
|
):
|
||||||
video_id = replay['mcpPlaybackId']
|
yield self._extract_video(replay['mcpPlaybackId'])
|
||||||
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
|
||||||
|
|
||||||
return self.playlist_result(entries(), slug)
|
return self.playlist_result(entries(), slug)
|
||||||
|
|
||||||
|
@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._get_account_info()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug = self._match_id(url)
|
slug = self._match_id(url)
|
||||||
self._get_auth_token(url, slug)
|
self._get_auth_token()
|
||||||
video_id = self._download_json(
|
video_id = self._download_json(
|
||||||
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
||||||
'Authorization': f'Bearer {self._TOKEN}',
|
'Authorization': f'Bearer {self._TOKEN}',
|
||||||
})['mcpPlaybackId']
|
})['mcpPlaybackId']
|
||||||
|
|
||||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
|
@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
|
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
|
||||||
track_id = str(info['id'])
|
track_id = str(info['id'])
|
||||||
title = info['title']
|
|
||||||
|
|
||||||
format_urls = set()
|
format_urls = set()
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
|
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
|
||||||
'uploader_url': user.get('permalink_url'),
|
'uploader_url': user.get('permalink_url'),
|
||||||
'timestamp': unified_timestamp(info.get('created_at')),
|
'timestamp': unified_timestamp(info.get('created_at')),
|
||||||
'title': title,
|
'title': info.get('title'),
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': float_or_none(info.get('duration'), 1000),
|
'duration': float_or_none(info.get('duration'), 1000),
|
||||||
|
@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||||
'comment_count': extract_count('comment'),
|
'comment_count': extract_count('comment'),
|
||||||
'repost_count': extract_count('reposts'),
|
'repost_count': extract_count('reposts'),
|
||||||
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||||
|
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||||
'formats': formats if not extract_flat else None,
|
'formats': formats if not extract_flat else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||||
'genres': [],
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# geo-restricted
|
# geo-restricted
|
||||||
|
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||||
'genres': ['Alternative'],
|
'genres': ['Alternative'],
|
||||||
|
'artists': ['The Royal Concept'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link
|
# private link
|
||||||
|
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'genres': ['Dance & EDM'],
|
'genres': ['Dance & EDM'],
|
||||||
|
'artists': ['80M'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link, downloadable format
|
# private link, downloadable format
|
||||||
|
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||||
'genres': ['Trance'],
|
'genres': ['Trance'],
|
||||||
|
'artists': ['Ori Uplift'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# no album art, use avatar pic for thumbnail
|
# no album art, use avatar pic for thumbnail
|
||||||
|
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||||
'genres': [],
|
'artists': ['MadReal'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
|
|
@ -3,6 +3,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -99,7 +100,6 @@ class TumblrIE(InfoExtractor):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'tags': [],
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'dashboard only (external)',
|
'note': 'dashboard only (external)',
|
||||||
|
@ -315,6 +315,7 @@ class TumblrIE(InfoExtractor):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader_id': 'x32m6ye',
|
'uploader_id': 'x32m6ye',
|
||||||
'duration': 20,
|
'duration': 20,
|
||||||
|
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Wtqh01cnxKNXLG1N8/x1080',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'tiktok video embed',
|
'note': 'tiktok video embed',
|
||||||
|
@ -325,7 +326,7 @@ class TumblrIE(InfoExtractor):
|
||||||
'artists': ['Alicia Dreaming'],
|
'artists': ['Alicia Dreaming'],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'thumbnail': 'https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/d9a4add712dd4082b3878022b101b1d6?lk3s=81f88b70&x-expires=1730228400&x-signature=O9lxD6AzDc2OcllC3%2BADfFVzsjw%3D&shp=81f88b70&shcp=-',
|
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
|
||||||
'channel_id': 'MS4wLjABAAAAsJohwz_dU4KfAOc61cbGDAZ46-5hg2ANTXVQlRe1ipDhpX08PywR3PPiple1NTAo',
|
'channel_id': 'MS4wLjABAAAAsJohwz_dU4KfAOc61cbGDAZ46-5hg2ANTXVQlRe1ipDhpX08PywR3PPiple1NTAo',
|
||||||
'uploader': 'aliciadreaming',
|
'uploader': 'aliciadreaming',
|
||||||
'description': 'huge casting news Greyworm will be #louisdulac #racebending #interviewwiththevampire',
|
'description': 'huge casting news Greyworm will be #louisdulac #racebending #interviewwiththevampire',
|
||||||
|
@ -445,8 +446,7 @@ class TumblrIE(InfoExtractor):
|
||||||
f'https://www.tumblr.com/api/v2/blog/{blog}/posts/{video_id}/permalink',
|
f'https://www.tumblr.com/api/v2/blog/{blog}/posts/{video_id}/permalink',
|
||||||
video_id, headers={'Authorization': f'Bearer {self._ACCESS_TOKEN}'}, fatal=False),
|
video_id, headers={'Authorization': f'Bearer {self._ACCESS_TOKEN}'}, fatal=False),
|
||||||
('response', 'timeline', 'elements', 0, {dict})) or {}
|
('response', 'timeline', 'elements', 0, {dict})) or {}
|
||||||
content_json = traverse_obj(
|
content_json = traverse_obj(post_json, ((('trail', 0), None), 'content', ..., {dict}))
|
||||||
post_json, ('trail', 0, 'content', ...), ('content', ...), expected_type=dict)
|
|
||||||
|
|
||||||
# the url we're extracting from might be an original post or it might be a reblog.
|
# the url we're extracting from might be an original post or it might be a reblog.
|
||||||
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
|
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
|
||||||
|
@ -465,10 +465,12 @@ class TumblrIE(InfoExtractor):
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
|
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
|
||||||
'like_count': post_json.get('like_count'),
|
**traverse_obj(post_json, {
|
||||||
'repost_count': post_json.get('reblog_count'),
|
'like_count': ('like_count', {int_or_none}),
|
||||||
|
'repost_count': ('reblog_count', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., {str}),
|
||||||
|
}),
|
||||||
'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')),
|
'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')),
|
||||||
'tags': post_json.get('tags'),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# for tumblr's own video hosting
|
# for tumblr's own video hosting
|
||||||
|
@ -549,14 +551,13 @@ class TumblrIE(InfoExtractor):
|
||||||
|
|
||||||
if formats:
|
if formats:
|
||||||
# tumblr's own video is always above embeds
|
# tumblr's own video is always above embeds
|
||||||
entries = [{
|
entries.insert(0, {
|
||||||
**info_dict,
|
**info_dict,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': traverse_obj(
|
'thumbnail': (traverse_obj(video_json, ('poster', 0, 'url', {url_or_none}))
|
||||||
video_json, ('poster', 0, 'url')) or self._og_search_thumbnail(
|
or self._og_search_thumbnail(webpage, default=None)),
|
||||||
webpage, default=None)},
|
})
|
||||||
*entries]
|
|
||||||
|
|
||||||
if ignored_providers:
|
if ignored_providers:
|
||||||
if not entries:
|
if not entries:
|
||||||
|
@ -566,14 +567,15 @@ class TumblrIE(InfoExtractor):
|
||||||
if unknown_providers:
|
if unknown_providers:
|
||||||
self.report_warning(f'Unrecognized providers, please report: {", ".join(unknown_providers)!s}', video_id)
|
self.report_warning(f'Unrecognized providers, please report: {", ".join(unknown_providers)!s}', video_id)
|
||||||
|
|
||||||
if len(entries) > 1:
|
if not entries:
|
||||||
|
self.raise_no_formats('No video could be found in this post', expected=True, video_id=video_id)
|
||||||
|
if len(entries) == 1:
|
||||||
|
return {
|
||||||
|
**info_dict,
|
||||||
|
**entries[0],
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
**info_dict,
|
**info_dict,
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
else:
|
|
||||||
return {
|
|
||||||
**info_dict,
|
|
||||||
**entries[0],
|
|
||||||
}
|
|
||||||
|
|
|
@ -150,14 +150,6 @@ class TwitterBaseIE(InfoExtractor):
|
||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
|
|
||||||
# XXX: Temporary workaround until twitter.com => x.com migration is completed
|
|
||||||
def _real_initialize(self):
|
|
||||||
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
|
|
||||||
return
|
|
||||||
# User has not yet been migrated to x.com and has passed twitter.com cookies
|
|
||||||
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
|
|
||||||
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def _selected_api(self):
|
def _selected_api(self):
|
||||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
traverse_obj,
|
|
||||||
unified_strdate,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class YleAreenaIE(InfoExtractor):
|
class YleAreenaIE(InfoExtractor):
|
||||||
|
@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://areena.yle.fi/1-4371942',
|
'url': 'https://areena.yle.fi/1-4371942',
|
||||||
'md5': '932edda0ecf5dfd6423804182d32f8ac',
|
'md5': 'd87e9a1e74e67e009990ddd413e426b4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0_a3tjk92c',
|
'id': '1-4371942',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Pouchit',
|
'title': 'Pouchit',
|
||||||
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
||||||
|
@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode': 'Episode 2',
|
'episode': 'Episode 2',
|
||||||
'episode_number': 2,
|
'episode_number': 2,
|
||||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
|
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||||
'uploader_id': 'ovp@yle.fi',
|
|
||||||
'duration': 1435,
|
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20181204',
|
|
||||||
'release_date': '20190106',
|
|
||||||
'timestamp': 1543916210,
|
|
||||||
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
|
|
||||||
'age_limit': 7,
|
'age_limit': 7,
|
||||||
'webpage_url': 'https://areena.yle.fi/1-4371942',
|
'release_date': '20190105',
|
||||||
|
'release_timestamp': 1546725660,
|
||||||
|
'duration': 1435,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://areena.yle.fi/1-2158940',
|
'url': 'https://areena.yle.fi/1-2158940',
|
||||||
'md5': 'cecb603661004e36af8c5188b5212b12',
|
'md5': '6369ddc5e07b5fdaeda27a495184143c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_l38iz9ur',
|
'id': '1-2158940',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Albi haluaa vessan',
|
'title': 'Albi haluaa vessan',
|
||||||
'description': 'md5:15236d810c837bed861fae0e88663c33',
|
'description': 'Albi haluaa vessan.',
|
||||||
'series': 'Albi Lumiukko',
|
'series': 'Albi Lumiukko',
|
||||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
|
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||||
'uploader_id': 'ovp@yle.fi',
|
|
||||||
'duration': 319,
|
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20211202',
|
|
||||||
'release_date': '20211215',
|
|
||||||
'timestamp': 1638448202,
|
|
||||||
'subtitles': {},
|
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'webpage_url': 'https://areena.yle.fi/1-2158940',
|
'release_date': '20211215',
|
||||||
|
'release_timestamp': 1639555200,
|
||||||
|
'duration': 319,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
|
||||||
'title': 'HKO & Mälkki & Tanner',
|
'title': 'HKO & Mälkki & Tanner',
|
||||||
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
|
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
|
||||||
'series': 'Helsingin kaupunginorkesterin konsertteja',
|
'series': 'Helsingin kaupunginorkesterin konsertteja',
|
||||||
'thumbnail': r're:^https?://.+\.jpg$',
|
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||||
'release_date': '20230120',
|
'release_date': '20230120',
|
||||||
|
'release_timestamp': 1674242079,
|
||||||
|
'duration': 8004,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://areena.yle.fi/1-72251830',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1-72251830',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||||
|
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
|
||||||
|
'series': 'Pentulive',
|
||||||
|
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'release_date': '20241025',
|
||||||
|
'release_timestamp': 1729875600,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'livestream',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://areena.yle.fi/podcastit/1-71022852',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1-71022852',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Värityspäivä',
|
||||||
|
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
|
||||||
|
'series': 'Murun ja Paukun ikioma kaupunki',
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'release_date': '20240607',
|
||||||
|
'release_timestamp': 1717736400,
|
||||||
|
'duration': 442,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
||||||
video_id, headers={
|
video_id, headers={
|
||||||
'origin': 'https://areena.yle.fi',
|
'origin': 'https://areena.yle.fi',
|
||||||
'referer': 'https://areena.yle.fi/',
|
'referer': 'https://areena.yle.fi/',
|
||||||
'content-type': 'application/json',
|
'content-type': 'application/json',
|
||||||
})
|
})['data']
|
||||||
|
|
||||||
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
||||||
season_number, episode_number, episode, series = self._search_regex(
|
season_number, episode_number, episode, series = self._search_regex(
|
||||||
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||||
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||||
default=(None, None, None, None))
|
default=(None, None, None, None))
|
||||||
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
|
description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
|
for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
|
||||||
if url_or_none(sub.get('uri')):
|
|
||||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||||
'url': sub['uri'],
|
'url': sub['uri'],
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'name': sub.get('kind'),
|
'name': sub.get('kind'),
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_podcast:
|
info_dict, metadata = {}, {}
|
||||||
info_dict = {
|
if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
|
||||||
'url': video_data['data']['ongoing_ondemand']['media_url'],
|
metadata = video_data['ongoing_ondemand']
|
||||||
}
|
info_dict['url'] = metadata['media_url']
|
||||||
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
|
elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
|
||||||
info_dict = {
|
metadata = video_data['ongoing_event']
|
||||||
|
metadata.pop('duration', None) # Duration is not accurate for livestreams
|
||||||
|
info_dict['live_status'] = 'is_live'
|
||||||
|
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
|
||||||
|
metadata = video_data['ongoing_ondemand']
|
||||||
|
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
|
||||||
|
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||||
|
metadata = video_data['ongoing_ondemand']
|
||||||
|
info_dict.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
||||||
'ie_key': KalturaIE.ie_key(),
|
'ie_key': KalturaIE.ie_key(),
|
||||||
}
|
})
|
||||||
|
elif traverse_obj(video_data, ('gone', {dict})):
|
||||||
|
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
|
||||||
|
metadata = video_data['gone']
|
||||||
else:
|
else:
|
||||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
raise ExtractorError('Unable to extract content')
|
||||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
if not info_dict.get('url') and metadata.get('manifest_url'):
|
||||||
|
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
info_dict = {'formats': formats}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**info_dict,
|
**traverse_obj(json_ld, {
|
||||||
|
'title': 'title',
|
||||||
|
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
|
||||||
|
}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
|
'title': episode,
|
||||||
or episode or info.get('title')),
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
|
'series': series,
|
||||||
or series),
|
|
||||||
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
|
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
|
||||||
or int_or_none(season_number)),
|
or int_or_none(season_number)),
|
||||||
'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
|
'episode_number': int_or_none(episode_number),
|
||||||
or int_or_none(episode_number)),
|
|
||||||
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
|
|
||||||
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
|
|
||||||
'subtitles': subtitles or None,
|
'subtitles': subtitles or None,
|
||||||
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('title', 'fin', {str}),
|
||||||
|
'description': ('description', 'fin', {str}),
|
||||||
|
'series': ('series', 'title', 'fin', {str}),
|
||||||
|
'episode_number': ('episode_number', {int_or_none}),
|
||||||
|
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
|
||||||
|
'release_timestamp': ('start_time', {parse_iso8601}),
|
||||||
|
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
|
||||||
|
}),
|
||||||
|
**info_dict,
|
||||||
}
|
}
|
||||||
|
|
|
@ -644,13 +644,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
|
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
|
||||||
|
|
||||||
if refresh_token:
|
if refresh_token:
|
||||||
refresh_token = refresh_token.strip('\'') or None
|
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
|
||||||
|
if self.get_param('cachedir') is not False:
|
||||||
# Allow refresh token passed to initialize cache
|
msg += ' and caching token to disk; you should supply an empty password next time'
|
||||||
if refresh_token:
|
self.to_screen(msg)
|
||||||
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
|
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
|
||||||
|
else:
|
||||||
|
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
||||||
|
|
||||||
refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
|
||||||
if refresh_token:
|
if refresh_token:
|
||||||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
|
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
|
||||||
try:
|
try:
|
||||||
|
@ -4776,7 +4777,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'live_status': live_status,
|
'live_status': live_status,
|
||||||
'release_timestamp': live_start_time,
|
'release_timestamp': live_start_time,
|
||||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
|
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
|
||||||
}
|
}
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -7857,7 +7858,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||||
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang'),
|
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -483,13 +483,13 @@ def create_parser():
|
||||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext',
|
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
|
||||||
}, 'aliases': {
|
}, 'aliases': {
|
||||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
|
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
|
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||||
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
||||||
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||||
'2023': [],
|
'2023': ['prefer-vp9-sort'],
|
||||||
},
|
},
|
||||||
}, help=(
|
}, help=(
|
||||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||||
|
|
|
@ -212,6 +212,23 @@ def write_json_file(obj, fn):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def partial_application(func):
|
||||||
|
sig = inspect.signature(func)
|
||||||
|
required_args = [
|
||||||
|
param.name for param in sig.parameters.values()
|
||||||
|
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
|
||||||
|
if param.default is inspect.Parameter.empty
|
||||||
|
]
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
if set(required_args[len(args):]).difference(kwargs):
|
||||||
|
return functools.partial(func, *args, **kwargs)
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def find_xpath_attr(node, xpath, key, val=None):
|
def find_xpath_attr(node, xpath, key, val=None):
|
||||||
""" Find the xpath xpath[@key=val] """
|
""" Find the xpath xpath[@key=val] """
|
||||||
assert re.match(r'^[a-zA-Z_-]+$', key)
|
assert re.match(r'^[a-zA-Z_-]+$', key)
|
||||||
|
@ -1192,6 +1209,7 @@ def extract_timezone(date_str, default=None):
|
||||||
return timezone, date_str
|
return timezone, date_str
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||||
""" Return a UNIX timestamp from the given date """
|
""" Return a UNIX timestamp from the given date """
|
||||||
|
|
||||||
|
@ -1269,6 +1287,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||||
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def determine_ext(url, default_ext='unknown_video'):
|
def determine_ext(url, default_ext='unknown_video'):
|
||||||
if url is None or '.' not in url:
|
if url is None or '.' not in url:
|
||||||
return default_ext
|
return default_ext
|
||||||
|
@ -1944,7 +1963,7 @@ def remove_start(s, start):
|
||||||
|
|
||||||
|
|
||||||
def remove_end(s, end):
|
def remove_end(s, end):
|
||||||
return s[:-len(end)] if s is not None and s.endswith(end) else s
|
return s[:-len(end)] if s is not None and end and s.endswith(end) else s
|
||||||
|
|
||||||
|
|
||||||
def remove_quotes(s):
|
def remove_quotes(s):
|
||||||
|
@ -1973,6 +1992,7 @@ def base_url(url):
|
||||||
return re.match(r'https?://[^?#]+/', url).group()
|
return re.match(r'https?://[^?#]+/', url).group()
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def urljoin(base, path):
|
def urljoin(base, path):
|
||||||
if isinstance(path, bytes):
|
if isinstance(path, bytes):
|
||||||
path = path.decode()
|
path = path.decode()
|
||||||
|
@ -1988,21 +2008,6 @@ def urljoin(base, path):
|
||||||
return urllib.parse.urljoin(base, path)
|
return urllib.parse.urljoin(base, path)
|
||||||
|
|
||||||
|
|
||||||
def partial_application(func):
|
|
||||||
sig = inspect.signature(func)
|
|
||||||
|
|
||||||
@functools.wraps(func)
|
|
||||||
def wrapped(*args, **kwargs):
|
|
||||||
try:
|
|
||||||
sig.bind(*args, **kwargs)
|
|
||||||
except TypeError:
|
|
||||||
return functools.partial(func, *args, **kwargs)
|
|
||||||
else:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
|
|
||||||
return wrapped
|
|
||||||
|
|
||||||
|
|
||||||
@partial_application
|
@partial_application
|
||||||
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
|
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
|
||||||
if get_attr and v is not None:
|
if get_attr and v is not None:
|
||||||
|
@ -2583,6 +2588,7 @@ def urlencode_postdata(*args, **kargs):
|
||||||
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
|
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def update_url(url, *, query_update=None, **kwargs):
|
def update_url(url, *, query_update=None, **kwargs):
|
||||||
"""Replace URL components specified by kwargs
|
"""Replace URL components specified by kwargs
|
||||||
@param url str or parse url tuple
|
@param url str or parse url tuple
|
||||||
|
@ -2603,6 +2609,7 @@ def update_url(url, *, query_update=None, **kwargs):
|
||||||
return urllib.parse.urlunparse(url._replace(**kwargs))
|
return urllib.parse.urlunparse(url._replace(**kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def update_url_query(url, query):
|
def update_url_query(url, query):
|
||||||
return update_url(url, query_update=query)
|
return update_url(url, query_update=query)
|
||||||
|
|
||||||
|
@ -2924,6 +2931,7 @@ def error_to_str(err):
|
||||||
return f'{type(err).__name__}: {err}'
|
return f'{type(err).__name__}: {err}'
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def mimetype2ext(mt, default=NO_DEFAULT):
|
def mimetype2ext(mt, default=NO_DEFAULT):
|
||||||
if not isinstance(mt, str):
|
if not isinstance(mt, str):
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
|
@ -4664,6 +4672,7 @@ def to_high_limit_path(path):
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
|
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
|
||||||
val = traversal.traverse_obj(obj, *variadic(field))
|
val = traversal.traverse_obj(obj, *variadic(field))
|
||||||
if not val if ignore is NO_DEFAULT else val in variadic(ignore):
|
if not val if ignore is NO_DEFAULT else val in variadic(ignore):
|
||||||
|
@ -4828,6 +4837,7 @@ def number_of_digits(number):
|
||||||
return len('%d' % number)
|
return len('%d' % number)
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def join_nonempty(*values, delim='-', from_dict=None):
|
def join_nonempty(*values, delim='-', from_dict=None):
|
||||||
if from_dict is not None:
|
if from_dict is not None:
|
||||||
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
|
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
|
||||||
|
@ -5165,6 +5175,7 @@ class _UnsafeExtensionError(Exception):
|
||||||
'ico',
|
'ico',
|
||||||
'image',
|
'image',
|
||||||
'jng',
|
'jng',
|
||||||
|
'jpe',
|
||||||
'jpeg',
|
'jpeg',
|
||||||
'jxl',
|
'jxl',
|
||||||
'svg',
|
'svg',
|
||||||
|
@ -5277,11 +5288,13 @@ class RetryManager:
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def make_archive_id(ie, video_id):
|
def make_archive_id(ie, video_id):
|
||||||
ie_key = ie if isinstance(ie, str) else ie.ie_key()
|
ie_key = ie if isinstance(ie, str) else ie.ie_key()
|
||||||
return f'{ie_key.lower()} {video_id}'
|
return f'{ie_key.lower()} {video_id}'
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def truncate_string(s, left, right=0):
|
def truncate_string(s, left, right=0):
|
||||||
assert left > 3 and right >= 0
|
assert left > 3 and right >= 0
|
||||||
if s is None or len(s) <= left + right:
|
if s is None or len(s) <= left + right:
|
||||||
|
@ -5324,8 +5337,11 @@ class FormatSorter:
|
||||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||||
|
|
||||||
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
|
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||||
'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
|
'res', 'fps', 'hdr:12', 'vcodec', 'channels', 'acodec',
|
||||||
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
|
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
|
||||||
|
_prefer_vp9_sort = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||||
|
'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
|
||||||
|
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')
|
||||||
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
|
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
|
||||||
'height', 'width', 'proto', 'vext', 'abr', 'aext',
|
'height', 'width', 'proto', 'vext', 'abr', 'aext',
|
||||||
'fps', 'fs_approx', 'source', 'id')
|
'fps', 'fs_approx', 'source', 'id')
|
||||||
|
|
|
@ -20,6 +20,7 @@ from ._utils import (
|
||||||
get_elements_html_by_class,
|
get_elements_html_by_class,
|
||||||
get_elements_html_by_attribute,
|
get_elements_html_by_attribute,
|
||||||
get_elements_by_attribute,
|
get_elements_by_attribute,
|
||||||
|
get_element_by_class,
|
||||||
get_element_html_by_attribute,
|
get_element_html_by_attribute,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_element_html_by_id,
|
get_element_html_by_id,
|
||||||
|
@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
@typing.overload
|
||||||
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
@typing.overload
|
||||||
|
@ -381,34 +382,34 @@ def find_element(*, cls: str, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
@typing.overload
|
||||||
def find_element(*, id: str, tag: str | None = None, html=False): ...
|
def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ...
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
@typing.overload
|
||||||
def find_element(*, tag: str, html=False): ...
|
def find_element(*, tag: str, html=False, regex=False): ...
|
||||||
|
|
||||||
|
|
||||||
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
|
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False):
|
||||||
# deliberately using `id=` and `cls=` for ease of readability
|
# deliberately using `id=` and `cls=` for ease of readability
|
||||||
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
||||||
if not tag:
|
ANY_TAG = r'[\w:.-]+'
|
||||||
tag = r'[\w:.-]+'
|
|
||||||
|
|
||||||
if attr and value:
|
if attr and value:
|
||||||
assert not cls, 'Cannot match both attr and cls'
|
assert not cls, 'Cannot match both attr and cls'
|
||||||
assert not id, 'Cannot match both attr and id'
|
assert not id, 'Cannot match both attr and id'
|
||||||
func = get_element_html_by_attribute if html else get_element_by_attribute
|
func = get_element_html_by_attribute if html else get_element_by_attribute
|
||||||
return functools.partial(func, attr, value, tag=tag)
|
return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex)
|
||||||
|
|
||||||
elif cls:
|
elif cls:
|
||||||
assert not id, 'Cannot match both cls and id'
|
assert not id, 'Cannot match both cls and id'
|
||||||
assert tag is None, 'Cannot match both cls and tag'
|
assert tag is None, 'Cannot match both cls and tag'
|
||||||
func = get_element_html_by_class if html else get_elements_by_class
|
assert not regex, 'Cannot use regex with cls'
|
||||||
|
func = get_element_html_by_class if html else get_element_by_class
|
||||||
return functools.partial(func, cls)
|
return functools.partial(func, cls)
|
||||||
|
|
||||||
elif id:
|
elif id:
|
||||||
func = get_element_html_by_id if html else get_element_by_id
|
func = get_element_html_by_id if html else get_element_by_id
|
||||||
return functools.partial(func, id, tag=tag)
|
return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex)
|
||||||
|
|
||||||
index = int(bool(html))
|
index = int(bool(html))
|
||||||
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
||||||
|
@ -419,23 +420,46 @@ def find_elements(*, cls: str, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
@typing.overload
|
||||||
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
|
||||||
|
|
||||||
|
|
||||||
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
|
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False):
|
||||||
# deliberately using `cls=` for ease of readability
|
# deliberately using `cls=` for ease of readability
|
||||||
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
|
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
|
||||||
|
|
||||||
if attr and value:
|
if attr and value:
|
||||||
assert not cls, 'Cannot match both attr and cls'
|
assert not cls, 'Cannot match both attr and cls'
|
||||||
func = get_elements_html_by_attribute if html else get_elements_by_attribute
|
func = get_elements_html_by_attribute if html else get_elements_by_attribute
|
||||||
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
|
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex)
|
||||||
|
|
||||||
assert not tag, 'Cannot match both cls and tag'
|
assert not tag, 'Cannot match both cls and tag'
|
||||||
|
assert not regex, 'Cannot use regex with cls'
|
||||||
func = get_elements_html_by_class if html else get_elements_by_class
|
func = get_elements_html_by_class if html else get_elements_by_class
|
||||||
return functools.partial(func, cls)
|
return functools.partial(func, cls)
|
||||||
|
|
||||||
|
|
||||||
|
def trim_str(*, start=None, end=None):
|
||||||
|
def trim(s):
|
||||||
|
if s is None:
|
||||||
|
return None
|
||||||
|
start_idx = 0
|
||||||
|
if start and s.startswith(start):
|
||||||
|
start_idx = len(start)
|
||||||
|
if end and s.endswith(end):
|
||||||
|
return s[start_idx:-len(end)]
|
||||||
|
return s[start_idx:]
|
||||||
|
|
||||||
|
return trim
|
||||||
|
|
||||||
|
|
||||||
|
def unpack(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def inner(items, **kwargs):
|
||||||
|
return func(*items, **kwargs)
|
||||||
|
|
||||||
|
return inner
|
||||||
|
|
||||||
|
|
||||||
def get_first(obj, *paths, **kwargs):
|
def get_first(obj, *paths, **kwargs):
|
||||||
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user