Compare commits

..

No commits in common. "5222390c89f977c9cf3ac88795313b2318a06b03" and "3de49061e3deb36909eaa8e1d466dd79430a6a67" have entirely different histories.

44 changed files with 228 additions and 1061 deletions

View File

@ -80,12 +80,12 @@ on:
default: true default: true
type: boolean type: boolean
origin: origin:
description: Origin description: .
required: false required: false
default: 'current repo' default: ''
type: choice type: choice
options: options:
- 'current repo' - ''
permissions: permissions:
contents: read contents: read
@ -99,7 +99,7 @@ jobs:
- name: Process origin - name: Process origin
id: process_origin id: process_origin
run: | run: |
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT" echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
unix: unix:
needs: process needs: process

View File

@ -1,25 +1,5 @@
name: Core Tests name: Core Tests
on: on: [push, pull_request]
push:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
pull_request:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
permissions: permissions:
contents: read contents: read

View File

@ -64,6 +64,7 @@ jobs:
target_tag: ${{ steps.setup_variables.outputs.target_tag }} target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }} pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }} pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
head_sha: ${{ steps.get_target.outputs.head_sha }} head_sha: ${{ steps.get_target.outputs.head_sha }}
steps: steps:
@ -152,6 +153,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}' pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
fi fi
else else
target_tag="${source_tag:-${version}}" target_tag="${source_tag:-${version}}"
@ -161,6 +163,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}' pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
else else
target_repo='${{ github.repository }}' target_repo='${{ github.repository }}'
fi fi
@ -169,6 +172,13 @@ jobs:
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
pypi_project='${{ vars.PYPI_PROJECT }}' pypi_project='${{ vars.PYPI_PROJECT }}'
fi fi
if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
if ${{ !secrets.PYPI_TOKEN }}; then
pypi_token=OIDC
else
pypi_token=PYPI_TOKEN
fi
fi
echo "::group::Output variables" echo "::group::Output variables"
cat << EOF | tee -a "$GITHUB_OUTPUT" cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -179,6 +189,7 @@ jobs:
target_tag=${target_tag} target_tag=${target_tag}
pypi_project=${pypi_project} pypi_project=${pypi_project}
pypi_suffix=${pypi_suffix} pypi_suffix=${pypi_suffix}
pypi_token=${pypi_token}
EOF EOF
echo "::endgroup::" echo "::endgroup::"
@ -275,7 +286,18 @@ jobs:
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel python setup.py sdist bdist_wheel
- name: Publish to PyPI - name: Publish to PyPI via token
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
if: |
needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
run: |
twine upload dist/*
- name: Publish to PyPI via trusted publishing
if: |
needs.prepare.outputs.pypi_token == 'OIDC'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true

View File

@ -1333,7 +1333,6 @@ The available fields are:
- `was_live` (boolean): Whether this video was originally a live stream - `was_live` (boolean): Whether this video was originally a live stream
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public" - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
- `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- `extractor` (string): Name of the extractor - `extractor` (string): Name of the extractor

View File

@ -140,8 +140,6 @@ class TestFormatSelection(unittest.TestCase):
test('example-with-dashes', 'example-with-dashes') test('example-with-dashes', 'example-with-dashes')
test('all', '2', '47', '45', 'example-with-dashes', '35') test('all', '2', '47', '45', 'example-with-dashes', '35')
test('mergeall', '2+47+45+example-with-dashes+35', multi=True) test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
test('7_a/worst', '35')
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [

View File

@ -328,7 +328,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
https_server_thread.start() https_server_thread.start()
with handler(verify=False) as rh: with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
assert not issubclass(exc_info.type, CertificateVerifyError) assert not issubclass(exc_info.type, CertificateVerifyError)

View File

@ -11,14 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, report_warning from test.helper import FakeYDL, report_warning
from yt_dlp.update import Updater, UpdateInfo from yt_dlp.update import Updater, UpdateInfo
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
TEST_UPDATE_SOURCES = {
'stable': 'yt-dlp/yt-dlp',
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
'master': 'yt-dlp/yt-dlp-master-builds',
}
TEST_API_DATA = { TEST_API_DATA = {
'yt-dlp/yt-dlp/latest': { 'yt-dlp/yt-dlp/latest': {
'tag_name': '2023.12.31', 'tag_name': '2023.12.31',
@ -112,7 +104,6 @@ class FakeUpdater(Updater):
_channel = 'stable' _channel = 'stable'
_origin = 'yt-dlp/yt-dlp' _origin = 'yt-dlp/yt-dlp'
_update_sources = TEST_UPDATE_SOURCES
def _download_update_spec(self, *args, **kwargs): def _download_update_spec(self, *args, **kwargs):
return TEST_LOCKFILE_ACTUAL return TEST_LOCKFILE_ACTUAL

View File

@ -2317,6 +2317,23 @@ Line 1
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [], self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
msg='branching should result in list if `traverse_string`') msg='branching should result in list if `traverse_string`')
# Test is_user_input behavior
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
is_user_input=True), 3,
msg='allow for string indexing if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
is_user_input=True), tuple(range(8))[3:],
msg='allow for string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
is_user_input=True), tuple(range(8))[:4:2],
msg='allow step in string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
is_user_input=True), range(8),
msg='`:` should be treated as `...` if `is_user_input`')
with self.assertRaises(TypeError, msg='too many params should result in error'):
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
# Test re.Match as input obj # Test re.Match as input obj
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123') mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None], self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
@ -2370,11 +2387,6 @@ Line 1
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'}) headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')}) self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
# common mistake: strip whitespace from values
# https://github.com/yt-dlp/yt-dlp/issues/8729
headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
def test_extract_basic_auth(self): def test_extract_basic_auth(self):
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None) assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None) assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)

View File

@ -148,7 +148,7 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True) @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_ssl_error(self, handler): def test_ssl_error(self, handler):
with handler(verify=False) as rh: with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
validate_and_send(rh, Request(self.bad_wss_host)) validate_and_send(rh, Request(self.bad_wss_host))
assert not issubclass(exc_info.type, CertificateVerifyError) assert not issubclass(exc_info.type, CertificateVerifyError)

View File

@ -1201,15 +1201,6 @@ class YoutubeDL:
(?:\|(?P<default>.*?))? (?:\|(?P<default>.*?))?
)$''') )$''')
def _from_user_input(field):
if field == ':':
return ...
elif ':' in field:
return slice(*map(int_or_none, field.split(':')))
elif int_or_none(field) is not None:
return int(field)
return field
def _traverse_infodict(fields): def _traverse_infodict(fields):
fields = [f for x in re.split(r'\.({.+?})\.?', fields) fields = [f for x in re.split(r'\.({.+?})\.?', fields)
for f in ([x] if x.startswith('{') else x.split('.'))] for f in ([x] if x.startswith('{') else x.split('.'))]
@ -1219,12 +1210,11 @@ class YoutubeDL:
for i, f in enumerate(fields): for i, f in enumerate(fields):
if not f.startswith('{'): if not f.startswith('{'):
fields[i] = _from_user_input(f)
continue continue
assert f.endswith('}'), f'No closing brace for {f} in {fields}' assert f.endswith('}'), f'No closing brace for {f} in {fields}'
fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')} fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
return traverse_obj(info_dict, fields, traverse_string=True) return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
def get_value(mdict): def get_value(mdict):
# Object traversal # Object traversal
@ -2465,16 +2455,9 @@ class YoutubeDL:
return selector_function(ctx_copy) return selector_function(ctx_copy)
return final_selector return final_selector
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid stream = io.BytesIO(format_spec.encode())
# Prefix numbers with random letters to avoid it being classified as a number
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
# TODO: Implement parser not reliant on tokenize.tokenize
prefix = ''.join(random.choices(string.ascii_letters, k=32))
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try: try:
tokens = list(_remove_unused_ops( tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
token._replace(string=token.string.replace(prefix, ''))
for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError: except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

View File

@ -276,7 +276,6 @@ from .brilliantpala import (
) )
from .businessinsider import BusinessInsiderIE from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE
@ -865,7 +864,6 @@ from .jiosaavn import (
) )
from .jove import JoveIE from .jove import JoveIE
from .joj import JojIE from .joj import JojIE
from .joqrag import JoqrAgIE
from .jstream import JStreamIE from .jstream import JStreamIE
from .jtbc import ( from .jtbc import (
JTBCIE, JTBCIE,
@ -993,7 +991,6 @@ from .lynda import (
LyndaIE, LyndaIE,
LyndaCourseIE LyndaCourseIE
) )
from .maariv import MaarivIE
from .magellantv import MagellanTVIE from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE from .magentamusik360 import MagentaMusik360IE
from .mailru import ( from .mailru import (
@ -1593,7 +1590,6 @@ from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE from .rheinmaintv import RheinMainTVIE
from .rinsefm import RinseFMIE
from .rmcdecouverte import RMCDecouverteIE from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE from .rockstargames import RockstarGamesIE
from .rokfin import ( from .rokfin import (
@ -1647,7 +1643,6 @@ from .rumble import (
RumbleIE, RumbleIE,
RumbleChannelIE, RumbleChannelIE,
) )
from .rudovideo import RudoVideoIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,

View File

@ -121,21 +121,11 @@ class AENetworksIE(AENetworksBaseIE):
'info_dict': { 'info_dict': {
'id': '22253814', 'id': '22253814',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Winter Is Coming', 'title': 'Winter is Coming',
'description': 'md5:a40e370925074260b1c8a633c632c63a', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241, 'timestamp': 1338306241,
'upload_date': '20120529', 'upload_date': '20120529',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 2592.0,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:5',
'tags': 'count:14',
'categories': ['Mountain Men'],
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'season_number': 1,
'series': 'Mountain Men',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -153,15 +143,6 @@ class AENetworksIE(AENetworksBaseIE):
'timestamp': 1452634428, 'timestamp': 1452634428,
'upload_date': '20160112', 'upload_date': '20160112',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 1277.695,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'tags': 'count:23',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 9',
'season_number': 9,
'series': 'Duck Dynasty',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View File

@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# available till 7.12.2023 # available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': '94812e6438488fb923c361a44469614b', 'md5': 'a438f671e87a7eba04000336a119ccc4',
'info_dict': { 'info_dict': {
'id': 'maischberger-video-424', 'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424', 'display_id': 'maischberger-video-424',
@ -403,25 +403,26 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'''(?x)https:// _VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)? (?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/ (?:player|live|video|(?P<playlist>sendung|sammlung))/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', 'md5': '3fd5fead7a370a819341129c8d713136',
'info_dict': { 'info_dict': {
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', 'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12939099', 'id': '12172961',
'title': 'Liebe auf vier Pfoten', 'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg', 'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5222, 'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1701343800, 'timestamp': 1670710500,
'upload_date': '20231130', 'upload_date': '20221210',
'ext': 'mp4', 'ext': 'mp4',
'episode': 'Liebe auf vier Pfoten', 'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'series': 'Filme im MDR' 'series': 'Filme im MDR'
}, },
}, { }, {
@ -453,7 +454,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'duration': 915, 'duration': 915,
'episode': 'tagesschau, 20:00 Uhr', 'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau', 'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -474,10 +475,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
# playlist of type 'sendung' # playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True, 'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, { }, {
# playlist of type 'sammlung' # playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -490,11 +487,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
""" Query the ARD server for playlist information """ Query the ARD server for playlist information
and returns the data in "raw" format """ and returns the data in "raw" format """
assert mode in ('sendung', 'serie', 'sammlung') if mode == 'sendung':
if mode in ('sendung', 'serie'):
graphQL = json.dumps({ graphQL = json.dumps({
'query': '''{ 'query': '''{
showPage( showPage(
@ -511,7 +507,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
links { target { id href title } } links { target { id href title } }
type type
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
else: # mode == 'sammlung' else: # mode == 'sammlung'
graphQL = json.dumps({ graphQL = json.dumps({
@ -532,7 +528,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
type type
} }
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
# Ressources for ARD graphQL debugging: # Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway # https://api-test.ardmediathek.de/public-gateway
@ -542,7 +538,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
data=graphQL, data=graphQL,
headers={'Content-Type': 'application/json'})['data'] headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data: # align the structure of the returned data:
if mode in ('sendung', 'serie'): if mode == 'sendung':
show_page = show_page['showPage'] show_page = show_page['showPage']
else: # mode == 'sammlung' else: # mode == 'sammlung'
show_page = show_page['morePage']['widget'] show_page = show_page['morePage']['widget']
@ -550,12 +546,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict. """ Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung', 'serie', and 'sammlung', Supports playlists of mode 'sendung' and 'sammlung', and also nested
as well as nested playlists. """ playlists. """
entries = [] entries = []
pageNumber = 0 pageNumber = 0
while True: # iterate by pageNumber while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snippet( show_page = self._ARD_load_playlist_snipped(
playlist_id, display_id, client, mode, pageNumber) playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']: if '/compilation/' in teaser['links']['target']['href']:

View File

@ -317,25 +317,16 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id): def _download_media_selector(self, programme_id):
last_exception = None last_exception = None
formats, subtitles = [], {}
for media_set in self._MEDIA_SETS: for media_set in self._MEDIA_SETS:
try: try:
fmts, subs = self._download_media_selector_url( return self._download_media_selector_url(
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id) self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
formats.extend(fmts)
if subs:
self._merge_subtitles(subs, target=subtitles)
except BBCCoUkIE.MediaSelectionError as e: except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e last_exception = e
continue continue
self._raise_extractor_error(e) self._raise_extractor_error(e)
if last_exception: self._raise_extractor_error(last_exception)
if formats or subtitles:
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
else:
self._raise_extractor_error(last_exception)
return formats, subtitles
def _download_media_selector_url(self, url, programme_id=None): def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_json( media_selection = self._download_json(
@ -1197,7 +1188,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if initial_data is None: if initial_data is None:
initial_data = self._search_regex( initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage, r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
'preload state', default='{}') 'preload state', default={})
else: else:
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False) initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False) initial_data = self._parse_json(initial_data, playlist_id, fatal=False)

View File

@ -7,10 +7,8 @@ from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
clean_html, clean_html,
extract_attributes,
get_element_by_class, get_element_by_class,
get_element_by_id, get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class, get_elements_html_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
@ -19,7 +17,6 @@ from ..utils import (
traverse_obj, traverse_obj,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
@ -37,25 +34,6 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
},
}, {
# test case: video with different channel and uploader
'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
'md5': 'f10e6a8e787766235946d0868703f1d0',
'info_dict': {
'id': 'Yti_j9A-UZ4',
'ext': 'mp4',
'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'sharylattkisson',
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
}, },
}, { }, {
# video not downloadable in browser, but we can recover it # video not downloadable in browser, but we can recover it
@ -70,9 +48,6 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20181113', 'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
}, },
'params': {'check_formats': None}, 'params': {'check_formats': None},
}, { }, {
@ -124,11 +99,6 @@ class BitChuteIE(InfoExtractor):
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
self.raise_geo_restricted(reason) self.raise_geo_restricted(reason)
@staticmethod
def _make_url(html):
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
return urljoin('https://www.bitchute.com', path)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
@ -151,19 +121,12 @@ class BitChuteIE(InfoExtractor):
'Video is unavailable. Please make sure this video is playable in the browser ' 'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id) 'before reporting this issue.', expected=True, video_id=video_id)
details = get_element_by_class('details', webpage) or ''
uploader_html = get_element_html_by_class('creator', details) or ''
channel_html = get_element_html_by_class('name', details) or ''
return { return {
'id': video_id, 'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage), 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None), 'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(uploader_html), 'uploader': clean_html(get_element_by_class('owner', webpage)),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex( 'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats, 'formats': formats,
@ -191,9 +154,6 @@ class BitChuteChannelIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16, 'duration': 16,
'view_count': int, 'view_count': int,
}, },
@ -209,7 +169,7 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'wV9Imujxasw9', 'id': 'wV9Imujxasw9',
'title': 'Bruce MacDonald and "The Light of Darkness"', 'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e', 'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
} }
}] }]

View File

@ -1,123 +0,0 @@
import re
from functools import partial
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
bug_reports_message,
clean_html,
format_field,
get_element_text_and_html_by_tag,
int_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BundestagIE(InfoExtractor):
_VALID_URL = [
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://dbtg.tv/cvid/7605304',
'info_dict': {
'id': '7605304',
'ext': 'mp4',
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
'description': 'md5:321a9dc6bdad201264c0045efc371561',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
'info_dict': {
'id': '7602120',
'ext': 'mp4',
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
'description': 'Befragung der Bundesregierung',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
'only_matching': True,
}, {
'url': 'http://dbtg.tv/fvid/3594346',
'only_matching': True,
}]
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
def _bt_extract_share_formats(self, video_id):
share_data = self._download_json(
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
if traverse_obj(share_data, ('status', 'code', {int})) != 1:
self.report_warning(format_field(
share_data, [('status', 'message', {str})],
'Share API response: %s', default='Unknown Share API Error')
+ bug_reports_message())
return
for name, url in share_data.items():
if not isinstance(name, str) or not url_or_none(url):
continue
elif name.startswith('audio'):
match = re.search(self._SHARE_AUDIO_REGEX, url)
yield {
'format_id': name,
'url': url,
'vcodec': 'none',
**traverse_obj(match, {
'acodec': 'codec',
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
'abr': ('bitrate', {int_or_none}),
'ext': 'ext',
}),
}
elif name.startswith('download'):
match = re.search(self._SHARE_VIDEO_REGEX, url)
yield {
'format_id': name,
'url': url,
**traverse_obj(match, {
'vcodec': 'codec',
'tbr': ('bitrate', {int_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'ext': 'ext',
}),
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = []
result = {'id': video_id, 'formats': formats}
try:
formats.extend(self._extract_m3u8_formats(
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
raise ExtractorError('Could not find video id', expected=True)
self.report_warning(f'Error extracting hls formats: {error}', video_id)
formats.extend(self._bt_extract_share_formats(video_id))
if not formats:
self.raise_no_formats('Could not find suitable formats', video_id=video_id)
result.update(traverse_obj(self._download_webpage(
self._OVERLAY_URL, video_id,
query={'videoid': video_id, 'view': 'main'},
note='Downloading metadata overlay', fatal=False,
), {
'title': (
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
}))
return result

View File

@ -180,13 +180,6 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [], 'chapters': [],
'duration': 494.811, 'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
}, },
}, { }, {
'url': 'http://www.cbc.ca/player/play/2164402062', 'url': 'http://www.cbc.ca/player/play/2164402062',
@ -202,37 +195,25 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [], 'chapters': [],
'duration': 186.867, 'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creator': 'Allison Johnson',
'media_type': 'Excerpt',
}, },
}, { }, {
# Has subtitles # Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here: # These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2284799043667', 'url': 'http://www.cbc.ca/player/play/2249992771553',
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5', 'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
'info_dict': { 'info_dict': {
'id': '2284799043667', 'id': '2249992771553',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The National | Hockey coach charged, Green grants, Safer drugs', 'title': 'The National | Womens soccer pay, Florida seawater, Swift quake',
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa', 'description': 'md5:adba28011a56cfa47a080ff198dad27a',
'timestamp': 1700272800, 'timestamp': 1690596000,
'duration': 2718.833, 'duration': 2716.333,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
'chapters': 'count:5', 'chapters': 'count:5',
'upload_date': '20231118', 'upload_date': '20230729',
'categories': 'count:4',
'series': 'The National - Full Show',
'tags': 'count:1',
'creator': 'News',
'location': 'Canada',
'media_type': 'Full Program',
}, },
}] }]

View File

@ -382,7 +382,6 @@ class InfoExtractor:
'private', 'premium_only', 'subscriber_only', 'needs_auth', 'private', 'premium_only', 'subscriber_only', 'needs_auth',
'unlisted' or 'public'. Use 'InfoExtractor._availability' 'unlisted' or 'public'. Use 'InfoExtractor._availability'
to set it to set it
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
_old_archive_ids: A list of old archive ids needed for backward compatibility _old_archive_ids: A list of old archive ids needed for backward compatibility
_format_sort_fields: A list of fields to use for sorting formats _format_sort_fields: A list of fields to use for sorting formats
__post_extractor: A function to be called just before the metadata is __post_extractor: A function to be called just before the metadata is

View File

@ -46,10 +46,6 @@ class CWTVIE(InfoExtractor):
'timestamp': 1444107300, 'timestamp': 1444107300,
'age_limit': 14, 'age_limit': 14,
'uploader': 'CWTV', 'uploader': 'CWTV',
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'episode': 'Episode 20',
'season': 'Season 11',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View File

@ -1,20 +1,15 @@
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
try_call, try_call,
unified_timestamp, unified_timestamp,
urlencode_postdata,
) )
class EplusIbIE(InfoExtractor): class EplusIbIE(InfoExtractor):
_NETRC_MACHINE = 'eplus' IE_NAME = 'eplus:inbound'
IE_NAME = 'eplus' IE_DESC = 'e+ (イープラス) overseas'
IE_DESC = 'e+ (イープラス)' _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
_TESTS = [{ _TESTS = [{
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D', 'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
'info_dict': { 'info_dict': {
@ -34,97 +29,14 @@ class EplusIbIE(InfoExtractor):
'No video formats found!', 'No video formats found!',
'Requested format is not available', 'Requested format is not available',
], ],
}, {
'url': 'https://live.eplus.jp/sample',
'info_dict': {
'id': 'stream1ng20210719-test-005',
'title': 'Online streaming test for DRM',
'live_status': 'was_live',
'release_date': '20210719',
'release_timestamp': 1626703200,
'description': None,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Could not find the playlist URL. This event may not be accessible',
'No video formats found!',
'Requested format is not available',
'This video is DRM protected',
],
}, {
'url': 'https://live.eplus.jp/2053935',
'info_dict': {
'id': '331320-0001-001',
'title': '丘みどり2020配信LIVE Vol.2 ~秋麗~ 【Streaming+(配信チケット)】',
'live_status': 'was_live',
'release_date': '20200920',
'release_timestamp': 1600596000,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Could not find the playlist URL. This event may not be accessible',
'No video formats found!',
'Requested format is not available',
],
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
def _login(self, username, password, urlh):
if not self._get_cookies('https://live.eplus.jp/').get('ci_session'):
raise ExtractorError('Unable to get ci_session cookie')
cltft_token = urlh.headers.get('X-CLTFT-Token')
if not cltft_token:
raise ExtractorError('Unable to get X-CLTFT-Token')
self._set_cookie('live.eplus.jp', 'X-CLTFT-Token', cltft_token)
login_json = self._download_json(
'https://live.eplus.jp/member/api/v1/FTAuth/idpw', None,
note='Sending pre-login info', errnote='Unable to send pre-login info', headers={
'Content-Type': 'application/json; charset=UTF-8',
'Referer': urlh.url,
'X-Cltft-Token': cltft_token,
'Accept': '*/*',
}, data=json.dumps({
'loginId': username,
'loginPassword': password,
}).encode())
if not login_json.get('isSuccess'):
raise ExtractorError('Login failed: Invalid id or password', expected=True)
self._request_webpage(
urlh.url, None, note='Logging in', errnote='Unable to log in',
data=urlencode_postdata({
'loginId': username,
'loginPassword': password,
'Token.Default': cltft_token,
'op': 'nextPage',
}), headers={'Referer': urlh.url})
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle( webpage = self._download_webpage(url, video_id)
url, video_id, headers={'User-Agent': self._USER_AGENT})
if urlh.url.startswith('https://live.eplus.jp/member/auth'):
username, password = self._get_login_info()
if not username:
self.raise_login_required()
self._login(username, password, urlh)
webpage = self._download_webpage(
url, video_id, headers={'User-Agent': self._USER_AGENT})
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id) data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
if data_json.get('drm_mode') == 'ON':
self.report_drm(video_id)
delivery_status = data_json.get('delivery_status') delivery_status = data_json.get('delivery_status')
archive_mode = data_json.get('archive_mode') archive_mode = data_json.get('archive_mode')
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400) release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
@ -152,7 +64,7 @@ class EplusIbIE(InfoExtractor):
formats = [] formats = []
m3u8_playlist_urls = self._search_json( m3u8_playlist_urls = self._search_json(
r'var\s+listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[]) r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
if not m3u8_playlist_urls: if not m3u8_playlist_urls:
if live_status == 'is_upcoming': if live_status == 'is_upcoming':
self.raise_no_formats( self.raise_no_formats(

View File

@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/(?:permalink|posts)/| groups/[^/]+/permalink/|
watchparty/ watchparty/
)| )|
facebook: facebook:
@ -232,21 +232,6 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
'info_dict': {
'id': '1569199726448814',
'ext': 'mp4',
'title': 'Pence MUST GO!',
'description': 'Vickie Gentry shared a memory.',
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -627,11 +612,9 @@ class FacebookIE(InfoExtractor):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or []) nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, ( attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments', ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')), ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
'attachment', {dict}))
for attachment in attachments: for attachment in attachments:
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}), ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns: for n in ns:
parse_attachment(n) parse_attachment(n)
parse_attachment(attachment) parse_attachment(attachment)
@ -654,7 +637,7 @@ class FacebookIE(InfoExtractor):
if len(entries) > 1: if len(entries) > 1:
return self.playlist_result(entries, video_id) return self.playlist_result(entries, video_id)
video_info = entries[0] if entries else {'id': video_id} video_info = entries[0]
webpage_info = extract_metadata(webpage) webpage_info = extract_metadata(webpage)
# honor precise duration in video info # honor precise duration in video info
if video_info.get('duration'): if video_info.get('duration'):

View File

@ -1,14 +1,12 @@
from .common import InfoExtractor from .common import InfoExtractor
from .dailymotion import DailymotionIE
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
ExtractorError,
format_field, format_field,
int_or_none,
join_nonempty,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
) )
from .dailymotion import DailymotionIE
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
@ -84,8 +82,6 @@ class FranceTVIE(InfoExtractor):
videos = [] videos = []
title = None title = None
subtitle = None subtitle = None
episode_number = None
season_number = None
image = None image = None
duration = None duration = None
timestamp = None timestamp = None
@ -116,9 +112,7 @@ class FranceTVIE(InfoExtractor):
if meta: if meta:
if title is None: if title is None:
title = meta.get('title') title = meta.get('title')
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>" # XXX: what is meta['pre_title']?
season_number, episode_number = self._search_regex(
r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
if subtitle is None: if subtitle is None:
subtitle = meta.get('additional_title') subtitle = meta.get('additional_title')
if image is None: if image is None:
@ -197,19 +191,19 @@ class FranceTVIE(InfoExtractor):
} for sheet in spritesheets] } for sheet in spritesheets]
}) })
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
return { return {
'id': video_id, 'id': video_id,
'title': join_nonempty(title, subtitle, delim=' - ').strip(), 'title': title,
'thumbnail': image, 'thumbnail': image,
'duration': duration, 'duration': duration,
'timestamp': timestamp, 'timestamp': timestamp,
'is_live': is_live, 'is_live': is_live,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'episode': subtitle if episode_number else None,
'series': title if episode_number else None,
'episode_number': int_or_none(episode_number),
'season_number': int_or_none(season_number),
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -236,31 +230,14 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500, 'timestamp': 1502623500,
'duration': 2580,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170813', 'upload_date': '20170813',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [FranceTVIE.ie_key()], 'add_ie': [FranceTVIE.ie_key()],
}, {
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
'info_dict': {
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
'ext': 'mp4',
'title': 'Foot2Rue - Duel au vieux port',
'episode': 'Duel au vieux port',
'series': 'Foot2Rue',
'episode_number': 1,
'season_number': 1,
'timestamp': 1642761360,
'upload_date': '20220121',
'season': 'Season 1',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1441,
},
}, { }, {
# france3 # france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',

View File

@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):
class IHeartRadioIE(IHeartRadioBaseIE): class IHeartRadioIE(IHeartRadioBaseIE):
IE_NAME = 'iheartradio' IENAME = 'iheartradio'
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)' _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true', 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',

View File

@ -10,7 +10,6 @@ from ..utils import (
ExtractorError, ExtractorError,
decode_base_n, decode_base_n,
encode_base_n, encode_base_n,
filter_dict,
float_or_none, float_or_none,
format_field, format_field,
get_element_by_attribute, get_element_by_attribute,
@ -704,31 +703,28 @@ class InstagramStoryIE(InstagramBaseIE):
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False) user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
if not user_info: if not user_info:
self.raise_login_required('This content is unreachable') self.raise_login_required('This content is unreachable')
user_id = user_info.get('id')
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}' story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
if not story_info_url: # user id is only mandatory for non-highlights
raise ExtractorError('Unable to extract user id')
videos = traverse_obj(self._download_json( videos = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}', f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels') story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
if not videos: if not videos:
self.raise_login_required('You need to log in to access this content') self.raise_login_required('You need to log in to access this content')
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name')) full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title')) story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
if not story_title: if not story_title:
story_title = f'Story by {username}' story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items')) highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
info_data = [] info_data = []
for highlight in highlights: for highlight in highlights:
highlight_data = self._extract_product(highlight) highlight_data = self._extract_product(highlight)
if highlight_data.get('formats'): if highlight_data.get('formats'):
info_data.append({ info_data.append({
**highlight_data,
'uploader': full_name, 'uploader': full_name,
'uploader_id': user_id, 'uploader_id': user_id,
**filter_dict(highlight_data),
}) })
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title) return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)

View File

@ -1,112 +0,0 @@
import datetime
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
datetime_from_str,
unified_timestamp,
urljoin,
)
class JoqrAgIE(InfoExtractor):
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
r'https?://(?:www\.)?joqr\.co\.jp/ag/',
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
_TESTS = [{
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
'info_dict': {
'id': 'live',
'title': str,
'channel': '超!A&G+',
'description': str,
'live_status': 'is_live',
'release_timestamp': int,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
'only_matching': True,
}, {
'url': 'https://www.joqr.co.jp/ag/article/103760/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
'only_matching': True,
}]
def _extract_metadata(self, variable, html):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, 'metadata', group='value', default=''))) or None
def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
date = dt.strftime('%Y%m%d')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)
if start_time:
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
return None
start_timestamp = extract_start_time_from('today')
if not start_timestamp:
return None
if not is_live or start_timestamp < datetime_from_str('now').timestamp():
return start_timestamp
else:
return extract_start_time_from('yesterday')
def _real_extract(self, url):
video_id = 'live'
metadata = self._download_webpage(
'https://www.uniqueradio.jp/aandg', video_id,
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata)
if title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
r'<source\s[^>]*\bsrc="([^"]+)"',
self._download_webpage(
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
note='Downloading player data', errnote='Failed to download player data'),
'm3u8 url')
formats = self._extract_m3u8_formats(
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
live_status = 'is_live'
release_timestamp = self._extract_start_timestamp(video_id, True)
return {
'id': video_id,
'title': title,
'channel': '超!A&G+',
'description': self._extract_metadata('Program_text', metadata),
'formats': formats,
'live_status': live_status,
'release_timestamp': release_timestamp,
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class KinjaEmbedIE(InfoExtractor): class KinjaEmbedIE(InfoExtractor):
IE_NAME = 'kinja:embed' IENAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)? _DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?: (?:
avclub| avclub|

View File

@ -6,7 +6,6 @@ from ..utils import (
int_or_none, int_or_none,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
try_call,
unsmuggle_url, unsmuggle_url,
) )
@ -97,22 +96,13 @@ class LiTVIE(InfoExtractor):
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
if not video_data: if not video_data:
payload = {'assetId': program_info['assetId']} payload = {
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value) 'assetId': program_info['assetId'],
if puid: 'watchDevices': program_info['watchDevices'],
payload.update({ 'contentType': program_info['contentType'],
'type': 'auth', }
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
})
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json( video_data = self._download_json(
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id, 'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
data=json.dumps(payload).encode('utf-8'), data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'}) headers={'Content-Type': 'application/json'})

View File

@ -1,62 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_resolution,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class MaarivIE(InfoExtractor):
IE_NAME = 'maariv.co.il'
_VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
'info_dict': {
'id': '3611585',
'duration': 75,
'ext': 'mp4',
'upload_date': '20231009',
'title': 'מבצע חרבות ברזל',
'timestamp': 1696851301,
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.maariv.co.il/news/law/Article-1044008',
'info_dict': {
'id': '3611585',
'duration': 75,
'ext': 'mp4',
'upload_date': '20231009',
'title': 'מבצע חרבות ברזל',
'timestamp': 1696851301,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
formats = []
if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
formats.append({
'url': http_format,
'format_id': 'http',
**parse_resolution(http_format),
})
return {
'id': video_id,
**traverse_obj(data, {
'title': 'title',
'duration': ('video', 'duration', {int_or_none}),
'timestamp': ('upload_date', {unified_timestamp}),
}),
'formats': formats,
}

View File

@ -73,7 +73,6 @@ class MediasetIE(ThePlatformBaseIE):
'season_number': 5, 'season_number': 5,
'episode_number': 5, 'episode_number': 5,
'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}], 'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
'categories': ['Informazione'],
}, },
}, { }, {
# DRM # DRM
@ -150,7 +149,6 @@ class MediasetIE(ThePlatformBaseIE):
'season_number': 12, 'season_number': 12,
'episode': 'Episode 8', 'episode': 'Episode 8',
'episode_number': 8, 'episode_number': 8,
'categories': ['Intrattenimento'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -3,11 +3,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
filter_dict,
parse_qs,
remove_end, remove_end,
traverse_obj, traverse_obj,
update_url_query,
urljoin, urljoin,
) )
@ -111,9 +108,7 @@ class MediaStreamIE(MediaStreamBaseIE):
for message in [ for message in [
'Debido a tu ubicación no puedes ver el contenido', 'Debido a tu ubicación no puedes ver el contenido',
'You are not allowed to watch this video: Geo Fencing Restriction', 'You are not allowed to watch this video: Geo Fencing Restriction'
'Este contenido no está disponible en tu zona geográfica.',
'El contenido sólo está disponible dentro de',
]: ]:
if message in webpage: if message in webpage:
self.raise_geo_restricted() self.raise_geo_restricted()
@ -123,16 +118,7 @@ class MediaStreamIE(MediaStreamBaseIE):
formats, subtitles = [], {} formats, subtitles = [], {}
for video_format in player_config['src']: for video_format in player_config['src']:
if video_format == 'hls': if video_format == 'hls':
params = { fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
'at': 'web-app',
'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
}
for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
params[key] = self._search_regex(
rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
fmts, subs = self._extract_m3u8_formats_and_subtitles(
update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif video_format == 'mpd': elif video_format == 'mpd':

View File

@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
class NBAWatchEmbedIE(NBAWatchBaseIE): class NBAWatchEmbedIE(NBAWatchBaseIE):
IE_NAME = 'nba:watch:embed' IENAME = 'nba:watch:embed'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://watch.nba.com/embed?id=659395', 'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):
class NBAEmbedIE(NBABaseIE): class NBAEmbedIE(NBABaseIE):
IE_NAME = 'nba:embed' IENAME = 'nba:embed'
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)' _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=', 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):
class NBAIE(NBABaseIE): class NBAIE(NBABaseIE):
IE_NAME = 'nba' IENAME = 'nba'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):
class NBAChannelIE(NBABaseIE): class NBAChannelIE(NBABaseIE):
IE_NAME = 'nba:channel' IENAME = 'nba:channel'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.nba.com/blazers/video/channel/summer_league', 'url': 'https://www.nba.com/blazers/video/channel/summer_league',

View File

@ -53,8 +53,6 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'chapters': 'count:1', 'chapters': 'count:1',
'tags': 'count:4', 'tags': 'count:4',
'thumbnail': r're:https?://.+\.jpg', 'thumbnail': r're:https?://.+\.jpg',
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
'media_type': 'Full Episode',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -133,8 +131,6 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'tags': 'count:10', 'tags': 'count:10',
'age_limit': 0, 'age_limit': 0,
'thumbnail': r're:https?://.+\.jpg', 'thumbnail': r're:https?://.+\.jpg',
'categories': ['Series/Quantum Leap 2022'],
'media_type': 'Highlight',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',

View File

@ -3,6 +3,7 @@ import re
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -83,17 +84,15 @@ class OnDemandKoreaIE(InfoExtractor):
def try_geo_bypass(url): def try_geo_bypass(url):
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
def try_upgrade_quality(url):
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
return mod_url if mod_url != url and self._request_webpage(
HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
formats = [] formats = []
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})): for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url) formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
if mod_url != m3u8_url:
mod_format = self._extract_m3u8_formats(
mod_url, video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False)
if mod_format:
formats.extend(mod_format)
continue
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
subtitles = {} subtitles = {}
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))): for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):

View File

@ -4,14 +4,7 @@ from urllib.parse import unquote
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import functools from ..compat import functools
from ..utils import ( from ..utils import ExtractorError, make_archive_id, urljoin
ExtractorError,
float_or_none,
int_or_none,
make_archive_id,
mimetype2ext,
urljoin,
)
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
@ -33,7 +26,6 @@ class Pr0grammIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'age_limit': 0, 'age_limit': 0,
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
'_old_archive_ids': ['pr0grammstatic 5466437'],
}, },
}, { }, {
# Tags require account # Tags require account
@ -51,7 +43,6 @@ class Pr0grammIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'age_limit': 0, 'age_limit': 0,
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
'_old_archive_ids': ['pr0grammstatic 3052805'],
}, },
}, { }, {
# Requires verified account # Requires verified account
@ -69,7 +60,6 @@ class Pr0grammIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'age_limit': 18, 'age_limit': 18,
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
'_old_archive_ids': ['pr0grammstatic 5848332'],
}, },
}, { }, {
'url': 'https://pr0gramm.com/static/5466437', 'url': 'https://pr0gramm.com/static/5466437',
@ -120,61 +110,37 @@ class Pr0grammIE(InfoExtractor):
return data return data
@staticmethod
def _create_source_url(path):
return urljoin('https://img.pr0gramm.com', path)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_info = traverse_obj( video_info = traverse_obj(
self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}), self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
('items', 0, {dict})) ('items', 0, {dict}))
source = video_info.get('image') source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
if not source or not source.endswith('mp4'): if not source or not source.endswith('mp4'):
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
tags = None tags = None
if self._is_logged_in: if self._is_logged_in:
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags') metadata = self._call_api('info', video_id, {'itemId': video_id})
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
# Sorted by "confidence", higher confidence = earlier in list # Sorted by "confidence", higher confidence = earlier in list
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
if confidences: if confidences:
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
formats = traverse_obj(video_info, ('variants', ..., {
'format_id': ('name', {str}),
'url': ('path', {self._create_source_url}),
'ext': ('mimeType', {mimetype2ext}),
'vcodec': ('codec', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'bitrate': ('bitRate', {float_or_none}),
'filesize': ('fileSize', {int_or_none}),
})) if video_info.get('variants') else [{
'ext': 'mp4',
'format_id': 'source',
**traverse_obj(video_info, {
'url': ('image', {self._create_source_url}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
}]
subtitles = {}
for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
'url': ('path', {self._create_source_url}),
'note': ('label', {str}),
}))
return { return {
'id': video_id, 'id': video_id,
'title': f'pr0gramm-{video_id} by {video_info.get("user")}', 'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
'formats': [{
'url': source,
'ext': 'mp4',
**traverse_obj(video_info, {
'width': ('width', {int}),
'height': ('height', {int}),
}),
}],
'tags': tags, 'tags': tags,
'formats': formats,
'subtitles': subtitles,
'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0, 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
'_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)], '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
**traverse_obj(video_info, { **traverse_obj(video_info, {

View File

@ -1,33 +0,0 @@
from .common import InfoExtractor
from ..utils import format_field, parse_iso8601
class RinseFMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
'md5': '76ee0b719315617df42e15e710f46c7b',
'info_dict': {
'id': '1536535',
'ext': 'mp3',
'title': 'Club Glow - 15/12/2023 - 20:00',
'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
'release_timestamp': 1702598400,
'release_date': '20231215'
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
return {
'id': entry['id'],
'title': entry.get('title'),
'url': entry['fileUrl'],
'vcodec': 'none',
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
'thumbnail': format_field(
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
}

View File

@ -1,135 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
js_to_json,
traverse_obj,
update_url_query,
url_or_none,
)
class RudoVideoIE(InfoExtractor):
_VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
_TESTS = [{
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
'md5': '28ed82b477708dc5e12e072da2449221',
'info_dict': {
'id': 'cz2wrUy8l0o',
'title': 'Diego Cabot',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/bQkt07',
'md5': '36b22a9863de0f47f00fc7532a32a898',
'info_dict': {
'id': 'bQkt07',
'title': 'Tubular Bells',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/b42ZUznHX0',
'md5': 'b91c70d832938871367f8ad10c895821',
'info_dict': {
'id': 'b42ZUznHX0',
'title': 'Columna Ruperto Concha',
'ext': 'mp3',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/vod/bN5AaJ',
'md5': '01324a329227e2591530ecb4f555c881',
'info_dict': {
'id': 'bN5AaJ',
'title': 'Ucrania 19.03',
'creator': 'La Tercera',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/bbtv',
'info_dict': {
'id': 'bbtv',
'ext': 'mp4',
'creator': 'BioBioTV',
'live_status': 'is_live',
'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/c13',
'info_dict': {
'id': 'c13',
'title': 'CANAL13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}, {
'url': 'https://rudo.video/live/t13-13cl',
'info_dict': {
'id': 't13-13cl',
'title': 'T13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}]
def _real_extract(self, url):
video_id, type_ = self._match_valid_url(url).group('id', 'type')
is_live = type_ == 'live'
webpage = self._download_webpage(url, video_id)
if 'Streaming is not available in your area' in webpage:
self.raise_geo_restricted()
media_url = (
self._search_regex(
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
# Source URL must be used only if streamURL is unavailable
or self._search_regex(
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
if not media_url:
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
webpage, 'youtube url', default=None)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
raise ExtractorError('Unable to extract stream url')
token_array = self._search_json(
r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
if token_array:
token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
if not token_url:
raise ExtractorError('Invalid access token array')
access_token = self._download_json(
token_url, video_id, note='Downloading access token')['data']['authToken']
media_url = update_url_query(media_url, {'auth-token': access_token})
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
elif ext == 'mp3':
formats = [{
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{'url': media_url}]
return {
'id': video_id,
'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
webpage, 'title', default=None)
or self._og_search_title(webpage)),
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
webpage, 'videoAuthor', default=None),
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
webpage, 'thumbnail', default=None)
or self._og_search_thumbnail(webpage)),
'formats': formats,
'is_live': is_live,
}

View File

@ -114,8 +114,6 @@ class ScrippsNetworksIE(InfoExtractor):
'timestamp': 1475678834, 'timestamp': 1475678834,
'upload_date': '20161005', 'upload_date': '20161005',
'uploader': 'SCNI-SCND', 'uploader': 'SCNI-SCND',
'tags': 'count:10',
'creator': 'Cooking Channel',
'duration': 29.995, 'duration': 29.995,
'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}], 'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg', 'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',

View File

@ -104,10 +104,6 @@ class ThePlatformBaseIE(OnceIE):
_add_chapter(chapter.get('startTime'), chapter.get('endTime')) _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration) _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
def extract_site_specific_field(field):
# A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
return { return {
'title': info['title'], 'title': info['title'],
'subtitles': subtitles, 'subtitles': subtitles,
@ -117,14 +113,6 @@ class ThePlatformBaseIE(OnceIE):
'timestamp': int_or_none(info.get('pubDate'), 1000) or None, 'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'), 'uploader': info.get('billingCode'),
'chapters': chapters, 'chapters': chapters,
'creator': traverse_obj(info, ('author', {str})) or None,
'categories': traverse_obj(info, (
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
'location': extract_site_specific_field('region'),
'series': extract_site_specific_field('show'),
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
} }
def _extract_theplatform_metadata(self, path, video_id): def _extract_theplatform_metadata(self, path, video_id):

View File

@ -10,7 +10,6 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
dict_get, dict_get,
@ -480,9 +479,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': [], 'tags': [],
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'],
}, },
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
@ -516,7 +515,6 @@ class TwitterIE(TwitterBaseIE):
'like_count': int, 'like_count': int,
'tags': ['TV', 'StarWars', 'TheForceAwakens'], 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 665052190608723968'],
}, },
}, { }, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880', 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
@ -560,9 +558,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': ['Damndaniel'], 'tags': ['Damndaniel'],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 700207533655363584'],
}, },
}, { }, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@ -601,9 +599,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': [], 'tags': [],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 719944021058060289'],
}, },
}, { }, {
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
@ -618,7 +616,6 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
}, },
'add_ie': ['Periscope'], 'add_ie': ['Periscope'],
'skip': 'Broadcast not found',
}, { }, {
# has mp4 formats via mobile API # has mp4 formats via mobile API
'url': 'https://twitter.com/news_al3alm/status/852138619213144067', 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
@ -638,9 +635,9 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'tags': [], 'tags': [],
'repost_count': int, 'repost_count': int,
'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 852138619213144067'],
}, },
}, { }, {
'url': 'https://twitter.com/i/web/status/910031516746514432', 'url': 'https://twitter.com/i/web/status/910031516746514432',
@ -660,9 +657,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': ['Maria'], 'tags': ['Maria'],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 910031516746514432'],
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -686,9 +683,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': [], 'tags': [],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 1001551623938805763'],
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -752,7 +749,6 @@ class TwitterIE(TwitterBaseIE):
'like_count': int, 'like_count': int,
'tags': [], 'tags': [],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 1349794411333394432'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -775,18 +771,18 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': [], 'tags': [],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 1577855540407197696'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima📛| New Era - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'uploader': 'Ultima📛| New Era',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -817,9 +813,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
'view_count': int,
'tags': ['HurricaneIan'], 'tags': ['HurricaneIan'],
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 1575560063510810624'],
}, },
}, { }, {
# Adult content, fails if not logged in # Adult content, fails if not logged in
@ -955,10 +951,10 @@ class TwitterIE(TwitterBaseIE):
'uploader_url': 'https://twitter.com/CTVJLaidlaw', 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'display_id': '1600649710662213632', 'display_id': '1600649710662213632',
'like_count': int, 'like_count': int,
'view_count': int,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'upload_date': '20221208', 'upload_date': '20221208',
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['twitter 1600649710662213632'],
}, },
'params': {'noplaylist': True}, 'params': {'noplaylist': True},
}, { }, {
@ -983,7 +979,7 @@ class TwitterIE(TwitterBaseIE):
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], 'view_count': int,
}, },
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@ -999,13 +995,13 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'duration': 9.531, 'duration': 9.531,
'comment_count': int, 'comment_count': int,
'view_count': int,
'upload_date': '20221203', 'upload_date': '20221203',
'age_limit': 0, 'age_limit': 0,
'timestamp': 1670092210.0, 'timestamp': 1670092210.0,
'tags': [], 'tags': [],
'uploader': '\u06ea', 'uploader': '\u06ea',
'description': '\U0001F48B https://t.co/bTj9Qz7vQP', 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
'_old_archive_ids': ['twitter 1599108751385972737'],
}, },
'params': {'noplaylist': True}, 'params': {'noplaylist': True},
}, { }, {
@ -1016,6 +1012,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'view_count': int,
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
'age_limit': 0, 'age_limit': 0,
'uploader': 'Mün', 'uploader': 'Mün',
@ -1028,7 +1025,6 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': 'MunTheShinobi', 'uploader_id': 'MunTheShinobi',
'duration': 139.987, 'duration': 139.987,
'timestamp': 1670306984.0, 'timestamp': 1670306984.0,
'_old_archive_ids': ['twitter 1600009574919962625'],
}, },
}, { }, {
# retweeted_status (private) # retweeted_status (private)
@ -1072,8 +1068,8 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+', 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'view_count': int,
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1695424220702888009'],
}, },
}, { }, {
# retweeted_status w/ legacy API # retweeted_status w/ legacy API
@ -1095,24 +1091,18 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+', 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'_old_archive_ids': ['twitter 1695424220702888009'],
}, },
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}}, 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
}, { }, {
# Broadcast embedded in tweet # Broadcast embedded in tweet
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384', 'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
'info_dict': { 'info_dict': {
'id': '1rmxPMjLzAXKN', 'id': '1yNGaNLjEblJj',
'ext': 'mp4', 'ext': 'mp4',
'title': 'WAVE Weather Now - Saturday 12/2/23 Update', 'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
'uploader': 'Jessica Dobson', 'uploader': 'Jessica Dobson',
'uploader_id': 'JessicaDobsonWX', 'uploader_id': '1DZEoDwDovRQa',
'uploader_url': 'https://twitter.com/JessicaDobsonWX', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1701566398,
'upload_date': '20231203',
'live_status': 'was_live',
'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
'concurrent_view_count': int,
'view_count': int, 'view_count': int,
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
@ -1135,30 +1125,6 @@ class TwitterIE(TwitterBaseIE):
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
'expected_warnings': ['Not all metadata'], 'expected_warnings': ['Not all metadata'],
}, {
# "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536',
'info_dict': {
'id': '1724883339285544960',
'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154',
'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr',
'upload_date': '20231115',
'timestamp': 1700079417.0,
'duration': 341.048,
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'tags': ['Kennedy24'],
'repost_count': int,
'like_count': int,
'comment_count': int,
'age_limit': 0,
'_old_archive_ids': ['twitter 1724884212803834154'],
},
}, { }, {
# onion route # onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -1213,23 +1179,19 @@ class TwitterIE(TwitterBaseIE):
), default={}, get_all=False) if self.is_logged_in else traverse_obj( ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
data, ('tweetResult', 'result', {dict}), default={}) data, ('tweetResult', 'result', {dict}), default={})
typename = result.get('__typename') if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None): self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
if 'tombstone' in result: if 'tombstone' in result:
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more') cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True) raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
elif typename == 'TweetUnavailable': elif result.get('__typename') == 'TweetUnavailable':
reason = result.get('reason') reason = result.get('reason')
if reason == 'NsfwLoggedOut': if reason == 'NsfwLoggedOut':
self.raise_login_required('NSFW tweet requires authentication') self.raise_login_required('NSFW tweet requires authentication')
elif reason == 'Protected': elif reason == 'Protected':
self.raise_login_required('You are not authorized to view this protected tweet') self.raise_login_required('You are not authorized to view this protected tweet')
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True) raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
# Result for "stale tweet" needs additional transformation
elif typename == 'TweetWithVisibilityResults':
result = traverse_obj(result, ('tweet', {dict})) or {}
status = result.get('legacy', {}) status = result.get('legacy', {})
status.update(traverse_obj(result, { status.update(traverse_obj(result, {
@ -1318,51 +1280,41 @@ class TwitterIE(TwitterBaseIE):
} }
} }
def _call_syndication_api(self, twid):
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
})
if not status:
raise ExtractorError('Syndication endpoint returned empty JSON response')
# Transform the result so its structure matches that of legacy/graphql
media = []
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
return status
def _extract_status(self, twid): def _extract_status(self, twid):
if self._selected_api not in ('graphql', 'legacy', 'syndication'): if self.is_logged_in or self._selected_api == 'graphql':
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True) status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
try: elif self._selected_api == 'legacy':
if self.is_logged_in or self._selected_api == 'graphql': status = self._call_api(f'statuses/show/{twid}.json', twid, {
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid) 'cards_platform': 'Web-12',
elif self._selected_api == 'legacy': 'include_cards': 1,
status = self._call_api(f'statuses/show/{twid}.json', twid, { 'include_reply_count': 1,
'cards_platform': 'Web-12', 'include_user_entities': 0,
'include_cards': 1, 'tweet_mode': 'extended',
'include_reply_count': 1, })
'include_user_entities': 0,
'tweet_mode': 'extended', elif self._selected_api == 'syndication':
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
}) })
except ExtractorError as e: if not status:
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429: raise ExtractorError('Syndication endpoint returned empty JSON response')
raise # Transform the result so its structure matches that of legacy/graphql
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint') media = []
status = self._call_syndication_api(twid) for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
if self._selected_api == 'syndication': else:
status = self._call_syndication_api(twid) raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {} return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
@ -1425,10 +1377,10 @@ class TwitterIE(TwitterBaseIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000), 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117 # The codec of http formats are unknown
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown '_format_sort_fields': ('res', 'br', 'size', 'proto'),
} }
def extract_from_card_info(card): def extract_from_card_info(card):

View File

@ -4480,13 +4480,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mobj: if mobj:
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
break break
sbr_tooltip = try_get(
info['like_count'] = traverse_obj(vpir, ( vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
'videoActions', 'menuRenderer', 'topLevelButtons', ..., if sbr_tooltip:
'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', like_count, dislike_count = sbr_tooltip.split(' / ')
'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', info.update({
'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False) 'like_count': str_to_int(like_count),
'dislike_count': str_to_int(dislike_count),
})
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer')) vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
if vcr: if vcr:
vc = self._get_count(vcr, 'viewCount') vc = self._get_count(vcr, 'viewCount')

View File

@ -206,14 +206,13 @@ class Updater:
# XXX: use class variables to simplify testing # XXX: use class variables to simplify testing
_channel = CHANNEL _channel = CHANNEL
_origin = ORIGIN _origin = ORIGIN
_update_sources = UPDATE_SOURCES
def __init__(self, ydl, target: str | None = None): def __init__(self, ydl, target: str | None = None):
self.ydl = ydl self.ydl = ydl
# For backwards compat, target needs to be treated as if it could be None # For backwards compat, target needs to be treated as if it could be None
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@') self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
# Check if requested_tag is actually the requested repo/channel # Check if requested_tag is actually the requested repo/channel
if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources): if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
self.requested_channel = self.requested_tag self.requested_channel = self.requested_tag
self.requested_tag: str = None # type: ignore (we set it later) self.requested_tag: str = None # type: ignore (we set it later)
elif not self.requested_channel: elif not self.requested_channel:
@ -238,11 +237,11 @@ class Updater:
self._block_restart('Automatically restarting into custom builds is disabled for security reasons') self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else: else:
# Check if requested_channel resolves to a known repository or else raise # Check if requested_channel resolves to a known repository or else raise
self.requested_repo = self._update_sources.get(self.requested_channel) self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
if not self.requested_repo: if not self.requested_repo:
self._report_error( self._report_error(
f'Invalid update channel {self.requested_channel!r} requested. ' f'Invalid update channel {self.requested_channel!r} requested. '
f'Valid channels are {", ".join(self._update_sources)}', True) f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
self._identifier = f'{detect_variant()} {system_identifier()}' self._identifier = f'{detect_variant()} {system_identifier()}'

View File

@ -67,7 +67,7 @@ class HTTPHeaderDict(collections.UserDict, dict):
def __setitem__(self, key, value): def __setitem__(self, key, value):
if isinstance(value, bytes): if isinstance(value, bytes):
value = value.decode('latin-1') value = value.decode('latin-1')
super().__setitem__(key.title(), str(value).strip()) super().__setitem__(key.title(), str(value))
def __getitem__(self, key): def __getitem__(self, key):
return super().__getitem__(key.title()) return super().__getitem__(key.title())

View File

@ -8,7 +8,7 @@ from ._utils import (
IDENTITY, IDENTITY,
NO_DEFAULT, NO_DEFAULT,
LazyList, LazyList,
deprecation_warning, int_or_none,
is_iterable_like, is_iterable_like,
try_call, try_call,
variadic, variadic,
@ -17,7 +17,7 @@ from ._utils import (
def traverse_obj( def traverse_obj(
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True, obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
casesense=True, is_user_input=NO_DEFAULT, traverse_string=False): casesense=True, is_user_input=False, traverse_string=False):
""" """
Safely traverse nested `dict`s and `Iterable`s Safely traverse nested `dict`s and `Iterable`s
@ -63,8 +63,10 @@ def traverse_obj(
@param get_all If `False`, return the first matching result, otherwise all matching ones. @param get_all If `False`, return the first matching result, otherwise all matching ones.
@param casesense If `False`, consider string dictionary keys as case insensitive. @param casesense If `False`, consider string dictionary keys as case insensitive.
`traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
@param is_user_input Whether the keys are generated from user input.
If `True` strings get converted to `int`/`slice` if needed.
@param traverse_string Whether to traverse into objects as strings. @param traverse_string Whether to traverse into objects as strings.
If `True`, any non-compatible object will first be If `True`, any non-compatible object will first be
converted into a string and then traversed into. converted into a string and then traversed into.
@ -78,9 +80,6 @@ def traverse_obj(
If no `default` is given and the last path branches, a `list` of results If no `default` is given and the last path branches, a `list` of results
is always returned. If a path ends on a `dict` that result will always be a `dict`. is always returned. If a path ends on a `dict` that result will always be a `dict`.
""" """
if is_user_input is not NO_DEFAULT:
deprecation_warning('The is_user_input parameter is deprecated and no longer works')
casefold = lambda k: k.casefold() if isinstance(k, str) else k casefold = lambda k: k.casefold() if isinstance(k, str) else k
if isinstance(expected_type, type): if isinstance(expected_type, type):
@ -196,6 +195,14 @@ def traverse_obj(
key = None key = None
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
if is_user_input and isinstance(key, str):
if key == ':':
key = ...
elif ':' in key:
key = slice(*map(int_or_none, key.split(':')))
elif int_or_none(key) is not None:
key = int(key)
if not casesense and isinstance(key, str): if not casesense and isinstance(key, str):
key = key.casefold() key = key.casefold()

View File

@ -286,8 +286,8 @@ class CueBlock(Block):
m1 = parser.consume(_REGEX_TS) m1 = parser.consume(_REGEX_TS)
if not m1: if not m1:
return None return None
m2 = parser.consume(cls._REGEX_SETTINGS)
parser.consume(_REGEX_OPTIONAL_WHITESPACE) parser.consume(_REGEX_OPTIONAL_WHITESPACE)
m2 = parser.consume(cls._REGEX_SETTINGS)
if not parser.consume(_REGEX_NL): if not parser.consume(_REGEX_NL):
return None return None