Compare commits

..

No commits in common. "da18e58fecf6a81f0d320bcf361ed806ea6e2e85" and "eacb31aaabdf533523d0f961279a7fd0ee80347e" have entirely different histories.

24 changed files with 122 additions and 675 deletions

View File

@ -80,12 +80,12 @@ on:
default: true
type: boolean
origin:
description: Origin
description: .
required: false
default: 'current repo'
default: ''
type: choice
options:
- 'current repo'
- ''
permissions:
contents: read
@ -99,7 +99,7 @@ jobs:
- name: Process origin
id: process_origin
run: |
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"
echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
unix:
needs: process

View File

@ -1,25 +1,5 @@
name: Core Tests
on:
push:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
pull_request:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
on: [push, pull_request]
permissions:
contents: read
@ -36,8 +16,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
# CPython 3.8 is in quick-test
python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10]
# CPython 3.11 is in quick-test
python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.8, pypy-3.10]
run-tests-ext: [sh]
include:
# atleast one of each CPython/PyPy tests must be in windows

View File

@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.8'
python-version: '3.11'
- name: Install test requirements
run: pip install pytest -r requirements.txt
- name: Run tests

View File

@ -64,6 +64,7 @@ jobs:
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
head_sha: ${{ steps.get_target.outputs.head_sha }}
steps:
@ -152,6 +153,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
fi
else
target_tag="${source_tag:-${version}}"
@ -161,6 +163,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
else
target_repo='${{ github.repository }}'
fi
@ -169,6 +172,13 @@ jobs:
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
pypi_project='${{ vars.PYPI_PROJECT }}'
fi
if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
if ${{ !secrets.PYPI_TOKEN }}; then
pypi_token=OIDC
else
pypi_token=PYPI_TOKEN
fi
fi
echo "::group::Output variables"
cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -179,6 +189,7 @@ jobs:
target_tag=${target_tag}
pypi_project=${pypi_project}
pypi_suffix=${pypi_suffix}
pypi_token=${pypi_token}
EOF
echo "::endgroup::"
@ -275,7 +286,18 @@ jobs:
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel
- name: Publish to PyPI
- name: Publish to PyPI via token
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
if: |
needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
run: |
twine upload dist/*
- name: Publish to PyPI via trusted publishing
if: |
needs.prepare.outputs.pypi_token == 'OIDC'
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -328,7 +328,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
https_server_thread.start()
with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
assert not issubclass(exc_info.type, CertificateVerifyError)

View File

@ -11,14 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, report_warning
from yt_dlp.update import UpdateInfo, Updater
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
TEST_UPDATE_SOURCES = {
'stable': 'yt-dlp/yt-dlp',
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
'master': 'yt-dlp/yt-dlp-master-builds',
}
TEST_API_DATA = {
'yt-dlp/yt-dlp/latest': {
'tag_name': '2023.12.31',
@ -112,7 +104,6 @@ class FakeUpdater(Updater):
_channel = 'stable'
_origin = 'yt-dlp/yt-dlp'
_update_sources = TEST_UPDATE_SOURCES
def _download_update_spec(self, *args, **kwargs):
return TEST_LOCKFILE_ACTUAL

View File

@ -2372,11 +2372,6 @@ Line 1
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
# common mistake: strip whitespace from values
# https://github.com/yt-dlp/yt-dlp/issues/8729
headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
def test_extract_basic_auth(self):
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)

View File

@ -148,7 +148,7 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_ssl_error(self, handler):
with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
validate_and_send(rh, Request(self.bad_wss_host))
assert not issubclass(exc_info.type, CertificateVerifyError)

View File

@ -276,7 +276,6 @@ from .brilliantpala import (
)
from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
@ -865,7 +864,6 @@ from .jiosaavn import (
)
from .jove import JoveIE
from .joj import JojIE
from .joqrag import JoqrAgIE
from .jstream import JStreamIE
from .jtbc import (
JTBCIE,
@ -993,7 +991,6 @@ from .lynda import (
LyndaIE,
LyndaCourseIE
)
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE
from .mailru import (
@ -1593,7 +1590,6 @@ from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rinsefm import RinseFMIE
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
@ -1647,7 +1643,6 @@ from .rumble import (
RumbleIE,
RumbleChannelIE,
)
from .rudovideo import RudoVideoIE
from .rutube import (
RutubeIE,
RutubeChannelIE,

View File

@ -317,25 +317,16 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id):
last_exception = None
formats, subtitles = [], {}
for media_set in self._MEDIA_SETS:
try:
fmts, subs = self._download_media_selector_url(
return self._download_media_selector_url(
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
formats.extend(fmts)
if subs:
self._merge_subtitles(subs, target=subtitles)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
if last_exception:
if formats or subtitles:
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
else:
self._raise_extractor_error(last_exception)
return formats, subtitles
self._raise_extractor_error(last_exception)
def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_json(
@ -1197,7 +1188,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if initial_data is None:
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
'preload state', default='{}')
'preload state', default={})
else:
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)

View File

@ -1,123 +0,0 @@
import re
from functools import partial
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
bug_reports_message,
clean_html,
format_field,
get_element_text_and_html_by_tag,
int_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BundestagIE(InfoExtractor):
_VALID_URL = [
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://dbtg.tv/cvid/7605304',
'info_dict': {
'id': '7605304',
'ext': 'mp4',
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
'description': 'md5:321a9dc6bdad201264c0045efc371561',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
'info_dict': {
'id': '7602120',
'ext': 'mp4',
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
'description': 'Befragung der Bundesregierung',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
'only_matching': True,
}, {
'url': 'http://dbtg.tv/fvid/3594346',
'only_matching': True,
}]
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
def _bt_extract_share_formats(self, video_id):
share_data = self._download_json(
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
if traverse_obj(share_data, ('status', 'code', {int})) != 1:
self.report_warning(format_field(
share_data, [('status', 'message', {str})],
'Share API response: %s', default='Unknown Share API Error')
+ bug_reports_message())
return
for name, url in share_data.items():
if not isinstance(name, str) or not url_or_none(url):
continue
elif name.startswith('audio'):
match = re.search(self._SHARE_AUDIO_REGEX, url)
yield {
'format_id': name,
'url': url,
'vcodec': 'none',
**traverse_obj(match, {
'acodec': 'codec',
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
'abr': ('bitrate', {int_or_none}),
'ext': 'ext',
}),
}
elif name.startswith('download'):
match = re.search(self._SHARE_VIDEO_REGEX, url)
yield {
'format_id': name,
'url': url,
**traverse_obj(match, {
'vcodec': 'codec',
'tbr': ('bitrate', {int_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'ext': 'ext',
}),
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = []
result = {'id': video_id, 'formats': formats}
try:
formats.extend(self._extract_m3u8_formats(
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
raise ExtractorError('Could not find video id', expected=True)
self.report_warning(f'Error extracting hls formats: {error}', video_id)
formats.extend(self._bt_extract_share_formats(video_id))
if not formats:
self.raise_no_formats('Could not find suitable formats', video_id=video_id)
result.update(traverse_obj(self._download_webpage(
self._OVERLAY_URL, video_id,
query={'videoid': video_id, 'view': 'main'},
note='Downloading metadata overlay', fatal=False,
), {
'title': (
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
}))
return result

View File

@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):
class IHeartRadioIE(IHeartRadioBaseIE):
IE_NAME = 'iheartradio'
IENAME = 'iheartradio'
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
_TEST = {
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',

View File

@ -10,7 +10,6 @@ from ..utils import (
ExtractorError,
decode_base_n,
encode_base_n,
filter_dict,
float_or_none,
format_field,
get_element_by_attribute,
@ -704,31 +703,28 @@ class InstagramStoryIE(InstagramBaseIE):
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
if not user_info:
self.raise_login_required('This content is unreachable')
user_id = user_info.get('id')
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
if not story_info_url: # user id is only mandatory for non-highlights
raise ExtractorError('Unable to extract user id')
videos = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
if not videos:
self.raise_login_required('You need to log in to access this content')
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
if not story_title:
story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
info_data = []
for highlight in highlights:
highlight_data = self._extract_product(highlight)
if highlight_data.get('formats'):
info_data.append({
**highlight_data,
'uploader': full_name,
'uploader_id': user_id,
**filter_dict(highlight_data),
})
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)

View File

@ -1,112 +0,0 @@
import datetime
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
datetime_from_str,
unified_timestamp,
urljoin,
)
class JoqrAgIE(InfoExtractor):
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
r'https?://(?:www\.)?joqr\.co\.jp/ag/',
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
_TESTS = [{
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
'info_dict': {
'id': 'live',
'title': str,
'channel': '超!A&G+',
'description': str,
'live_status': 'is_live',
'release_timestamp': int,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
'only_matching': True,
}, {
'url': 'https://www.joqr.co.jp/ag/article/103760/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
'only_matching': True,
}]
def _extract_metadata(self, variable, html):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, 'metadata', group='value', default=''))) or None
def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
date = dt.strftime('%Y%m%d')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)
if start_time:
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
return None
start_timestamp = extract_start_time_from('today')
if not start_timestamp:
return None
if not is_live or start_timestamp < datetime_from_str('now').timestamp():
return start_timestamp
else:
return extract_start_time_from('yesterday')
def _real_extract(self, url):
video_id = 'live'
metadata = self._download_webpage(
'https://www.uniqueradio.jp/aandg', video_id,
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata)
if title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
r'<source\s[^>]*\bsrc="([^"]+)"',
self._download_webpage(
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
note='Downloading player data', errnote='Failed to download player data'),
'm3u8 url')
formats = self._extract_m3u8_formats(
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
live_status = 'is_live'
release_timestamp = self._extract_start_timestamp(video_id, True)
return {
'id': video_id,
'title': title,
'channel': '超!A&G+',
'description': self._extract_metadata('Program_text', metadata),
'formats': formats,
'live_status': live_status,
'release_timestamp': release_timestamp,
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class KinjaEmbedIE(InfoExtractor):
IE_NAME = 'kinja:embed'
IENAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?:
avclub|

View File

@ -6,7 +6,6 @@ from ..utils import (
int_or_none,
smuggle_url,
traverse_obj,
try_call,
unsmuggle_url,
)
@ -97,22 +96,13 @@ class LiTVIE(InfoExtractor):
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {'assetId': program_info['assetId']}
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
payload.update({
'type': 'auth',
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
})
endpoint = 'getMainUrlNoAuth'
payload = {
'assetId': program_info['assetId'],
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
}
video_data = self._download_json(
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'})

View File

@ -1,62 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_resolution,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class MaarivIE(InfoExtractor):
IE_NAME = 'maariv.co.il'
_VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
'info_dict': {
'id': '3611585',
'duration': 75,
'ext': 'mp4',
'upload_date': '20231009',
'title': 'מבצע חרבות ברזל',
'timestamp': 1696851301,
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.maariv.co.il/news/law/Article-1044008',
'info_dict': {
'id': '3611585',
'duration': 75,
'ext': 'mp4',
'upload_date': '20231009',
'title': 'מבצע חרבות ברזל',
'timestamp': 1696851301,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
formats = []
if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
formats.append({
'url': http_format,
'format_id': 'http',
**parse_resolution(http_format),
})
return {
'id': video_id,
**traverse_obj(data, {
'title': 'title',
'duration': ('video', 'duration', {int_or_none}),
'timestamp': ('upload_date', {unified_timestamp}),
}),
'formats': formats,
}

View File

@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
class NBAWatchEmbedIE(NBAWatchBaseIE):
IE_NAME = 'nba:watch:embed'
IENAME = 'nba:watch:embed'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):
class NBAEmbedIE(NBABaseIE):
IE_NAME = 'nba:embed'
IENAME = 'nba:embed'
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):
class NBAIE(NBABaseIE):
IE_NAME = 'nba'
IENAME = 'nba'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):
class NBAChannelIE(NBABaseIE):
IE_NAME = 'nba:channel'
IENAME = 'nba:channel'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{
'url': 'https://www.nba.com/blazers/video/channel/summer_league',

View File

@ -1,33 +0,0 @@
from .common import InfoExtractor
from ..utils import format_field, parse_iso8601
class RinseFMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
'md5': '76ee0b719315617df42e15e710f46c7b',
'info_dict': {
'id': '1536535',
'ext': 'mp3',
'title': 'Club Glow - 15/12/2023 - 20:00',
'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
'release_timestamp': 1702598400,
'release_date': '20231215'
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
return {
'id': entry['id'],
'title': entry.get('title'),
'url': entry['fileUrl'],
'vcodec': 'none',
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
'thumbnail': format_field(
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
}

View File

@ -1,135 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
js_to_json,
traverse_obj,
update_url_query,
url_or_none,
)
class RudoVideoIE(InfoExtractor):
_VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
_TESTS = [{
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
'md5': '28ed82b477708dc5e12e072da2449221',
'info_dict': {
'id': 'cz2wrUy8l0o',
'title': 'Diego Cabot',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/bQkt07',
'md5': '36b22a9863de0f47f00fc7532a32a898',
'info_dict': {
'id': 'bQkt07',
'title': 'Tubular Bells',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/b42ZUznHX0',
'md5': 'b91c70d832938871367f8ad10c895821',
'info_dict': {
'id': 'b42ZUznHX0',
'title': 'Columna Ruperto Concha',
'ext': 'mp3',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/vod/bN5AaJ',
'md5': '01324a329227e2591530ecb4f555c881',
'info_dict': {
'id': 'bN5AaJ',
'title': 'Ucrania 19.03',
'creator': 'La Tercera',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/bbtv',
'info_dict': {
'id': 'bbtv',
'ext': 'mp4',
'creator': 'BioBioTV',
'live_status': 'is_live',
'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/c13',
'info_dict': {
'id': 'c13',
'title': 'CANAL13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}, {
'url': 'https://rudo.video/live/t13-13cl',
'info_dict': {
'id': 't13-13cl',
'title': 'T13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}]
def _real_extract(self, url):
video_id, type_ = self._match_valid_url(url).group('id', 'type')
is_live = type_ == 'live'
webpage = self._download_webpage(url, video_id)
if 'Streaming is not available in your area' in webpage:
self.raise_geo_restricted()
media_url = (
self._search_regex(
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
# Source URL must be used only if streamURL is unavailable
or self._search_regex(
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
if not media_url:
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
webpage, 'youtube url', default=None)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
raise ExtractorError('Unable to extract stream url')
token_array = self._search_json(
r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
if token_array:
token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
if not token_url:
raise ExtractorError('Invalid access token array')
access_token = self._download_json(
token_url, video_id, note='Downloading access token')['data']['authToken']
media_url = update_url_query(media_url, {'auth-token': access_token})
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
elif ext == 'mp3':
formats = [{
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{'url': media_url}]
return {
'id': video_id,
'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
webpage, 'title', default=None)
or self._og_search_title(webpage)),
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
webpage, 'videoAuthor', default=None),
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
webpage, 'thumbnail', default=None)
or self._og_search_thumbnail(webpage)),
'formats': formats,
'is_live': is_live,
}

View File

@ -10,7 +10,6 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
dict_get,
@ -480,9 +479,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': [],
'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'],
},
}, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
@ -516,7 +515,6 @@ class TwitterIE(TwitterBaseIE):
'like_count': int,
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
'age_limit': 0,
'_old_archive_ids': ['twitter 665052190608723968'],
},
}, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
@ -560,9 +558,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': ['Damndaniel'],
'age_limit': 0,
'_old_archive_ids': ['twitter 700207533655363584'],
},
}, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@ -601,9 +599,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': [],
'age_limit': 0,
'_old_archive_ids': ['twitter 719944021058060289'],
},
}, {
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
@ -618,7 +616,6 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:^https?://.*\.jpg',
},
'add_ie': ['Periscope'],
'skip': 'Broadcast not found',
}, {
# has mp4 formats via mobile API
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
@ -638,9 +635,9 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:^https?://.*\.jpg',
'tags': [],
'repost_count': int,
'view_count': int,
'like_count': int,
'comment_count': int,
'_old_archive_ids': ['twitter 852138619213144067'],
},
}, {
'url': 'https://twitter.com/i/web/status/910031516746514432',
@ -660,9 +657,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': ['Maria'],
'age_limit': 0,
'_old_archive_ids': ['twitter 910031516746514432'],
},
'params': {
'skip_download': True, # requires ffmpeg
@ -686,9 +683,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': [],
'age_limit': 0,
'_old_archive_ids': ['twitter 1001551623938805763'],
},
'params': {
'skip_download': True, # requires ffmpeg
@ -752,7 +749,6 @@ class TwitterIE(TwitterBaseIE):
'like_count': int,
'tags': [],
'age_limit': 0,
'_old_archive_ids': ['twitter 1349794411333394432'],
},
'params': {
'skip_download': True,
@ -775,18 +771,18 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': [],
'age_limit': 0,
'_old_archive_ids': ['twitter 1577855540407197696'],
},
'params': {'skip_download': True},
}, {
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': {
'id': '1577719286659006464',
'title': 'Ultima - Test',
'title': 'Ultima📛| New Era - Test',
'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima',
'uploader': 'Ultima📛| New Era',
'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005',
@ -817,9 +813,9 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'repost_count': int,
'like_count': int,
'view_count': int,
'tags': ['HurricaneIan'],
'age_limit': 0,
'_old_archive_ids': ['twitter 1575560063510810624'],
},
}, {
# Adult content, fails if not logged in
@ -955,10 +951,10 @@ class TwitterIE(TwitterBaseIE):
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'display_id': '1600649710662213632',
'like_count': int,
'view_count': int,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'upload_date': '20221208',
'age_limit': 0,
'_old_archive_ids': ['twitter 1600649710662213632'],
},
'params': {'noplaylist': True},
}, {
@ -983,7 +979,7 @@ class TwitterIE(TwitterBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'],
'view_count': int,
},
}, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@ -999,13 +995,13 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int,
'duration': 9.531,
'comment_count': int,
'view_count': int,
'upload_date': '20221203',
'age_limit': 0,
'timestamp': 1670092210.0,
'tags': [],
'uploader': '\u06ea',
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
'_old_archive_ids': ['twitter 1599108751385972737'],
},
'params': {'noplaylist': True},
}, {
@ -1016,6 +1012,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4',
'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'view_count': int,
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
'age_limit': 0,
'uploader': 'Mün',
@ -1028,7 +1025,6 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': 'MunTheShinobi',
'duration': 139.987,
'timestamp': 1670306984.0,
'_old_archive_ids': ['twitter 1600009574919962625'],
},
}, {
# retweeted_status (private)
@ -1072,8 +1068,8 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'like_count': int,
'repost_count': int,
'view_count': int,
'comment_count': int,
'_old_archive_ids': ['twitter 1695424220702888009'],
},
}, {
# retweeted_status w/ legacy API
@ -1095,24 +1091,18 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'like_count': int,
'repost_count': int,
'_old_archive_ids': ['twitter 1695424220702888009'],
},
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
}, {
# Broadcast embedded in tweet
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
'info_dict': {
'id': '1rmxPMjLzAXKN',
'id': '1yNGaNLjEblJj',
'ext': 'mp4',
'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
'uploader': 'Jessica Dobson',
'uploader_id': 'JessicaDobsonWX',
'uploader_url': 'https://twitter.com/JessicaDobsonWX',
'timestamp': 1701566398,
'upload_date': '20231203',
'live_status': 'was_live',
'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
'concurrent_view_count': int,
'uploader_id': '1DZEoDwDovRQa',
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
},
'add_ie': ['TwitterBroadcast'],
@ -1135,30 +1125,6 @@ class TwitterIE(TwitterBaseIE):
},
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
'expected_warnings': ['Not all metadata'],
}, {
# "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536',
'info_dict': {
'id': '1724883339285544960',
'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154',
'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr',
'upload_date': '20231115',
'timestamp': 1700079417.0,
'duration': 341.048,
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'tags': ['Kennedy24'],
'repost_count': int,
'like_count': int,
'comment_count': int,
'age_limit': 0,
'_old_archive_ids': ['twitter 1724884212803834154'],
},
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -1213,23 +1179,19 @@ class TwitterIE(TwitterBaseIE):
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
data, ('tweetResult', 'result', {dict}), default={})
typename = result.get('__typename')
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
if 'tombstone' in result:
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
elif typename == 'TweetUnavailable':
elif result.get('__typename') == 'TweetUnavailable':
reason = result.get('reason')
if reason == 'NsfwLoggedOut':
self.raise_login_required('NSFW tweet requires authentication')
elif reason == 'Protected':
self.raise_login_required('You are not authorized to view this protected tweet')
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
# Result for "stale tweet" needs additional transformation
elif typename == 'TweetWithVisibilityResults':
result = traverse_obj(result, ('tweet', {dict})) or {}
status = result.get('legacy', {})
status.update(traverse_obj(result, {
@ -1318,51 +1280,41 @@ class TwitterIE(TwitterBaseIE):
}
}
def _call_syndication_api(self, twid):
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
})
if not status:
raise ExtractorError('Syndication endpoint returned empty JSON response')
# Transform the result so its structure matches that of legacy/graphql
media = []
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
return status
def _extract_status(self, twid):
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
try:
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
})
elif self._selected_api == 'syndication':
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
raise
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
status = self._call_syndication_api(twid)
if not status:
raise ExtractorError('Syndication endpoint returned empty JSON response')
# Transform the result so its structure matches that of legacy/graphql
media = []
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
if self._selected_api == 'syndication':
status = self._call_syndication_api(twid)
else:
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
@ -1425,10 +1377,10 @@ class TwitterIE(TwitterBaseIE):
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
# The codec of http formats are unknown
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
}
def extract_from_card_info(card):

View File

@ -4480,13 +4480,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mobj:
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
break
info['like_count'] = traverse_obj(vpir, (
'videoActions', 'menuRenderer', 'topLevelButtons', ...,
'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
sbr_tooltip = try_get(
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
if sbr_tooltip:
like_count, dislike_count = sbr_tooltip.split(' / ')
info.update({
'like_count': str_to_int(like_count),
'dislike_count': str_to_int(dislike_count),
})
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
if vcr:
vc = self._get_count(vcr, 'viewCount')

View File

@ -206,14 +206,13 @@ class Updater:
# XXX: use class variables to simplify testing
_channel = CHANNEL
_origin = ORIGIN
_update_sources = UPDATE_SOURCES
def __init__(self, ydl, target: str | None = None):
self.ydl = ydl
# For backwards compat, target needs to be treated as if it could be None
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
# Check if requested_tag is actually the requested repo/channel
if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
self.requested_channel = self.requested_tag
self.requested_tag: str = None # type: ignore (we set it later)
elif not self.requested_channel:
@ -238,11 +237,11 @@ class Updater:
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else:
# Check if requested_channel resolves to a known repository or else raise
self.requested_repo = self._update_sources.get(self.requested_channel)
self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
if not self.requested_repo:
self._report_error(
f'Invalid update channel {self.requested_channel!r} requested. '
f'Valid channels are {", ".join(self._update_sources)}', True)
f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
self._identifier = f'{detect_variant()} {system_identifier()}'

View File

@ -67,7 +67,7 @@ class HTTPHeaderDict(collections.UserDict, dict):
def __setitem__(self, key, value):
if isinstance(value, bytes):
value = value.decode('latin-1')
super().__setitem__(key.title(), str(value).strip())
super().__setitem__(key.title(), str(value))
def __getitem__(self, key):
return super().__getitem__(key.title())