Compare commits

...

23 Commits

Author SHA1 Message Date
bashonly
e48735604e
[ie/imgur] cleanup 2023-12-25 18:30:20 -06:00
bashonly
e2792fd5b3
[ie/imgur] format_id 2023-12-25 18:27:21 -06:00
bashonly
39d0e2248a
[ie/imgur] even more cleanup 2023-12-25 18:25:07 -06:00
bashonly
abfe114e49
[ie/imgur] more cleanup 2023-12-25 18:00:47 -06:00
bashonly
4396063a52
[ie/imgur] cleanup 2023-12-25 17:55:42 -06:00
bashonly
64f8963313
[ie/imgur] fix tests 2023-12-25 17:23:41 -06:00
bashonly
87e8a2dfe3
[ie/imgur] fix traversal 2023-12-25 17:13:29 -06:00
bashonly
68b4cbdd2b
Merge branch 'yt-dlp:master' into feat/backport-2023-12 2023-12-25 16:32:14 -06:00
bashonly
da957cff5e
regex cleanup 2023-12-25 16:29:17 -06:00
bashonly
5e90f3a9a9
Backport imgur overhaul 2023-12-25 16:24:45 -06:00
kclauhk
c39358a54b
[ie/Facebook] Fix Memories extraction (#8681)
- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
2023-12-24 23:43:35 +01:00
Lars Strojny
1f8bd8eba8
[ie/ARDBetaMediathek] Fix series extraction (#8687)
Closes #7666
Authored by: lstrojny
2023-12-24 23:38:21 +01:00
Simon Sawicki
00cdda4f6f
[core] Fix format selection parse error for CPython 3.12 (#8797)
Authored by: Grub4K
2023-12-24 22:09:01 +01:00
bashonly
116c268438
[ie/twitter] Work around API rate-limit (#8825)
Closes #8762
Authored by: bashonly
2023-12-24 16:41:28 +00:00
bashonly
e7d22348e7
[ie/twitter] Prioritize m3u8 formats (#8826)
Closes #8117
Authored by: bashonly
2023-12-24 16:40:50 +00:00
bashonly
50eaea9fd7
[ie/instagram] Fix stories extraction (#8843)
Closes #8290
Authored by: bashonly
2023-12-24 16:40:03 +00:00
bashonly
f45c4efcd9
[ie/litv] Fix premium content extraction (#8842)
Closes #8654
Authored by: bashonly
2023-12-24 16:33:16 +00:00
Simon Sawicki
13b3cb3c2b
[ci] Run core tests only for core changes (#8841)
Authored by: Grub4K
2023-12-24 00:11:10 +01:00
Nicolas Dato
0d531c35ec
[ie/RudoVideo] Add extractor (#8664)
Authored by: nicodato
2023-12-22 22:52:07 +01:00
barsnick
bc4ab17b38
[cleanup] Fix spelling of IE_NAME (#8810)
Authored by: barsnick
2023-12-22 02:32:29 +01:00
bashonly
632b8ee54e
[core] Release workflow and Updater cleanup (#8640)
- Only use trusted publishing with PyPI and remove support for PyPI tokens from release workflow
- Clean up improper actions syntax in the build workflow inputs
- Refactor Updater to allow for consistent unit testing with `UPDATE_SOURCES`

Authored by: bashonly
2023-12-21 21:06:26 +00:00
barsnick
c919b68f7e
[ie/bbc] Extract more formats (#8321)
Closes #4902
Authored by: barsnick, dirkf
2023-12-21 20:47:32 +00:00
bashonly
19741ab8a4
[ie/bbc] Fix JSON parsing bug
Authored by: bashonly
2023-12-21 14:46:00 -06:00
19 changed files with 610 additions and 186 deletions

View File

@ -80,12 +80,12 @@ on:
default: true
type: boolean
origin:
description: .
description: Origin
required: false
default: ''
default: 'current repo'
type: choice
options:
- ''
- 'current repo'
permissions:
contents: read
@ -99,7 +99,7 @@ jobs:
- name: Process origin
id: process_origin
run: |
echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"
unix:
needs: process

View File

@ -1,5 +1,25 @@
name: Core Tests
on: [push, pull_request]
on:
push:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
pull_request:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
permissions:
contents: read

View File

@ -64,7 +64,6 @@ jobs:
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
head_sha: ${{ steps.get_target.outputs.head_sha }}
steps:
@ -153,7 +152,6 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
fi
else
target_tag="${source_tag:-${version}}"
@ -163,7 +161,6 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
else
target_repo='${{ github.repository }}'
fi
@ -172,13 +169,6 @@ jobs:
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
pypi_project='${{ vars.PYPI_PROJECT }}'
fi
if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
if ${{ !secrets.PYPI_TOKEN }}; then
pypi_token=OIDC
else
pypi_token=PYPI_TOKEN
fi
fi
echo "::group::Output variables"
cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -189,7 +179,6 @@ jobs:
target_tag=${target_tag}
pypi_project=${pypi_project}
pypi_suffix=${pypi_suffix}
pypi_token=${pypi_token}
EOF
echo "::endgroup::"
@ -286,18 +275,7 @@ jobs:
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel
- name: Publish to PyPI via token
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
if: |
needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
run: |
twine upload dist/*
- name: Publish to PyPI via trusted publishing
if: |
needs.prepare.outputs.pypi_token == 'OIDC'
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -140,6 +140,8 @@ class TestFormatSelection(unittest.TestCase):
test('example-with-dashes', 'example-with-dashes')
test('all', '2', '47', '45', 'example-with-dashes', '35')
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
test('7_a/worst', '35')
def test_format_selection_audio(self):
formats = [

View File

@ -11,6 +11,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, report_warning
from yt_dlp.update import Updater, UpdateInfo
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
TEST_UPDATE_SOURCES = {
'stable': 'yt-dlp/yt-dlp',
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
'master': 'yt-dlp/yt-dlp-master-builds',
}
TEST_API_DATA = {
'yt-dlp/yt-dlp/latest': {
'tag_name': '2023.12.31',
@ -104,6 +112,7 @@ class FakeUpdater(Updater):
_channel = 'stable'
_origin = 'yt-dlp/yt-dlp'
_update_sources = TEST_UPDATE_SOURCES
def _download_update_spec(self, *args, **kwargs):
return TEST_LOCKFILE_ACTUAL

View File

@ -2465,9 +2465,16 @@ class YoutubeDL:
return selector_function(ctx_copy)
return final_selector
stream = io.BytesIO(format_spec.encode())
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
# Prefix numbers with random letters to avoid it being classified as a number
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
# TODO: Implement parser not reliant on tokenize.tokenize
prefix = ''.join(random.choices(string.ascii_letters, k=32))
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try:
tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
tokens = list(_remove_unused_ops(
token._replace(string=token.string.replace(prefix, ''))
for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

View File

@ -1648,6 +1648,7 @@ from .rumble import (
RumbleIE,
RumbleChannelIE,
)
from .rudovideo import RudoVideoIE
from .rutube import (
RutubeIE,
RutubeChannelIE,

View File

@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
_TESTS = [{
# available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': 'a438f671e87a7eba04000336a119ccc4',
'md5': '94812e6438488fb923c361a44469614b',
'info_dict': {
'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424',
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|sammlung))/
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': '3fd5fead7a370a819341129c8d713136',
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
'info_dict': {
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12172961',
'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1670710500,
'upload_date': '20221210',
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
'id': '12939099',
'title': 'Liebe auf vier Pfoten',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
'duration': 5222,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
'timestamp': 1701343800,
'upload_date': '20231130',
'ext': 'mp4',
'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'episode': 'Liebe auf vier Pfoten',
'series': 'Filme im MDR'
},
}, {
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'duration': 915,
'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
},
}, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
# playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, {
# playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'only_matching': True,
}]
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
""" Query the ARD server for playlist information
and returns the data in "raw" format """
if mode == 'sendung':
assert mode in ('sendung', 'serie', 'sammlung')
if mode in ('sendung', 'serie'):
graphQL = json.dumps({
'query': '''{
showPage(
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
links { target { id href title } }
type
}
}}''' % (client, playlist_id, pageNumber),
}}''' % (client, playlist_id, page_number),
}).encode()
else: # mode == 'sammlung'
graphQL = json.dumps({
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
type
}
}
}}''' % (client, playlist_id, pageNumber),
}}''' % (client, playlist_id, page_number),
}).encode()
# Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
data=graphQL,
headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data:
if mode == 'sendung':
if mode in ('sendung', 'serie'):
show_page = show_page['showPage']
else: # mode == 'sammlung'
show_page = show_page['morePage']['widget']
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung' and 'sammlung', and also nested
playlists. """
Supports playlists of mode 'sendung', 'serie', and 'sammlung',
as well as nested playlists. """
entries = []
pageNumber = 0
while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snipped(
show_page = self._ARD_load_playlist_snippet(
playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']:

View File

@ -317,16 +317,25 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id):
last_exception = None
formats, subtitles = [], {}
for media_set in self._MEDIA_SETS:
try:
return self._download_media_selector_url(
fmts, subs = self._download_media_selector_url(
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
formats.extend(fmts)
if subs:
self._merge_subtitles(subs, target=subtitles)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
if last_exception:
if formats or subtitles:
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
else:
self._raise_extractor_error(last_exception)
return formats, subtitles
def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_json(
@ -1188,7 +1197,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if initial_data is None:
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
'preload state', default={})
'preload state', default='{}')
else:
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)

View File

@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/|
groups/[^/]+/permalink/|
groups/[^/]+/(?:permalink|posts)/|
watchparty/
)|
facebook:
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100013949973717',
},
'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
'info_dict': {
'id': '1569199726448814',
'ext': 'mp4',
'title': 'Pence MUST GO!',
'description': 'Vickie Gentry shared a memory.',
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
'attachment', {dict}))
for attachment in attachments:
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns:
parse_attachment(n)
parse_attachment(attachment)
@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
if len(entries) > 1:
return self.playlist_result(entries, video_id)
video_info = entries[0]
video_info = entries[0] if entries else {'id': video_id}
webpage_info = extract_metadata(webpage)
# honor precise duration in video info
if video_info.get('duration'):

View File

@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):
class IHeartRadioIE(IHeartRadioBaseIE):
IENAME = 'iheartradio'
IE_NAME = 'iheartradio'
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
_TEST = {
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',

View File

@ -1,99 +1,243 @@
import functools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
js_to_json,
mimetype2ext,
ExtractorError,
parse_iso8601,
str_or_none,
strip_or_none,
traverse_obj,
url_or_none,
)
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
class ImgurBaseIE(InfoExtractor):
_CLIENT_ID = '546c25a59c58ad7'
@classmethod
def _imgur_result(cls, item_id):
return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id)
def _call_api(self, endpoint, video_id, **kwargs):
return self._download_json(
f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account',
video_id, **kwargs)
@staticmethod
def get_description(s):
if 'Discover the magic of the internet at Imgur' in s:
return None
return s or None
class ImgurIE(ImgurBaseIE):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
'url': 'https://imgur.com/A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'title': 'MRW gifv is up and running without any bugs',
'timestamp': 1416446068,
'upload_date': '20141120',
'dislike_count': int,
'comment_count': int,
'release_timestamp': 1416446068,
'release_date': '20141120',
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
},
}, {
'url': 'https://imgur.com/A61SaA1',
'url': 'https://i.imgur.com/A61SaA1.gifv',
'only_matching': True,
}, {
'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
}, {
# no title
'url': 'https://i.imgur.com/jxBXAMC.gifv',
'only_matching': True,
'info_dict': {
'id': 'jxBXAMC',
'ext': 'mp4',
'title': 'Fahaka puffer feeding',
'timestamp': 1533835503,
'upload_date': '20180809',
'release_date': '20180809',
'like_count': int,
'duration': 30.0,
'comment_count': int,
'release_timestamp': 1533835503,
'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
'dislike_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._call_api('media', video_id)
if not traverse_obj(data, ('media', 0, (
('type', {lambda t: t == 'video' or None}),
('metadata', 'is_animated'))), get_all=False):
raise ExtractorError(f'{video_id} is not a video or animated image', expected=True)
webpage = self._download_webpage(
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or ''
formats = []
width = int_or_none(self._og_search_property(
'video:width', webpage, default=None))
height = int_or_none(self._og_search_property(
'video:height', webpage, default=None))
media_fmt = traverse_obj(data, ('media', 0, {
'url': ('url', {url_or_none}),
'ext': ('ext', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}),
}))
media_url = media_fmt.get('url')
if media_url:
if not media_fmt.get('ext'):
media_fmt['ext'] = mimetype2ext(traverse_obj(
data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
if traverse_obj(data, ('media', 0, 'type')) == 'image':
media_fmt['acodec'] = 'none'
media_fmt.setdefault('preference', -10)
formats.append(media_fmt)
video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>',
webpage, 'video elements', default=None)
if not video_elements:
raise ExtractorError(
'No sources found for video %s. Maybe an image?' % video_id,
expected=True)
formats = []
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
formats.append({
'format_id': m.group('type').partition('/')[2],
'url': self._proto_relative_url(m.group('src')),
'ext': mimetype2ext(m.group('type')),
'width': width,
'height': height,
'http_headers': {
'User-Agent': 'yt-dlp (like wget)',
},
if video_elements:
def og_get_size(media_type):
return {
p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None))
for p in ('width', 'height')
}
size = og_get_size('video')
if not any(size.values()):
size = og_get_size('image')
formats = traverse_obj(
re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
(..., {
'format_id': ('type', {lambda s: s.partition('/')[2]}),
'url': ('src', {self._proto_relative_url}),
'ext': ('type', {mimetype2ext}),
}))
for f in formats:
f.update(size)
# We can get the original gif format from the webpage as well
gif_json = traverse_obj(self._search_json(
r'var\s+videoItem\s*=', webpage, 'GIF info', video_id,
transform_source=js_to_json, fatal=False), {
'url': ('gifUrl', {self._proto_relative_url}),
'filesize': ('size', {int_or_none}),
})
gif_json = self._search_regex(
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
webpage, 'GIF code', fatal=False)
if gif_json:
gifd = self._parse_json(
gif_json, video_id, transform_source=js_to_json)
formats.append({
gif_json.update(size)
gif_json.update({
'format_id': 'gif',
'preference': -10, # gifs are worse than videos
'width': width,
'height': height,
'preference': -10, # gifs < videos
'ext': 'gif',
'acodec': 'none',
'vcodec': 'gif',
'container': 'gif',
'url': self._proto_relative_url(gifd['gifUrl']),
'filesize': gifd.get('size'),
'http_headers': {
'User-Agent': 'yt-dlp (like wget)',
},
})
formats.append(gif_json)
search = functools.partial(self._html_search_meta, html=webpage, default=None)
twitter_fmt = {
'format_id': 'twitter',
'url': url_or_none(search('twitter:player:stream')),
'ext': mimetype2ext(search('twitter:player:stream:content_type')),
'width': int_or_none(search('twitter:width')),
'height': int_or_none(search('twitter:height')),
}
if twitter_fmt['url']:
formats.append(twitter_fmt)
if not formats:
self.raise_no_formats(
f'No sources found for video {video_id}. Maybe a plain image?', expected=True)
self._remove_duplicate_formats(formats)
return {
'title': self._og_search_title(webpage, default=None),
'description': self.get_description(self._og_search_description(webpage, default='')),
**traverse_obj(data, {
'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}),
'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}),
'uploader_url': ('account', 'avatar_url', {url_or_none}),
'like_count': ('upvote_count', {int_or_none}),
'dislike_count': ('downvote_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'age_limit': ('is_mature', {lambda x: 18 if x else None}),
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
'release_timestamp': ('created_at', {parse_iso8601}),
}, get_all=False),
**traverse_obj(data, ('media', 0, 'metadata', {
'title': ('title', {lambda x: strip_or_none(x) or None}),
'description': ('description', {self.get_description}),
'duration': ('duration', {float_or_none}),
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
'release_timestamp': ('created_at', {parse_iso8601}),
}), get_all=False),
'id': video_id,
'formats': formats,
'title': self._og_search_title(webpage, default=video_id),
'thumbnail': url_or_none(search('thumbnailUrl')),
}
class ImgurGalleryIE(InfoExtractor):
class ImgurGalleryBaseIE(ImgurBaseIE):
_GALLERY = True
def _real_extract(self, url):
gallery_id = self._match_id(url)
data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
info = traverse_obj(data, {
'title': ('title', {lambda x: strip_or_none(x) or None}),
'description': ('description', {self.get_description}),
})
if traverse_obj(data, 'is_album'):
def yield_media_ids():
for m_id in traverse_obj(data, (
'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
'id', {lambda x: str_or_none(x) or None})):
yield m_id
# if a gallery with exactly one video, apply album metadata to video
media_id = (
self._GALLERY
and traverse_obj(data, ('image_count', {lambda c: c == 1}))
and next(yield_media_ids(), None))
if not media_id:
result = self.playlist_result(
map(self._imgur_result, yield_media_ids()), gallery_id)
result.update(info)
return result
gallery_id = media_id
result = self._imgur_result(gallery_id)
info['_type'] = 'url_transparent'
result.update(info)
return result
class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
@ -102,49 +246,121 @@ class ImgurGalleryIE(InfoExtractor):
'title': 'Adding faces make every GIF better',
},
'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, {
# TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True,
}, {
'url': 'https://imgur.com/gallery/YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
}
'timestamp': 1358554297,
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
'release_timestamp': 1358554297,
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'release_date': '20130119',
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
},
}, {
# TODO: static image - replace with animated/video gallery
'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True,
}, {
'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True,
'add_ies': ['Imgur'],
'info_dict': {
'id': 'VQcQPhM',
'ext': 'mp4',
'title': 'The boss is here',
'timestamp': 1476494751,
'upload_date': '20161015',
'uploader_id': '19138530',
'uploader': 'thematrixcam',
'comment_count': int,
'dislike_count': int,
'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand',
'release_timestamp': 1476494751,
'like_count': int,
'release_date': '20161015',
'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg',
},
},
# from https://github.com/ytdl-org/youtube-dl/pull/16674
{
'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'ZVMv45i',
'ext': 'mp4',
'title': 'Intruder',
'timestamp': 1528129683,
'upload_date': '20180604',
'release_timestamp': 1528129683,
'release_date': '20180604',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'duration': 30.03,
'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg',
},
}, {
'url': 'https://imgur.com/t/unmuted/wXSK0YH',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'JCAP4io',
'ext': 'mp4',
'title': 're:I got the blues$',
'description': 'Lukas vocal stylings.\n\nFP edit: dont encourage me. Ill never stop posting Luka and friends.',
'timestamp': 1527809525,
'upload_date': '20180531',
'like_count': int,
'dislike_count': int,
'duration': 30.03,
'comment_count': int,
'release_timestamp': 1527809525,
'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg',
'release_date': '20180531',
},
}]
def _real_extract(self, url):
gallery_id = self._match_id(url)
data = self._download_json(
'https://imgur.com/gallery/%s.json' % gallery_id,
gallery_id)['data']['image']
if data.get('is_album'):
entries = [
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
for image in data['album_images']['images'] if image.get('hash')]
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
class ImgurAlbumIE(ImgurGalleryIE): # XXX: Do not subclass from concrete IE
class ImgurAlbumIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False
_TESTS = [{
# TODO: only static images - replace with animated/video gallery
'url': 'http://imgur.com/a/j6Orj',
'info_dict': {
'id': 'j6Orj',
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
'only_matching': True,
},
'playlist_count': 12,
# from https://github.com/ytdl-org/youtube-dl/pull/21693
{
'url': 'https://imgur.com/a/iX265HX',
'info_dict': {
'id': 'iX265HX',
'title': 'enen-no-shouboutai'
},
'playlist_count': 2,
}, {
'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': {
'id': '8pih2Ed'
},
'playlist_mincount': 1,
}]

View File

@ -10,6 +10,7 @@ from ..utils import (
ExtractorError,
decode_base_n,
encode_base_n,
filter_dict,
float_or_none,
format_field,
get_element_by_attribute,
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
if not user_info:
self.raise_login_required('This content is unreachable')
user_id = user_info.get('id')
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
if not story_info_url: # user id is only mandatory for non-highlights
raise ExtractorError('Unable to extract user id')
videos = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
if not videos:
self.raise_login_required('You need to log in to access this content')
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
if not story_title:
story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
info_data = []
for highlight in highlights:
highlight_data = self._extract_product(highlight)
if highlight_data.get('formats'):
info_data.append({
**highlight_data,
'uploader': full_name,
'uploader_id': user_id,
**filter_dict(highlight_data),
})
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)

View File

@ -12,7 +12,7 @@ from ..utils import (
class KinjaEmbedIE(InfoExtractor):
IENAME = 'kinja:embed'
IE_NAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?:
avclub|

View File

@ -6,6 +6,7 @@ from ..utils import (
int_or_none,
smuggle_url,
traverse_obj,
try_call,
unsmuggle_url,
)
@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {
'assetId': program_info['assetId'],
payload = {'assetId': program_info['assetId']}
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
payload.update({
'type': 'auth',
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
}
})
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json(
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'})

View File

@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
class NBAWatchEmbedIE(NBAWatchBaseIE):
IENAME = 'nba:watch:embed'
IE_NAME = 'nba:watch:embed'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):
class NBAEmbedIE(NBABaseIE):
IENAME = 'nba:embed'
IE_NAME = 'nba:embed'
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):
class NBAIE(NBABaseIE):
IENAME = 'nba'
IE_NAME = 'nba'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):
class NBAChannelIE(NBABaseIE):
IENAME = 'nba:channel'
IE_NAME = 'nba:channel'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{
'url': 'https://www.nba.com/blazers/video/channel/summer_league',

View File

@ -0,0 +1,135 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
js_to_json,
traverse_obj,
update_url_query,
url_or_none,
)
class RudoVideoIE(InfoExtractor):
_VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
_TESTS = [{
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
'md5': '28ed82b477708dc5e12e072da2449221',
'info_dict': {
'id': 'cz2wrUy8l0o',
'title': 'Diego Cabot',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/bQkt07',
'md5': '36b22a9863de0f47f00fc7532a32a898',
'info_dict': {
'id': 'bQkt07',
'title': 'Tubular Bells',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/b42ZUznHX0',
'md5': 'b91c70d832938871367f8ad10c895821',
'info_dict': {
'id': 'b42ZUznHX0',
'title': 'Columna Ruperto Concha',
'ext': 'mp3',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/vod/bN5AaJ',
'md5': '01324a329227e2591530ecb4f555c881',
'info_dict': {
'id': 'bN5AaJ',
'title': 'Ucrania 19.03',
'creator': 'La Tercera',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/bbtv',
'info_dict': {
'id': 'bbtv',
'ext': 'mp4',
'creator': 'BioBioTV',
'live_status': 'is_live',
'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/c13',
'info_dict': {
'id': 'c13',
'title': 'CANAL13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}, {
'url': 'https://rudo.video/live/t13-13cl',
'info_dict': {
'id': 't13-13cl',
'title': 'T13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}]
def _real_extract(self, url):
video_id, type_ = self._match_valid_url(url).group('id', 'type')
is_live = type_ == 'live'
webpage = self._download_webpage(url, video_id)
if 'Streaming is not available in your area' in webpage:
self.raise_geo_restricted()
media_url = (
self._search_regex(
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
# Source URL must be used only if streamURL is unavailable
or self._search_regex(
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
if not media_url:
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
webpage, 'youtube url', default=None)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
raise ExtractorError('Unable to extract stream url')
token_array = self._search_json(
r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
if token_array:
token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
if not token_url:
raise ExtractorError('Invalid access token array')
access_token = self._download_json(
token_url, video_id, note='Downloading access token')['data']['authToken']
media_url = update_url_query(media_url, {'auth-token': access_token})
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
elif ext == 'mp3':
formats = [{
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{'url': media_url}]
return {
'id': video_id,
'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
webpage, 'title', default=None)
or self._og_search_title(webpage)),
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
webpage, 'videoAuthor', default=None),
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
webpage, 'thumbnail', default=None)
or self._og_search_thumbnail(webpage)),
'formats': formats,
'is_live': is_live,
}

View File

@ -10,6 +10,7 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
dict_get,
@ -1317,20 +1318,7 @@ class TwitterIE(TwitterBaseIE):
}
}
def _extract_status(self, twid):
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
})
elif self._selected_api == 'syndication':
def _call_syndication_api(self, twid):
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
@ -1350,8 +1338,31 @@ class TwitterIE(TwitterBaseIE):
media.append(detail)
status['extended_entities'] = {'media': media}
else:
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
return status
def _extract_status(self, twid):
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
try:
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
raise
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
status = self._call_syndication_api(twid)
if self._selected_api == 'syndication':
status = self._call_syndication_api(twid)
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
'thumbnails': thumbnails,
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
# The codec of http formats are unknown
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
}
def extract_from_card_info(card):

View File

@ -206,13 +206,14 @@ class Updater:
# XXX: use class variables to simplify testing
_channel = CHANNEL
_origin = ORIGIN
_update_sources = UPDATE_SOURCES
def __init__(self, ydl, target: str | None = None):
self.ydl = ydl
# For backwards compat, target needs to be treated as if it could be None
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
# Check if requested_tag is actually the requested repo/channel
if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
self.requested_channel = self.requested_tag
self.requested_tag: str = None # type: ignore (we set it later)
elif not self.requested_channel:
@ -237,11 +238,11 @@ class Updater:
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else:
# Check if requested_channel resolves to a known repository or else raise
self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
self.requested_repo = self._update_sources.get(self.requested_channel)
if not self.requested_repo:
self._report_error(
f'Invalid update channel {self.requested_channel!r} requested. '
f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
f'Valid channels are {", ".join(self._update_sources)}', True)
self._identifier = f'{detect_variant()} {system_identifier()}'