mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 09:41:26 +01:00
Compare commits
8 Commits
0eb370d3cd
...
91ca560381
Author | SHA1 | Date | |
---|---|---|---|
|
91ca560381 | ||
|
c39358a54b | ||
|
1f8bd8eba8 | ||
|
00cdda4f6f | ||
|
116c268438 | ||
|
e7d22348e7 | ||
|
50eaea9fd7 | ||
|
f45c4efcd9 |
|
@ -140,6 +140,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||||
test('example-with-dashes', 'example-with-dashes')
|
test('example-with-dashes', 'example-with-dashes')
|
||||||
test('all', '2', '47', '45', 'example-with-dashes', '35')
|
test('all', '2', '47', '45', 'example-with-dashes', '35')
|
||||||
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
|
||||||
|
test('7_a/worst', '35')
|
||||||
|
|
||||||
def test_format_selection_audio(self):
|
def test_format_selection_audio(self):
|
||||||
formats = [
|
formats = [
|
||||||
|
|
|
@ -2465,9 +2465,16 @@ class YoutubeDL:
|
||||||
return selector_function(ctx_copy)
|
return selector_function(ctx_copy)
|
||||||
return final_selector
|
return final_selector
|
||||||
|
|
||||||
stream = io.BytesIO(format_spec.encode())
|
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
|
||||||
|
# Prefix numbers with random letters to avoid it being classified as a number
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
|
||||||
|
# TODO: Implement parser not reliant on tokenize.tokenize
|
||||||
|
prefix = ''.join(random.choices(string.ascii_letters, k=32))
|
||||||
|
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
|
||||||
try:
|
try:
|
||||||
tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
|
tokens = list(_remove_unused_ops(
|
||||||
|
token._replace(string=token.string.replace(prefix, ''))
|
||||||
|
for token in tokenize.tokenize(stream.readline)))
|
||||||
except tokenize.TokenError:
|
except tokenize.TokenError:
|
||||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||||
|
|
||||||
|
|
|
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 7.12.2023
|
# available till 7.12.2023
|
||||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
'md5': '94812e6438488fb923c361a44469614b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'maischberger-video-424',
|
'id': 'maischberger-video-424',
|
||||||
'display_id': 'maischberger-video-424',
|
'display_id': 'maischberger-video-424',
|
||||||
|
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'''(?x)https://
|
_VALID_URL = r'''(?x)https://
|
||||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||||
(?:(?P<client>[^/]+)/)?
|
(?:(?P<client>[^/]+)/)?
|
||||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
|
||||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
|
||||||
'id': '12172961',
|
'id': '12939099',
|
||||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
'title': 'Liebe auf vier Pfoten',
|
||||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||||
'duration': 5241,
|
'duration': 5222,
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||||
'timestamp': 1670710500,
|
'timestamp': 1701343800,
|
||||||
'upload_date': '20221210',
|
'upload_date': '20231130',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 12,
|
'episode': 'Liebe auf vier Pfoten',
|
||||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
|
||||||
'series': 'Filme im MDR'
|
'series': 'Filme im MDR'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'duration': 915,
|
'duration': 915,
|
||||||
'episode': 'tagesschau, 20:00 Uhr',
|
'episode': 'tagesschau, 20:00 Uhr',
|
||||||
'series': 'tagesschau',
|
'series': 'tagesschau',
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
# playlist of type 'sendung'
|
# playlist of type 'sendung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# playlist of type 'serie'
|
||||||
|
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# playlist of type 'sammlung'
|
# playlist of type 'sammlung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||||
|
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
|
||||||
""" Query the ARD server for playlist information
|
""" Query the ARD server for playlist information
|
||||||
and returns the data in "raw" format """
|
and returns the data in "raw" format """
|
||||||
if mode == 'sendung':
|
assert mode in ('sendung', 'serie', 'sammlung')
|
||||||
|
if mode in ('sendung', 'serie'):
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
'query': '''{
|
'query': '''{
|
||||||
showPage(
|
showPage(
|
||||||
|
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
links { target { id href title } }
|
links { target { id href title } }
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
|
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
# Ressources for ARD graphQL debugging:
|
# Ressources for ARD graphQL debugging:
|
||||||
# https://api-test.ardmediathek.de/public-gateway
|
# https://api-test.ardmediathek.de/public-gateway
|
||||||
|
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
data=graphQL,
|
data=graphQL,
|
||||||
headers={'Content-Type': 'application/json'})['data']
|
headers={'Content-Type': 'application/json'})['data']
|
||||||
# align the structure of the returned data:
|
# align the structure of the returned data:
|
||||||
if mode == 'sendung':
|
if mode in ('sendung', 'serie'):
|
||||||
show_page = show_page['showPage']
|
show_page = show_page['showPage']
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
show_page = show_page['morePage']['widget']
|
show_page = show_page['morePage']['widget']
|
||||||
|
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||||
""" Collects all playlist entries and returns them as info dict.
|
""" Collects all playlist entries and returns them as info dict.
|
||||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
Supports playlists of mode 'sendung', 'serie', and 'sammlung',
|
||||||
playlists. """
|
as well as nested playlists. """
|
||||||
entries = []
|
entries = []
|
||||||
pageNumber = 0
|
pageNumber = 0
|
||||||
while True: # iterate by pageNumber
|
while True: # iterate by pageNumber
|
||||||
show_page = self._ARD_load_playlist_snipped(
|
show_page = self._ARD_load_playlist_snippet(
|
||||||
playlist_id, display_id, client, mode, pageNumber)
|
playlist_id, display_id, client, mode, pageNumber)
|
||||||
for teaser in show_page['teasers']: # process playlist items
|
for teaser in show_page['teasers']: # process playlist items
|
||||||
if '/compilation/' in teaser['links']['target']['href']:
|
if '/compilation/' in teaser['links']['target']['href']:
|
||||||
|
|
|
@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
|
||||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||||
[^/]+/videos/(?:[^/]+/)?|
|
[^/]+/videos/(?:[^/]+/)?|
|
||||||
[^/]+/posts/|
|
[^/]+/posts/|
|
||||||
groups/[^/]+/permalink/|
|
groups/[^/]+/(?:permalink|posts)/|
|
||||||
watchparty/
|
watchparty/
|
||||||
)|
|
)|
|
||||||
facebook:
|
facebook:
|
||||||
|
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
|
||||||
'uploader_id': '100013949973717',
|
'uploader_id': '100013949973717',
|
||||||
},
|
},
|
||||||
'skip': 'Requires logging in',
|
'skip': 'Requires logging in',
|
||||||
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
|
||||||
|
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1569199726448814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pence MUST GO!',
|
||||||
|
'description': 'Vickie Gentry shared a memory.',
|
||||||
|
'timestamp': 1511548260,
|
||||||
|
'upload_date': '20171124',
|
||||||
|
'uploader': 'Vickie Gentry',
|
||||||
|
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'duration': 148.435,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
|
||||||
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
||||||
attachments = traverse_obj(nodes, (
|
attachments = traverse_obj(nodes, (
|
||||||
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
||||||
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
|
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
|
||||||
|
'attachment', {dict}))
|
||||||
for attachment in attachments:
|
for attachment in attachments:
|
||||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
|
||||||
|
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
|
||||||
for n in ns:
|
for n in ns:
|
||||||
parse_attachment(n)
|
parse_attachment(n)
|
||||||
parse_attachment(attachment)
|
parse_attachment(attachment)
|
||||||
|
@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
|
||||||
if len(entries) > 1:
|
if len(entries) > 1:
|
||||||
return self.playlist_result(entries, video_id)
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
video_info = entries[0]
|
video_info = entries[0] if entries else {'id': video_id}
|
||||||
webpage_info = extract_metadata(webpage)
|
webpage_info = extract_metadata(webpage)
|
||||||
# honor precise duration in video info
|
# honor precise duration in video info
|
||||||
if video_info.get('duration'):
|
if video_info.get('duration'):
|
||||||
|
|
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
decode_base_n,
|
decode_base_n,
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
|
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
||||||
if not user_info:
|
if not user_info:
|
||||||
self.raise_login_required('This content is unreachable')
|
self.raise_login_required('This content is unreachable')
|
||||||
user_id = user_info.get('id')
|
|
||||||
|
|
||||||
|
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
|
||||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||||
|
if not story_info_url: # user id is only mandatory for non-highlights
|
||||||
|
raise ExtractorError('Unable to extract user id')
|
||||||
|
|
||||||
videos = traverse_obj(self._download_json(
|
videos = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||||
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
||||||
if not videos:
|
if not videos:
|
||||||
self.raise_login_required('You need to log in to access this content')
|
self.raise_login_required('You need to log in to access this content')
|
||||||
|
|
||||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
|
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
|
||||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
||||||
if not story_title:
|
if not story_title:
|
||||||
story_title = f'Story by {username}'
|
story_title = f'Story by {username}'
|
||||||
|
|
||||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
|
||||||
info_data = []
|
info_data = []
|
||||||
for highlight in highlights:
|
for highlight in highlights:
|
||||||
highlight_data = self._extract_product(highlight)
|
highlight_data = self._extract_product(highlight)
|
||||||
if highlight_data.get('formats'):
|
if highlight_data.get('formats'):
|
||||||
info_data.append({
|
info_data.append({
|
||||||
**highlight_data,
|
|
||||||
'uploader': full_name,
|
'uploader': full_name,
|
||||||
'uploader_id': user_id,
|
'uploader_id': user_id,
|
||||||
|
**filter_dict(highlight_data),
|
||||||
})
|
})
|
||||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
||||||
|
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
|
||||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||||
webpage, 'video data', default='{}'), video_id)
|
webpage, 'video data', default='{}'), video_id)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
payload = {
|
payload = {'assetId': program_info['assetId']}
|
||||||
'assetId': program_info['assetId'],
|
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||||
'watchDevices': program_info['watchDevices'],
|
if puid:
|
||||||
'contentType': program_info['contentType'],
|
payload.update({
|
||||||
}
|
'type': 'auth',
|
||||||
|
'puid': puid,
|
||||||
|
})
|
||||||
|
endpoint = 'getUrl'
|
||||||
|
else:
|
||||||
|
payload.update({
|
||||||
|
'watchDevices': program_info['watchDevices'],
|
||||||
|
'contentType': program_info['contentType'],
|
||||||
|
})
|
||||||
|
endpoint = 'getMainUrlNoAuth'
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
|
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||||
data=json.dumps(payload).encode('utf-8'),
|
data=json.dumps(payload).encode('utf-8'),
|
||||||
headers={'Content-Type': 'application/json'})
|
headers={'Content-Type': 'application/json'})
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ from ..compat import (
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
dict_get,
|
dict_get,
|
||||||
|
@ -1317,41 +1318,51 @@ class TwitterIE(TwitterBaseIE):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_status(self, twid):
|
def _call_syndication_api(self, twid):
|
||||||
if self.is_logged_in or self._selected_api == 'graphql':
|
self.report_warning(
|
||||||
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||||
|
status = self._download_json(
|
||||||
elif self._selected_api == 'legacy':
|
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||||
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
headers={'User-Agent': 'Googlebot'}, query={
|
||||||
'cards_platform': 'Web-12',
|
'id': twid,
|
||||||
'include_cards': 1,
|
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||||
'include_reply_count': 1,
|
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||||
'include_user_entities': 0,
|
|
||||||
'tweet_mode': 'extended',
|
|
||||||
})
|
})
|
||||||
|
if not status:
|
||||||
|
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||||
|
# Transform the result so its structure matches that of legacy/graphql
|
||||||
|
media = []
|
||||||
|
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||||
|
detail['id_str'] = traverse_obj(detail, (
|
||||||
|
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||||
|
media.append(detail)
|
||||||
|
status['extended_entities'] = {'media': media}
|
||||||
|
|
||||||
elif self._selected_api == 'syndication':
|
return status
|
||||||
self.report_warning(
|
|
||||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
def _extract_status(self, twid):
|
||||||
status = self._download_json(
|
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
|
||||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
|
||||||
headers={'User-Agent': 'Googlebot'}, query={
|
|
||||||
'id': twid,
|
try:
|
||||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
if self.is_logged_in or self._selected_api == 'graphql':
|
||||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||||
|
elif self._selected_api == 'legacy':
|
||||||
|
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||||
|
'cards_platform': 'Web-12',
|
||||||
|
'include_cards': 1,
|
||||||
|
'include_reply_count': 1,
|
||||||
|
'include_user_entities': 0,
|
||||||
|
'tweet_mode': 'extended',
|
||||||
})
|
})
|
||||||
if not status:
|
except ExtractorError as e:
|
||||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||||
# Transform the result so its structure matches that of legacy/graphql
|
raise
|
||||||
media = []
|
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
|
||||||
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
status = self._call_syndication_api(twid)
|
||||||
detail['id_str'] = traverse_obj(detail, (
|
|
||||||
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
|
||||||
media.append(detail)
|
|
||||||
status['extended_entities'] = {'media': media}
|
|
||||||
|
|
||||||
else:
|
if self._selected_api == 'syndication':
|
||||||
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
|
status = self._call_syndication_api(twid)
|
||||||
|
|
||||||
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
||||||
|
|
||||||
|
@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
||||||
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||||
# The codec of http formats are unknown
|
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_from_card_info(card):
|
def extract_from_card_info(card):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user