mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 17:21:23 +01:00
Compare commits
4 Commits
13b3cb3c2b
...
116c268438
Author | SHA1 | Date | |
---|---|---|---|
|
116c268438 | ||
|
e7d22348e7 | ||
|
50eaea9fd7 | ||
|
f45c4efcd9 |
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||
ExtractorError,
|
||||
decode_base_n,
|
||||
encode_base_n,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_attribute,
|
||||
|
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
|
|||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
||||
if not user_info:
|
||||
self.raise_login_required('This content is unreachable')
|
||||
user_id = user_info.get('id')
|
||||
|
||||
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
|
||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||
if not story_info_url: # user id is only mandatory for non-highlights
|
||||
raise ExtractorError('Unable to extract user id')
|
||||
|
||||
videos = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
||||
if not videos:
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
|
||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
|
||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
|
||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
||||
if not story_title:
|
||||
story_title = f'Story by {username}'
|
||||
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
|
||||
info_data = []
|
||||
for highlight in highlights:
|
||||
highlight_data = self._extract_product(highlight)
|
||||
if highlight_data.get('formats'):
|
||||
info_data.append({
|
||||
**highlight_data,
|
||||
'uploader': full_name,
|
||||
'uploader_id': user_id,
|
||||
**filter_dict(highlight_data),
|
||||
})
|
||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
||||
|
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
|||
int_or_none,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
|
|||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
if not video_data:
|
||||
payload = {
|
||||
'assetId': program_info['assetId'],
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
}
|
||||
payload = {'assetId': program_info['assetId']}
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
payload.update({
|
||||
'type': 'auth',
|
||||
'puid': puid,
|
||||
})
|
||||
endpoint = 'getUrl'
|
||||
else:
|
||||
payload.update({
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
})
|
||||
endpoint = 'getMainUrlNoAuth'
|
||||
video_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
|
||||
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||
data=json.dumps(payload).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ from ..compat import (
|
|||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
|
@ -1317,41 +1318,51 @@ class TwitterIE(TwitterBaseIE):
|
|||
}
|
||||
}
|
||||
|
||||
def _extract_status(self, twid):
|
||||
if self.is_logged_in or self._selected_api == 'graphql':
|
||||
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||
|
||||
elif self._selected_api == 'legacy':
|
||||
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||
'cards_platform': 'Web-12',
|
||||
'include_cards': 1,
|
||||
'include_reply_count': 1,
|
||||
'include_user_entities': 0,
|
||||
'tweet_mode': 'extended',
|
||||
def _call_syndication_api(self, twid):
|
||||
self.report_warning(
|
||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||
status = self._download_json(
|
||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||
headers={'User-Agent': 'Googlebot'}, query={
|
||||
'id': twid,
|
||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||
})
|
||||
if not status:
|
||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||
# Transform the result so its structure matches that of legacy/graphql
|
||||
media = []
|
||||
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||
detail['id_str'] = traverse_obj(detail, (
|
||||
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||
media.append(detail)
|
||||
status['extended_entities'] = {'media': media}
|
||||
|
||||
elif self._selected_api == 'syndication':
|
||||
self.report_warning(
|
||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||
status = self._download_json(
|
||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||
headers={'User-Agent': 'Googlebot'}, query={
|
||||
'id': twid,
|
||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||
return status
|
||||
|
||||
def _extract_status(self, twid):
|
||||
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
|
||||
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
|
||||
|
||||
try:
|
||||
if self.is_logged_in or self._selected_api == 'graphql':
|
||||
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||
elif self._selected_api == 'legacy':
|
||||
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||
'cards_platform': 'Web-12',
|
||||
'include_cards': 1,
|
||||
'include_reply_count': 1,
|
||||
'include_user_entities': 0,
|
||||
'tweet_mode': 'extended',
|
||||
})
|
||||
if not status:
|
||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||
# Transform the result so its structure matches that of legacy/graphql
|
||||
media = []
|
||||
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||
detail['id_str'] = traverse_obj(detail, (
|
||||
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||
media.append(detail)
|
||||
status['extended_entities'] = {'media': media}
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
raise
|
||||
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
|
||||
status = self._call_syndication_api(twid)
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
|
||||
if self._selected_api == 'syndication':
|
||||
status = self._call_syndication_api(twid)
|
||||
|
||||
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
||||
|
||||
|
@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
|
|||
'thumbnails': thumbnails,
|
||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
||||
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||
# The codec of http formats are unknown
|
||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
||||
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
|
||||
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
|
||||
}
|
||||
|
||||
def extract_from_card_info(card):
|
||||
|
|
Loading…
Reference in New Issue
Block a user