Compare commits

..

4 Commits

Author SHA1 Message Date
bashonly
116c268438
[ie/twitter] Work around API rate-limit (#8825)
Closes #8762
Authored by: bashonly
2023-12-24 16:41:28 +00:00
bashonly
e7d22348e7
[ie/twitter] Prioritize m3u8 formats (#8826)
Closes #8117
Authored by: bashonly
2023-12-24 16:40:50 +00:00
bashonly
50eaea9fd7
[ie/instagram] Fix stories extraction (#8843)
Closes #8290
Authored by: bashonly
2023-12-24 16:40:03 +00:00
bashonly
f45c4efcd9
[ie/litv] Fix premium content extraction (#8842)
Closes #8654
Authored by: bashonly
2023-12-24 16:33:16 +00:00
3 changed files with 68 additions and 43 deletions

View File

@ -10,6 +10,7 @@ from ..utils import (
ExtractorError, ExtractorError,
decode_base_n, decode_base_n,
encode_base_n, encode_base_n,
filter_dict,
float_or_none, float_or_none,
format_field, format_field,
get_element_by_attribute, get_element_by_attribute,
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False) user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
if not user_info: if not user_info:
self.raise_login_required('This content is unreachable') self.raise_login_required('This content is unreachable')
user_id = user_info.get('id')
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}' story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
if not story_info_url: # user id is only mandatory for non-highlights
raise ExtractorError('Unable to extract user id')
videos = traverse_obj(self._download_json( videos = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}', f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels') story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
if not videos: if not videos:
self.raise_login_required('You need to log in to access this content') self.raise_login_required('You need to log in to access this content')
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name')) full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title')) story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
if not story_title: if not story_title:
story_title = f'Story by {username}' story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items')) highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
info_data = [] info_data = []
for highlight in highlights: for highlight in highlights:
highlight_data = self._extract_product(highlight) highlight_data = self._extract_product(highlight)
if highlight_data.get('formats'): if highlight_data.get('formats'):
info_data.append({ info_data.append({
**highlight_data,
'uploader': full_name, 'uploader': full_name,
'uploader_id': user_id, 'uploader_id': user_id,
**filter_dict(highlight_data),
}) })
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title) return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)

View File

@ -6,6 +6,7 @@ from ..utils import (
int_or_none, int_or_none,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
try_call,
unsmuggle_url, unsmuggle_url,
) )
@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
if not video_data: if not video_data:
payload = { payload = {'assetId': program_info['assetId']}
'assetId': program_info['assetId'], puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
payload.update({
'type': 'auth',
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'], 'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'], 'contentType': program_info['contentType'],
} })
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json( video_data = self._download_json(
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id, f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
data=json.dumps(payload).encode('utf-8'), data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'}) headers={'Content-Type': 'application/json'})

View File

@ -10,6 +10,7 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
dict_get, dict_get,
@ -1317,20 +1318,7 @@ class TwitterIE(TwitterBaseIE):
} }
} }
def _extract_status(self, twid): def _call_syndication_api(self, twid):
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
})
elif self._selected_api == 'syndication':
self.report_warning( self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True) 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json( status = self._download_json(
@ -1350,8 +1338,31 @@ class TwitterIE(TwitterBaseIE):
media.append(detail) media.append(detail)
status['extended_entities'] = {'media': media} status['extended_entities'] = {'media': media}
else: return status
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
def _extract_status(self, twid):
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
try:
if self.is_logged_in or self._selected_api == 'graphql':
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
elif self._selected_api == 'legacy':
status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
'include_user_entities': 0,
'tweet_mode': 'extended',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
raise
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
status = self._call_syndication_api(twid)
if self._selected_api == 'syndication':
status = self._call_syndication_api(twid)
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {} return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000), 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
# The codec of http formats are unknown # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
'_format_sort_fields': ('res', 'br', 'size', 'proto'), '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
} }
def extract_from_card_info(card): def extract_from_card_info(card):