mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-03 07:41:27 +02:00
Compare commits
No commits in common. "a3a20f7f64fc69a6d4f5969b0274a28541268c4a" and "b8d9fdccc0e5b53dd5441a714b074c4f19753090" have entirely different histories.
a3a20f7f64
...
b8d9fdccc0
|
@ -27,7 +27,7 @@ class NYTimesBaseIE(InfoExtractor):
|
||||||
'url': 'url',
|
'url': 'url',
|
||||||
'width': ('width', {int_or_none}),
|
'width': ('width', {int_or_none}),
|
||||||
'height': ('height', {int_or_none}),
|
'height': ('height', {int_or_none}),
|
||||||
}), default=None)
|
}))
|
||||||
|
|
||||||
def _extract_media_from_json(self, video_id, content_media_json):
|
def _extract_media_from_json(self, video_id, content_media_json):
|
||||||
urls = []
|
urls = []
|
||||||
|
@ -153,9 +153,6 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||||
'id': 'air-traffic-controllers-safety',
|
'id': 'air-traffic-controllers-safety',
|
||||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||||
'upload_date': '20231202',
|
|
||||||
'creator': 'Emily Steel, Sydney Ember',
|
|
||||||
'timestamp': 1701511264,
|
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
|
|
||||||
|
@ -178,13 +175,11 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||||
# audio articles will have an url and no formats
|
# audio articles will have an url and no formats
|
||||||
url = traverse_obj(block, ('fileUrl', {url_or_none}))
|
url = traverse_obj(block, ('fileUrl', {url_or_none}))
|
||||||
if not formats and url:
|
if not formats and url:
|
||||||
formats.append({'url': url, 'vcodec': 'none'})
|
formats.append({'url': url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**details,
|
**details,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
|
||||||
block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
|
||||||
'subtitles': subtitles
|
'subtitles': subtitles
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,26 +197,30 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||||
'creator': ', '.join(
|
'creator': ', '.join(
|
||||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))) # TODO: change to 'creators' (list)
|
||||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
|
||||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks = traverse_obj(art_json, (
|
blocks = traverse_obj(art_json, (
|
||||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||||
|
|
||||||
entries = []
|
# more than 1 video in the article, treat it as a playlist
|
||||||
for block in blocks:
|
if len(blocks) > 1:
|
||||||
entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
|
entries = []
|
||||||
|
for block in blocks:
|
||||||
|
entries.append(merge_dicts(
|
||||||
|
self._extract_content_from_block(block), common_info, {
|
||||||
|
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||||
|
block, ('promotionalMedia', 'crops', ..., 'renditions', ...)))}))
|
||||||
|
|
||||||
if len(entries) > 1:
|
return self.playlist_result(entries, page_id, common_info.get('title'),
|
||||||
return self.playlist_result(entries, page_id, **common_info)
|
common_info.get('description'))
|
||||||
|
|
||||||
return {
|
return merge_dicts(self._extract_content_from_block(blocks[0]), common_info, {
|
||||||
'id': page_id,
|
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||||
**entries[0],
|
blocks[0], ('promotionalMedia', 'crops', ..., 'renditions', ...)) or traverse_obj(
|
||||||
}
|
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class NYTimesCookingIE(InfoExtractor):
|
class NYTimesCookingIE(InfoExtractor):
|
||||||
|
@ -288,7 +287,7 @@ class NYTimesCookingIE(InfoExtractor):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': [{'url': url} for url in traverse_obj(next_data, (
|
'thumbnails': [{'url': url} for url in traverse_obj(next_data, (
|
||||||
'recipe', 'image', 'crops', 'recipe', ..., {url_or_none}))]
|
'recipe', 'image', 'crops', 'recipe', lambda _, v: url_or_none(v)))]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -392,15 +391,14 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
self._GRAPHQL_API, media_id, note='Downloading json from GRAPHQL API', data=json.dumps({
|
self._GRAPHQL_API, media_id, note='Downloading json from GRAPHQL API', data=json.dumps({
|
||||||
'query': self._GRAPHQL_QUERY,
|
'query': self._GRAPHQL_QUERY, 'variables': {'id': f'nyt://video/{media_uuid}'}},
|
||||||
'variables': {'id': f'nyt://video/{media_uuid}'},
|
separators=(',', ':')).encode(), headers={
|
||||||
}, separators=(',', ':')).encode(), headers={
|
'Content-Type': 'application/json',
|
||||||
'Content-Type': 'application/json',
|
'Nyt-App-Type': 'vhs',
|
||||||
'Nyt-App-Type': 'vhs',
|
'Nyt-App-Version': 'v3.52.21',
|
||||||
'Nyt-App-Version': 'v3.52.21',
|
'Nyt-Token': self._TOKEN,
|
||||||
'Nyt-Token': self._TOKEN,
|
'Origin': 'https://cooking.nytimes.com',
|
||||||
'Origin': 'https://cooking.nytimes.com',
|
'Referer': 'https://www.google.com/'}, fatal=False) or {}
|
||||||
}, fatal=False) or {}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
|
@ -412,6 +410,8 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
|
||||||
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
|
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
|
||||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||||
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||||
|
creator = self._search_regex(
|
||||||
|
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None)
|
||||||
|
|
||||||
if media_items:
|
if media_items:
|
||||||
media_items.append(lead_video_id)
|
media_items.append(lead_video_id)
|
||||||
|
@ -426,8 +426,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': int_or_none(json_obj.get('duration')),
|
'duration': int_or_none(json_obj.get('duration')),
|
||||||
'creator': self._search_regex(
|
'creator': creator, # TODO: change to 'creators'
|
||||||
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None), # TODO: change to 'creators'
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': self._extract_thumbnails(
|
'thumbnails': self._extract_thumbnails(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user