Compare commits

..

No commits in common. "0b24f9b867a9125ac1b49dd806887687a2cf097d" and "c0cd9e13eea18829d35c7436acc98ecd4f179174" have entirely different histories.

2 changed files with 74 additions and 74 deletions

View File

@ -1337,7 +1337,7 @@ from .nytimes import (
NYTimesIE, NYTimesIE,
NYTimesArticleIE, NYTimesArticleIE,
NYTimesCookingIE, NYTimesCookingIE,
NYTimesCookingRecipeIE, NYTimesCookingGuidesIE,
) )
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .nzherald import NZHeraldIE from .nzherald import NZHeraldIE

View File

@ -178,7 +178,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
'creator': 'Patricia Cohen', 'creator': 'Patricia Cohen',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 119.0, 'duration': 119.0,
}, }
}, { }, {
# article with audio and no video # article with audio and no video
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html', 'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
@ -193,7 +193,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
'creator': 'Stephanie Nolen, Natalija Gormalova', 'creator': 'Stephanie Nolen, Natalija Gormalova',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 1322, 'duration': 1322,
}, }
}, { }, {
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html', 'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
'md5': '3eb5ddb1d6f86254fe4f233826778737', 'md5': '3eb5ddb1d6f86254fe4f233826778737',
@ -290,67 +290,8 @@ class NYTimesArticleIE(NYTimesBaseIE):
} }
class NYTimesCookingIE(NYTimesBaseIE): class NYTimesCookingIE(InfoExtractor):
IE_NAME = 'NYTimesCookingGuide' IE_NAME = 'NYTimesCookingRecipes'
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
'info_dict': {
'id': '13-how-to-cook-a-turkey',
'title': 'How to Cook a Turkey',
'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
},
'playlist_count': 2,
}, {
# single video example
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': {
'id': '100000005835845',
'ext': 'mp4',
'title': 'How to Make Mac and Cheese',
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
'duration': 9.51,
'creator': 'Alison Roman',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
},
}, {
'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': {
'id': '20-how-to-frost-a-cake',
'title': 'How to Frost a Cake',
'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
},
'playlist_count': 8,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
lead_video_id = self._search_regex(
r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
media_ids = traverse_obj(
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
if media_ids:
media_ids.append(lead_video_id)
return self.playlist_result(
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
return {
**self._extract_video(lead_video_id),
'title': title,
'description': description,
'creator': self._search_regex( # TODO: change to 'creators'
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
}
class NYTimesCookingRecipeIE(InfoExtractor):
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)' _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
@ -390,27 +331,86 @@ class NYTimesCookingRecipeIE(InfoExtractor):
'title': 'Overnight Oats', 'title': 'Overnight Oats',
'creator': 'Genevieve Ko', 'creator': 'Genevieve Ko',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
}, }
}] }]
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe'] next_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']
formats, subtitles = self._extract_m3u8_formats_and_subtitles( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls') next_data['recipe']['videoSrc'], page_id, 'mp4', m3u8_id='hls')
return { return {
**traverse_obj(recipe_data, { **traverse_obj(next_data, {
'id': ('id', {str_or_none}), 'id': ('recipe', 'id', {str_or_none}),
'title': ('title', {str}), 'title': ('recipe', 'title', {str}),
'description': ('topnote', {clean_html}), 'description': ('recipe', 'topnote', {clean_html}),
'timestamp': ('publishedAt', {int_or_none}), 'timestamp': ('recipe', 'publishedAt', {int_or_none}),
'creator': ('contentAttribution', 'cardByline', {str}), 'creator': ('recipe', 'contentAttribution', 'cardByline', {str}),
}), }),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj( 'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))], next_data, ('recipe', 'image', 'crops', 'recipe', ..., {url_or_none}))],
}
class NYTimesCookingGuidesIE(NYTimesBaseIE):
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
'info_dict': {
'id': '13-how-to-cook-a-turkey',
'title': 'How to Cook a Turkey',
'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
},
'playlist_count': 2,
}, {
# single video example
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': {
'id': '100000005835845',
'ext': 'mp4',
'title': 'How to Make Mac and Cheese',
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
'duration': 9.51,
'creator': 'Alison Roman',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
}
}, {
'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': {
'id': '20-how-to-frost-a-cake',
'title': 'How to Frost a Cake',
'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
},
'playlist_count': 8,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
lead_video_id = self._search_regex(
r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
media_ids = traverse_obj(
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
if media_ids:
media_ids.append(lead_video_id)
return self.playlist_result(
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
return {
**self._extract_video(lead_video_id),
'title': title,
'description': description,
'creator': self._search_regex( # TODO: change to 'creators'
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
} }