minutia

Update _extractors.py
swap CookingGuide and CookingRecipe IEs
2024-10-03 07:41:27 +02:00 · 2024-02-05 00:22:00 +00:00 · 2024-02-05 00:18:13 +00:00 · 2024-02-05 00:18:02 +00:00 · 2024-02-04 23:52:53 +00:00 · 2024-02-04 23:40:20 +00:00
2 changed files with 74 additions and 74 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1337,7 +1337,7 @@ from .nytimes import (
    NYTimesIE,
    NYTimesArticleIE,
    NYTimesCookingIE,
-    NYTimesCookingGuidesIE,
+    NYTimesCookingRecipeIE,
 )
 from .nuvid import NuvidIE
 from .nzherald import NZHeraldIE
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@ -178,7 +178,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'creator': 'Patricia Cohen',
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
            'duration': 119.0,
-        }
+        },
    }, {
        # article with audio and no video
        'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
@ -193,7 +193,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'creator': 'Stephanie Nolen, Natalija Gormalova',
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
            'duration': 1322,
-        }
+        },
    }, {
        'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
        'md5': '3eb5ddb1d6f86254fe4f233826778737',
@ -290,8 +290,67 @@ class NYTimesArticleIE(NYTimesBaseIE):
        }


-class NYTimesCookingIE(InfoExtractor):
-    IE_NAME = 'NYTimesCookingRecipes'
+class NYTimesCookingIE(NYTimesBaseIE):
+    IE_NAME = 'NYTimesCookingGuide'
+    _VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
+        'info_dict': {
+            'id': '13-how-to-cook-a-turkey',
+            'title': 'How to Cook a Turkey',
+            'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
+        },
+        'playlist_count': 2,
+    }, {
+        # single video example
+        'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
+        'md5': '64415805fe0b8640fce6b0b9def5989a',
+        'info_dict': {
+            'id': '100000005835845',
+            'ext': 'mp4',
+            'title': 'How to Make Mac and Cheese',
+            'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
+            'duration': 9.51,
+            'creator': 'Alison Roman',
+            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+        },
+    }, {
+        'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
+        'md5': '64415805fe0b8640fce6b0b9def5989a',
+        'info_dict': {
+            'id': '20-how-to-frost-a-cake',
+            'title': 'How to Frost a Cake',
+            'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
+        },
+        'playlist_count': 8,
+    }]
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+        title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+        description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
+
+        lead_video_id = self._search_regex(
+            r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
+        media_ids = traverse_obj(
+            get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
+
+        if media_ids:
+            media_ids.append(lead_video_id)
+            return self.playlist_result(
+                [self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
+
+        return {
+            **self._extract_video(lead_video_id),
+            'title': title,
+            'description': description,
+            'creator': self._search_regex(  # TODO: change to 'creators'
+                r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
+        }
+
+
+class NYTimesCookingRecipeIE(InfoExtractor):
    _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
@ -331,86 +390,27 @@ class NYTimesCookingIE(InfoExtractor):
            'title': 'Overnight Oats',
            'creator': 'Genevieve Ko',
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
-        }
+        },
    }]

    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
-        next_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']
+        recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']

        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            next_data['recipe']['videoSrc'], page_id, 'mp4', m3u8_id='hls')
+            recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')

        return {
-            **traverse_obj(next_data, {
-                'id': ('recipe', 'id', {str_or_none}),
-                'title': ('recipe', 'title', {str}),
-                'description': ('recipe', 'topnote', {clean_html}),
-                'timestamp': ('recipe', 'publishedAt', {int_or_none}),
-                'creator': ('recipe', 'contentAttribution', 'cardByline', {str}),
+            **traverse_obj(recipe_data, {
+                'id': ('id', {str_or_none}),
+                'title': ('title', {str}),
+                'description': ('topnote', {clean_html}),
+                'timestamp': ('publishedAt', {int_or_none}),
+                'creator': ('contentAttribution', 'cardByline', {str}),
            }),
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
-                next_data, ('recipe', 'image', 'crops', 'recipe', ..., {url_or_none}))],
-        }
-
-
-class NYTimesCookingGuidesIE(NYTimesBaseIE):
-    _VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
-    _TESTS = [{
-        'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
-        'info_dict': {
-            'id': '13-how-to-cook-a-turkey',
-            'title': 'How to Cook a Turkey',
-            'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
-        },
-        'playlist_count': 2,
-    }, {
-        # single video example
-        'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
-        'md5': '64415805fe0b8640fce6b0b9def5989a',
-        'info_dict': {
-            'id': '100000005835845',
-            'ext': 'mp4',
-            'title': 'How to Make Mac and Cheese',
-            'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
-            'duration': 9.51,
-            'creator': 'Alison Roman',
-            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
-        }
-    }, {
-        'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
-        'md5': '64415805fe0b8640fce6b0b9def5989a',
-        'info_dict': {
-            'id': '20-how-to-frost-a-cake',
-            'title': 'How to Frost a Cake',
-            'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
-        },
-        'playlist_count': 8,
-    }]
-
-    def _real_extract(self, url):
-        page_id = self._match_id(url)
-        webpage = self._download_webpage(url, page_id)
-        title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
-        description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
-
-        lead_video_id = self._search_regex(
-            r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
-        media_ids = traverse_obj(
-            get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
-
-        if media_ids:
-            media_ids.append(lead_video_id)
-            return self.playlist_result(
-                [self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
-
-        return {
-            **self._extract_video(lead_video_id),
-            'title': title,
-            'description': description,
-            'creator': self._search_regex(  # TODO: change to 'creators'
-                r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
+                recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
        }
Author	SHA1	Message	Date
bashonly	0b24f9b867	minutia	2024-02-05 00:22:00 +00:00
bashonly	f0ce3e0a55	Update _extractors.py	2024-02-05 00:18:13 +00:00
bashonly	93ce9ad277	swap CookingGuide and CookingRecipe IEs original video IDs are still available for Guide but not for Recipe	2024-02-05 00:18:02 +00:00
bashonly	32a86f7a1b	simplify recipes	2024-02-04 23:52:53 +00:00
bashonly	7f7b2ebc86	make old archive id for single video cooking guides playlists were not handled previously	2024-02-04 23:40:20 +00:00