[ie/NYTimes] - update _extractors.py

[ie/NYTimes] - adjustments to the cooking classes
2024-10-01 14:51:25 +02:00 · 2024-01-27 09:49:00 +00:00 · 2024-01-27 09:46:50 +00:00
2 changed files with 18 additions and 116 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1335,8 +1335,8 @@ from .ntvru import NTVRuIE
 from .nubilesporn import NubilesPornIE
 from .nytimes import (
    NYTimesArticleIE,
    NYTimesCookingIE,
    NYTimesCookingGuidesIE,
    NYTimesCookingReceipesIE,
 )
 from .nuvid import NuvidIE
 from .nzherald import NZHeraldIE
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@ -235,7 +235,8 @@ class NYTimesArticleIE(NYTimesBaseIE):
        }
-class NYTimesCookingReceipesIE(InfoExtractor):
+class NYTimesCookingIE(InfoExtractor):
    IE_NAME = 'NYTimesCookingRecipes'
    _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
@ -354,96 +355,19 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
        'playlist_count': 8,
    }]
-    _TOKEN = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB"
+    _TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
    _DNS_UUID = '36dd619a-56dc-595b-9e09-37f4152c7b5d'  # uuid -v5 ns:DNS scoop.nyt.net
    _GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
  video(id: $id) {
    ... on Video {
      advertisingProperties {
        sensitivity
        sponsored
      }
      bylines {
        renderedRepresentation
      }
      contentSeries
      cues {
        name
        type
        timeIn
        timeOut
      }
      duration
      embedded
      headline {
        default
      }
      is360
      isLive
      liveUrls
      playlist {
        headline {
          default
        }
        promotionalHeadline
        url
        sourceId
        section {
          displayName
        }
        videos(first: 20) {
          edges @filterEmpty {
            node {
              advertisingProperties {
                sensitivity
                sponsored
              }
              id
              sourceId
              duration
              section {
                id
                name
              }
              headline {
                default
              }
              renditions {
                url
                type
              }
              url
              promotionalMedia {
                ... on Image {
                  crops(
                    cropNames: [SMALL_SQUARE, MEDIUM_SQUARE, SIXTEEN_BY_NINE]
                  ) {
                    renditions {
                      name
                      width
                      height
                      url
                    }
                  }
                }
              }
            }
          }
        }
      }
      promotionalHeadline
      promotionalMedia {
        ... on Image {
-          crops(
+          crops {
            cropNames: [
              SMALL_SQUARE
              MEDIUM_SQUARE
              SIXTEEN_BY_NINE
              THREE_BY_TWO
              TWO_BY_THREE
              FLEXIBLE
            ]
          ) {
            name
            renditions {
              name
@ -454,14 +378,6 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
          }
        }
      }
      promotionalSummary
      related {
        ... on Article {
          promotionalHeadline
          url
          sourceId
        }
      }
      renditions {
        type
        width
@ -469,22 +385,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
        url
        bitrate
      }
      section {
        name
      }
      shortUrl
      sourceId
      subsection {
        name
      }
      summary
      timesTags {
        __typename
        displayName
        isAdvertisingBrandSensitive
        vernacular
      }
      url
    }
  }
 }'''
@ -509,6 +410,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
                if not url_or_none(image_url):
                    continue
                thumbnails.append({
                    'name': image.get('name'),
                    'url': image_url,
                    'width': int_or_none(image.get('width')),
                    'height': int_or_none(image.get('height')),
@ -526,28 +428,28 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
            })
        return entries
-    def _json_from_graphql(self, id):
+    def _json_from_graphql(self, media_id):
        # reference: `id-to-uri.js`
        namespace = uuid.UUID(self._DNS_UUID)
        video_uuid = uuid.uuid5(namespace, 'video')
-        media_uuid = uuid.uuid5(video_uuid, id)
+        media_uuid = uuid.uuid5(video_uuid, media_id)
        payload = {
-            "query": self._GRAPHQL_QUERY,
+            'query': self._GRAPHQL_QUERY,
-            "variables": {"id": f"nyt://video/{media_uuid}"}
+            'variables': {'id': f'nyt://video/{media_uuid}'}
        }
        headers = {
-            "Content-Type": "application/json",
+            'Content-Type': 'application/json',
-            "Nyt-App-Type": "vhs",
+            'Nyt-App-Type': 'vhs',
-            "Nyt-App-Version": "v3.52.21",
+            'Nyt-App-Version': 'v3.52.21',
-            "Nyt-Token": self._TOKEN,
+            'Nyt-Token': self._TOKEN,
-            "Origin": "https://cooking.nytimes.com",
+            'Origin': 'https://cooking.nytimes.com',
-            "Referer": "https://www.google.com/",
+            'Referer': 'https://www.google.com/',
        }
        return self._download_json(
-            self._GRAPHQL_API, id, note="Downloading json from GRAPHQL API",
+            self._GRAPHQL_API, id, note='Downloading json from GRAPHQL API',
            data=json.dumps(payload, separators=(',', ':')).encode(), headers=headers, fatal=False)
    def _real_extract(self, url):
@ -561,7 +463,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
        title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
        description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
        creator = self._search_regex(
-            r'<span itemprop="author">(.+)</span></p>', webpage, 'author', default=None)
+            r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None)
        if media_items:
            media_items.append(lead_video_id)
Author	SHA1	Message	Date
SirElderling	173930869f	[ie/NYTimes] - update _extractors.py	2024-01-27 09:49:00 +00:00
SirElderling	0765da378f	[ie/NYTimes] - adjustments to the cooking classes	2024-01-27 09:46:50 +00:00