Compare commits

..

2 Commits

Author SHA1 Message Date
SirElderling
173930869f [ie/NYTimes] - update _extractors.py 2024-01-27 09:49:00 +00:00
SirElderling
0765da378f [ie/NYTimes] - adjustments to the cooking classes 2024-01-27 09:46:50 +00:00
2 changed files with 18 additions and 116 deletions

View File

@ -1335,8 +1335,8 @@ from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE from .nubilesporn import NubilesPornIE
from .nytimes import ( from .nytimes import (
NYTimesArticleIE, NYTimesArticleIE,
NYTimesCookingIE,
NYTimesCookingGuidesIE, NYTimesCookingGuidesIE,
NYTimesCookingReceipesIE,
) )
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .nzherald import NZHeraldIE from .nzherald import NZHeraldIE

View File

@ -235,7 +235,8 @@ class NYTimesArticleIE(NYTimesBaseIE):
} }
class NYTimesCookingReceipesIE(InfoExtractor): class NYTimesCookingIE(InfoExtractor):
IE_NAME = 'NYTimesCookingRecipes'
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)' _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
@ -354,96 +355,19 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
'playlist_count': 8, 'playlist_count': 8,
}] }]
_TOKEN = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB" _TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
_DNS_UUID = '36dd619a-56dc-595b-9e09-37f4152c7b5d' # uuid -v5 ns:DNS scoop.nyt.net _DNS_UUID = '36dd619a-56dc-595b-9e09-37f4152c7b5d' # uuid -v5 ns:DNS scoop.nyt.net
_GRAPHQL_QUERY = '''query VideoQuery($id: String!) { _GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
video(id: $id) { video(id: $id) {
... on Video { ... on Video {
advertisingProperties {
sensitivity
sponsored
}
bylines { bylines {
renderedRepresentation renderedRepresentation
} }
contentSeries
cues {
name
type
timeIn
timeOut
}
duration duration
embedded
headline {
default
}
is360
isLive
liveUrls
playlist {
headline {
default
}
promotionalHeadline
url
sourceId
section {
displayName
}
videos(first: 20) {
edges @filterEmpty {
node {
advertisingProperties {
sensitivity
sponsored
}
id
sourceId
duration
section {
id
name
}
headline {
default
}
renditions {
url
type
}
url
promotionalMedia {
... on Image {
crops(
cropNames: [SMALL_SQUARE, MEDIUM_SQUARE, SIXTEEN_BY_NINE]
) {
renditions {
name
width
height
url
}
}
}
}
}
}
}
}
promotionalHeadline promotionalHeadline
promotionalMedia { promotionalMedia {
... on Image { ... on Image {
crops( crops {
cropNames: [
SMALL_SQUARE
MEDIUM_SQUARE
SIXTEEN_BY_NINE
THREE_BY_TWO
TWO_BY_THREE
FLEXIBLE
]
) {
name name
renditions { renditions {
name name
@ -454,14 +378,6 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
} }
} }
} }
promotionalSummary
related {
... on Article {
promotionalHeadline
url
sourceId
}
}
renditions { renditions {
type type
width width
@ -469,22 +385,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
url url
bitrate bitrate
} }
section {
name
}
shortUrl
sourceId
subsection {
name
}
summary summary
timesTags {
__typename
displayName
isAdvertisingBrandSensitive
vernacular
}
url
} }
} }
}''' }'''
@ -509,6 +410,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
if not url_or_none(image_url): if not url_or_none(image_url):
continue continue
thumbnails.append({ thumbnails.append({
'name': image.get('name'),
'url': image_url, 'url': image_url,
'width': int_or_none(image.get('width')), 'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')), 'height': int_or_none(image.get('height')),
@ -526,28 +428,28 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
}) })
return entries return entries
def _json_from_graphql(self, id): def _json_from_graphql(self, media_id):
# reference: `id-to-uri.js` # reference: `id-to-uri.js`
namespace = uuid.UUID(self._DNS_UUID) namespace = uuid.UUID(self._DNS_UUID)
video_uuid = uuid.uuid5(namespace, 'video') video_uuid = uuid.uuid5(namespace, 'video')
media_uuid = uuid.uuid5(video_uuid, id) media_uuid = uuid.uuid5(video_uuid, media_id)
payload = { payload = {
"query": self._GRAPHQL_QUERY, 'query': self._GRAPHQL_QUERY,
"variables": {"id": f"nyt://video/{media_uuid}"} 'variables': {'id': f'nyt://video/{media_uuid}'}
} }
headers = { headers = {
"Content-Type": "application/json", 'Content-Type': 'application/json',
"Nyt-App-Type": "vhs", 'Nyt-App-Type': 'vhs',
"Nyt-App-Version": "v3.52.21", 'Nyt-App-Version': 'v3.52.21',
"Nyt-Token": self._TOKEN, 'Nyt-Token': self._TOKEN,
"Origin": "https://cooking.nytimes.com", 'Origin': 'https://cooking.nytimes.com',
"Referer": "https://www.google.com/", 'Referer': 'https://www.google.com/',
} }
return self._download_json( return self._download_json(
self._GRAPHQL_API, id, note="Downloading json from GRAPHQL API", self._GRAPHQL_API, id, note='Downloading json from GRAPHQL API',
data=json.dumps(payload, separators=(',', ':')).encode(), headers=headers, fatal=False) data=json.dumps(payload, separators=(',', ':')).encode(), headers=headers, fatal=False)
def _real_extract(self, url): def _real_extract(self, url):
@ -561,7 +463,7 @@ class NYTimesCookingGuidesIE(NYTimesBaseIE):
title = self._html_search_meta(['og:title', 'twitter:title'], webpage) title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
description = self._html_search_meta(['og:description', 'twitter:description'], webpage) description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
creator = self._search_regex( creator = self._search_regex(
r'<span itemprop="author">(.+)</span></p>', webpage, 'author', default=None) r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None)
if media_items: if media_items:
media_items.append(lead_video_id) media_items.append(lead_video_id)