Set https as optional in _VALID_URL

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
Rename sosialDescription to socialDescription
2024-10-02 15:21:23 +02:00 · 2024-02-01 10:12:37 +09:00 · 2024-02-01 09:16:24 +09:00 · 2024-02-01 09:13:11 +09:00 · 2024-02-01 09:11:26 +09:00 · 2024-02-01 08:36:33 +09:00
1 changed files with 14 additions and 16 deletions
--- a/yt_dlp/extractor/zetland.py
+++ b/yt_dlp/extractor/zetland.py
@ -1,10 +1,10 @@
 from .common import InfoExtractor
-from ..utils import determine_ext, merge_dicts, unified_timestamp
+from ..utils import merge_dicts, unified_timestamp, url_or_none
 from ..utils.traversal import traverse_obj


 class ZetlandDKArticleIE(InfoExtractor):
-    _VALID_URL = r'https://www.zetland.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
+    _VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
    _TESTS = [{
        'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
        'info_dict': {
@ -19,7 +19,7 @@ class ZetlandDKArticleIE(InfoExtractor):
            'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
            'uploader': 'Helle Fuusager',
            'release_date': '20240116',
-            'thumbnail': 'https://zetland.imgix.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1.jpg?fit=crop&crop=focalpoint&auto=format,compress&cs=srgb&fp-x=0.49421296296296297&fp-y=0.48518518518518516&w=1200&h=630',
+            'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
            'description': 'md5:9619d426772c133f5abb26db27f26a01',
            'timestamp': 1705377592,
            'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
@ -35,11 +35,10 @@ class ZetlandDKArticleIE(InfoExtractor):
        story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))

        formats = []
-        for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ...)):
+        for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
            formats.append({
                'url': audio_url,
                'vcodec': 'none',
-                'ext': determine_ext(audio_url)
            })

        return merge_dicts({
@ -47,27 +46,26 @@ class ZetlandDKArticleIE(InfoExtractor):
            'formats': formats,
            'uploader_id': uploader_id
        }, traverse_obj(story_data, {
-            'title': ('story_content', 'content', 'title') or 'title',
+            'title': ((('story_content', 'content', 'title'), 'title'), {str}),
            'uploader': ('sharer', 'name'),
            'uploader_id': ('sharer', 'sharer_id'),
-            'description': ('story_content', 'content', 'sosialDescription'),
+            'description': ('story_content', 'content', 'socialDescription'),
            'series_id': ('story_content', 'meta', 'seriesId'),
            'release_timestamp': ('published_at', {unified_timestamp}),
            'modified_timestamp': ('revised_at', {unified_timestamp}),
-        }), traverse_obj(next_js_data, ('metaInfo', {
-            'title': ('meta', 'title') or ('ld', 'headline') or ('og', 'og:title') or ('og', 'twitter:title'),
-            'description': (('meta', 'description') or ('ld', 'description')
-                            or ('og', 'og:description') or ('og', 'twitter:description')),
-            'uploader': ('meta', 'author') or ('ld', 'author', 'name'),
-            'uploader_url': ('ld', 'author', 'url'),
-            'thumbnail': ('ld', 'image') or ('og', 'og:image') or ('og', 'twitter:image'),
+        }, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
+            'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
+            'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
+            'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
+            'uploader_url': ('ld', 'author', 'url', {url_or_none}),
+            'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
            'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
            'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
            'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
-        })), {
+        }), get_all=False), {
            'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
            'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
            'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
            'uploader': self._html_search_meta(['author'], webpage),
            'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
-        }, self._search_json_ld(webpage, display_id))
+        }, self._search_json_ld(webpage, display_id, fatal=False))
Author	SHA1	Message	Date
HobbyistDev	7995ff155b	Set `https` as optional in `_VALID_URL` Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2024-02-01 10:12:37 +09:00
HobbyistDev	fa3f206e18	Rename `sosialDescription` to `socialDescription`	2024-02-01 09:16:24 +09:00
HobbyistDev	6e0c2ee7b0	Use regex in `thumbnail` key in test	2024-02-01 09:13:11 +09:00
HobbyistDev	7019f2530a	escape dot (`.`) in `_VALID_URL`	2024-02-01 09:11:26 +09:00
HobbyistDev	4cb7edddcd	Merge branch 'extractor/zetland-dk-issue-9024' of https://github.com/HobbyistDev/yt-dlp into extractor/zetland-dk-issue-9024	2024-02-01 08:36:33 +09:00
HobbyistDev	b94cb64caa	Remove `determine_ext` import from `utils`	2024-02-01 08:36:26 +09:00
HobbyistDev	9274444b9a	Import `url_or_none` from `utils`	2024-02-01 08:34:42 +09:00
HobbyistDev	562aae4db2	Traverse correctly in `next_js_data` Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:32:55 +09:00
HobbyistDev	ff6b64e765	Traverse correctly in `story_data` Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:31:29 +09:00
HobbyistDev	e4b16383d8	Ensure the `audio_url` using `url_or_none` Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:25:15 +09:00
HobbyistDev	74b9bfcb1d	Remove unecessary set `ext` key Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-02-01 08:24:34 +09:00
HobbyistDev	b9aa2b5604	set `_search_json_ld` to not fatal	2024-02-01 08:21:35 +09:00