Fix returned dict

do extract if metadata not available
Update tests
2024-11-28 18:21:26 +01:00 · 2024-01-08 20:54:11 -05:00 · 2024-01-08 20:53:02 -05:00 · 2024-01-08 19:08:33 -05:00 · 2024-01-08 19:07:42 -05:00
1 changed files with 13 additions and 11 deletions
--- a/yt_dlp/extractor/viously.py
+++ b/yt_dlp/extractor/viously.py
@ -1,9 +1,9 @@
 import base64
 import re
 from .common import InfoExtractor
 from ..utils import (
    extract_attributes,
    get_elements_html_by_class,
    int_or_none,
    parse_iso8601,
 )
@ -18,16 +18,18 @@ class ViouslyIE(InfoExtractor):
        'info_dict': {
            'id': 'F_xQzS2jwb3',
            'ext': 'mp4',
-            'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+            'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
-            'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+            'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
            'age_limit': 0,
-            'upload_date': str,
+            'upload_date': '20230328',
-            'timestamp': float,
+            'timestamp': 1680037507,
            'duration': 3716,
            'categories': ['motors'],
        }
    }]
    def _extract_from_webpage(self, url, webpage):
-        viously_players = get_elements_html_by_class('viously-player', webpage) + get_elements_html_by_class('vsly-player', webpage)
+        viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
        if not viously_players:
            return
@ -40,19 +42,19 @@ class ViouslyIE(InfoExtractor):
        for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
            formats = self._extract_m3u8_formats(
                f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
            if not formats:
                continue
            data = self._download_json(
                f'https://www.viously.com/export/json/{video_id}', video_id,
                transform_source=custom_decode, fatal=False)
            if not formats or not data:
                continue
            yield {
                'id': video_id,
                'formats': formats,
                **traverse_obj(data, ('video', {
-                    'title': 'title',
+                    'title': ('title', {str}),
-                    'description': 'description',
+                    'description': ('description', {str}),
                    'duration': ('duration', {int_or_none}),
                    'timestamp': ('iso_date', {parse_iso8601}),
-                    'categories': ('category', {lambda x: [x['name']]}),
+                    'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
                })),
            }
Author	SHA1	Message	Date
Maxence	acafd165ef	Fix returned dict	2024-01-08 20:54:11 -05:00
Maxence	b2575bcb11	do extract if metadata not available	2024-01-08 20:53:02 -05:00
Maxence	9e3beb1bfa	Update tests	2024-01-08 19:08:33 -05:00
Maxence	1d07a96cf5	use re.findall to find players	2024-01-08 19:07:42 -05:00