[NinaProtocol] Fixed NewLine Error on EOF.

[NinaProtocol] Fixed flake8 errors.
[NinaProtocol] Updated code to a working implementation. There is a minor bug, however and any help on it would be appreciated.
2025-02-18 19:01:25 +01:00 · 2023-12-13 18:09:41 -05:00 · 2023-12-13 18:08:13 -05:00 · 2023-12-13 18:03:04 -05:00
1 changed files with 39 additions and 16 deletions
--- a/yt_dlp/extractor/ninaprotocol.py
+++ b/yt_dlp/extractor/ninaprotocol.py
@ -1,21 +1,19 @@
 import re
 import json
 from .common import InfoExtractor
 class NinaProtocolIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[a-zA-Z0-9\-]+)'
    _TESTS = [{
-        'url': ' https://www.ninaprotocol.com/releases/3xl-nina-label-mix-014',
+        'url': 'https://www.ninaprotocol.com/releases/3xl-nina-label-mix-014',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'md5': 'TODO: md5 sum of the first 10241 bytes of the audio file (use --test)',
        'info_dict': {
-            'id': '1',
+            'id': '3xl-nina-label-mix-014',
            'ext': 'mp3',
            'title': '3XL - Nina Label Mix 014',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            # Add the thumbnail regex extraction here
            # TODO more properties, either as:
            # * A value
            # * MD5 checksum; start the string with md5:
            # * A regular expression; start the string with re:
            # * Any Python type, e.g. int or float
        }
    }]
@ -23,13 +21,38 @@ class NinaProtocolIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        # TODO more code goes here, for example ...
+        # If the title is not within <h1> tags, adjust the regex below.
-        title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+        title = self._html_search_regex(r'<div class="title">([^<]+)</div>', webpage, 'title', default=None)
        if not title:
            self.report_warning(f'Could not extract title for {video_id}')
            title = video_id  # Use a default title if none is found
        # Extract JSON-like data within JavaScript
        json_str = self._search_regex(
            r'self\.__next_f\.push\(\[1,"24:\[\\"(.+?)\\"\]\]"\)',
            webpage, 'JSON data', fatal=False)
        # Parse JSON data if found
        audio_url = None
        if json_str:
            try:
                # Clean up the JSON string and load it
                json_str = re.sub(r'\\u003c|\\u003e|\\u0026', '', json_str)
                json_data = json.loads(f'[{json_str}]')  # Wrap in array brackets to form valid JSON
                # Navigate through the JSON structure to find the audio URL
                audio_url = json_data[0].get('animation_url')
            except json.JSONDecodeError:
                self.report_warning('Could not parse JSON data for audio URL.')
        # Extract thumbnail
        thumbnail = self._html_search_regex(
            r'<img[^>]+src="([^"]+)"[^>]*alt="[^"]*"', webpage, 'thumbnail', fatal=False)
        return {
            'id': video_id,
            'title': title,
-            'description': self._og_search_description(webpage),
+            'url': audio_url,
-            'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+            'thumbnail': thumbnail,
-            # TODO more properties (see yt_dlp/extractor/common.py)
+            # Add additional properties as needed
        }
Author	SHA1	Message	Date
Abhimanyu Tyagi	6bfb8380a7	[NinaProtocol] Fixed NewLine Error on EOF.	2023-12-13 18:09:41 -05:00
Abhimanyu Tyagi	ca5f68c89b	[NinaProtocol] Fixed flake8 errors.	2023-12-13 18:08:13 -05:00
Abhay Walia	b8671868f7	[NinaProtocol] Updated code to a working implementation. There is a minor bug, however and any help on it would be appreciated.	2023-12-13 18:03:04 -05:00