mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-15 03:41:24 +01:00
Compare commits
3 Commits
21bda14aaa
...
6bfb8380a7
Author | SHA1 | Date | |
---|---|---|---|
|
6bfb8380a7 | ||
|
ca5f68c89b | ||
|
b8671868f7 |
|
@ -1,21 +1,19 @@
|
||||||
|
import re
|
||||||
|
import json
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class NinaProtocolIE(InfoExtractor):
|
class NinaProtocolIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[a-zA-Z0-9\-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': ' https://www.ninaprotocol.com/releases/3xl-nina-label-mix-014',
|
'url': 'https://www.ninaprotocol.com/releases/3xl-nina-label-mix-014',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the audio file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1',
|
'id': '3xl-nina-label-mix-014',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '3XL - Nina Label Mix 014',
|
'title': '3XL - Nina Label Mix 014',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
# Add the thumbnail regex extraction here
|
||||||
# TODO more properties, either as:
|
|
||||||
# * A value
|
|
||||||
# * MD5 checksum; start the string with md5:
|
|
||||||
# * A regular expression; start the string with re:
|
|
||||||
# * Any Python type, e.g. int or float
|
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -23,13 +21,38 @@ class NinaProtocolIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# TODO more code goes here, for example ...
|
# If the title is not within <h1> tags, adjust the regex below.
|
||||||
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
title = self._html_search_regex(r'<div class="title">([^<]+)</div>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
if not title:
|
||||||
|
self.report_warning(f'Could not extract title for {video_id}')
|
||||||
|
title = video_id # Use a default title if none is found
|
||||||
|
|
||||||
|
# Extract JSON-like data within JavaScript
|
||||||
|
json_str = self._search_regex(
|
||||||
|
r'self\.__next_f\.push\(\[1,"24:\[\\"(.+?)\\"\]\]"\)',
|
||||||
|
webpage, 'JSON data', fatal=False)
|
||||||
|
|
||||||
|
# Parse JSON data if found
|
||||||
|
audio_url = None
|
||||||
|
if json_str:
|
||||||
|
try:
|
||||||
|
# Clean up the JSON string and load it
|
||||||
|
json_str = re.sub(r'\\u003c|\\u003e|\\u0026', '', json_str)
|
||||||
|
json_data = json.loads(f'[{json_str}]') # Wrap in array brackets to form valid JSON
|
||||||
|
# Navigate through the JSON structure to find the audio URL
|
||||||
|
audio_url = json_data[0].get('animation_url')
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
self.report_warning('Could not parse JSON data for audio URL.')
|
||||||
|
|
||||||
|
# Extract thumbnail
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<img[^>]+src="([^"]+)"[^>]*alt="[^"]*"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'url': audio_url,
|
||||||
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
'thumbnail': thumbnail,
|
||||||
# TODO more properties (see yt_dlp/extractor/common.py)
|
# Add additional properties as needed
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user