Compare commits

...

8 Commits

Author SHA1 Message Date
Michael Skyba
1c051fd59c
Merge 2be0c24897 into b83ca24eb7 2024-11-10 00:54:44 +01:00
sepro
b83ca24eb7
[core] Catch broken Cryptodome installations (#11486)
Authored by: seproDev
2024-11-10 00:53:49 +01:00
bashonly
240a7d43c8
[build] Pin websockets version to >=13.0,<14 (#11488)
websockets 14.0 causes CI test failures (a lot more of them)

Authored by: bashonly
2024-11-09 23:46:47 +00:00
bashonly
f13df591d4
[build] Enable attestations for trusted publishing (#11420)
Reverts 428ffb75aa

Authored by: bashonly
2024-11-09 23:26:02 +00:00
Michael Skyba
2be0c24897 [ie/suno] add fallbacks for basic page metadata 2024-11-04 23:54:46 -05:00
Michael Skyba
10a1a93352 [ie/suno] use regexes for thumbnail match tests 2024-11-04 23:54:45 -05:00
Michael Skyba
6ec19e942d [ie/suno] add playlist extractor 2024-11-04 23:54:43 -05:00
Michael Skyba
3c59d3e7a2 [ie/suno] add /song mp3 extractor 2024-11-04 20:18:18 -05:00
8 changed files with 205 additions and 8 deletions

View File

@ -504,7 +504,8 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v4 - name: Download artifacts
uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-bin-* pattern: build-bin-*

View File

@ -28,3 +28,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.MASTER_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -41,3 +41,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -2,10 +2,6 @@ name: Release
on: on:
workflow_call: workflow_call:
inputs: inputs:
prerelease:
required: false
default: true
type: boolean
source: source:
required: false required: false
default: '' default: ''
@ -18,6 +14,10 @@ on:
required: false required: false
default: '' default: ''
type: string type: string
prerelease:
required: false
default: true
type: boolean
workflow_dispatch: workflow_dispatch:
inputs: inputs:
source: source:
@ -278,11 +278,20 @@ jobs:
make clean-cache make clean-cache
python -m build --no-isolation . python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: build-pypi
path: |
dist/*
compression-level: 0
- name: Publish to PyPI - name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
publish: publish:
needs: [prepare, build] needs: [prepare, build]

View File

@ -52,7 +52,7 @@ default = [
"pycryptodomex", "pycryptodomex",
"requests>=2.32.2,<3", "requests>=2.32.2,<3",
"urllib3>=1.26.17,<3", "urllib3>=1.26.17,<3",
"websockets>=13.0", "websockets>=13.0,<14",
] ]
curl-cffi = [ curl-cffi = [
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",

View File

@ -24,7 +24,7 @@ try:
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401
except ImportError: except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip() __version__ = f'broken {__version__}'.strip()

View File

@ -1984,6 +1984,7 @@ from .stretchinternet import StretchInternetIE
from .stripchat import StripchatIE from .stripchat import StripchatIE
from .stv import STVPlayerIE from .stv import STVPlayerIE
from .substack import SubstackIE from .substack import SubstackIE
from .suno import SunoIE, SunoPlaylistIE
from .sunporno import SunPornoIE from .sunporno import SunPornoIE
from .sverigesradio import ( from .sverigesradio import (
SverigesRadioEpisodeIE, SverigesRadioEpisodeIE,

152
yt_dlp/extractor/suno.py Normal file
View File

@ -0,0 +1,152 @@
import re
from .common import InfoExtractor
from ..utils import ExtractorError, unescapeHTML
class SunoBaseIE(InfoExtractor):
def _get_title(self, webpage):
return self._html_search_meta(
['og:title', 'twitter:title'], webpage, 'title',
default=None) or self._html_extract_title(webpage)
def _get_description(self, webpage):
return self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
def _get_thumbnail(self, webpage):
return self._html_search_meta(
['og:image', 'twitter:image'], webpage, 'thumbnail', default=None)
class SunoIE(SunoBaseIE):
_VALID_URL = r'https?://(?:www\.)?suno\.com/song/(?P<id>[-a-f0-9]+)'
_TESTS = [
{
'url': 'https://suno.com/song/ab39a04d-b2e6-463b-9b8e-ddea725422f5',
'md5': 'ef850763b175d8a3c7fba5e2dbdc6bc5',
'info_dict': {
'id': 'ab39a04d-b2e6-463b-9b8e-ddea725422f5',
'title': 'Life\'s a Soundtrack · AI Funk Factory @ YT by @funk | Suno',
'description': 'groovy funk, melodic song. Listen and make your own with Suno.',
'thumbnail': r're:https?://.*903f2bd7-ccc0-4029-a76a-887f07ebc2df.*\.jpeg$',
'ext': 'mp3',
},
},
{
'url': 'https://suno.com/song/9cbcb5f4-f367-4f1c-8a32-23ec62bdc47e',
'md5': '2f038badef88d189891d5f8cd8d8804d',
'info_dict': {
'id': '9cbcb5f4-f367-4f1c-8a32-23ec62bdc47e',
'title': 'Pequenos Prazeres da Vida by @groovebot | Suno',
'description': 'pop bossa nova song. Listen and make your own with Suno.',
'thumbnail': r're:https?://.*9cbcb5f4-f367-4f1c-8a32-23ec62bdc47e.*\.jpeg$',
'ext': 'mp3',
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
url = self._html_search_meta(
['og:audio', 'twitter:player:stream'], webpage, 'url', default=None)
return {
'id': video_id,
'title': self._get_title(webpage),
'description': self._get_description(webpage),
'thumbnail': self._get_thumbnail(webpage),
'url': url,
}
class SunoPlaylistIE(SunoBaseIE):
_VALID_URL = r'https?://(?:www\.)?suno\.com/playlist/(?P<id>[-a-f0-9]+)'
_TESTS = [
{
'url': 'https://suno.com/playlist/01f2ac32-c32e-4d26-b10c-221107c02946',
'info_dict': {
'id': '01f2ac32-c32e-4d26-b10c-221107c02946',
'title': 'Main 0 by @contemplativetranspositions367 | Suno',
'description': 'Hopefully the test case passed',
'thumbnail': r're:https?://.*19d6d518-1b87-43b3-90b9-2a476ca5824a.*\.jpeg$',
},
'playlist': [{
'info_dict': {
'id': '19d6d518-1b87-43b3-90b9-2a476ca5824a',
'title': 'Ceaseless <Echoes>',
'ext': 'mp3',
},
}],
'playlist_count': 1,
},
{
'url': 'https://www.suno.com/playlist/568eeaab-dfbf-4da6-aa0a-0fb1a32330de',
'info_dict': {
'id': '568eeaab-dfbf-4da6-aa0a-0fb1a32330de',
'title': 'Piano by @kunal | Suno',
'description': 'Here are some good piano',
'thumbnail': r're:https?://.*0ecc0956-3b17-4d4b-8504-55849dd75e22.*\.jpeg$',
},
'playlist': [
{
'info_dict': {
'id': '0ecc0956-3b17-4d4b-8504-55849dd75e22',
'title': 'ST',
'ext': 'mp3',
},
},
{
'info_dict': {
'id': '3fef7d44-c5a3-4181-9de3-d81542af23ef',
'title': 'ST',
'ext': 'mp3',
},
},
{
'info_dict': {
'id': '15e797fa-06c0-4e11-8cc0-3b2580476039',
'title': 'ST - 2',
'ext': 'mp3',
},
},
],
'playlist_count': 3,
},
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
# There are <a>s whose href is a song/ID path. The <span>s directly
# within them have the respective song title as their innerHTML.
# Alternatively, this info can be extracted through parsing an escaped
# JSON object inside a <script> array, though that seems even less stable
# than this HTML.
songs_regex = r'/song/(?P<id>[-a-f0-9]+)["\'][^>]*>\s*<span[^>]*>\s*(?P<title>[^<]+)</span>'
songs = re.findall(songs_regex, webpage)
og_audio_regex = self._og_regexes('audio')[0]
audio_urls = [matches[0] for matches in re.findall(og_audio_regex, webpage)]
if len(songs) != len(audio_urls):
raise ExtractorError('Unexpected mismatch between song HTML list and og audio URLs')
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._get_title(webpage),
'description': self._get_description(webpage),
'thumbnail': self._get_thumbnail(webpage),
'entries': [{
'id': song_tuple[0],
'title': unescapeHTML(song_tuple[1]),
'url': audio_urls[i],
} for i, song_tuple in enumerate(songs)],
}