Compare commits

...

22 Commits

Author SHA1 Message Date
Léon McGregor
a1881a5e39
Merge dda6f7b563 into b83ca24eb7 2024-11-10 14:42:00 +01:00
sepro
b83ca24eb7
[core] Catch broken Cryptodome installations (#11486)
Authored by: seproDev
2024-11-10 00:53:49 +01:00
bashonly
240a7d43c8
[build] Pin websockets version to >=13.0,<14 (#11488)
websockets 14.0 causes CI test failures (a lot more of them)

Authored by: bashonly
2024-11-09 23:46:47 +00:00
bashonly
f13df591d4
[build] Enable attestations for trusted publishing (#11420)
Reverts 428ffb75aa

Authored by: bashonly
2024-11-09 23:26:02 +00:00
lonm
dda6f7b563 [RadioFrance] run autopep 2024-10-15 16:35:28 +01:00
lonm
dcd0ee3ec3 [RadioFrance] ruff trailing commas 2024-10-15 16:30:19 +01:00
lonm
9e3ac89514 [RadioFrance] support pages with embedded playback info 2024-10-15 16:28:49 +01:00
lonm
0fb8bc11ed [RadioFrance] Fix ruff issues 2024-10-15 15:04:48 +01:00
lonm
3c5e3af7bc [RadioFrance] Remove defunct test 2024-10-15 14:54:09 +01:00
lonm
9d54ffc768 [RadioFrance] update tests for program grille 2024-10-15 14:52:11 +01:00
lonm
e01fab7041 [RadioFrance] fix profile pagination detection 2024-10-15 14:44:48 +01:00
lonm
867bf965bb [RadioFrance] Fix playlist api parse 2024-10-15 14:23:47 +01:00
lonm
40f1a95a67 Merge branch 'master' of github.com:yt-dlp/yt-dlp 2024-10-15 13:07:59 +01:00
lonm
dd74aa0bca [RadioFrance] Fix quote styling 2024-05-16 11:45:17 +01:00
lonm
e5e91ad05d [RadioFrance] Fix thumb detection on profiles 2024-05-16 11:29:32 +01:00
lonm
7308dc895c [RadioFrance] Fix outdated tests 2024-05-16 11:29:16 +01:00
lonm
1f719e1934 [RadioFrance] Cleanup imports 2024-05-16 11:00:08 +01:00
lonm
a8edca98f5 [RadioFrance] Fix live substations 2024-05-16 10:59:56 +01:00
lonm
827560f2b9 [RadioFrance] Ep selection is already handled, don't add it here 2024-05-16 10:47:28 +01:00
lonm
5db908bebf Merge branch 'master' of github.com:LonMcGregor/yt-dlp 2024-05-15 16:41:43 +01:00
lonm
e2243c2033 [RadioFrance] Fix podcast and person playlist downloads 2024-05-15 16:41:26 +01:00
lonm
960b8931c6 Fix podcast and person playlist downloads 2024-05-15 16:39:56 +01:00
7 changed files with 184 additions and 71 deletions

View File

@ -504,7 +504,8 @@ jobs:
- windows32
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: artifact
pattern: build-bin-*

View File

@ -28,3 +28,20 @@ jobs:
actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing
secrets: inherit
publish_pypi:
needs: [release]
if: vars.MASTER_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -41,3 +41,20 @@ jobs:
actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing
secrets: inherit
publish_pypi:
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -2,10 +2,6 @@ name: Release
on:
workflow_call:
inputs:
prerelease:
required: false
default: true
type: boolean
source:
required: false
default: ''
@ -18,6 +14,10 @@ on:
required: false
default: ''
type: string
prerelease:
required: false
default: true
type: boolean
workflow_dispatch:
inputs:
source:
@ -278,11 +278,20 @@ jobs:
make clean-cache
python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: build-pypi
path: |
dist/*
compression-level: 0
- name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
publish:
needs: [prepare, build]

View File

@ -52,7 +52,7 @@ default = [
"pycryptodomex",
"requests>=2.32.2,<3",
"urllib3>=1.26.17,<3",
"websockets>=13.0",
"websockets>=13.0,<14",
]
curl-cffi = [
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",

View File

@ -24,7 +24,7 @@ try:
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401
except ImportError:
except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip()

View File

@ -1,6 +1,4 @@
import itertools
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
@ -19,18 +17,6 @@ class RadioFranceIE(InfoExtractor):
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = 'radiofrance'
_TEST = {
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
'info_dict': {
'id': 'one-one',
'ext': 'ogg',
'title': 'One to one',
'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
'uploader': 'Thomas Hercouët',
},
}
def _real_extract(self, url):
m = self._match_valid_url(url)
video_id = m.group('id')
@ -237,7 +223,8 @@ class RadioFranceLiveIE(RadioFranceBaseIE):
if substation_id:
webpage = self._download_webpage(url, station_id)
api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
api_response = self._search_json(r'webradioLive:\s*', webpage, station_id, substation_id,
transform_source=js_to_json)
else:
api_response = self._download_json(
f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
@ -267,42 +254,66 @@ class RadioFranceLiveIE(RadioFranceBaseIE):
class RadioFrancePlaylistBaseIE(RadioFranceBaseIE):
"""Subclasses must set _METADATA_KEY"""
def _call_api(self, content_id, cursor, page_num):
def _call_api(self, station, content_id, cursor):
raise NotImplementedError('This method must be implemented by subclasses')
def _generate_playlist_entries(self, content_id, content_response):
for page_num in itertools.count(2):
def _generate_playlist_entries(self, station, content_id, content_response):
while True:
for entry in content_response['items']:
if entry['link'] == '':
yield entry
else:
yield self.url_result(
f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
f'https://www.radiofrance.fr{entry["link"]}', url_transparent=True, **traverse_obj(entry, {
'title': 'title',
'description': 'standFirst',
'timestamp': ('publishedDate', {int_or_none}),
'thumbnail': ('visual', 'src'),
}))
next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
if not next_cursor:
if content_response['next']:
content_response = self._call_api(station, content_id, content_response['next'])
else:
break
content_response = self._call_api(content_id, next_cursor, page_num)
def _extract_embedded_episodes(self, item, webpage, content_id):
"""Certain episdoes data are embedded directly in the page, use these if the link is missing"""
links = item['playerInfo']['media']['sources']
item['formats'] = []
for linkkey in links:
url = self._search_regex(linkkey + r'\.url="([^"]+)";', webpage, content_id)
dur = int(self._search_regex(linkkey + r'\.duration=(\d+);', webpage, content_id))
preset = self._search_json(linkkey + r'\.preset=', webpage, content_id, content_id, contains_pattern=r'\{.+\}', transform_source=js_to_json)
item['formats'].append({
'format_id': preset['id'],
'url': url,
'vcodec': 'none',
'acodec': preset['encoding'],
'quality': preset['bitrate'],
'duration': dur,
})
item['duration'] = dur
return item
def _real_extract(self, url):
display_id = self._match_id(url)
playlist_id = self._match_id(url)
# If it is a podcast playlist, get the name of the station it is on
# profile page playlists are not attached to a station currently
station = self._match_valid_url(url).group('station') if isinstance(self, RadioFrancePodcastIE) else None
metadata = self._download_json(
'https://www.radiofrance.fr/api/v2.1/path', display_id,
query={'value': urllib.parse.urlparse(url).path})['content']
content_id = metadata['id']
# Get data for the first page, and the uuid for the playlist
metadata = self._call_api(station, playlist_id, 1)
uuid = traverse_obj(metadata, ('metadata', 'id'))
return self.playlist_result(
self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
display_id=display_id, **{**traverse_obj(metadata, {
self._generate_playlist_entries(station, playlist_id, metadata),
uuid,
display_id=playlist_id,
**{**traverse_obj(metadata['metadata'], {
'title': 'title',
'description': 'standFirst',
'thumbnail': ('visual', 'src'),
}), **traverse_obj(metadata, {
}), **traverse_obj(metadata['metadata'], {
'title': 'name',
'description': 'role',
})})
@ -311,7 +322,7 @@ class RadioFrancePlaylistBaseIE(RadioFranceBaseIE):
class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
_VALID_URL = rf'''(?x)
{RadioFranceBaseIE._VALID_URL_BASE}
/(?:{RadioFranceBaseIE._STATIONS_RE})
/(?P<station>{RadioFranceBaseIE._STATIONS_RE})
/podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
'''
@ -321,20 +332,20 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
'display_id': 'le-billet-vert',
'title': 'Le billet sciences',
'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
'description': 'md5:85d5ce8c488192e71904c551d595f4da',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'playlist_mincount': 11,
}, {
'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
'url': 'https://www.radiofrance.fr/franceinter/podcasts/avec-la-langue',
'info_dict': {
'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
'display_id': 'jean-marie-le-pen-l-obsession-nationale',
'title': 'Jean-Marie Le Pen, l\'obsession nationale',
'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
'id': '53a95989-7c61-48c7-873c-6a71009101bb',
'display_id': 'avec-la-langue',
'title': 'Avec la langue',
'description': 'md5:4ddb6d4ed46dbbdee611b8e16e4af868',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'playlist_count': 7,
'playlist_mincount': 36,
}, {
'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
'info_dict': {
@ -349,10 +360,20 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
'display_id': 'certains-l-aiment-fip',
'title': 'Certains laiment Fip',
'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
'description': 'md5:7c373cdcec7a024f12fa34de7612e44e',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'playlist_mincount': 321,
}, {
'url': 'http://www.radiofrance.fr/franceculture/podcasts/serie-les-aventures-de-tintin-les-cigares-du-pharaon',
'info_dict': {
'id': '01b096c6-e7f8-49c4-8319-dd399221885b',
'display_id': 'serie-les-aventures-de-tintin-les-cigares-du-pharaon',
'title': 'Les Cigares du Pharaon\xa0: les Aventures de Tintin',
'description': 'md5:1c5b6d010b2aaeb0d90b2c233b5f7b15',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
},
'playlist_count': 5,
}, {
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
'only_matching': True,
@ -363,24 +384,48 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
_METADATA_KEY = 'expressions'
def _call_api(self, podcast_id, cursor, page_num):
return self._download_json(
f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
note=f'Downloading page {page_num}', query={'pageCursor': cursor})
def _call_api(self, station, podcast_id, cursor):
# The data is stored in the last <script> tag on a page
url = 'https://www.radiofrance.fr/' + station + '/podcasts/' + podcast_id + '?p=' + str(cursor)
webpage = self._download_webpage(url, podcast_id, note=f'Downloading {podcast_id} page {cursor}')
resp = {}
resp['items'] = []
# _search_json cannot parse the data as it contains javascript
# Therefore, parse the episodes objects array separately
itemlist = self._search_json(r'a.items\s*=\s*', webpage, podcast_id, podcast_id,
contains_pattern=r'\[.+\]', transform_source=js_to_json)
for item in itemlist:
if item['model'] == 'Expression':
if item['link'] == '':
item = self._extract_embedded_episodes(item, webpage, podcast_id)
resp['items'].append(item)
# the pagination data is stored in a javascript object 'a'
lastPage = int(re.search(r'a\.lastPage\s*=\s*(\d+);', webpage).group(1))
hasMorePages = cursor < lastPage
resp['next'] = cursor + 1 if hasMorePages else None
resp['metadata'] = self._search_json(r'content:\s*', webpage, podcast_id, podcast_id,
transform_source=js_to_json)
return resp
class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
'info_dict': {
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
'display_id': 'thomas-pesquet',
'title': 'Thomas Pesquet',
'description': 'Astronaute à l\'agence spatiale européenne',
},
'playlist_mincount': 212,
'playlist_mincount': 100,
}, {
'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
'info_dict': {
@ -398,15 +443,39 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
_METADATA_KEY = 'documents'
def _call_api(self, profile_id, cursor, page_num):
resp = self._download_json(
f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
note=f'Downloading page {page_num}', query={
'relation': 'personality',
'cursor': cursor,
})
def _call_api(self, station, profile_id, cursor):
url = 'https://www.radiofrance.fr/personnes/' + profile_id + '?p=' + str(cursor)
webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
resp = {}
resp['items'] = []
# get episode data from page
pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
transform_source=js_to_json)
# get the page data
pagekey = pagedata['pagination']
hasMorePages = False
lastPage = int(self._search_regex(pagekey + r'\.lastPage=(\d+);', webpage, profile_id, '0'))
hasMorePages = cursor < lastPage
resp['next'] = cursor + 1 if hasMorePages else None
# get episode data, note, not all will be A/V, so filter for 'expression'
for item in pagedata['items']:
if item['model'] == 'Expression':
if item.link == '':
item = self._extract_embedded_episodes(item, webpage, profile_id)
resp['items'].append(item)
resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
transform_source=js_to_json)
# If the image data is stored separately rather than in the main content area
if resp['metadata']['visual'] and isinstance(resp['metadata']['visual'], str):
imagedata = {}
imagedata['src'] = self._og_search_thumbnail(webpage)
resp['metadata']['visual'] = imagedata
resp['next'] = traverse_obj(resp, ('pagination', 'next'))
return resp
@ -423,14 +492,14 @@ class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
'id': 'franceinter-program-20230217',
'upload_date': '20230217',
},
'playlist_count': 25,
'playlist_count': 27,
}, {
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
'info_dict': {
'id': 'franceculture-program-20230201',
'upload_date': '20230201',
},
'playlist_count': 25,
'playlist_count': 29,
}, {
'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
'info_dict': {
@ -444,7 +513,7 @@ class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
'id': 'francemusique-program-20230318',
'upload_date': '20230318',
},
'playlist_count': 15,
'playlist_count': 16,
}, {
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
'only_matching': True,