Merge dda6f7b563 into b83ca24eb7

[core] Catch broken Cryptodome installations (#11486 )
Authored by: seproDev
2024-11-27 01:31:25 +01:00 · 2024-11-10 14:42:00 +01:00 · 2024-11-10 00:53:49 +01:00 · 2024-11-09 23:46:47 +00:00 · 2024-11-09 23:26:02 +00:00 · 2024-10-15 16:35:28 +01:00
7 changed files with 184 additions and 71 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -504,7 +504,8 @@ jobs:
      - windows32
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/download-artifact@v4
+      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifact
          pattern: build-bin-*
--- a/.github/workflows/release-master.yml
+++ b/.github/workflows/release-master.yml
@ -28,3 +28,20 @@ jobs:
      actions: write  # For cleaning up cache
      id-token: write  # mandatory for trusted publishing
    secrets: inherit
  publish_pypi:
    needs: [release]
    if: vars.MASTER_PYPI_PROJECT != ''
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # mandatory for trusted publishing
    steps:
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: dist
          name: build-pypi
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/.github/workflows/release-nightly.yml
+++ b/.github/workflows/release-nightly.yml
@ -41,3 +41,20 @@ jobs:
      actions: write  # For cleaning up cache
      id-token: write  # mandatory for trusted publishing
    secrets: inherit
  publish_pypi:
    needs: [release]
    if: vars.NIGHTLY_PYPI_PROJECT != ''
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # mandatory for trusted publishing
    steps:
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: dist
          name: build-pypi
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -2,10 +2,6 @@ name: Release
 on:
  workflow_call:
    inputs:
      prerelease:
        required: false
        default: true
        type: boolean
      source:
        required: false
        default: ''
@ -18,6 +14,10 @@ on:
        required: false
        default: ''
        type: string
      prerelease:
        required: false
        default: true
        type: boolean
  workflow_dispatch:
    inputs:
      source:
@ -278,11 +278,20 @@ jobs:
          make clean-cache
          python -m build --no-isolation .
      - name: Upload artifacts
        if: github.event_name != 'workflow_dispatch'
        uses: actions/upload-artifact@v4
        with:
          name: build-pypi
          path: |
            dist/*
          compression-level: 0
      - name: Publish to PyPI
        if: github.event_name == 'workflow_dispatch'
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
          attestations: false  # Currently doesn't work w/ reusable workflows (breaks nightly)
  publish:
    needs: [prepare, build]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -52,7 +52,7 @@ default = [
    "pycryptodomex",
    "requests>=2.32.2,<3",
    "urllib3>=1.26.17,<3",
-    "websockets>=13.0",
+    "websockets>=13.0,<14",
 ]
 curl-cffi = [
    "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
--- a/yt_dlp/dependencies/Cryptodome.py
+++ b/yt_dlp/dependencies/Cryptodome.py
@ -24,7 +24,7 @@ try:
        from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5  # noqa: F401
        from Crypto.Hash import CMAC, SHA1  # noqa: F401
        from Crypto.PublicKey import RSA  # noqa: F401
-except ImportError:
+except (ImportError, OSError):
    __version__ = f'broken {__version__}'.strip()
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@ -1,6 +1,4 @@
 import itertools
 import re
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
@ -19,18 +17,6 @@ class RadioFranceIE(InfoExtractor):
    _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
    IE_NAME = 'radiofrance'
    _TEST = {
        'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
        'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
        'info_dict': {
            'id': 'one-one',
            'ext': 'ogg',
            'title': 'One to one',
            'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
            'uploader': 'Thomas Hercouët',
        },
    }
    def _real_extract(self, url):
        m = self._match_valid_url(url)
        video_id = m.group('id')
@ -237,7 +223,8 @@ class RadioFranceLiveIE(RadioFranceBaseIE):
        if substation_id:
            webpage = self._download_webpage(url, station_id)
-            api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
+            api_response = self._search_json(r'webradioLive:\s*', webpage, station_id, substation_id,
                                             transform_source=js_to_json)
        else:
            api_response = self._download_json(
                f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
@ -267,42 +254,66 @@ class RadioFranceLiveIE(RadioFranceBaseIE):
 class RadioFrancePlaylistBaseIE(RadioFranceBaseIE):
    """Subclasses must set _METADATA_KEY"""
-    def _call_api(self, content_id, cursor, page_num):
+    def _call_api(self, station, content_id, cursor):
        raise NotImplementedError('This method must be implemented by subclasses')
-    def _generate_playlist_entries(self, content_id, content_response):
+    def _generate_playlist_entries(self, station, content_id, content_response):
-        for page_num in itertools.count(2):
+        while True:
            for entry in content_response['items']:
                if entry['link'] == '':
                    yield entry
                else:
                    yield self.url_result(
-                    f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
+                        f'https://www.radiofrance.fr{entry["link"]}', url_transparent=True, **traverse_obj(entry, {
                            'title': 'title',
                            'description': 'standFirst',
                            'timestamp': ('publishedDate', {int_or_none}),
                            'thumbnail': ('visual', 'src'),
                        }))
-            next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
+            if content_response['next']:
-            if not next_cursor:
+                content_response = self._call_api(station, content_id, content_response['next'])
            else:
                break
-            content_response = self._call_api(content_id, next_cursor, page_num)
+    def _extract_embedded_episodes(self, item, webpage, content_id):
        """Certain episdoes data are embedded directly in the page, use these if the link is missing"""
        links = item['playerInfo']['media']['sources']
        item['formats'] = []
        for linkkey in links:
            url = self._search_regex(linkkey + r'\.url="([^"]+)";', webpage, content_id)
            dur = int(self._search_regex(linkkey + r'\.duration=(\d+);', webpage, content_id))
            preset = self._search_json(linkkey + r'\.preset=', webpage, content_id, content_id, contains_pattern=r'\{.+\}', transform_source=js_to_json)
            item['formats'].append({
                'format_id': preset['id'],
                'url': url,
                'vcodec': 'none',
                'acodec': preset['encoding'],
                'quality': preset['bitrate'],
                'duration': dur,
            })
            item['duration'] = dur
        return item
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        playlist_id = self._match_id(url)
        # If it is a podcast playlist, get the name of the station it is on
        # profile page playlists are not attached to a station currently
        station = self._match_valid_url(url).group('station') if isinstance(self, RadioFrancePodcastIE) else None
-        metadata = self._download_json(
+        # Get data for the first page, and the uuid for the playlist
-            'https://www.radiofrance.fr/api/v2.1/path', display_id,
+        metadata = self._call_api(station, playlist_id, 1)
-            query={'value': urllib.parse.urlparse(url).path})['content']
+        uuid = traverse_obj(metadata, ('metadata', 'id'))
        content_id = metadata['id']
        return self.playlist_result(
-            self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
+            self._generate_playlist_entries(station, playlist_id, metadata),
-            display_id=display_id, **{**traverse_obj(metadata, {
+            uuid,
            display_id=playlist_id,
            **{**traverse_obj(metadata['metadata'], {
                'title': 'title',
                'description': 'standFirst',
                'thumbnail': ('visual', 'src'),
-            }), **traverse_obj(metadata, {
+            }), **traverse_obj(metadata['metadata'], {
                'title': 'name',
                'description': 'role',
            })})
@ -311,7 +322,7 @@ class RadioFrancePlaylistBaseIE(RadioFranceBaseIE):
 class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
    _VALID_URL = rf'''(?x)
        {RadioFranceBaseIE._VALID_URL_BASE}
-        /(?:{RadioFranceBaseIE._STATIONS_RE})
+        /(?P<station>{RadioFranceBaseIE._STATIONS_RE})
        /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
    '''
@ -321,20 +332,20 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
            'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
            'display_id': 'le-billet-vert',
            'title': 'Le billet sciences',
-            'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
+            'description': 'md5:85d5ce8c488192e71904c551d595f4da',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_mincount': 11,
    }, {
-        'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/avec-la-langue',
        'info_dict': {
-            'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
+            'id': '53a95989-7c61-48c7-873c-6a71009101bb',
-            'display_id': 'jean-marie-le-pen-l-obsession-nationale',
+            'display_id': 'avec-la-langue',
-            'title': 'Jean-Marie Le Pen, l\'obsession nationale',
+            'title': 'Avec la langue',
-            'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
+            'description': 'md5:4ddb6d4ed46dbbdee611b8e16e4af868',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
-        'playlist_count': 7,
+        'playlist_mincount': 36,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
        'info_dict': {
@ -349,10 +360,20 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
            'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
            'display_id': 'certains-l-aiment-fip',
            'title': 'Certains l’aiment Fip',
-            'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
+            'description': 'md5:7c373cdcec7a024f12fa34de7612e44e',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_mincount': 321,
    }, {
        'url': 'http://www.radiofrance.fr/franceculture/podcasts/serie-les-aventures-de-tintin-les-cigares-du-pharaon',
        'info_dict': {
            'id': '01b096c6-e7f8-49c4-8319-dd399221885b',
            'display_id': 'serie-les-aventures-de-tintin-les-cigares-du-pharaon',
            'title': 'Les Cigares du Pharaon\xa0: les Aventures de Tintin',
            'description': 'md5:1c5b6d010b2aaeb0d90b2c233b5f7b15',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_count': 5,
    }, {
        'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
        'only_matching': True,
@ -363,24 +384,48 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE):
    _METADATA_KEY = 'expressions'
-    def _call_api(self, podcast_id, cursor, page_num):
+    def _call_api(self, station, podcast_id, cursor):
-        return self._download_json(
+        # The data is stored in the last <script> tag on a page
-            f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
+        url = 'https://www.radiofrance.fr/' + station + '/podcasts/' + podcast_id + '?p=' + str(cursor)
-            note=f'Downloading page {page_num}', query={'pageCursor': cursor})
+        webpage = self._download_webpage(url, podcast_id, note=f'Downloading {podcast_id} page {cursor}')
        resp = {}
        resp['items'] = []
        # _search_json cannot parse the data as it contains javascript
        # Therefore, parse the episodes objects array separately
        itemlist = self._search_json(r'a.items\s*=\s*', webpage, podcast_id, podcast_id,
                                     contains_pattern=r'\[.+\]', transform_source=js_to_json)
        for item in itemlist:
            if item['model'] == 'Expression':
                if item['link'] == '':
                    item = self._extract_embedded_episodes(item, webpage, podcast_id)
                resp['items'].append(item)
        # the pagination data is stored in a javascript object 'a'
        lastPage = int(re.search(r'a\.lastPage\s*=\s*(\d+);', webpage).group(1))
        hasMorePages = cursor < lastPage
        resp['next'] = cursor + 1 if hasMorePages else None
        resp['metadata'] = self._search_json(r'content:\s*', webpage, podcast_id, podcast_id,
                                             transform_source=js_to_json)
        return resp
 class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
    _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
    _TESTS = [{
-        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
+        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
        'info_dict': {
            'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
            'display_id': 'thomas-pesquet',
            'title': 'Thomas Pesquet',
            'description': 'Astronaute à l\'agence spatiale européenne',
        },
-        'playlist_mincount': 212,
+        'playlist_mincount': 100,
    }, {
        'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
        'info_dict': {
@ -398,15 +443,39 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
    _METADATA_KEY = 'documents'
-    def _call_api(self, profile_id, cursor, page_num):
+    def _call_api(self, station, profile_id, cursor):
-        resp = self._download_json(
+        url = 'https://www.radiofrance.fr/personnes/' + profile_id + '?p=' + str(cursor)
-            f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
+        webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
-            note=f'Downloading page {page_num}', query={
+
-                'relation': 'personality',
+        resp = {}
-                'cursor': cursor,
+        resp['items'] = []
-            })
+
        # get episode data from page
        pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
                                     transform_source=js_to_json)
        # get the page data
        pagekey = pagedata['pagination']
        hasMorePages = False
        lastPage = int(self._search_regex(pagekey + r'\.lastPage=(\d+);', webpage, profile_id, '0'))
        hasMorePages = cursor < lastPage
        resp['next'] = cursor + 1 if hasMorePages else None
        # get episode data, note, not all will be A/V, so filter for 'expression'
        for item in pagedata['items']:
            if item['model'] == 'Expression':
                if item.link == '':
                    item = self._extract_embedded_episodes(item, webpage, profile_id)
                resp['items'].append(item)
        resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
                                             transform_source=js_to_json)
        # If the image data is stored separately rather than in the main content area
        if resp['metadata']['visual'] and isinstance(resp['metadata']['visual'], str):
            imagedata = {}
            imagedata['src'] = self._og_search_thumbnail(webpage)
            resp['metadata']['visual'] = imagedata
        resp['next'] = traverse_obj(resp, ('pagination', 'next'))
        return resp
@ -423,14 +492,14 @@ class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
            'id': 'franceinter-program-20230217',
            'upload_date': '20230217',
        },
-        'playlist_count': 25,
+        'playlist_count': 27,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
        'info_dict': {
            'id': 'franceculture-program-20230201',
            'upload_date': '20230201',
        },
-        'playlist_count': 25,
+        'playlist_count': 29,
    }, {
        'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
        'info_dict': {
@ -444,7 +513,7 @@ class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
            'id': 'francemusique-program-20230318',
            'upload_date': '20230318',
        },
-        'playlist_count': 15,
+        'playlist_count': 16,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
        'only_matching': True,
Author	SHA1	Message	Date
Léon McGregor	a1881a5e39	Merge `dda6f7b563` into `b83ca24eb7`	2024-11-10 14:42:00 +01:00
sepro	b83ca24eb7	[core] Catch broken Cryptodome installations (#11486 ) Authored by: seproDev	2024-11-10 00:53:49 +01:00
bashonly	240a7d43c8	[build] Pin `websockets` version to >=13.0,<14 (#11488 ) websockets 14.0 causes CI test failures (a lot more of them) Authored by: bashonly	2024-11-09 23:46:47 +00:00
bashonly	f13df591d4	[build] Enable attestations for trusted publishing (#11420 ) Reverts `428ffb75aa` Authored by: bashonly	2024-11-09 23:26:02 +00:00
lonm	dda6f7b563	[RadioFrance] run autopep	2024-10-15 16:35:28 +01:00
lonm	dcd0ee3ec3	[RadioFrance] ruff trailing commas	2024-10-15 16:30:19 +01:00
lonm	9e3ac89514	[RadioFrance] support pages with embedded playback info	2024-10-15 16:28:49 +01:00
lonm	0fb8bc11ed	[RadioFrance] Fix ruff issues	2024-10-15 15:04:48 +01:00
lonm	3c5e3af7bc	[RadioFrance] Remove defunct test	2024-10-15 14:54:09 +01:00
lonm	9d54ffc768	[RadioFrance] update tests for program grille	2024-10-15 14:52:11 +01:00
lonm	e01fab7041	[RadioFrance] fix profile pagination detection	2024-10-15 14:44:48 +01:00
lonm	867bf965bb	[RadioFrance] Fix playlist api parse	2024-10-15 14:23:47 +01:00
lonm	40f1a95a67	Merge branch 'master' of github.com:yt-dlp/yt-dlp	2024-10-15 13:07:59 +01:00
lonm	dd74aa0bca	[RadioFrance] Fix quote styling	2024-05-16 11:45:17 +01:00
lonm	e5e91ad05d	[RadioFrance] Fix thumb detection on profiles	2024-05-16 11:29:32 +01:00
lonm	7308dc895c	[RadioFrance] Fix outdated tests	2024-05-16 11:29:16 +01:00
lonm	1f719e1934	[RadioFrance] Cleanup imports	2024-05-16 11:00:08 +01:00
lonm	a8edca98f5	[RadioFrance] Fix live substations	2024-05-16 10:59:56 +01:00
lonm	827560f2b9	[RadioFrance] Ep selection is already handled, don't add it here	2024-05-16 10:47:28 +01:00
lonm	5db908bebf	Merge branch 'master' of github.com:LonMcGregor/yt-dlp	2024-05-15 16:41:43 +01:00
lonm	e2243c2033	[RadioFrance] Fix podcast and person playlist downloads	2024-05-15 16:41:26 +01:00
lonm	960b8931c6	Fix podcast and person playlist downloads	2024-05-15 16:39:56 +01:00