Merge 049862b803 into b83ca24eb7

[core] Catch broken Cryptodome installations (#11486 )
Authored by: seproDev
2024-11-27 01:31:25 +01:00 · 2024-11-10 00:54:43 +01:00 · 2024-11-10 00:53:49 +01:00 · 2024-11-09 23:46:47 +00:00 · 2024-11-09 23:26:02 +00:00 · 2024-10-23 11:11:24 +01:00
7 changed files with 185 additions and 49 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -504,7 +504,8 @@ jobs:
      - windows32
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/download-artifact@v4
+      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifact
          pattern: build-bin-*
--- a/.github/workflows/release-master.yml
+++ b/.github/workflows/release-master.yml
@ -28,3 +28,20 @@ jobs:
      actions: write  # For cleaning up cache
      id-token: write  # mandatory for trusted publishing
    secrets: inherit
  publish_pypi:
    needs: [release]
    if: vars.MASTER_PYPI_PROJECT != ''
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # mandatory for trusted publishing
    steps:
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: dist
          name: build-pypi
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/.github/workflows/release-nightly.yml
+++ b/.github/workflows/release-nightly.yml
@ -41,3 +41,20 @@ jobs:
      actions: write  # For cleaning up cache
      id-token: write  # mandatory for trusted publishing
    secrets: inherit
  publish_pypi:
    needs: [release]
    if: vars.NIGHTLY_PYPI_PROJECT != ''
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # mandatory for trusted publishing
    steps:
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: dist
          name: build-pypi
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -2,10 +2,6 @@ name: Release
 on:
  workflow_call:
    inputs:
      prerelease:
        required: false
        default: true
        type: boolean
      source:
        required: false
        default: ''
@ -18,6 +14,10 @@ on:
        required: false
        default: ''
        type: string
      prerelease:
        required: false
        default: true
        type: boolean
  workflow_dispatch:
    inputs:
      source:
@ -278,11 +278,20 @@ jobs:
          make clean-cache
          python -m build --no-isolation .
      - name: Upload artifacts
        if: github.event_name != 'workflow_dispatch'
        uses: actions/upload-artifact@v4
        with:
          name: build-pypi
          path: |
            dist/*
          compression-level: 0
      - name: Publish to PyPI
        if: github.event_name == 'workflow_dispatch'
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
          attestations: false  # Currently doesn't work w/ reusable workflows (breaks nightly)
  publish:
    needs: [prepare, build]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -52,7 +52,7 @@ default = [
    "pycryptodomex",
    "requests>=2.32.2,<3",
    "urllib3>=1.26.17,<3",
-    "websockets>=13.0",
+    "websockets>=13.0,<14",
 ]
 curl-cffi = [
    "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
--- a/yt_dlp/dependencies/Cryptodome.py
+++ b/yt_dlp/dependencies/Cryptodome.py
@ -24,7 +24,7 @@ try:
        from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5  # noqa: F401
        from Crypto.Hash import CMAC, SHA1  # noqa: F401
        from Crypto.PublicKey import RSA  # noqa: F401
-except ImportError:
+except (ImportError, OSError):
    __version__ = f'broken {__version__}'.strip()
--- a/yt_dlp/extractor/rtp.py
+++ b/yt_dlp/extractor/rtp.py
@ -4,40 +4,97 @@ import re
 import urllib.parse
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
    ExtractorError,
    determine_ext,
    join_nonempty,
    js_to_json,
 )
 class RTPIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?rtp\.pt/play/(?P<subarea>.*/)?p(?P<program_id>\d+)/|arquivos\.rtp\.pt/conteudos/)(?P<id>[^/?#]+)'
    _TESTS = [{
-        'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'url': 'https://www.rtp.pt/play/p9165/e562949/por-do-sol',
        'md5': 'e736ce0c665e459ddb818546220b4ef8',
        'info_dict': {
-            'id': 'e174042',
+            'id': 'e562949',
            'ext': 'mp3',
            'title': 'Paixões Cruzadas',
            'description': 'As paixões musicais de António Cartaxo e António Macedo',
            'thumbnail': r're:^https?://.*\.jpg',
        },
    }, {
        'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
        'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
        'info_dict': {
            'id': 'e757904',
            'ext': 'mp4',
-            'title': '25 Curiosidades, 25 de Abril',
+            'title': 'Pôr do Sol Episódio 1',
-            'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
+            'description': 'Madalena Bourbon de Linhaça vive atormentada pelo segredo que esconde desde 1990. Matilde Bourbon de Linhaça sonha fugir com o seu amor proibido. O',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
-        'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
+        'url': 'https://www.rtp.pt/play/p12646/e738493/telejornal',
-        'only_matching': True,
+        'info_dict': {
            'id': 'e738493',
            'ext': 'mp4',
            'title': 'Telejornal de 01 jan 2024 PARTE 1',
            'description': 'A mais rigorosa seleção de notícias, todos os dias às 20h00. De segunda a domingo, João Adelino Faria, José Rodrigues dos Santos e Ana Lourenço',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
-        'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
+        'url': 'https://www.rtp.pt/play/p6646/e457262/grande-entrevista',
-        'only_matching': True,
+        'info_dict': {
            'id': 'e457262',
            'ext': 'mp4',
            'title': 'Grande Entrevista Episódio 7 - de 19 fev 2020',
            'description': 'Bruno Nogueira - É um dos mais originais humoristas portugueses e de maior êxito! Bruno Nogueira na Grande Entrevista com Vítor Gonçalves.',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
-        'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
+        'url': 'https://www.rtp.pt/play/p1525/e738522/a-mosca',
-        'only_matching': True,
+        'info_dict': {
            'id': 'e738522',
            'ext': 'mp4',
            'title': 'A Mosca de 02 jan 2024',
            'description': 'Ano novo, vida nova - Ano novo, vida nova',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
        'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/e539826/portugues-1-ano',
        'info_dict': {
            'id': 'e539826',
            'ext': 'mp4',
            'title': 'Português - 1.º ano , aula 45 - 27 abr 2021',
            'description': 'A História do Pedrito Coelho, de Beatrix Potter. O dígrafo \'lh\' - A História do Pedrito Coelho, de Beatrix Potter. O dígrafo \'lh\'.',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
        'url': 'https://www.rtp.pt/play/zigzag/p13857/e794575/zig-zag-zzz-e-amigos',
        'info_dict': {
            'id': 'e794575',
            'ext': 'mp4',
            'title': 'Zig, Zag, Zzz e Amigos Episódio 1 - de 16 set 2024',
            'description': 'O Brinquedo Perdido - Zig, Zag e Zzz são três amigos inseparáveis que partilham aventuras emocionantes e cheias de imaginação. Exploram o mundo <20>',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
        'url': 'https://www.rtp.pt/play/palco/p13151/premio-miguel-rovisco-2023-requiem-por-isabel',
        'info_dict': {
            'id': 'premio-miguel-rovisco-2023-requiem-por-isabel',
            'ext': 'mp4',
            'title': 'Prémio Miguel Rovisco 23: Requiem Por Isabel de 30 mar 2024',
            'description': 'Lucrécia foi a atriz mais famosa e requisitada do seu tempo. Este já não é o seu tempo. A debater-se com a decrepitude física e financeira, foi o',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
        'url': 'https://arquivos.rtp.pt/conteudos/liga-dos-ultimos-152/',
        'info_dict': {
            'id': 'liga-dos-ultimos-152',
            'ext': 'mp4',
            'title': 'Liga dos Últimos – RTP Arquivos',
            'description': 'Magazine desportivo, com apresentação de Álvaro Costa e comentários em estúdio do professor Hernâni Gonçalves e do sociólogo João Nuno Coelho. Destaque para os jogos de futebol das equipas dos escalões secundários de Portugal, com momentos dos jogos: Agrário de Lamas vs Pampilhoense e Apúlia vs Fragoso.',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }, {
        'url': 'https://www.rtp.pt/play/p510/e786608/aleixo-fm',
        'info_dict': {
            'id': 'e786608',
            'ext': 'mp3',
            'title': 'Aleixo FM de 31 jul 2024',
            'description': 'Melhor dia pra casar - Já o diz Joaquim de Magalhães Fernandes Barreiros, comummente conhecido como Quim Barreiros. Mas será mesmo este o melhor di',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
        },
    }]
    _RX_OBFUSCATION = re.compile(r'''(?xs)
@ -60,42 +117,77 @@ class RTPIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_meta(
            'twitter:title', webpage, display_name='title', fatal=True)
        # Title tag includes relevant data
        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title', default='')
        # Raise error if episode is unavailable
        if 'Este episódio não se encontra disponível' in title:
            raise ExtractorError('Episode unavailable', expected=True)
        # Replace irrelevant text in title
        title = re.sub(r' -  ?(RTP Play|Estudo Em Casa|Zig Zag Play|RTP Palco)( - RTP)?', '', title)
        # Check if it's a episode split in parts
        part = self._html_search_regex(r'section\-parts.*<span.*>(.+?)</span>.*</ul>', webpage, 'part', default=None)
        # Add episode part identification to title if it exists
        title = join_nonempty(title, part, delim=' ')
        # Extract f and config from page
        f, config = self._search_regex(
            r'''(?sx)
                (?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
-                var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
+                var\s+player1?\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
            ''', webpage,
            'player config', group=('f', 'config'))
        config = self._parse_json(
            config, video_id,
            lambda data: self.__unobfuscate(data, video_id=video_id))
        # Estudo em Casa / Zig Zag / Palco / RTP Arquivos subareas don't include f
        f = config['file'] if not f else self._parse_json(
            f, video_id,
            lambda data: self.__unobfuscate(data, video_id=video_id))
        formats = []
        if isinstance(f, dict):
-            f_hls = f.get('hls')
+            file_hls = f.get('hls')
-            if f_hls is not None:
+            file_fps = f.get('fps')
                formats.extend(self._extract_m3u8_formats(
                    f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
-            f_dash = f.get('dash')
+            if file_fps is not None:
-            if f_dash is not None:
+                # RTP Arquivos specific use case
-                formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash'))
+                if '/arquivo/' in file_fps:
                    file_key = config['fileKey']
                    split_file_key = file_key.split('/')
                    filename = split_file_key[-1]
                    del split_file_key[-1]
                    split_file_key.extend([f'index.m3u8?tlm=hls&streams={filename}.m3u8'])
                    path = '/'.join(split_file_key)
                    file_hls = f'https://streaming-arquivo-ondemand.rtp.pt/nas2.share{path}'
                elif file_hls is None:
                    file_hls = file_fps.replace('drm-fps', 'hls')
            formats.extend(self._extract_m3u8_formats(
                file_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
        else:
-            formats.append({
+            ext = determine_ext(f)
-                'format_id': 'f',
+
-                'url': f,
+            if ext == 'm3u8':
-                'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
+                formats.extend(self._extract_m3u8_formats(
-            })
+                    f, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
            else:
                formats.append({
                    'format_id': 'f',
                    'url': f,
                    'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
                })
        subtitles = {}
        vtt = config.get('vtt')
        if vtt is not None:
            for lcode, lname, url in vtt:
@ -108,7 +200,7 @@ class RTPIE(InfoExtractor):
            'id': video_id,
            'title': title,
            'formats': formats,
-            'description': self._html_search_meta(['description', 'twitter:description'], webpage),
+            'description': self._html_search_meta(['og:description', 'description'], webpage),
            'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
            'subtitles': subtitles,
        }
Author	SHA1	Message	Date
red-acid	b19adadbd7	Merge `049862b803` into `b83ca24eb7`	2024-11-10 00:54:43 +01:00
sepro	b83ca24eb7	[core] Catch broken Cryptodome installations (#11486 ) Authored by: seproDev	2024-11-10 00:53:49 +01:00
bashonly	240a7d43c8	[build] Pin `websockets` version to >=13.0,<14 (#11488 ) websockets 14.0 causes CI test failures (a lot more of them) Authored by: bashonly	2024-11-09 23:46:47 +00:00
bashonly	f13df591d4	[build] Enable attestations for trusted publishing (#11420 ) Reverts `428ffb75aa` Authored by: bashonly	2024-11-09 23:26:02 +00:00
red-acid	049862b803	Apply suggestions from code review Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com>	2024-10-23 11:11:24 +01:00
red-acid	c2b5c7025c	Update rtp.py	2024-10-14 12:36:35 +01:00