[ie/joqrag] Fix download test; add only_matching tests

Co-authored-by: garret <garret1317@yandex.com>
[ie/joqrag] Match multiple urls by pattern array
2024-09-21 01:41:25 +02:00 · 2023-10-21 02:55:48 +08:00 · 2023-10-21 02:55:24 +08:00 · 2023-10-21 01:51:17 +08:00 · 2023-10-21 01:51:12 +08:00 · 2023-10-21 01:51:05 +08:00
1 changed files with 25 additions and 12 deletions
--- a/yt_dlp/extractor/joqrag.py
+++ b/yt_dlp/extractor/joqrag.py
@ -1,21 +1,21 @@
 import urllib.parse

 from .common import InfoExtractor
+from ..utils import clean_html, urljoin


 class JoqrAgIE(InfoExtractor):
    IE_DESC = '超!A&G+ 文化放送 Nippon Cultural Broadcasting, Inc. (JOQR)'
-    _VALID_URL = r'''(?x)
-                    (https?://www\.uniqueradio\.jp/agplayer5/player\.php)|
-                    (https?://www\.uniqueradio\.jp/agplayer5/inc-player-hls\.php)|
-                    (https?://(?:www\.)?joqr\.co\.jp/ag/)|
-                    (https?://(?:www\.)?joqr\.co\.jp/qr/(?:agdailyprogram|agregularprogram)/)
-                    '''
+    _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/player\.php',
+                  r'https?://www\.uniqueradio\.jp/agplayer5/inc-player-hls\.php',
+                  r'https?://(?:www\.)?joqr\.co\.jp/ag/',
+                  r'https?://(?:www\.)?joqr\.co\.jp/qr/(?:agdailyprogram|agregularprogram)/']
    _TESTS = [{
        'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
        'info_dict': {
            'id': 'live',
            'title': str,
+            'channel': '超!A&G+',
            'description': str,
            'live_status': 'is_live',
        },
@ -23,6 +23,18 @@ class JoqrAgIE(InfoExtractor):
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
+    }, {
+        'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.joqr.co.jp/ag/article/103760/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -31,10 +43,10 @@ class JoqrAgIE(InfoExtractor):
        metadata = self._download_webpage(
            'https://www.uniqueradio.jp/aandg', video_id,
            note='Downloading metadata', errnote='Failed to download metadata')
-        title = urllib.parse.unquote_plus(
-            self._search_regex(r'var\s+Program_name\s*=\s*["\']([^"\']+)["\']', metadata, 'program title'))
-        desc = urllib.parse.unquote_plus(
-            self._search_regex(r'var\s+Program_text\s*=\s*["\']([^"\']+)["\']', metadata, 'program description'))
+        title = clean_html(urllib.parse.unquote_plus(
+            self._search_regex(r'var\s+Program_name\s*=\s*["\']([^"\']+)["\']', metadata, 'program title')))
+        desc = clean_html(urllib.parse.unquote_plus(
+            self._search_regex(r'var\s+Program_text\s*=\s*["\']([^"\']+)["\']', metadata, 'program description')))

        m3u8_path = self._search_regex(
            r'<source\s[^>]*\bsrc="([^"]+)"',
@ -43,11 +55,12 @@ class JoqrAgIE(InfoExtractor):
                note='Downloading player data', errnote='Failed to download player data'),
            'm3u8 url')
        formats = self._extract_m3u8_formats(
-            f'https://www.uniqueradio.jp/{m3u8_path}', video_id, fatal=False)
+            urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id, fatal=False)

        return {
            'id': video_id,
-            'title': f'{title} - 超!A&G+',
+            'title': title,
+            'channel': '超!A&G+',
            'description': desc,
            'formats': formats,
            'live_status': 'is_live',
Author	SHA1	Message	Date
Mozi	11eb240df9	[ie/joqrag] Fix download test; add only_matching tests Co-authored-by: garret <garret1317@yandex.com>	2023-10-21 02:55:48 +08:00
Mozi	3c90db7555	[ie/joqrag] Match multiple urls by pattern array Co-authored-by: garret <garret1317@yandex.com>	2023-10-21 02:55:24 +08:00
Mozi	e9f8f3e397	[ie/joqrag] Join url segments with urljoin Co-authored-by: garret <garret1317@yandex.com>	2023-10-21 01:51:17 +08:00
Mozi	d5c6e48d5d	[ie/joqrag] Extract text from title and description in HTML Co-authored-by: garret <garret1317@yandex.com>	2023-10-21 01:51:12 +08:00
Mozi	988329c5ae	[ie/joqrag] Add channel; clean title Co-authored-by: garret <garret1317@yandex.com>	2023-10-21 01:51:05 +08:00