Merge 4bc859b336 into e079ffbda6

[underline] Removed debugging code
[underline] Added slide_info to info_dict
2024-11-24 00:01:54 +01:00 · 2024-11-18 03:41:01 +05:30 · 2022-12-06 14:06:20 +03:30 · 2022-12-06 07:30:19 +03:30 · 2022-12-06 07:02:02 +03:30 · 2022-12-06 06:59:04 +03:30
2 changed files with 93 additions and 0 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -2255,6 +2255,7 @@ from .uliza import (
    UlizaPortalIE,
 )
 from .umg import UMGDeIE
+from .underline import UnderlineIE
 from .unistra import UnistraIE
 from .unity import UnityIE
 from .unsupported import (
--- a/yt_dlp/extractor/underline.py
+++ b/yt_dlp/extractor/underline.py
@ -0,0 +1,92 @@
+from .common import InfoExtractor
+
+
+def gen_dict_extract(var, key):
+    if hasattr(var, "items"):
+        for k, v in var.items():
+            if k == key:
+                yield v
+            if isinstance(v, dict):
+                for result in gen_dict_extract(v, key):
+                    yield result
+            elif isinstance(v, list):
+                for d in v:
+                    for result in gen_dict_extract(d, key):
+                        yield result
+
+
+class UnderlineIE(InfoExtractor):
+    _VALID_URL = r"https?://(?:www\.)?underline\.io/events/(?P<id>[^?]+).*"
+
+    _TESTS = [
+        {
+            "params": {
+                "skip_download": True,  # needs cookies
+            },
+            "url": "https://underline.io/events/342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter?tab=video",
+            "md5": "md5:eaa894161adaef6efd6008681e1cd2c5",
+            # md5 sum of the first 10241 bytes of the video file (use --test)
+            "info_dict": {
+                "id": "342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter",
+                "ext": "mp4",
+                "title": (
+                    "MBTI Personality Prediction Approach on Persian Twitter"
+                ),
+                "slide_info": [
+                    {
+                        "url": "https://assets.underline.io/lecture/66463/slideshow/b236b5cfb38966a761a5443bf47fbdf9.pdf",
+                        "filename": "Personality-Prediction-WINLP-slides.pdf",
+                        "size": 780319,
+                    }
+                ],
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        webpage_info = self._search_json(
+            r'<script\s+id="__NEXT_DATA__"\s+type="application/json">',
+            webpage,
+            "idk_what_this_arg_does",
+            video_id,
+            end_pattern=r"</script>",
+        )
+
+        title = list(gen_dict_extract(webpage_info, "title"))
+
+        if len(title) == 0:
+            title = None
+        else:
+            title = title[0]
+
+        playlist_urls = list(gen_dict_extract(webpage_info, "playlist"))
+
+        if len(playlist_urls) == 0:
+            url = None
+        else:
+            url = playlist_urls[0]
+
+        formats = []
+
+        m3u8_url = url
+        if m3u8_url:
+            formats.extend(
+                self._extract_m3u8_formats(
+                    m3u8_url,
+                    video_id,
+                    ext="mp4",
+                    entry_protocol="m3u8_native",
+                )
+            )
+
+        slide_info = list(gen_dict_extract(webpage_info, "slideshow"))
+
+        return {
+            "id": video_id,
+            "title": title,
+            "formats": formats,
+            "slide_info": slide_info,
+        }
Author	SHA1	Message	Date
Feraidoon Mehri	4dfb913086	Merge `4bc859b336` into `e079ffbda6`	2024-11-18 03:41:01 +05:30
NightMachinery	4bc859b336	[underline] Removed debugging code	2022-12-06 14:06:20 +03:30
NightMachinery	93a079d865	[underline] Added slide_info to info_dict	2022-12-06 07:30:19 +03:30
NightMachinery	2cb546cf8e	[underline] Add extractor	2022-12-06 07:02:02 +03:30
NightMachinery	ebab01bb73	[underline] Add extractor (draft)	2022-12-06 06:59:04 +03:30