Compare commits

...

5 Commits

Author SHA1 Message Date
Feraidoon Mehri
92fbfc767b
Merge 4bc859b336 into 52c0ffe40a 2024-11-17 09:25:29 +05:30
NightMachinery
4bc859b336
[underline] Removed debugging code 2022-12-06 14:06:20 +03:30
NightMachinery
93a079d865
[underline] Added slide_info to info_dict 2022-12-06 07:30:19 +03:30
NightMachinery
2cb546cf8e
[underline] Add extractor 2022-12-06 07:02:02 +03:30
NightMachinery
ebab01bb73
[underline] Add extractor (draft) 2022-12-06 06:59:04 +03:30
2 changed files with 93 additions and 0 deletions

View File

@ -2257,6 +2257,7 @@ from .ufctv import (
from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
from .umg import UMGDeIE
from .underline import UnderlineIE
from .unistra import UnistraIE
from .unity import UnityIE
from .unsupported import (

View File

@ -0,0 +1,92 @@
from .common import InfoExtractor
def gen_dict_extract(var, key):
if hasattr(var, "items"):
for k, v in var.items():
if k == key:
yield v
if isinstance(v, dict):
for result in gen_dict_extract(v, key):
yield result
elif isinstance(v, list):
for d in v:
for result in gen_dict_extract(d, key):
yield result
class UnderlineIE(InfoExtractor):
_VALID_URL = r"https?://(?:www\.)?underline\.io/events/(?P<id>[^?]+).*"
_TESTS = [
{
"params": {
"skip_download": True, # needs cookies
},
"url": "https://underline.io/events/342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter?tab=video",
"md5": "md5:eaa894161adaef6efd6008681e1cd2c5",
# md5 sum of the first 10241 bytes of the video file (use --test)
"info_dict": {
"id": "342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter",
"ext": "mp4",
"title": (
"MBTI Personality Prediction Approach on Persian Twitter"
),
"slide_info": [
{
"url": "https://assets.underline.io/lecture/66463/slideshow/b236b5cfb38966a761a5443bf47fbdf9.pdf",
"filename": "Personality-Prediction-WINLP-slides.pdf",
"size": 780319,
}
],
},
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_info = self._search_json(
r'<script\s+id="__NEXT_DATA__"\s+type="application/json">',
webpage,
"idk_what_this_arg_does",
video_id,
end_pattern=r"</script>",
)
title = list(gen_dict_extract(webpage_info, "title"))
if len(title) == 0:
title = None
else:
title = title[0]
playlist_urls = list(gen_dict_extract(webpage_info, "playlist"))
if len(playlist_urls) == 0:
url = None
else:
url = playlist_urls[0]
formats = []
m3u8_url = url
if m3u8_url:
formats.extend(
self._extract_m3u8_formats(
m3u8_url,
video_id,
ext="mp4",
entry_protocol="m3u8_native",
)
)
slide_info = list(gen_dict_extract(webpage_info, "slideshow"))
return {
"id": video_id,
"title": title,
"formats": formats,
"slide_info": slide_info,
}