2024-11-30 03:01:25 +01:00
1 changed files with 44 additions and 11 deletions
--- a/yt_dlp/extractor/loom.py
+++ b/yt_dlp/extractor/loom.py
@ -1,6 +1,13 @@
 from .common import InfoExtractor
 from datetime import datetime
-from yt_dlp.utils.traversal import traverse_obj
+
+'''
+This scraper was made really fast without really following best practices.
+The webpage string has json data inside it and it would be better if the
+video data was grabbed from there instead of using regex.
+Because loom could change their video requesting api at any time, I decided
+not to work too much on this. If you want to make this scraper better, feel free to do so.
+'''


 class LoomIE(InfoExtractor):
@ -35,28 +42,54 @@ class LoomIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        json = self._search_json(start_pattern=r'window\.loomSSRVideo\s*=', string=webpage, name="Json from Loom Webpage", video_id=video_id)
-        videourl = self.fetch_loom_download_url(video_id)
-        ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
+        # print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
+        # print(f'Id: {video_id}')
+        # print(f'\n\n\n\n\n\n\n\nWebpage: {webpage}\n\n\n\n\n\n\n\n')

-        date_string = json.get('createdAt')
+        title = self._search_regex(r'"name":"([^"]+)"', webpage, 'title')
+
+        # title = self._search_json()
+        # print(f'Title: {title}')
+
+        uploader = self._search_regex(r'"owner_full_name":"([^"]+)"', webpage, 'uploader', fatal=False)
+        # print(f'Uploader: {uploader}')
+
+        videourl = self.fetch_loom_download_url(video_id)
+        # print(f'Url: {url}')
+
+        ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
+        # print(f'Ext: {ext}')
+
+        width = self._search_regex(r'"width":([0-9]+)', webpage, 'width', fatal=False)
+        # print(f'Width: {width}')
+
+        height = self._search_regex(r'"height":([0-9]+)', webpage, 'height', fatal=False)
+        # print(f'Height: {height}')
+
+        date_string = self._search_regex(r'"visibility":"(?:[^"]+)","createdAt":"([^"]+)"', webpage, 'date', fatal=False)
        date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")

+        # filesize = self._search_regex(r'"file_size":([0-9]+)', webpage, 'filesize', fatal=False)
+        # print(f'Filesize: {filesize}')
+
+        # description =
+        # print(description)
+        # print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
+
        formats = []
        formats.append({
            'url': videourl,
-            'width': traverse_obj(json, ('video_properties', 'width')),
-            'height': traverse_obj(json, ('video_properties', 'height')),
+            'width': int(width),
+            'height': int(height),
            'ext': ext,
-            'filesize': traverse_obj(json, ('video_properties', 'byte_size')),
+            # 'filesize': int(filesize),
        })

        return {
            'id': video_id,
-            'title': json.get('name'),
-            'uploader': json.get('owner_full_name'),
+            'title': title,
+            'uploader': uploader,
            'upload_date': date,
            'formats': formats,
-            # 'view_count': json["total_views"], # View Count is always changing so don't know how to test this.
            # TODO more properties (see yt_dlp/extractor/common.py)
        }