Compare commits

..

No commits in common. "57c3df596001a8250d37e086fcdb10094a218e73" and "a09c95273614e9463618a3d738f2a008d9f9cea6" have entirely different histories.

View File

@ -1,6 +1,13 @@
from .common import InfoExtractor from .common import InfoExtractor
from datetime import datetime from datetime import datetime
from yt_dlp.utils.traversal import traverse_obj
'''
This scraper was made really fast without really following best practices.
The webpage string has json data inside it and it would be better if the
video data was grabbed from there instead of using regex.
Because loom could change their video requesting api at any time, I decided
not to work too much on this. If you want to make this scraper better, feel free to do so.
'''
class LoomIE(InfoExtractor): class LoomIE(InfoExtractor):
@ -35,28 +42,54 @@ class LoomIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
json = self._search_json(start_pattern=r'window\.loomSSRVideo\s*=', string=webpage, name="Json from Loom Webpage", video_id=video_id) # print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
videourl = self.fetch_loom_download_url(video_id) # print(f'Id: {video_id}')
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False) # print(f'\n\n\n\n\n\n\n\nWebpage: {webpage}\n\n\n\n\n\n\n\n')
date_string = json.get('createdAt') title = self._search_regex(r'"name":"([^"]+)"', webpage, 'title')
# title = self._search_json()
# print(f'Title: {title}')
uploader = self._search_regex(r'"owner_full_name":"([^"]+)"', webpage, 'uploader', fatal=False)
# print(f'Uploader: {uploader}')
videourl = self.fetch_loom_download_url(video_id)
# print(f'Url: {url}')
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
# print(f'Ext: {ext}')
width = self._search_regex(r'"width":([0-9]+)', webpage, 'width', fatal=False)
# print(f'Width: {width}')
height = self._search_regex(r'"height":([0-9]+)', webpage, 'height', fatal=False)
# print(f'Height: {height}')
date_string = self._search_regex(r'"visibility":"(?:[^"]+)","createdAt":"([^"]+)"', webpage, 'date', fatal=False)
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d") date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
# filesize = self._search_regex(r'"file_size":([0-9]+)', webpage, 'filesize', fatal=False)
# print(f'Filesize: {filesize}')
# description =
# print(description)
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
formats = [] formats = []
formats.append({ formats.append({
'url': videourl, 'url': videourl,
'width': traverse_obj(json, ('video_properties', 'width')), 'width': int(width),
'height': traverse_obj(json, ('video_properties', 'height')), 'height': int(height),
'ext': ext, 'ext': ext,
'filesize': traverse_obj(json, ('video_properties', 'byte_size')), # 'filesize': int(filesize),
}) })
return { return {
'id': video_id, 'id': video_id,
'title': json.get('name'), 'title': title,
'uploader': json.get('owner_full_name'), 'uploader': uploader,
'upload_date': date, 'upload_date': date,
'formats': formats, 'formats': formats,
# 'view_count': json["total_views"], # View Count is always changing so don't know how to test this.
# TODO more properties (see yt_dlp/extractor/common.py) # TODO more properties (see yt_dlp/extractor/common.py)
} }