mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 03:01:25 +01:00
Compare commits
No commits in common. "57c3df596001a8250d37e086fcdb10094a218e73" and "a09c95273614e9463618a3d738f2a008d9f9cea6" have entirely different histories.
57c3df5960
...
a09c952736
|
@ -1,6 +1,13 @@
|
|||
from .common import InfoExtractor
|
||||
from datetime import datetime
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
|
||||
'''
|
||||
This scraper was made really fast without really following best practices.
|
||||
The webpage string has json data inside it and it would be better if the
|
||||
video data was grabbed from there instead of using regex.
|
||||
Because loom could change their video requesting api at any time, I decided
|
||||
not to work too much on this. If you want to make this scraper better, feel free to do so.
|
||||
'''
|
||||
|
||||
|
||||
class LoomIE(InfoExtractor):
|
||||
|
@ -35,28 +42,54 @@ class LoomIE(InfoExtractor):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
json = self._search_json(start_pattern=r'window\.loomSSRVideo\s*=', string=webpage, name="Json from Loom Webpage", video_id=video_id)
|
||||
videourl = self.fetch_loom_download_url(video_id)
|
||||
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
||||
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
||||
# print(f'Id: {video_id}')
|
||||
# print(f'\n\n\n\n\n\n\n\nWebpage: {webpage}\n\n\n\n\n\n\n\n')
|
||||
|
||||
date_string = json.get('createdAt')
|
||||
title = self._search_regex(r'"name":"([^"]+)"', webpage, 'title')
|
||||
|
||||
# title = self._search_json()
|
||||
# print(f'Title: {title}')
|
||||
|
||||
uploader = self._search_regex(r'"owner_full_name":"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
# print(f'Uploader: {uploader}')
|
||||
|
||||
videourl = self.fetch_loom_download_url(video_id)
|
||||
# print(f'Url: {url}')
|
||||
|
||||
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
||||
# print(f'Ext: {ext}')
|
||||
|
||||
width = self._search_regex(r'"width":([0-9]+)', webpage, 'width', fatal=False)
|
||||
# print(f'Width: {width}')
|
||||
|
||||
height = self._search_regex(r'"height":([0-9]+)', webpage, 'height', fatal=False)
|
||||
# print(f'Height: {height}')
|
||||
|
||||
date_string = self._search_regex(r'"visibility":"(?:[^"]+)","createdAt":"([^"]+)"', webpage, 'date', fatal=False)
|
||||
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
|
||||
|
||||
# filesize = self._search_regex(r'"file_size":([0-9]+)', webpage, 'filesize', fatal=False)
|
||||
# print(f'Filesize: {filesize}')
|
||||
|
||||
# description =
|
||||
# print(description)
|
||||
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
||||
|
||||
formats = []
|
||||
formats.append({
|
||||
'url': videourl,
|
||||
'width': traverse_obj(json, ('video_properties', 'width')),
|
||||
'height': traverse_obj(json, ('video_properties', 'height')),
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
'ext': ext,
|
||||
'filesize': traverse_obj(json, ('video_properties', 'byte_size')),
|
||||
# 'filesize': int(filesize),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json.get('name'),
|
||||
'uploader': json.get('owner_full_name'),
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'upload_date': date,
|
||||
'formats': formats,
|
||||
# 'view_count': json["total_views"], # View Count is always changing so don't know how to test this.
|
||||
# TODO more properties (see yt_dlp/extractor/common.py)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user