mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-29 18:51:24 +01:00
Compare commits
2 Commits
a09c952736
...
57c3df5960
Author | SHA1 | Date | |
---|---|---|---|
|
57c3df5960 | ||
|
0b036af923 |
|
@ -1,13 +1,6 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from yt_dlp.utils.traversal import traverse_obj
|
||||||
'''
|
|
||||||
This scraper was made really fast without really following best practices.
|
|
||||||
The webpage string has json data inside it and it would be better if the
|
|
||||||
video data was grabbed from there instead of using regex.
|
|
||||||
Because loom could change their video requesting api at any time, I decided
|
|
||||||
not to work too much on this. If you want to make this scraper better, feel free to do so.
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class LoomIE(InfoExtractor):
|
class LoomIE(InfoExtractor):
|
||||||
|
@ -42,54 +35,28 @@ class LoomIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
json = self._search_json(start_pattern=r'window\.loomSSRVideo\s*=', string=webpage, name="Json from Loom Webpage", video_id=video_id)
|
||||||
# print(f'Id: {video_id}')
|
|
||||||
# print(f'\n\n\n\n\n\n\n\nWebpage: {webpage}\n\n\n\n\n\n\n\n')
|
|
||||||
|
|
||||||
title = self._search_regex(r'"name":"([^"]+)"', webpage, 'title')
|
|
||||||
|
|
||||||
# title = self._search_json()
|
|
||||||
# print(f'Title: {title}')
|
|
||||||
|
|
||||||
uploader = self._search_regex(r'"owner_full_name":"([^"]+)"', webpage, 'uploader', fatal=False)
|
|
||||||
# print(f'Uploader: {uploader}')
|
|
||||||
|
|
||||||
videourl = self.fetch_loom_download_url(video_id)
|
videourl = self.fetch_loom_download_url(video_id)
|
||||||
# print(f'Url: {url}')
|
|
||||||
|
|
||||||
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
||||||
# print(f'Ext: {ext}')
|
|
||||||
|
|
||||||
width = self._search_regex(r'"width":([0-9]+)', webpage, 'width', fatal=False)
|
date_string = json.get('createdAt')
|
||||||
# print(f'Width: {width}')
|
|
||||||
|
|
||||||
height = self._search_regex(r'"height":([0-9]+)', webpage, 'height', fatal=False)
|
|
||||||
# print(f'Height: {height}')
|
|
||||||
|
|
||||||
date_string = self._search_regex(r'"visibility":"(?:[^"]+)","createdAt":"([^"]+)"', webpage, 'date', fatal=False)
|
|
||||||
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
|
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
|
||||||
|
|
||||||
# filesize = self._search_regex(r'"file_size":([0-9]+)', webpage, 'filesize', fatal=False)
|
|
||||||
# print(f'Filesize: {filesize}')
|
|
||||||
|
|
||||||
# description =
|
|
||||||
# print(description)
|
|
||||||
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': videourl,
|
'url': videourl,
|
||||||
'width': int(width),
|
'width': traverse_obj(json, ('video_properties', 'width')),
|
||||||
'height': int(height),
|
'height': traverse_obj(json, ('video_properties', 'height')),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
# 'filesize': int(filesize),
|
'filesize': traverse_obj(json, ('video_properties', 'byte_size')),
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': json.get('name'),
|
||||||
'uploader': uploader,
|
'uploader': json.get('owner_full_name'),
|
||||||
'upload_date': date,
|
'upload_date': date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
# 'view_count': json["total_views"], # View Count is always changing so don't know how to test this.
|
||||||
# TODO more properties (see yt_dlp/extractor/common.py)
|
# TODO more properties (see yt_dlp/extractor/common.py)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user