mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 03:01:25 +01:00
Compare commits
No commits in common. "57c3df596001a8250d37e086fcdb10094a218e73" and "a09c95273614e9463618a3d738f2a008d9f9cea6" have entirely different histories.
57c3df5960
...
a09c952736
|
@ -1,6 +1,13 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from yt_dlp.utils.traversal import traverse_obj
|
|
||||||
|
'''
|
||||||
|
This scraper was made really fast without really following best practices.
|
||||||
|
The webpage string has json data inside it and it would be better if the
|
||||||
|
video data was grabbed from there instead of using regex.
|
||||||
|
Because loom could change their video requesting api at any time, I decided
|
||||||
|
not to work too much on this. If you want to make this scraper better, feel free to do so.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
class LoomIE(InfoExtractor):
|
class LoomIE(InfoExtractor):
|
||||||
|
@ -35,28 +42,54 @@ class LoomIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
json = self._search_json(start_pattern=r'window\.loomSSRVideo\s*=', string=webpage, name="Json from Loom Webpage", video_id=video_id)
|
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
||||||
videourl = self.fetch_loom_download_url(video_id)
|
# print(f'Id: {video_id}')
|
||||||
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
# print(f'\n\n\n\n\n\n\n\nWebpage: {webpage}\n\n\n\n\n\n\n\n')
|
||||||
|
|
||||||
date_string = json.get('createdAt')
|
title = self._search_regex(r'"name":"([^"]+)"', webpage, 'title')
|
||||||
|
|
||||||
|
# title = self._search_json()
|
||||||
|
# print(f'Title: {title}')
|
||||||
|
|
||||||
|
uploader = self._search_regex(r'"owner_full_name":"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||||
|
# print(f'Uploader: {uploader}')
|
||||||
|
|
||||||
|
videourl = self.fetch_loom_download_url(video_id)
|
||||||
|
# print(f'Url: {url}')
|
||||||
|
|
||||||
|
ext = self._search_regex(r'([a-zA-Z0-9]+)(?=\?)', videourl, 'ext', fatal=False)
|
||||||
|
# print(f'Ext: {ext}')
|
||||||
|
|
||||||
|
width = self._search_regex(r'"width":([0-9]+)', webpage, 'width', fatal=False)
|
||||||
|
# print(f'Width: {width}')
|
||||||
|
|
||||||
|
height = self._search_regex(r'"height":([0-9]+)', webpage, 'height', fatal=False)
|
||||||
|
# print(f'Height: {height}')
|
||||||
|
|
||||||
|
date_string = self._search_regex(r'"visibility":"(?:[^"]+)","createdAt":"([^"]+)"', webpage, 'date', fatal=False)
|
||||||
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
|
date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y%m%d")
|
||||||
|
|
||||||
|
# filesize = self._search_regex(r'"file_size":([0-9]+)', webpage, 'filesize', fatal=False)
|
||||||
|
# print(f'Filesize: {filesize}')
|
||||||
|
|
||||||
|
# description =
|
||||||
|
# print(description)
|
||||||
|
# print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': videourl,
|
'url': videourl,
|
||||||
'width': traverse_obj(json, ('video_properties', 'width')),
|
'width': int(width),
|
||||||
'height': traverse_obj(json, ('video_properties', 'height')),
|
'height': int(height),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'filesize': traverse_obj(json, ('video_properties', 'byte_size')),
|
# 'filesize': int(filesize),
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': json.get('name'),
|
'title': title,
|
||||||
'uploader': json.get('owner_full_name'),
|
'uploader': uploader,
|
||||||
'upload_date': date,
|
'upload_date': date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
# 'view_count': json["total_views"], # View Count is always changing so don't know how to test this.
|
|
||||||
# TODO more properties (see yt_dlp/extractor/common.py)
|
# TODO more properties (see yt_dlp/extractor/common.py)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user