Compare commits

..

11 Commits

Author SHA1 Message Date
sepro
c93a2fe793 oops 2024-01-14 19:03:34 +01:00
sepro
c0049ea05b Move Regex to class variable 2024-01-14 19:02:31 +01:00
sepro
d6f1ace3a3 Rename file 2024-01-14 19:00:17 +01:00
pompos02
d30df6e5f4 Class name change 2024-01-14 19:52:45 +02:00
pompos02
c0c0816cae Class name change 2024-01-14 19:52:21 +02:00
pompos02
a8bcefdcc1 fixed tests 2024-01-14 19:50:12 +02:00
Karavellas
ce599fe690
Update yt_dlp/extractor/elementorgeneral.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-14 19:44:04 +02:00
Karavellas
6e8dfba202
Update yt_dlp/extractor/elementorgeneral.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-14 19:43:56 +02:00
Karavellas
1f793fa751
Update yt_dlp/extractor/elementorgeneral.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-14 19:43:48 +02:00
Karavellas
3846eadd33
Update yt_dlp/extractor/elementorgeneral.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-14 19:43:35 +02:00
Karavellas
dd59e420bb
Update yt_dlp/extractor/_extractors.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-01-14 19:43:04 +02:00
3 changed files with 71 additions and 96 deletions

View File

@ -84,7 +84,6 @@ from .allstar import (
AllstarIE,
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
from .altcensored import (
AltCensoredIE,
@ -541,7 +540,7 @@ from .egghead import (
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elementorgeneral import ElementorGeneralIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
from .elpais import ElPaisIE
from .eltrecetv import ElTreceTVIE

View File

@ -0,0 +1,70 @@
import re
from .common import InfoExtractor
from ..utils import unescapeHTML, url_or_none
from ..utils.traversal import traverse_obj
class ElementorEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
'info_dict': {
'id': 'KgzuxwuQwM4',
'ext': 'mp4',
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
'playable_in_embed': True,
'tags': 'count:16',
'like_count': int,
'channel': 'Capital TV Cyprus',
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
'availability': 'public',
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
'duration': 2891,
'upload_date': '20231214',
'uploader_id': '@capitaltvcyprus6389',
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
'uploader': 'Capital TV Cyprus',
'age_limit': 0,
'categories': ['News & Politics'],
'view_count': int,
'channel_follower_count': int,
},
}, {
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
'info_dict': {
'id': '?playlist=76011151&video=9e59909',
'title': 'Theme Builder Collection - Academy',
'age_limit': 0,
'timestamp': 1702196984.0,
'upload_date': '20231210',
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
},
'playlist_count': 11,
'params': {
'skip_download': True,
},
}]
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
def _extract_from_webpage(self, url, webpage):
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
yield self.url_result(youtube_url, ie='Youtube')
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
yield self.url_result(youtube_url, ie='Youtube')
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
yield self.url_result(vimeo_url, ie='Vimeo')
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
yield {
'id': video['_id'],
'url': direct_url,
'title': video.get('title'),
}

View File

@ -1,94 +0,0 @@
from .common import InfoExtractor
import re
from ..utils import (
traverse_obj,
clean_html,
unescapeHTML,
)
class ElementorGeneralIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
'info_dict': {
'id': 'KgzuxwuQwM4',
'ext': 'mp4',
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
'playable_in_embed': True,
'tags': 'count:16',
'like_count': int,
'channel': 'Capital TV Cyprus',
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
'availability': 'public',
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
'duration': 2891,
'upload_date': '20231214',
'uploader_id': '@capitaltvcyprus6389',
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
'uploader': 'Capital TV Cyprus',
'age_limit': 0,
'categories': ['News & Politics'],
'view_count': int,
'channel_follower_count': int,
},
}, {
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
'info_dict': {
'id': '?playlist=76011151&video=9e59909',
'title': 'Theme Builder Collection - Academy',
'age_limit': 0,
'timestamp': 1702196984.0,
'upload_date': '20231210',
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
},
'playlist_mincount': 2,
'params': {
'skip_download': True,
},
}]
def _extract_from_webpage(self, url, webpage):
for element in re.findall(r'<div[^>]+class="[^"]*elementor-widget-(?:video|video-playlist)[^"]*"[^>]*data-settings="([^"]*)"', webpage):
data_settings = unescapeHTML(clean_html(element))
data = self._parse_json(data_settings, None, fatal=False)
tabs = data.get('tabs', [])
if tabs: # Handling playlists
for tab in tabs:
video_url = tab.get('youtube_url') or tab.get('vimeo_url') or tab.get('dailymotion_url') or tab.get('videopress_url')
if video_url:
title = tab.get('title') or self._og_search_title(webpage)
thumbnail = tab.get('thumbnail', {}).get('url') or self._og_search_thumbnail(webpage)
ie_key = self._get_ie_key(video_url)
yield self._build_result(video_url, title, thumbnail, ie_key)
else:
video_url = data.get('youtube_url') or data.get('vimeo_url') or data.get('dailymotion_url') or data.get('videopress_url')
title = data.get('title') or self._og_search_title(webpage)
thumbnail = traverse_obj(data, ('image_overlay', 'url')) or self._og_search_thumbnail(webpage)
ie_key = self._get_ie_key(video_url)
yield self._build_result(video_url, title, thumbnail, ie_key)
def _get_ie_key(self, url):
if 'youtube' in url or 'youtu.be' in url:
return 'Youtube'
elif 'vimeo' in url:
return 'Vimeo'
elif 'dailymotion' in url:
return 'Dailymotion'
elif 'videopress' in url:
return 'Videopress'
return 'Generic'
def _build_result(self, video_url, title, thumbnail, ie_key):
return {
'id': video_url,
'title': title,
'_type': 'url_transparent',
'url': video_url,
'thumbnail': thumbnail,
'ie_key': ie_key,
}