Compare commits

...

4 Commits

Author SHA1 Message Date
Raphaël Droz
0de2b61016 import str_to_int 2023-11-13 00:24:58 -03:00
Raphaël Droz
6e681fdfef
Update yt_dlp/extractor/altcensored.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-13 00:24:13 -03:00
Raphaël Droz
9d88a2f105
Update yt_dlp/extractor/altcensored.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-13 00:21:50 -03:00
Raphaël Droz
f0b45518db
Update yt_dlp/extractor/altcensored.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2023-11-13 00:21:13 -03:00

View File

@ -4,6 +4,7 @@ from .common import InfoExtractor
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_to_int,
orderedSet, orderedSet,
urljoin, urljoin,
InAdvancePagedList, InAdvancePagedList,
@ -16,9 +17,9 @@ class AltCensoredIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8', 'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
'info_dict': { 'info_dict': {
"id": "youtube-k0srjLSkga8", 'id': 'youtube-k0srjLSkga8',
"ext": "webm", 'ext': 'webm',
"title": "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm', 'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403', 'release_date': '20180403',
'creator': 'Virginie Vota', 'creator': 'Virginie Vota',
@ -37,16 +38,12 @@ class AltCensoredIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
# Use most data from archive.org (extractor indirection)
# But try first to gather a couple of useful information from altcensored
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
yt_views = int_or_none(self._html_search_regex( yt_views = str_to_int(self._html_search_regex(r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=''))
r'YouTube Views:.*?([0-9,.]+)', webpage, 'view count', default='0').replace(',', '')) category = self._html_search_regex(r'<a href="/category/\d+">\s*\n?\s*([^<]+)</a>', webpage, 'category', fatal=False)
category = self._html_search_regex(r'<a href="/category/.*?\n\s+([^<]+)', webpage, 'category')
# Hardcoded (very unlikely to need a change in a foreseeable future) return self.url_result(f'https://archive.org/details/youtube-{video_id}', ArchiveOrgIE, url_transparent=True,
res = self.url_result(f'https://archive.org/details/youtube-{video_id}', ArchiveOrgIE, url_transparent=True, view_count=yt_views, categories=[category])
yt_views=yt_views, category=category)
return res
class AltCensoredChannelIE(InfoExtractor): class AltCensoredChannelIE(InfoExtractor):