import str_to_int

Update yt_dlp/extractor/altcensored.py
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-09-22 18:31:25 +02:00 · 2023-11-13 00:24:58 -03:00 · 2023-11-13 00:24:13 -03:00 · 2023-11-13 00:21:50 -03:00 · 2023-11-13 00:21:13 -03:00
1 changed files with 9 additions and 12 deletions
--- a/yt_dlp/extractor/altcensored.py
+++ b/yt_dlp/extractor/altcensored.py
@ -4,6 +4,7 @@ from .common import InfoExtractor
 from .archiveorg import ArchiveOrgIE
 from ..utils import (
    int_or_none,
+    str_to_int,
    orderedSet,
    urljoin,
    InAdvancePagedList,
@ -16,9 +17,9 @@ class AltCensoredIE(InfoExtractor):
    _TESTS = [{
        'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
        'info_dict': {
-            "id": "youtube-k0srjLSkga8",
-            "ext": "webm",
-            "title": "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
+            'id': 'youtube-k0srjLSkga8',
+            'ext': 'webm',
+            'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
            'display_id': 'k0srjLSkga8.webm',
            'release_date': '20180403',
            'creator': 'Virginie Vota',
@ -37,16 +38,12 @@ class AltCensoredIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        # Use most data from archive.org (extractor indirection)
-        # But try first to gather a couple of useful information from altcensored
        webpage = self._download_webpage(url, video_id)
-        yt_views = int_or_none(self._html_search_regex(
-            r'YouTube Views:.*?([0-9,.]+)', webpage, 'view count', default='0').replace(',', ''))
-        category = self._html_search_regex(r'<a href="/category/.*?\n\s+([^<]+)', webpage, 'category')
-        # Hardcoded (very unlikely to need a change in a foreseeable future)
-        res = self.url_result(f'https://archive.org/details/youtube-{video_id}', ArchiveOrgIE, url_transparent=True,
-                              yt_views=yt_views, category=category)
-        return res
+        yt_views = str_to_int(self._html_search_regex(r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=''))
+        category = self._html_search_regex(r'<a href="/category/\d+">\s*\n?\s*([^<]+)</a>', webpage, 'category', fatal=False)
+
+        return self.url_result(f'https://archive.org/details/youtube-{video_id}', ArchiveOrgIE, url_transparent=True,
+                               view_count=yt_views, categories=[category])


 class AltCensoredChannelIE(InfoExtractor):
Author	SHA1	Message	Date
Raphaël Droz	0de2b61016	import str_to_int	2023-11-13 00:24:58 -03:00
Raphaël Droz	6e681fdfef	Update yt_dlp/extractor/altcensored.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-13 00:24:13 -03:00
Raphaël Droz	9d88a2f105	Update yt_dlp/extractor/altcensored.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-13 00:21:50 -03:00
Raphaël Droz	f0b45518db	Update yt_dlp/extractor/altcensored.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-13 00:21:13 -03:00