[ZenPorn] Add extractor - remove trailing whitespace

[ZenPorn] Add extractor - code cleanups
2024-09-22 02:11:26 +02:00 · 2023-11-05 18:55:08 +00:00 · 2023-11-05 13:55:23 +00:00
1 changed files with 26 additions and 35 deletions
--- a/yt_dlp/extractor/zenporn.py
+++ b/yt_dlp/extractor/zenporn.py
@ -1,7 +1,4 @@
-import re
 import math
-from collections import namedtuple
-

 from .common import InfoExtractor
 from ..utils import (
@ -9,6 +6,7 @@ from ..utils import (
    determine_ext,
    int_or_none,
    traverse_obj,
+    unified_strdate
 )


@ -26,7 +24,7 @@ class ZenPornIE(InfoExtractor):
                'title': 'md5:669eafd3bbc688aa29770553b738ada2',
                'description': '',
                'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
-                'post_date': '2023-09-25 18:28:11',
+                'post_date': '20230925',
                'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
                'age_limit': 18
            }
@ -41,7 +39,7 @@ class ZenPornIE(InfoExtractor):
                'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
                'description': '',
                'thumbnail': 'https://tn.manysex.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
-                'post_date': '2023-09-21 04:42:51',
+                'post_date': '20230921',
                'uploader': 'Lois Clarke',
                'age_limit': 18
            }
@ -56,7 +54,7 @@ class ZenPornIE(InfoExtractor):
                'title': 'Amateur students having a fuck fest at club',
                'description': '',
                'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
-                'post_date': '2019-10-05 12:12:34',
+                'post_date': '20191005',
                'uploader': 'Jackopenass',
                'age_limit': 18
            }
@ -71,33 +69,24 @@ class ZenPornIE(InfoExtractor):
                'title': 'Glad You Came',
                'description': '',
                'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
-                'post_date': '2023-10-24 15:50:03',
+                'post_date': '20231024',
                'uploader': 'Martin Rudenko',
                'age_limit': 18
            }
        }
    ]

-    def _extract_embed_info(self, source):
-        embed = namedtuple('embed', ['ext_domain', 'extr_id'])
-        regex = re.compile(
-            r'https:\/\/(?P<ext_domain>[\w.-]+\.\w{3})\/embed\/(?P<extr_id>\d+)\/')
-        match = regex.search(source)
-
-        if match:
-            return embed(match.group('ext_domain'), match.group('extr_id'))
-        else:
-            return embed(None, None)
-
-    def _gen_info_url(self, embed, lifetime=86400):
-        dyn_a = int_or_none(1e6 * math.floor(int_or_none(embed.extr_id) / 1e6))
-        dyn_b = int_or_none(1e3 * math.floor(int_or_none(embed.extr_id) / 1e3))
+    def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
+        """ This function is a reverse engineering from the website javascript """
+        dyn_a = int_or_none(1e6 * math.floor(int_or_none(extr_id) / 1e6))
+        dyn_b = int_or_none(1e3 * math.floor(int_or_none(extr_id) / 1e3))
        if dyn_a is None or dyn_b is None:
            raise ExtractorError('Unable to generate the ``gen_info_url``.')

-        return f'https://{embed.ext_domain}/api/json/video/{lifetime}/{dyn_a}/{dyn_b}/{embed.extr_id}.json'
+        return f'https://{ext_domain}/api/json/video/{lifetime}/{dyn_a}/{dyn_b}/{extr_id}.json'

    def _decode_video_url(self, ext_domain, encoded_url):
+        """ This function is a reverse engineering from the website javascript """
        cust_char_set = 'АВСDЕFGHIJKLМNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,~'
        decoded_url = ''
        cur_pos = 0
@ -134,34 +123,36 @@ class ZenPornIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        embed = self._extract_embed_info(webpage)
-        if not embed.ext_domain or not embed.extr_id:
-            raise ExtractorError('Unable to retrieve the ``embed`` info.')
+        ext_domain, extr_id = self._search_regex(
+            r'https:\/\/(?P<ext_domain>[\w.-]+\.\w{3})\/embed\/(?P<extr_id>\d+)\/',
+            webpage, 'embed_info', group=('ext_domain', 'extr_id'))

-        info_url = self._gen_info_url(embed)
-        info_json = self._download_json(
-            info_url, video_id, note="Downloading JSON metadata for the video info.")
+        info_json = self._download_json(self._gen_info_url(ext_domain, extr_id),
+                                        video_id, note="Downloading JSON metadata for the video info.")
+        if not info_json:
+            raise ExtractorError('Unable to retrieve the video info.')

-        video_info_url = f'https://{embed.ext_domain}/api/videofile.php?video_id={embed.extr_id}&lifetime=8640000'
-        video_json = self._download_json(
-            video_info_url, video_id, note="Downloading JSON metadata for the video location.")
+        video_json = self._download_json(f'https://{ext_domain}/api/videofile.php?video_id={extr_id}&lifetime=8640000',
+                                         video_id, note="Downloading JSON metadata for the video location.")
+        if not video_json:
+            raise ExtractorError('Unable to retrieve the the video location.')

        encoded_url = video_json[0].get('video_url')
        if not encoded_url:
-            raise ExtractorError('Unable to retrieve the ``encoded_url``.')
+            raise ExtractorError('Unable to retrieve the `encoded_url` value.')

-        download_url = self._decode_video_url(embed.ext_domain, encoded_url)
+        download_url = self._decode_video_url(ext_domain, encoded_url)
        if not download_url:
            raise ExtractorError('Unable to retrieve the ``download_url``.')

        return {
            'id': video_id,
-            'extr_id': embed.extr_id,
+            'extr_id': extr_id,
            'ext': determine_ext(video_json[0].get('format')),
            'title': traverse_obj(info_json, ('video', 'title')),
            'description': traverse_obj(info_json, ('video', 'description')),
            'thumbnail': traverse_obj(info_json, ('video', 'thumb')),
-            'post_date': traverse_obj(info_json, ('video', 'post_date')),
+            'post_date': unified_strdate(traverse_obj(info_json, ('video', 'post_date'))),
            'uploader': traverse_obj(info_json, ('video', 'user', 'username')),
            'url': download_url,
            'age_limit': 18
Author	SHA1	Message	Date
SirElderling	ce7f2057c0	[ZenPorn] Add extractor - remove trailing whitespace	2023-11-05 18:55:08 +00:00
SirElderling	6312910f3a	[ZenPorn] Add extractor - code cleanups	2023-11-05 13:55:23 +00:00