Compare commits

...

6 Commits

Author SHA1 Message Date
scrat5h
990df2563d
Merge df8c550abe into e079ffbda6 2024-11-18 03:41:01 +05:30
scrat5h
df8c550abe Revert "[extractor/pornhub] Remove fetch of /video/get_media for formats"
This reverts commit 9ecbbcd844.
2022-11-23 12:34:42 +01:00
scrat5h
9ecbbcd844 [extractor/pornhub] Remove fetch of /video/get_media for formats
Previously URLs that contained `/video/get_media` returned JSON with
available formats.

Some time ago Pornhub seem to removed this endpoint and has started to
return `HTTP Error 403: Forbidden`, see #4298.

Nowadays it seem to serve originally requested html page which, of
course, cannot be parsed with JSON parser. yt-dlp produced WARNING:

```
Failed to parse JSON: Expecting value in '': line 1 column 1 (char 0).
```

Since we are already getting format of the video by other means
(`mpd` or `m3u8`) this change removes fetching of URLs that don't
provide value to us anymore.

Fixes: #5615
2022-11-22 23:03:04 +01:00
scrat5h
6887f87208 [extractor/pornhub] URL to subtitles is relative
`closedCaptionsFile` field on the page is not absolute URL (anymore?)
but relative one.

This change makes extracting subtitles working again.
2022-11-22 23:03:04 +01:00
scrat5h
1138e33ac5 [extractor/pornhub] Add data for tests 2022-11-22 23:03:04 +01:00
scrat5h
bdceb022d0 [extractor/pornhub] uploader in test renamed from Babes to BABES-COM 2022-11-22 23:03:04 +01:00

View File

@ -11,6 +11,7 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
base_url,
clean_html, clean_html,
determine_ext, determine_ext,
format_field, format_field,
@ -23,6 +24,7 @@ from ..utils import (
update_url_query, update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
@ -137,12 +139,14 @@ class PornHubIE(PornHubBaseIE):
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'md5': '4d4a4e9178b655776f86cf89ecaf0edf',
'info_dict': { 'info_dict': {
'id': '648719015', 'id': '648719015',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': r're:^https://.i\.phncdn\.com/videos/201306/28/14084201/original/.*\.jpg',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes', 'uploader': 'BABES-COM',
'uploader_id': '/users/babes-com',
'upload_date': '20130628', 'upload_date': '20130628',
'timestamp': 1372447216, 'timestamp': 1372447216,
'duration': 361, 'duration': 361,
@ -207,11 +211,22 @@ class PornHubIE(PornHubBaseIE):
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
'info_dict': { 'info_dict': {
'id': 'ph601dc30bae19a', 'id': 'ph601dc30bae19a',
'ext': 'mp4',
'uploader': 'Projekt Melody', 'uploader': 'Projekt Melody',
'uploader_id': 'projekt-melody', 'uploader_id': 'projekt-melody',
'upload_date': '20210205', 'upload_date': '20210205',
'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
'thumbnail': r're:https?://.+', 'thumbnail': r're:https?://.+',
'age_limit': 18,
'view_count': int,
'cast': [],
'like_count': int,
'comment_count': int,
'dislike_count': int,
'timestamp': 1612564932,
'duration': 8173,
'categories': list,
'tags': list,
}, },
}, { }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
@ -317,10 +332,10 @@ class PornHubIE(PornHubBaseIE):
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id) video_id)
if flashvars: if flashvars:
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) subtitle_url = flashvars.get('closedCaptionsFile')
if subtitle_url: if subtitle_url:
subtitles.setdefault('en', []).append({ subtitles.setdefault('en', []).append({
'url': subtitle_url, 'url': urljoin(base_url(url), subtitle_url),
'ext': 'srt', 'ext': 'srt',
}) })
thumbnail = flashvars.get('image_url') thumbnail = flashvars.get('image_url')