Compare commits

...

6 Commits

4 changed files with 225 additions and 90 deletions

View File

@ -123,8 +123,10 @@ def generator(test_case, tname):
params['outtmpl'] = tname + '_' + params['outtmpl'] params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case: if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist') params.setdefault('extract_flat', 'in_playlist')
if traverse_obj(test_case, 'playlist_count', 'playlist_maxcount', default=-1) < 0: # only process enough items for specified tests
params.setdefault('playlistend', test_case.get('playlist_mincount')) pl_counts = traverse_obj(test_case, (None, ('playlist_count', 'playlist_mincount', 'playlist_maxcount')))
if pl_counts:
params.setdefault('playlistend', max(pl_counts) + 1)
params.setdefault('skip_download', True) params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False) ydl = YoutubeDL(params, auto_init=False)

View File

@ -1603,6 +1603,7 @@ from .xhamster import (
XHamsterIE, XHamsterIE,
XHamsterEmbedIE, XHamsterEmbedIE,
XHamsterCategoryIE, XHamsterCategoryIE,
XHamsterChannelIE,
XHamsterCreatorIE, XHamsterCreatorIE,
XHamsterSearchIE, XHamsterSearchIE,
XHamsterSearchKeyIE, XHamsterSearchKeyIE,

View File

@ -12,6 +12,7 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
classpropinit,
clean_html, clean_html,
determine_ext, determine_ext,
extract_attributes, extract_attributes,
@ -22,6 +23,7 @@ from ..utils import (
merge_dicts, merge_dicts,
parse_duration, parse_duration,
parse_qs, parse_qs,
remove_start,
T, T,
traverse_obj, traverse_obj,
txt_or_none, txt_or_none,
@ -32,6 +34,18 @@ from ..utils import (
class XHamsterBaseIE(InfoExtractor): class XHamsterBaseIE(InfoExtractor):
# base domains that don't redirect to xhamster.com (not xhday\d\.com, eg)
_DOMAINS = '(?:%s)' % '|'.join((
r'xhamster\d*\.(?:com|desi)',
r'xhamster\.one',
r'xhms\.pro',
r'xh(?:open|access|victory|big|channel)\.com',
r'(?:full|mega)xh\.com',
r'xh(?:vid|official|planet)\d*\.com',
# requires Tor
r'xhamster[a-z2-7]+\.onion',
))
def _download_webpage_handle(self, url, video_id, *args, **kwargs): def _download_webpage_handle(self, url, video_id, *args, **kwargs):
# note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None) # note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None)
# default UA to 'Mozilla' (only) to avoid interstitial page # default UA to 'Mozilla' (only) to avoid interstitial page
@ -53,25 +67,16 @@ class XHamsterBaseIE(InfoExtractor):
class XHamsterIE(XHamsterBaseIE): class XHamsterIE(XHamsterBaseIE):
# base domains that don't redirect to xhamster.com (not xhday\d\.com, eg) _VALID_URL = classpropinit(
_DOMAINS = '(?:%s)' % '|'.join(( lambda cls:
r'xhamster\d*\.(?:com|desi)', r'''(?x)
r'xhamster\.one', https?://
r'xhms\.pro', (?:.+?\.)?%s/
r'xh(?:open|access|victory|big|channel)\.com', (?:
r'(?:full|mega)xh\.com', movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
r'xh(?:vid|official|planet)\d*\.com', videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
# requires Tor )
r'xhamster[a-z2-7]+\.onion', ''' % cls._DOMAINS)
))
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?%s/
(?:
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
)
''' % _DOMAINS
_TESTS = [{ _TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '34e1ab926db5dc2750fed9e1f34304bb', 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
@ -379,7 +384,9 @@ class XHamsterIE(XHamsterBaseIE):
class XHamsterEmbedIE(XHamsterBaseIE): class XHamsterEmbedIE(XHamsterBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS _VALID_URL = classpropinit(
lambda cls:
r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % cls._DOMAINS)
_TEST = { _TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539', 'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': { 'info_dict': {
@ -421,9 +428,19 @@ class XHamsterEmbedIE(XHamsterBaseIE):
class XHamsterPlaylistIE(XHamsterBaseIE): class XHamsterPlaylistIE(XHamsterBaseIE):
_NEXT_PAGE_RE = r'(<a\b[^>]+\bdata-page\s*=\s*["\']next[^>]+>)' _NEXT_PAGE_RE = r'(<a\b[^>]+\bdata-page\s*=\s*["\']next[^>]+>)'
_VALID_URL_TPL = r'''(?x)
https?://(?:.+?\.)?%s
/%s/(?P<id>[^/?#]+)
(?:(?P<sub>(?:/%s)+))?
(?:/(?P<pnum>\d+))?(?:[/?#]|$)
'''
def _page_url(self, user_id, page_num, url=None): def _page_url(self, user_id, subs, page_num, url):
return self._PAGE_URL_TPL % (user_id, page_num) n_url = self._PAGE_URL_TPL % (
join_nonempty(user_id, *subs, delim='/'), page_num)
n_url = compat_urlparse.urlsplit(n_url)
url = compat_urlparse.urlsplit(url)
return compat_urlparse.urlunsplit(n_url[:3] + url[3:])
def _extract_entries(self, page, user_id): def _extract_entries(self, page, user_id):
for video_tag_match in re.finditer( for video_tag_match in re.finditer(
@ -442,9 +459,9 @@ class XHamsterPlaylistIE(XHamsterBaseIE):
self._search_regex(self._NEXT_PAGE_RE, page, 'next page', default=None), self._search_regex(self._NEXT_PAGE_RE, page, 'next page', default=None),
(T(extract_attributes), 'href', T(url_or_none))) (T(extract_attributes), 'href', T(url_or_none)))
def _entries(self, user_id, page_num=None, page=None, url=None): def _entries(self, user_id, subs, page_num=None, page=None, url=None):
page_1 = 1 if page_num is None else page_num page_1 = 1 if page_num is None else page_num
next_page_url = self._page_url(user_id, page_1, url) next_page_url = self._page_url(user_id, subs, page_1, url)
for pagenum in itertools.count(page_1): for pagenum in itertools.count(page_1):
if not page: if not page:
page = self._download_webpage( page = self._download_webpage(
@ -463,34 +480,28 @@ class XHamsterPlaylistIE(XHamsterBaseIE):
break break
page = None page = None
def _fancy_page_url(self, user_id, page_num, url):
sub = self._match_valid_url(url).group('sub')
n_url = self._PAGE_URL_TPL % (
join_nonempty(user_id, sub, delim='/'), page_num)
return compat_urlparse.urljoin(n_url, url)
def _fancy_get_title(self, user_id, page_num, url):
sub = self._match_valid_url(url).group('sub')
sub = (sub or '').split('/')
sub.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
sub.append('all' if page_num is None else ('p%d' % page_num))
return '%s (%s)' % (user_id, join_nonempty(*sub, delim=','))
@staticmethod @staticmethod
def _get_title(user_id, page_num, url=None): def _get_title(user_id, subs, page_num, url):
return '%s (%s)' % (user_id, 'all' if page_num is None else ('p%d' % page_num)) subs = subs[:]
if url:
subs.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
subs.append('all' if page_num is None else ('p%d' % page_num))
return '%s (%s)' % (user_id, join_nonempty(*subs, delim=','))
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url).groupdict()
user_id = mobj.group('id') user_id = mobj['id']
page_num = int_or_none(mobj.groupdict().get('pnum')) page_num = int_or_none(mobj.get('pnum'))
subs = remove_start(mobj.get('sub') or '', '/').split('/')
return self.playlist_result( return self.playlist_result(
self._entries(user_id, page_num, url=url), user_id, self._entries(user_id, subs, page_num, url=url), user_id,
self._get_title(user_id, page_num, url=url)) self._get_title(user_id, subs, page_num, url=url))
class XHamsterUserIE(XHamsterPlaylistIE): class XHamsterUserIE(XHamsterPlaylistIE):
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)(?:/videos/(?P<pnum>\d+))?' % XHamsterIE._DOMAINS _VALID_URL = classpropinit(
lambda cls:
r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)(?P<sub>/favorites)?(?:/videos/(?P<pnum>\d+))?' % cls._DOMAINS)
_PAGE_URL_TPL = 'https://xhamster.com/users/%s/videos/%s' _PAGE_URL_TPL = 'https://xhamster.com/users/%s/videos/%s'
_TESTS = [{ _TESTS = [{
# Paginated user profile # Paginated user profile
@ -513,10 +524,27 @@ class XHamsterUserIE(XHamsterPlaylistIE):
'url': 'https://xhamster.com/users/firatkaan/videos', 'url': 'https://xhamster.com/users/firatkaan/videos',
'info_dict': { 'info_dict': {
'id': 'firatkaan', 'id': 'firatkaan',
'title': 'firatkaan (all)',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
# the below doesn't match but is redirected via generic # User with `favorites`
'url': 'https://xhamster.com/users/cubafidel/videos/',
'info_dict': {
'id': 'cubafidel',
'title': 'cubafidel (all)',
},
'playlist_maxcount': 300,
}, {
# Faves of user with `favorites`
'url': 'https://xhamster.com/users/cubafidel/favorites/videos/',
'info_dict': {
'id': 'cubafidel',
'title': 'cubafidel (favorites,all)',
},
'playlist_mincount': 400,
}, {
# below URL doesn't match but is redirected via generic
# 'url': 'https://xhday.com/users/mobhunter', # 'url': 'https://xhday.com/users/mobhunter',
'url': 'https://xhvid.com/users/pelushe21', 'url': 'https://xhvid.com/users/pelushe21',
'only_matching': True, 'only_matching': True,
@ -525,13 +553,13 @@ class XHamsterUserIE(XHamsterPlaylistIE):
class XHamsterCreatorIE(XHamsterPlaylistIE): class XHamsterCreatorIE(XHamsterPlaylistIE):
# `pornstars`, `celebrities` and `creators` share the same namespace # `pornstars`, `celebrities` and `creators` share the same namespace
_VALID_URL = r'''(?x) _VALID_URL = classpropinit(
https?://(?:.+?\.)?%s lambda cls:
/(?:(?:gay|shemale)/)?(?:creators|pornstars|celebrities) cls._VALID_URL_TPL % (
/(?P<id>[^/?#]+) cls._DOMAINS,
(?:(?P<sub>(?:/(?:hd|4k|newest|full-length|exclusive))+))? '(?:(?:gay|shemale)/)?(?:creators|pornstars|celebrities)',
(?:/(?P<pnum>\d+))?(?:[/?#]|$) r'(?:hd|4k|newest|full-length|exclusive|best(?:/(?:weekly|monthly|year-\d{4}))?)',
''' % XHamsterIE._DOMAINS ))
_PAGE_URL_TPL = 'https://xhamster.com/creators/%s/%s' _PAGE_URL_TPL = 'https://xhamster.com/creators/%s/%s'
_TESTS = [{ _TESTS = [{
# Paginated creator profile # Paginated creator profile
@ -569,24 +597,66 @@ class XHamsterCreatorIE(XHamsterPlaylistIE):
'playlist_maxcount': 30, 'playlist_maxcount': 30,
}] }]
def _page_url(self, user_id, page_num, url):
return self._fancy_page_url(user_id, page_num, url)
def _get_title(self, user_id, page_num, url): class XHamsterChannelBaseIE(XHamsterPlaylistIE):
return self._fancy_get_title(user_id, page_num, url)
class XHamsterCategoryIE(XHamsterPlaylistIE):
# `tags` and `categories` share the same namespace
_VALID_URL = r'''(?x)
https?://(?:.+?\.)?%s
(?:(?P<queer>gay|shemale)/)?(?:/categories|/tags|(?=/hd))
/(?P<id>[^/?#]+)
(?P<sub>(?:/(?:hd|4k|producer|creator|best(?:/(?:weekly|monthly|year-\d{4}))?))+)?
(?:/(?P<pnum>\d+))?(?:[/?#]|$)
''' % XHamsterIE._DOMAINS
_PAGE_URL_TPL = 'https://xhamster.com/categories/%s/%s'
_NEXT_PAGE_RE = r'(<a\b[^>]+\bclass\s*=\s*("|\')(?:[\w-]+\s+)*?prev-next-list-link--next(?:\s+[\w-]+)*\2[^>]+>)' _NEXT_PAGE_RE = r'(<a\b[^>]+\bclass\s*=\s*("|\')(?:[\w-]+\s+)*?prev-next-list-link--next(?:\s+[\w-]+)*\2[^>]+>)'
class XHamsterChannelIE(XHamsterChannelBaseIE):
_VALID_URL = classpropinit(
lambda cls:
cls._VALID_URL_TPL % (
cls._DOMAINS,
'(?:(?:gay|shemale)/)?channels',
r'(?:hd|4k|newest|full-length|best(?:/(?:weekly|monthly|year-\d{4}))?)',
))
_PAGE_URL_TPL = 'https://xhamster.com/channels/%s/%s'
_TESTS = [{
# Paginated channel
'url': 'https://xhamster.com/channels/freeuse-fantasy',
'info_dict': {
'id': 'freeuse-fantasy',
'title': 'freeuse-fantasy (all)',
},
'playlist_mincount': 90,
}, {
# Non-paginated channel (for now?)
'url': 'https://xhamster.com/channels/oopsie',
'info_dict': {
'id': 'oopsie',
'title': 'oopsie (all)',
},
'playlist_mincount': 30,
'playlist_maxcount': 48,
}, {
# Channel filtered by path
'url': 'https://xhamster.com/channels/freeuse-fantasy/best/year-2022',
'info_dict': {
'id': 'freeuse-fantasy',
'title': 'freeuse-fantasy (best,year-2022,all)',
},
'playlist_count': 30,
}, {
# Channel filtered by query
'url': 'https://xhamster.com/channels/freeuse-fantasy?min-duration=40',
'info_dict': {
'id': 'freeuse-fantasy',
'title': 'freeuse-fantasy (min-duration=40,all)',
},
'playlist_maxcount': 10,
}]
class XHamsterCategoryIE(XHamsterChannelBaseIE):
# `tags` and `categories` share the same namespace
_VALID_URL = classpropinit(
lambda cls:
cls._VALID_URL_TPL % (
cls._DOMAINS,
'(?:(?P<queer>gay|shemale)/)?(?:categories|tags|(?=hd))',
r'(?:hd|4k|producer|creator|best(?:/(?:weekly|monthly|year-\d{4}))?)',
))
_PAGE_URL_TPL = 'https://xhamster.com/categories/%s/%s'
_TESTS = [{ _TESTS = [{
# Paginated category/tag # Paginated category/tag
'url': 'https://xhamster.com/tags/hawaiian', 'url': 'https://xhamster.com/tags/hawaiian',
@ -624,26 +694,28 @@ class XHamsterCategoryIE(XHamsterPlaylistIE):
'playlist_maxcount': 20, 'playlist_maxcount': 20,
}] }]
def _page_url(self, user_id, page_num, url): def _page_url(self, user_id, subs, page_num, url):
queer, sub = self._match_valid_url(url).group('queer', 'sub') queer = self._match_valid_url(url).group('queer')
n_url = self._PAGE_URL_TPL % ( n_url = self._PAGE_URL_TPL % (
join_nonempty(queer, user_id, sub, delim='/'), page_num) join_nonempty(queer, user_id, *subs, delim='/'), page_num)
return compat_urlparse.urljoin(n_url, url) return compat_urlparse.urljoin(n_url, url)
def _get_title(self, user_id, page_num, url): def _get_title(self, user_id, subs, page_num, url):
queer, sub = self._match_valid_url(url).group('queer', 'sub') queer = self._match_valid_url(url).group('queer')
queer = [] if queer is None else [queer] if queer:
sub = queer + (sub or '').split('/') subs = [queer] + subs
sub.extend((compat_urlparse.urlsplit(url).query or '').split('&')) subs.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
sub.append('all' if page_num is None else ('p%d' % page_num)) subs.append('all' if page_num is None else ('p%d' % page_num))
return '%s (%s)' % (user_id, join_nonempty(*sub, delim=',')) return '%s (%s)' % (user_id, join_nonempty(*subs, delim=','))
class XHamsterSearchIE(XHamsterPlaylistIE): class XHamsterSearchIE(XHamsterPlaylistIE):
_VALID_URL = r'''(?x) _VALID_URL = classpropinit(
https?://(?:.+?\.)?%s lambda cls:
/search/(?P<id>[^/?#]+) r'''(?x)
''' % XHamsterIE._DOMAINS https?://(?:.+?\.)?%s
/search/(?P<id>[^/?#]+)
''' % cls._DOMAINS)
_TESTS = [{ _TESTS = [{
# Single page result # Single page result
'url': 'https://xhamster.com/search/latvia', 'url': 'https://xhamster.com/search/latvia',
@ -672,20 +744,20 @@ class XHamsterSearchIE(XHamsterPlaylistIE):
}] }]
@staticmethod @staticmethod
def _page_url(user_id, page_num, url): def _page_url(user_id, subs, page_num, url):
return url return url
def _get_title(self, user_id, page_num, url=None): def _get_title(self, user_id, subs, page_num, url=None):
return super(XHamsterSearchIE, self)._get_title( return super(XHamsterSearchIE, self)._get_title(
user_id.replace('+', ' '), page_num, url) user_id.replace('+', ' '), [], page_num, url)
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
page_num = traverse_obj(url, ( page_num = traverse_obj(url, (
T(parse_qs), 'page', -1, T(int_or_none))) T(parse_qs), 'page', -1, T(int_or_none)))
return self.playlist_result( return self.playlist_result(
self._entries(user_id, page_num, url=url), user_id, self._entries(user_id, None, page_num, url=url), user_id,
self._get_title(user_id, page_num)) self._get_title(user_id, None, page_num))
class XHamsterSearchKeyIE(SearchInfoExtractor, XHamsterSearchIE): class XHamsterSearchKeyIE(SearchInfoExtractor, XHamsterSearchIE):

View File

@ -6510,3 +6510,63 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None: if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values) values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values))) return delim.join(map(compat_str, filter(None, values)))
# from yt-dlp
class classproperty(object):
"""property access for class methods with optional caching"""
def __new__(cls, *args, **kwargs):
if 'func' in kwargs:
func = kwargs.pop('func')
elif len(args) > 0:
func = args[0]
args = args[1:]
else:
func = None
if not func:
return functools.partial(cls, *args, **kwargs)
return super(classproperty, cls).__new__(cls)
def __init__(self, func, **kwargs):
# kw-only arg
cache = kwargs.get('cache', False)
functools.update_wrapper(self, func)
self.func = func
self._cache = {} if cache else None
def __get__(self, n, cls):
if self._cache is None:
return self.func(cls)
elif cls not in self._cache:
self._cache[cls] = self.func(cls)
return self._cache[cls]
class classpropinit(classproperty):
""" A Python fubar: parent class vars are not in scope when the
`class suite` is evaluated, so disallowing `childvar = fn(parentvar)`.
Instead, the parent class has to be mentioned redundantly and
unmaintainably, since the current class isn't yet bound.
This decorator evaluates a class method and assigns its result
in place of the method.
class child(parent):
# before
childvar = fn(parent.parentvar)
# now
@classpropinit
def childvar(cls):
return fn(cls.parentvar)
# or
childvar = classpropinit(lambda cls: fn(cls.parentvar))
"""
def __init__(self, func):
functools.update_wrapper(self, func)
self.name = func.__name__
self.func = func
def __get__(self, _, cls):
val = self.func(cls)
setattr(cls, self.name, val)
return val