Compare commits

..

4 Commits

Author SHA1 Message Date
John Victor
f0e8bc7c60
[ie/patreon] Fix embedded HLS extraction (#8993)
Closes #8973
Authored by: johnvictorfs
2024-01-21 22:36:59 +00:00
Stefan Lobbenmeier
c099ec9392
[ie/ard:mediathek] Support cookies to verify age (#9037)
Closes #9035
Authored by: StefanLobbenmeier
2024-01-21 20:54:11 +00:00
gmes78
c0ecceeefe
[ie/Rule34Video] Fix _VALID_URL (#9044)
Authored by: gmes78
2024-01-21 18:56:01 +00:00
u-spec-png
3e083191cd
[ie/Newgrounds:user] Fix extractor (#9046)
Closes #7308
Authored by: u-spec-png
2024-01-21 18:50:14 +00:00
4 changed files with 33 additions and 19 deletions

View File

@ -8,6 +8,7 @@ from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
jwt_decode_hs256,
make_archive_id,
parse_duration,
parse_iso8601,
@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
(?P<id>[a-zA-Z0-9]+)
/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
_TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
@ -359,12 +361,27 @@ class ARDBetaMediathekIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
query = {'embedded': 'false', 'mcV6': 'true'}
headers = {}
if self._get_cookies(self._TOKEN_URL).get('ams'):
token = self._download_json(
self._TOKEN_URL, display_id, 'Fetching token for age verification',
'Unable to fetch age verification token', fatal=False)
id_token = traverse_obj(token, ('idToken', {str}))
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
if not user_id:
self.report_warning('Unable to extract token, continuing without authentication')
else:
headers['x-authorization'] = f'Bearer {id_token}'
query['userId'] = user_id
if decoded_token.get('age_rating') != 18:
self.report_warning('Account is not verified as 18+; video may be unavailable')
page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
'embedded': 'false',
'mcV6': 'true',
})
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
display_id, query=query, headers=headers)
# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
@ -383,7 +400,7 @@ class ARDBetaMediathekIE(InfoExtractor):
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
if player_data.get('blockedByFsk'):
self.raise_no_formats('This video is only available after 22:00', expected=True)
self.raise_login_required('This video is only available for age verified users or after 22:00')
formats = []
subtitles = {}

View File

@ -3,15 +3,15 @@ import re
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_id,
int_or_none,
parse_count,
parse_duration,
traverse_obj,
unified_timestamp,
OnDemandPagedList,
try_get,
)
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
def _fetch_page(self, channel_id, url, page):
page += 1
posts_info = self._download_json(
f'{url}/page/{page}', channel_id,
f'{url}?page={page}', channel_id,
note=f'Downloading page {page}', headers={
'Accept': 'application/json, text/javascript, */*; q = 0.01',
'X-Requested-With': 'XMLHttpRequest',
})
sequence = posts_info.get('sequence', [])
for year in sequence:
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
for post in posts:
path, media_id = self._search_regex(
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
post, 'url', group=(1, 2))
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
path, media_id = self._search_regex(
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
post, 'url', group=(1, 2))
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
def _real_extract(self, url):
channel_id = self._match_id(url)

View File

@ -275,7 +275,7 @@ class PatreonIE(PatreonBaseIE):
'ext': ext,
'url': post_file['url'],
}
elif name == 'video':
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
return {
**info,

View File

@ -18,10 +18,10 @@ from ..utils.traversal import traverse_obj
class Rule34VideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/',
'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
'md5': 'ffccac2c23799dabbd192621ae4d04f3',
'info_dict': {
'id': '3065157',