Compare commits

...

35 Commits

Author SHA1 Message Date
Spencer Baer d28f03569f
Merge e287aec51d into 5904853ae5 2024-05-07 13:04:30 +03:00
bashonly 5904853ae5
[ie/crunchyroll] Support browser impersonation (#9857)
Closes #7442
Authored by: bashonly
2024-05-05 23:15:32 +00:00
Chris Caruso c8bf48f3a8
[ie/cbc.ca:player] Improve `_VALID_URL` (#9866)
Closes #9825
Authored by: carusocr
2024-05-05 23:02:24 +00:00
The-MAGI 351368cb9a
[ie/youporn] Fix extractor (#8827)
Closes #7967
Authored by: The-MAGI
2024-05-05 22:57:38 +00:00
Spencer Baer e287aec51d fix sound id parsing 2024-03-14 16:34:30 -05:00
Spencer Baer 96b5090f21 remove duplicate info 2024-03-14 16:08:59 -05:00
Spencer Baer 71becb6351 Merge branch 'yt-dlp:master' into master 2024-03-14 16:01:19 -05:00
Spencer Baer 2e170813ce
Merge branch 'yt-dlp:master' into master 2024-03-04 21:44:13 -06:00
Spencer Baer 06106f9a3b
Merge branch 'yt-dlp:master' into master 2024-02-25 08:09:25 -06:00
Spencer Baer 1545be288e
Merge branch 'yt-dlp:master' into master 2024-02-24 12:12:12 -06:00
Spencer Baer e1220755f3
Merge branch 'yt-dlp:master' into master 2024-02-10 12:26:42 -06:00
Spencer Baer d39862411c
Merge branch 'yt-dlp:master' into master 2024-01-05 16:43:11 -06:00
Spencer Baer f12029a3c3
Merge branch 'yt-dlp:master' into master 2023-12-26 05:05:38 -06:00
Spencer Baer 8df4ae577a
Merge branch 'yt-dlp:master' into master 2023-11-20 13:42:02 -06:00
Spencer Baer 1cd37b06b6
Merge branch 'yt-dlp:master' into master 2023-11-14 18:34:36 -06:00
Spencer Baer 23d9355671
Merge branch 'yt-dlp:master' into master 2023-10-07 19:54:16 -05:00
Spencer Baer 4cd23b99db
Merge branch 'yt-dlp:master' into master 2023-09-30 23:03:57 -05:00
Spencer Baer 9155019df2
Merge branch 'yt-dlp:master' into master 2023-09-13 16:14:57 -05:00
Spencer Baer 392ab3f298
Merge branch 'yt-dlp:master' into master 2023-08-15 22:58:38 -05:00
Spencer Baer 6c5b0afad2
Merge branch 'yt-dlp:master' into master 2023-07-30 22:17:23 -05:00
Spencer Baer 41e7c3ef81
Merge branch 'yt-dlp:master' into master 2023-07-22 23:11:45 -05:00
Spencer Baer fa185fd709
Merge branch 'yt-dlp:master' into master 2023-07-03 19:59:10 -05:00
Spencer Baer 63f9d6fdaa
Merge branch 'yt-dlp:master' into master 2023-06-19 22:17:03 -05:00
Spencer Baer d6c1a4f0a7 Merge remote-tracking branch 'upstream/master' 2023-05-26 15:31:16 -05:00
Spencer Baer 2cf2dfe794
Merge branch 'yt-dlp:master' into master 2023-03-05 11:50:15 -06:00
Spencer Baer 319a563f19
Merge branch 'yt-dlp:master' into master 2023-03-03 18:23:06 -06:00
Spencer Baer d0564d7b67
Merge branch 'yt-dlp:master' into master 2023-02-17 20:29:46 -06:00
Spencer Baer 49c4c6a422 Merge branch 'master' of https://github.com/spencerbaer/yt-dlp 2023-02-13 20:50:06 -06:00
Spencer Baer a8710389e3 Use id_str for sound id 2023-02-13 20:50:02 -06:00
Spencer Baer 4172bd903b
Merge branch 'yt-dlp:master' into master 2023-02-13 20:04:16 -06:00
Spencer Baer 5a156214ba Use attributions for contained track 2023-02-13 20:02:42 -06:00
Spencer Baer 7745febd99
Merge branch 'yt-dlp:master' into master 2023-02-07 10:59:16 -06:00
Spencer Baer c2675bfa91
Merge branch 'yt-dlp:master' into master 2023-02-03 09:26:20 -06:00
Spencer Baer 26346b0e21
Merge branch 'yt-dlp:master' into master 2023-01-20 14:50:10 -06:00
Spencer Baer d38752e958 Add track_id 2022-12-29 14:39:00 -06:00
4 changed files with 69 additions and 19 deletions

View File

@ -151,7 +151,7 @@ def _real_extract(self, url):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
'location': 'Canada',
'media_type': 'Full Program',
},
}, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': {
'id': '2334524995812',
'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791,
'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
'uploader': 'CBCC-NEW',
'chapters': 'count:0',
'upload_date': '20240504',
'categories': 'count:3',
'series': 'The National',
'tags': 'count:15',
'creators': ['encoder'],
'location': 'Canada',
'media_type': 'Excerpt',
},
}, {
'url': 'cbcplayer:1.7159484',
'only_matching': True,

View File

@ -53,15 +53,19 @@ def _set_auth_info(self, response):
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
try: # TODO: Add impersonation support here
try:
return self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
headers=headers, data=urlencode_postdata(data))
headers=headers, data=urlencode_postdata(data), impersonate=True)
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
raise
if target := error.cause.response.extensions.get('impersonate'):
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'Request blocked by Cloudflare. '
'Install the required impersonation dependency if possible, '
'or else navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)

View File

@ -370,13 +370,19 @@ def extract_addr(addr, add_meta={}):
contained_music_track = traverse_obj(
music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str)
contained_music_author = traverse_obj(
music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), expected_type=str)
is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
if is_generic_og_trackname:
music_track, music_author = contained_music_track or 'original sound', contained_music_author
music_track, music_author = 'original sound' if is_generic_og_trackname else music_info.get('title'), traverse_obj(music_info, ('author', {str}))
if contained_music_track or contained_music_author:
contained_music_info = [{
'relation': 'music',
'track': contained_music_track,
'artist': contained_music_author,
}]
else:
music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
contained_music_info = None
return {
'id': aweme_id,
@ -400,7 +406,10 @@ def extract_addr(addr, add_meta={}):
}),
'uploader_url': user_url,
'track': music_track,
'track_id': str_or_none(music_info.get('id')),
'album': str_or_none(music_info.get('album')) or None,
'attributions': contained_music_info,
'timestamp': int_or_none(aweme_detail.get('create_time')),
'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
'formats': formats,
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
@ -475,6 +484,7 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
}, expected_type=int_or_none),
**traverse_obj(music_info, {
'track': ('title', {str}),
'track_id': ('id', {str}, {lambda x: x or None}),
'album': ('album', {str}, {lambda x: x or None}),
'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
}),
@ -517,6 +527,7 @@ class TikTokIE(TikTokBaseIE):
'artist': 'Ysrbeats',
'album': 'Lehanga',
'track': 'Lehanga',
'track_id': '6716465478027447045',
},
'skip': '404 Not Found',
}, {
@ -545,6 +556,7 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
'track': 'Big Fun',
'track_id': '209649576000286720',
},
}, {
# Banned audio, only available on the app
@ -561,7 +573,15 @@ class TikTokIE(TikTokBaseIE):
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'track': 'Boka Dance',
'track_id': '6984138615588653826',
'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'attributions': [
{
'relation': 'music',
'track': 'Boka Loka Dance',
'artist': 'FAUJI NOSS',
}
],
'timestamp': 1626121503,
'duration': 18,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
@ -586,6 +606,7 @@ class TikTokIE(TikTokBaseIE):
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'track': 'Promoted Music',
'track_id': '7042692955299203841',
'timestamp': 1639754738,
'duration': 30,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
@ -611,6 +632,7 @@ class TikTokIE(TikTokBaseIE):
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'track': 'original sound',
'track_id': '7059698289792273198',
'timestamp': 1643714123,
'duration': 6,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',

View File

@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': 'md5:00ea70f642f431c379763c17c2f396bc',
'description': str, # TODO: detect/remove SEO spam description in ytdl backport
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
'timestamp': 1606089600,
'thumbnail': r're:https://.+\.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
definitions = self._download_json(
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
self._set_cookie('.youporn.com', 'age_verified', '1')
webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
def get_format_data(data, f):
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
def get_format_data(data, stream_type):
info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
if not info_url:
return []
return traverse_obj(
self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
f['height'] = height
formats.append(f)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title(