Compare commits

..

10 Commits

Author SHA1 Message Date
SirElderling
49dfe3ab77
Merge branch 'yt-dlp:master' into zenporn 2023-10-31 19:29:13 +00:00
sepro
4a601c9eff
[ie/weverse] Fix login error handling (#8458)
Authored by: seproDev
2023-10-28 15:53:24 +00:00
Shubham
464327acdb
[ie/polskieradio:audition] Fix playlist extraction (#8459)
Closes #8419
Authored by: shubhexists
2023-10-28 15:50:08 +00:00
bashonly
ef79d20dc9
[ie/youtube] Check newly uploaded iOS HLS formats (#8336)
Closes #7747
Authored by: bashonly
2023-10-28 08:02:13 +00:00
bashonly
39abae2354
[ie/youtube] Deprioritize iOS client formats (#8337)
Authored by: bashonly
2023-10-28 08:01:31 +00:00
bashonly
4ce2f29a50
[ie/generic] Improve direct video link ext detection (#8340)
Closes #8265
Authored by: bashonly
2023-10-28 00:35:37 +00:00
bashonly
177f0d963e
[ie/QDance] Update _VALID_URL (#8426)
Authored by: bashonly
2023-10-28 00:01:31 +00:00
Bart Broere
8e02a4dcc8
[ie/npo] Send POST request to streams API endpoint (#8413)
Closes #6398
Authored by: bartbroere
2023-10-28 00:00:12 +00:00
saintliao
7b8b1cf5eb
[ie/twitcasting] Fix livestream extraction (#8427)
Closes #8431
Authored by: JC-Chung, saintliao

Co-authored-by: JC-Chung <52159296+JC-Chung@users.noreply.github.com>
2023-10-27 23:59:13 +00:00
bashonly
a40e0b37df
[core] Only ensure playlist thumbnail dir if writing thumbs (#8373)
Bugfix for 2acd1d555e

Closes #8372
Authored by: bashonly
2023-10-22 23:05:22 +00:00
9 changed files with 47 additions and 16 deletions

View File

@ -1801,7 +1801,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web,ios` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)

View File

@ -2338,7 +2338,7 @@ class YoutubeDL:
return return
for f in formats: for f in formats:
if f.get('has_drm'): if f.get('has_drm') or f.get('__needs_testing'):
yield from self._check_formats([f]) yield from self._check_formats([f])
else: else:
yield f yield f
@ -4242,7 +4242,7 @@ class YoutubeDL:
self.write_debug(f'Skipping writing {label} thumbnail') self.write_debug(f'Skipping writing {label} thumbnail')
return ret return ret
if not self._ensure_dir_exists(filename): if thumbnails and not self._ensure_dir_exists(filename):
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:

View File

@ -34,6 +34,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
update_url_query, update_url_query,
urlhandle_detect_ext,
url_or_none, url_or_none,
urljoin, urljoin,
variadic, variadic,
@ -2459,7 +2460,7 @@ class GenericIE(InfoExtractor):
self.report_detected('direct video link') self.report_detected('direct video link')
headers = smuggled_data.get('http_headers', {}) headers = smuggled_data.get('http_headers', {})
format_id = str(m.group('format_id')) format_id = str(m.group('format_id'))
ext = determine_ext(url) ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
subtitles = {} subtitles = {}
if format_id.endswith('mpegurl') or ext == 'm3u8': if format_id.endswith('mpegurl') or ext == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
@ -2471,6 +2472,7 @@ class GenericIE(InfoExtractor):
formats = [{ formats = [{
'format_id': format_id, 'format_id': format_id,
'url': url, 'url': url,
'ext': ext,
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
info_dict['direct'] = True info_dict['direct'] = True

View File

@ -245,7 +245,7 @@ class NPOIE(InfoExtractor):
'quality': 'npoplus', 'quality': 'npoplus',
'tokenId': player_token, 'tokenId': player_token,
'streamType': 'broadcast', 'streamType': 'broadcast',
}) }, data=b'') # endpoint requires POST
if not streams: if not streams:
continue continue
stream = streams.get('stream') stream = streams.get('stream')

View File

@ -262,14 +262,14 @@ class PolskieRadioAuditionIE(InfoExtractor):
query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'}) query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'})
def _entries(self, playlist_id, has_episodes, has_articles): def _entries(self, playlist_id, has_episodes, has_articles):
for i in itertools.count(1) if has_episodes else []: for i in itertools.count(0) if has_episodes else []:
page = self._call_lp3( page = self._call_lp3(
'AudioArticle/GetListByCategoryId', { 'AudioArticle/GetListByCategoryId', {
'categoryId': playlist_id, 'categoryId': playlist_id,
'PageSize': 10, 'PageSize': 10,
'skip': i, 'skip': i,
'format': 400, 'format': 400,
}, playlist_id, f'Downloading episode list page {i}') }, playlist_id, f'Downloading episode list page {i + 1}')
if not traverse_obj(page, 'data'): if not traverse_obj(page, 'data'):
break break
for episode in page['data']: for episode in page['data']:
@ -281,14 +281,14 @@ class PolskieRadioAuditionIE(InfoExtractor):
'timestamp': parse_iso8601(episode.get('datePublic')), 'timestamp': parse_iso8601(episode.get('datePublic')),
} }
for i in itertools.count(1) if has_articles else []: for i in itertools.count(0) if has_articles else []:
page = self._call_lp3( page = self._call_lp3(
'Article/GetListByCategoryId', { 'Article/GetListByCategoryId', {
'categoryId': playlist_id, 'categoryId': playlist_id,
'PageSize': 9, 'PageSize': 9,
'skip': i, 'skip': i,
'format': 400, 'format': 400,
}, playlist_id, f'Downloading article list page {i}') }, playlist_id, f'Downloading article list page {i + 1}')
if not traverse_obj(page, 'data'): if not traverse_obj(page, 'data'):
break break
for article in page['data']: for article in page['data']:

View File

@ -15,7 +15,7 @@ from ..utils import (
class QDanceIE(InfoExtractor): class QDanceIE(InfoExtractor):
_NETRC_MACHINE = 'qdance' _NETRC_MACHINE = 'qdance'
_VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'note': 'vod', 'note': 'vod',
'url': 'https://www.q-dance.com/network/library/146542138', 'url': 'https://www.q-dance.com/network/library/146542138',
@ -53,6 +53,27 @@ class QDanceIE(InfoExtractor):
'channel_id': 'qdancenetwork.video_149170353', 'channel_id': 'qdancenetwork.video_149170353',
}, },
'skip': 'Completed livestream', 'skip': 'Completed livestream',
}, {
'note': 'vod with alphanumeric id',
'url': 'https://www.q-dance.com/network/library/WhDleSIWSfeT3Q9ObBKBeA',
'info_dict': {
'id': 'WhDleSIWSfeT3Q9ObBKBeA',
'ext': 'mp4',
'title': 'Aftershock I Defqon.1 Weekend Festival 2023 I Sunday I BLUE',
'display_id': 'naam-i-defqon-1-weekend-festival-2023-i-dag-i-podium',
'description': 'Relive Defqon.1 Path of the Warrior with Aftershock at the BLUE 🔥',
'series': 'Defqon.1',
'series_id': '31840378',
'season': 'Defqon.1 Weekend Festival 2023',
'season_id': '141735599',
'duration': 3507,
'availability': 'premium_only',
'thumbnail': 'https://images.q-dance.network/1698158361-230625-135716-defqon-1-aftershock.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.q-dance.com/network/library/-uRFKXwmRZGVnve7av9uqA',
'only_matching': True,
}] }]
_access_token = None _access_token = None

View File

@ -142,7 +142,7 @@ class TwitCastingIE(InfoExtractor):
'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id, 'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id,
'Downloading live info', fatal=False) 'Downloading live info', fatal=False)
is_live = 'data-status="online"' in webpage is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
if not traverse_obj(stream_server_data, 'llfmp4') and is_live: if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
self.raise_login_required(method='cookies') self.raise_login_required(method='cookies')

View File

@ -45,10 +45,10 @@ class WeverseBaseIE(InfoExtractor):
'x-acc-trace-id': str(uuid.uuid4()), 'x-acc-trace-id': str(uuid.uuid4()),
'x-clog-user-device-id': str(uuid.uuid4()), 'x-clog-user-device-id': str(uuid.uuid4()),
} }
check_username = self._download_json( valid_username = traverse_obj(self._download_json(
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
note='Checking username', query={'email': username}, headers=headers) query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
if not check_username.get('hasPassword'): if not valid_username:
raise ExtractorError('Invalid username provided', expected=True) raise ExtractorError('Invalid username provided', expected=True)
headers['content-type'] = 'application/json' headers['content-type'] = 'application/json'

View File

@ -3619,7 +3619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data): def _get_requested_clients(self, url, smuggled_data):
requested_clients = [] requested_clients = []
default = ['ios', 'android', 'web'] default = ['android', 'web', 'ios']
allowed_clients = sorted( allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@ -4560,6 +4560,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date info['upload_date'] = upload_date
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
if upload_datetime >= datetime_from_str('today-1day'):
for fmt in info['formats']:
if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v: