Compare commits

...

11 Commits

Author SHA1 Message Date
Frank Aurich
32593552bb Apply code review suggestions 2023-11-04 23:33:57 +01:00
Frank Aurich
0072c76b28 Merge remote-tracking branch 'origin/master' into fix_ntvde 2023-11-04 23:15:08 +01:00
sepro
4a601c9eff
[ie/weverse] Fix login error handling (#8458)
Authored by: seproDev
2023-10-28 15:53:24 +00:00
Shubham
464327acdb
[ie/polskieradio:audition] Fix playlist extraction (#8459)
Closes #8419
Authored by: shubhexists
2023-10-28 15:50:08 +00:00
bashonly
ef79d20dc9
[ie/youtube] Check newly uploaded iOS HLS formats (#8336)
Closes #7747
Authored by: bashonly
2023-10-28 08:02:13 +00:00
bashonly
39abae2354
[ie/youtube] Deprioritize iOS client formats (#8337)
Authored by: bashonly
2023-10-28 08:01:31 +00:00
bashonly
4ce2f29a50
[ie/generic] Improve direct video link ext detection (#8340)
Closes #8265
Authored by: bashonly
2023-10-28 00:35:37 +00:00
bashonly
177f0d963e
[ie/QDance] Update _VALID_URL (#8426)
Authored by: bashonly
2023-10-28 00:01:31 +00:00
Bart Broere
8e02a4dcc8
[ie/npo] Send POST request to streams API endpoint (#8413)
Closes #6398
Authored by: bartbroere
2023-10-28 00:00:12 +00:00
saintliao
7b8b1cf5eb
[ie/twitcasting] Fix livestream extraction (#8427)
Closes #8431
Authored by: JC-Chung, saintliao

Co-authored-by: JC-Chung <52159296+JC-Chung@users.noreply.github.com>
2023-10-27 23:59:13 +00:00
bashonly
a40e0b37df
[core] Only ensure playlist thumbnail dir if writing thumbs (#8373)
Bugfix for 2acd1d555e

Closes #8372
Authored by: bashonly
2023-10-22 23:05:22 +00:00
10 changed files with 64 additions and 31 deletions

View File

@ -1801,7 +1801,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web,ios` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)

View File

@ -2338,7 +2338,7 @@ class YoutubeDL:
return
for f in formats:
if f.get('has_drm'):
if f.get('has_drm') or f.get('__needs_testing'):
yield from self._check_formats([f])
else:
yield f
@ -4242,7 +4242,7 @@ class YoutubeDL:
self.write_debug(f'Skipping writing {label} thumbnail')
return ret
if not self._ensure_dir_exists(filename):
if thumbnails and not self._ensure_dir_exists(filename):
return None
for idx, t in list(enumerate(thumbnails))[::-1]:

View File

@ -34,6 +34,7 @@ from ..utils import (
unified_timestamp,
unsmuggle_url,
update_url_query,
urlhandle_detect_ext,
url_or_none,
urljoin,
variadic,
@ -2459,7 +2460,7 @@ class GenericIE(InfoExtractor):
self.report_detected('direct video link')
headers = smuggled_data.get('http_headers', {})
format_id = str(m.group('format_id'))
ext = determine_ext(url)
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
subtitles = {}
if format_id.endswith('mpegurl') or ext == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
@ -2471,6 +2472,7 @@ class GenericIE(InfoExtractor):
formats = [{
'format_id': format_id,
'url': url,
'ext': ext,
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
info_dict['direct'] = True

View File

@ -245,7 +245,7 @@ class NPOIE(InfoExtractor):
'quality': 'npoplus',
'tokenId': player_token,
'streamType': 'broadcast',
})
}, data=b'') # endpoint requires POST
if not streams:
continue
stream = streams.get('stream')

View File

@ -32,17 +32,14 @@ class NTVDeIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._parse_json(self._search_regex(
r'(?s)article:\s*(\{.+?\})', webpage, 'info'),
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
info = self._search_json(
r'article:\s*', webpage, 'info', video_id, transform_source=js_to_json)
player_data = self._parse_json(self._search_regex(
r'(?s)\$\(\s*"\#playerwrapper"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
webpage, 'player data'), video_id,
transform_source=lambda s: js_to_json(re.sub(r'ivw:\s*.+', '', s)))
player_data = self._search_json(
r'\$\(\s*"\#playerwrapper"\s*\)\s*\.data\(\s*"player",\s*',
webpage, 'player data', video_id,
transform_source=lambda s: js_to_json(re.sub(r'ivw:[^},]+', '', s)))
vdata = traverse_obj(player_data, ('setup', 'source'))
duration = int_or_none(vdata.get('length'))
formats = []
if vdata.get('progressive'):
@ -56,13 +53,18 @@ class NTVDeIE(InfoExtractor):
quality=1, m3u8_id='hls', fatal=False))
if vdata.get('dash'):
formats.extend(self._extract_mpd_formats(vdata['dash'], video_id, fatal=False))
return {
'id': video_id,
'title': info['headline'],
'description': info.get('intro'),
'alt_title': info.get('kicker'),
'timestamp': timestamp,
'thumbnail': vdata.get('poster'),
'duration': duration,
**traverse_obj(info, {
'title': 'headline',
'description': 'intro',
'alt_title': 'kicker',
'timestamp': ('publishedDateAsUnixTimeStamp', {int_or_none}),
}),
**traverse_obj(vdata, {
'thumbnail': 'poster',
'duration': ('length', {int_or_none}),
}),
'formats': formats,
}

View File

@ -262,14 +262,14 @@ class PolskieRadioAuditionIE(InfoExtractor):
query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'})
def _entries(self, playlist_id, has_episodes, has_articles):
for i in itertools.count(1) if has_episodes else []:
for i in itertools.count(0) if has_episodes else []:
page = self._call_lp3(
'AudioArticle/GetListByCategoryId', {
'categoryId': playlist_id,
'PageSize': 10,
'skip': i,
'format': 400,
}, playlist_id, f'Downloading episode list page {i}')
}, playlist_id, f'Downloading episode list page {i + 1}')
if not traverse_obj(page, 'data'):
break
for episode in page['data']:
@ -281,14 +281,14 @@ class PolskieRadioAuditionIE(InfoExtractor):
'timestamp': parse_iso8601(episode.get('datePublic')),
}
for i in itertools.count(1) if has_articles else []:
for i in itertools.count(0) if has_articles else []:
page = self._call_lp3(
'Article/GetListByCategoryId', {
'categoryId': playlist_id,
'PageSize': 9,
'skip': i,
'format': 400,
}, playlist_id, f'Downloading article list page {i}')
}, playlist_id, f'Downloading article list page {i + 1}')
if not traverse_obj(page, 'data'):
break
for article in page['data']:

View File

@ -15,7 +15,7 @@ from ..utils import (
class QDanceIE(InfoExtractor):
_NETRC_MACHINE = 'qdance'
_VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>[\w-]+)'
_TESTS = [{
'note': 'vod',
'url': 'https://www.q-dance.com/network/library/146542138',
@ -53,6 +53,27 @@ class QDanceIE(InfoExtractor):
'channel_id': 'qdancenetwork.video_149170353',
},
'skip': 'Completed livestream',
}, {
'note': 'vod with alphanumeric id',
'url': 'https://www.q-dance.com/network/library/WhDleSIWSfeT3Q9ObBKBeA',
'info_dict': {
'id': 'WhDleSIWSfeT3Q9ObBKBeA',
'ext': 'mp4',
'title': 'Aftershock I Defqon.1 Weekend Festival 2023 I Sunday I BLUE',
'display_id': 'naam-i-defqon-1-weekend-festival-2023-i-dag-i-podium',
'description': 'Relive Defqon.1 Path of the Warrior with Aftershock at the BLUE 🔥',
'series': 'Defqon.1',
'series_id': '31840378',
'season': 'Defqon.1 Weekend Festival 2023',
'season_id': '141735599',
'duration': 3507,
'availability': 'premium_only',
'thumbnail': 'https://images.q-dance.network/1698158361-230625-135716-defqon-1-aftershock.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.q-dance.com/network/library/-uRFKXwmRZGVnve7av9uqA',
'only_matching': True,
}]
_access_token = None

View File

@ -142,7 +142,7 @@ class TwitCastingIE(InfoExtractor):
'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id,
'Downloading live info', fatal=False)
is_live = 'data-status="online"' in webpage
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
self.raise_login_required(method='cookies')

View File

@ -45,10 +45,10 @@ class WeverseBaseIE(InfoExtractor):
'x-acc-trace-id': str(uuid.uuid4()),
'x-clog-user-device-id': str(uuid.uuid4()),
}
check_username = self._download_json(
f'{self._ACCOUNT_API_BASE}/signup/email/status', None,
note='Checking username', query={'email': username}, headers=headers)
if not check_username.get('hasPassword'):
valid_username = traverse_obj(self._download_json(
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
if not valid_username:
raise ExtractorError('Invalid username provided', expected=True)
headers['content-type'] = 'application/json'

View File

@ -3619,7 +3619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
default = ['ios', 'android', 'web']
default = ['android', 'web', 'ios']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@ -4560,6 +4560,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
if upload_datetime >= datetime_from_str('today-1day'):
for fmt in info['formats']:
if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
v = info.get(s_k)
if v: