mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 09:11:25 +01:00
Compare commits
5 Commits
8dd2e82be8
...
878c5f1fe7
Author | SHA1 | Date | |
---|---|---|---|
|
878c5f1fe7 | ||
|
f2a4983df7 | ||
|
bacc31b05a | ||
|
9de472f6ea | ||
|
0f58c5628a |
|
@ -612,8 +612,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
--no-restrict-filenames Allow Unicode characters, "&" and spaces in
|
--no-restrict-filenames Allow Unicode characters, "&" and spaces in
|
||||||
filenames (default)
|
filenames (default)
|
||||||
--windows-filenames Force filenames to be Windows-compatible
|
--windows-filenames Force filenames to be Windows-compatible
|
||||||
--no-windows-filenames Make filenames Windows-compatible only if
|
--no-windows-filenames Sanitize filenames only minimally
|
||||||
using Windows (default)
|
|
||||||
--trim-filenames LENGTH Limit the filename length (excluding
|
--trim-filenames LENGTH Limit the filename length (excluding
|
||||||
extension) to the specified number of
|
extension) to the specified number of
|
||||||
characters
|
characters
|
||||||
|
|
|
@ -762,6 +762,13 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
|
test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
|
||||||
test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
|
test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
|
||||||
|
|
||||||
|
# Sanitization options
|
||||||
|
test('%(title3)s', (None, 'foo⧸bar⧹test'))
|
||||||
|
test('%(title5)s', (None, 'aei_A'), restrictfilenames=True)
|
||||||
|
test('%(title3)s', (None, 'foo_bar_test'), windowsfilenames=False, restrictfilenames=True)
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
test('%(title3)s', (None, 'foo⧸bar\\test'), windowsfilenames=False)
|
||||||
|
|
||||||
# ID sanitization
|
# ID sanitization
|
||||||
test('%(id)s', '_abcd', info={'id': '_abcd'})
|
test('%(id)s', '_abcd', info={'id': '_abcd'})
|
||||||
test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
|
test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
|
||||||
|
|
|
@ -267,7 +267,9 @@ class YoutubeDL:
|
||||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||||
restrictfilenames: Do not allow "&" and spaces in file names
|
restrictfilenames: Do not allow "&" and spaces in file names
|
||||||
trim_file_name: Limit length of filename (extension excluded)
|
trim_file_name: Limit length of filename (extension excluded)
|
||||||
windowsfilenames: Force the filenames to be windows compatible
|
windowsfilenames: True: Force filenames to be Windows compatible
|
||||||
|
False: Sanitize filenames only minimally
|
||||||
|
This option has no effect when running on Windows
|
||||||
ignoreerrors: Do not stop on download/postprocessing errors.
|
ignoreerrors: Do not stop on download/postprocessing errors.
|
||||||
Can be 'only_download' to ignore only download errors.
|
Can be 'only_download' to ignore only download errors.
|
||||||
Default is 'only_download' for CLI, but False for API
|
Default is 'only_download' for CLI, but False for API
|
||||||
|
@ -1193,8 +1195,7 @@ class YoutubeDL:
|
||||||
|
|
||||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||||
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
|
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
|
||||||
@param sanitize Whether to sanitize the output as a filename.
|
@param sanitize Whether to sanitize the output as a filename
|
||||||
For backward compatibility, a function can also be passed
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
||||||
|
@ -1310,14 +1311,23 @@ class YoutubeDL:
|
||||||
|
|
||||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||||
|
|
||||||
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
def filename_sanitizer(key, value, restricted):
|
||||||
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
||||||
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||||
if 'filename-sanitization' in self.params['compat_opts']
|
if 'filename-sanitization' in self.params['compat_opts']
|
||||||
else NO_DEFAULT))
|
else NO_DEFAULT))
|
||||||
|
|
||||||
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
if callable(sanitize):
|
||||||
sanitize = bool(sanitize)
|
self.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated')
|
||||||
|
elif not sanitize:
|
||||||
|
pass
|
||||||
|
elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
|
||||||
|
and self.params.get('windowsfilenames') is False):
|
||||||
|
def sanitize(key, value):
|
||||||
|
return value.replace('/', '\u29F8').replace('\0', '')
|
||||||
|
else:
|
||||||
|
def sanitize(key, value):
|
||||||
|
return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
|
||||||
|
|
||||||
def _dumpjson_default(obj):
|
def _dumpjson_default(obj):
|
||||||
if isinstance(obj, (set, LazyList)):
|
if isinstance(obj, (set, LazyList)):
|
||||||
|
@ -1400,13 +1410,13 @@ class YoutubeDL:
|
||||||
|
|
||||||
if sanitize:
|
if sanitize:
|
||||||
# If value is an object, sanitize might convert it to a string
|
# If value is an object, sanitize might convert it to a string
|
||||||
# So we convert it to repr first
|
# So we manually convert it before sanitizing
|
||||||
if fmt[-1] == 'r':
|
if fmt[-1] == 'r':
|
||||||
value, fmt = repr(value), str_fmt
|
value, fmt = repr(value), str_fmt
|
||||||
elif fmt[-1] == 'a':
|
elif fmt[-1] == 'a':
|
||||||
value, fmt = ascii(value), str_fmt
|
value, fmt = ascii(value), str_fmt
|
||||||
if fmt[-1] in 'csra':
|
if fmt[-1] in 'csra':
|
||||||
value = sanitizer(last_field, value)
|
value = sanitize(last_field, value)
|
||||||
|
|
||||||
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||||
TMPL_DICT[key] = value
|
TMPL_DICT[key] = value
|
||||||
|
|
|
@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
}, {
|
||||||
|
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||||
|
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||||
|
'creators': ['Grateful Dead'],
|
||||||
|
'duration': 338.31,
|
||||||
|
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||||
|
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||||
|
'display_id': 'gd95-04-02d1t04.shn',
|
||||||
|
'location': 'Pyramid Arena',
|
||||||
|
'uploader': 'jon@archive.org',
|
||||||
|
'album': '1995-04-02 - Pyramid Arena',
|
||||||
|
'upload_date': '20040519',
|
||||||
|
'track_number': 4,
|
||||||
|
'release_date': '19950402',
|
||||||
|
'timestamp': 1084927901,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
info['comments'].append({
|
info['comments'].append({
|
||||||
'id': review.get('review_id'),
|
'id': review.get('review_id'),
|
||||||
'author': review.get('reviewer'),
|
'author': review.get('reviewer'),
|
||||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||||
'timestamp': unified_timestamp(review.get('createdate')),
|
'timestamp': unified_timestamp(review.get('createdate')),
|
||||||
'parent': 'root'})
|
'parent': 'root'})
|
||||||
|
|
||||||
|
|
|
@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor):
|
||||||
return extract_video_data(try_get(
|
return extract_video_data(try_get(
|
||||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
def extract_dash_manifest(video, formats):
|
def extract_dash_manifest(vid_data, formats, mpd_url=None):
|
||||||
dash_manifest = traverse_obj(
|
dash_manifest = traverse_obj(
|
||||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
|
||||||
if dash_manifest:
|
if dash_manifest:
|
||||||
formats.extend(self._parse_mpd_formats(
|
formats.extend(self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
|
||||||
|
|
||||||
def process_formats(info):
|
def process_formats(info):
|
||||||
# Downloads with browser's User-Agent are rate limited. Working around
|
# Downloads with browser's User-Agent are rate limited. Working around
|
||||||
|
@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor):
|
||||||
video = video['creation_story']
|
video = video['creation_story']
|
||||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||||
video.update(reel_info)
|
video.update(reel_info)
|
||||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
|
||||||
formats = []
|
formats = []
|
||||||
q = qualities(['sd', 'hd'])
|
q = qualities(['sd', 'hd'])
|
||||||
|
|
||||||
|
# Legacy formats extraction
|
||||||
|
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||||
('browser_native_sd_url', 'sd')):
|
('browser_native_sd_url', 'sd')):
|
||||||
|
@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor):
|
||||||
if not playable_url:
|
if not playable_url:
|
||||||
continue
|
continue
|
||||||
if determine_ext(playable_url) == 'mpd':
|
if determine_ext(playable_url) == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': playable_url,
|
'url': playable_url,
|
||||||
})
|
})
|
||||||
extract_dash_manifest(fmt_data, formats)
|
extract_dash_manifest(fmt_data, formats)
|
||||||
|
|
||||||
|
# New videoDeliveryResponse formats extraction
|
||||||
|
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
|
||||||
|
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
|
||||||
|
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
|
||||||
|
for idx, dash_manifest in enumerate(dash_manifests):
|
||||||
|
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
|
||||||
|
if not dash_manifests:
|
||||||
|
# Only extract from MPD URLs if the manifests are not already provided
|
||||||
|
for mpd_url in mpd_urls:
|
||||||
|
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
|
||||||
|
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
|
||||||
|
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||||
|
'quality': q(format_id) - 3,
|
||||||
|
'url': prog_fmt['progressive_url'],
|
||||||
|
})
|
||||||
|
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
|
||||||
|
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
# Do not append false positive entry w/o any formats
|
# Do not append false positive entry w/o any formats
|
||||||
return
|
return
|
||||||
|
|
|
@ -1368,12 +1368,12 @@ def create_parser():
|
||||||
help='Allow Unicode characters, "&" and spaces in filenames (default)')
|
help='Allow Unicode characters, "&" and spaces in filenames (default)')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--windows-filenames',
|
'--windows-filenames',
|
||||||
action='store_true', dest='windowsfilenames', default=False,
|
action='store_true', dest='windowsfilenames', default=None,
|
||||||
help='Force filenames to be Windows-compatible')
|
help='Force filenames to be Windows-compatible')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--no-windows-filenames',
|
'--no-windows-filenames',
|
||||||
action='store_false', dest='windowsfilenames',
|
action='store_false', dest='windowsfilenames',
|
||||||
help='Make filenames Windows-compatible only if using Windows (default)')
|
help='Sanitize filenames only minimally')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--trim-filenames', '--trim-file-names', metavar='LENGTH',
|
'--trim-filenames', '--trim-file-names', metavar='LENGTH',
|
||||||
dest='trim_file_name', default=0, type=int,
|
dest='trim_file_name', default=0, type=int,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user