Compare commits

...

21 Commits

Author SHA1 Message Date
Riteo
d536dc4d12
Merge 1cae3bf46d into 52c0ffe40a 2024-11-17 00:41:04 +01:00
Riteo
1cae3bf46d Use unpack operator for files to delete 2024-11-08 03:52:50 +01:00
Riteo
4aa3c401d4 Do not pass -map -0:s multiple times 2024-11-08 03:49:39 +01:00
Riteo
0cc0f3f086 Merge remote-tracking branch 'origin/master' into json-subtitles 2024-11-08 03:44:09 +01:00
Riteo
85a844aef3 Select copy mode depending on extension 2024-09-11 11:43:33 +02:00
Riteo
17781f9d7d Remove debug thing
I'm dumb
2024-09-08 13:33:24 +02:00
Riteo
fc349670c3 Fix info attachment in subpaths 2024-09-08 13:30:35 +02:00
Riteo
4b5be635b1 Add missing comma (again)
oops
2024-09-08 13:30:35 +02:00
Riteo
45d1f2bb6c Fix attachments in subpaths 2024-09-08 13:30:32 +02:00
Riteo
7fb0c05ff6 Revert format check stuff 2024-09-08 13:04:59 +02:00
Riteo
aaa25eb508 Add missing trailing comma 2024-08-14 03:18:55 +02:00
Riteo
780bfd044f Pass target extension to all stream_copy_opts instances 2024-08-14 03:05:11 +02:00
Riteo
fe5de0005e Add extra checks for non-matroska formats when copying 2024-08-14 02:55:33 +02:00
Riteo
9db000a9af Check also if there are json subtitles 2024-08-14 02:55:29 +02:00
Riteo
62e274f515 Move regular subtitles options to their loop 2024-08-14 02:10:14 +02:00
Riteo
e202aae5d6 Remove redundant copy_unknown 2024-08-14 02:03:09 +02:00
Riteo
3b8050da5b Merge remote-tracking branch 'origin/master' into json-subtitles 2024-08-14 02:02:56 +02:00
Riteo
38a9f70044 Use a map for JSON sub handling instead of two lists 2024-08-14 01:16:15 +02:00
Riteo
550b3a046a Use the -copy_unknown flag in the stream copy otions
Also split the yield expression as the comment above was a bit
misleading (it was only related to the `-dn` flag).
2024-08-13 22:30:08 +02:00
Riteo
ba3a7232f0 [pp/FFmpegEmbedSubtitle] Embed JSON subtitles as Matroska attachments
Since we can't embed them as regular subtitles (due to them not having
any consistent structure), we embed them as file attachments, if
exporting as Matroska.

This allows us to have single-file downloads with everything embedded
for e.g. archival purposes.
2024-06-14 16:56:54 +02:00
Riteo
339828d777 [pp/FFmpegMetadata] Use metadata stream specifier for info.json
The old stream index specifiers would indiscriminately select any JSON
attachment, which made stuff like embedding live chat json data risky if
not impossible.

Also adds `-copy_unknown` as JSON data is "unknown" according to FFmpeg
(since it has no codec id) and thus would otherwise be rejected by
default.
2024-06-14 16:56:52 +02:00

View File

@ -219,9 +219,20 @@ class FFmpegPostProcessor(PostProcessor):
@staticmethod
def stream_copy_opts(copy=True, *, ext=None):
yield from ('-map', '0')
if ext in ('mkv', 'mka'):
# Some streams, such as JSON attachments, are considered of unknown
# type by FFmpeg but we still want to copy them.
yield '-copy_unknown'
else:
# Most containers don't really like unknown streams. Let's make
# sure to get rid of them.
yield '-ignore_unknown'
# Don't copy Apple TV chapters track, bin_data
# See https://github.com/yt-dlp/yt-dlp/issues/2, #19042, #19024, https://trac.ffmpeg.org/ticket/6016
yield from ('-dn', '-ignore_unknown')
yield '-dn'
if copy:
yield from ('-c', 'copy')
if ext in ('mp4', 'mov', 'm4a'):
@ -556,7 +567,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@staticmethod
def _options(target_ext):
yield from FFmpegPostProcessor.stream_copy_opts(False)
yield from FFmpegPostProcessor.stream_copy_opts(False, ext=target_ext)
if target_ext == 'avi':
yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
@ -582,7 +593,7 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
@staticmethod
def _options(target_ext):
return FFmpegPostProcessor.stream_copy_opts()
return FFmpegPostProcessor.stream_copy_opts(ext=target_ext)
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
@ -619,13 +630,18 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
webm_vtt_warn = False
mp4_ass_warn = False
json_subs = {}
for lang, sub_info in subtitles.items():
if not os.path.exists(sub_info.get('filepath', '')):
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
continue
sub_ext = sub_info['ext']
if sub_ext == 'json':
self.report_warning('JSON subtitles cannot be embedded')
if info['ext'] in ('mkv', 'mka'):
json_subs[lang] = sub_info['filepath']
else:
self.report_warning('JSON subtitles can only be embedded in mkv/mka files.')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
sub_names.append(sub_info.get('name'))
@ -638,31 +654,48 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
mp4_ass_warn = True
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if not sub_langs:
if not sub_langs and not json_subs:
return [], info
input_files = [filename, *sub_filenames]
opts = [
*self.stream_copy_opts(ext=info['ext']),
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
]
opts = [*self.stream_copy_opts(ext=info['ext'])]
if sub_langs and sub_names:
# We have regular subtitles available to embed. Don't copy the
# existing subtitles, we may be running the postprocessor a second
# time.
opts.extend([
'-map', '-0:s',
])
for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
opts.extend(['-map', f'{i + 1}:0'])
lang_code = ISO639Utils.short2long(lang) or lang
opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}'])
opts.extend([
'-map', f'{i + 1}:0',
f'-metadata:s:s:{i}', f'language={lang_code}',
])
if name:
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
f'-metadata:s:s:{i}', f'title={name}'])
for json_lang, json_filename in json_subs.items():
escaped_json_filename = self._ffmpeg_filename_argument(json_filename)
json_basename = os.path.basename(json_filename)
opts.extend([
'-map', f'-0:m:filename:{json_lang}.json?',
'-attach', escaped_json_filename,
f'-metadata:s:m:filename:{json_basename}', 'mimetype=application/json',
f'-metadata:s:m:filename:{json_basename}', f'filename={json_lang}.json',
])
temp_filename = prepend_extension(filename, 'temp')
self.to_screen(f'Embedding subtitles in "{filename}"')
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.replace(temp_filename, filename)
files_to_delete = [] if self._already_have_subtitle else sub_filenames
files_to_delete = [] if self._already_have_subtitle else [*sub_filenames, *json_subs.values()]
return files_to_delete, info
@ -677,7 +710,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
@staticmethod
def _options(target_ext):
audio_only = target_ext == 'm4a'
yield from FFmpegPostProcessor.stream_copy_opts(not audio_only)
yield from FFmpegPostProcessor.stream_copy_opts(not audio_only, ext=target_ext)
if audio_only:
yield from ('-vn', '-acodec', 'copy')
@ -805,15 +838,20 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn)
info['infojson_filename'] = infofn
old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
if old_stream is not None:
yield ('-map', f'-0:{old_stream}')
new_stream -= 1
escaped_name = self._ffmpeg_filename_argument(infofn)
info_basename = os.path.basename(infofn)
yield (
'-attach', self._ffmpeg_filename_argument(infofn),
f'-metadata:s:{new_stream}', 'mimetype=application/json',
f'-metadata:s:{new_stream}', 'filename=info.json',
# In order to override any old info.json reliably we need to
# instruct FFmpeg to consider valid tracks without a codec id, like
# JSON attachments.
'-copy_unknown',
# This map operation allows us to actually replace any previous
# info.json data.
'-map', '-0:m:filename:info.json?',
'-attach', escaped_name,
f'-metadata:s:m:filename:{info_basename}', 'mimetype=application/json',
f'-metadata:s:m:filename:{info_basename}', 'filename=info.json',
)
@ -872,7 +910,7 @@ class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
stretched_ratio = info.get('stretched_ratio')
if stretched_ratio not in (None, 1):
self._fixup('Fixing aspect ratio', info['filepath'], [
*self.stream_copy_opts(), '-aspect', f'{stretched_ratio:f}'])
*self.stream_copy_opts(ext=info['ext']), '-aspect', f'{stretched_ratio:f}'])
return [], info
@ -880,7 +918,7 @@ class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
@PostProcessor._restrict_to(images=False, video=False)
def run(self, info):
if info.get('container') == 'm4a_dash':
self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(), '-f', 'mp4'])
self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(ext=info['ext']), '-f', 'mp4'])
return [], info
@ -903,7 +941,7 @@ class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
if self.get_audio_codec(info['filepath']) == 'aac':
args.extend(['-bsf:a', 'aac_adtstoasc'])
self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
*self.stream_copy_opts(), *args])
*self.stream_copy_opts(ext=info['ext']), *args])
return [], info
@ -924,7 +962,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
opts = ['-vf', 'setpts=PTS-STARTPTS']
else:
opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False), '-ss', self.trim])
self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False, ext=info['ext']), '-ss', self.trim])
return [], info
@ -933,7 +971,7 @@ class FFmpegCopyStreamPP(FFmpegFixupPostProcessor):
@PostProcessor._restrict_to(images=False)
def run(self, info):
self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts())
self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts(ext=info['ext']))
return [], info
@ -1062,7 +1100,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
self.to_screen(f'Splitting video by chapters; {len(chapters)} chapters found')
for idx, chapter in enumerate(chapters):
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts(ext=info['ext']))])
if in_file != info['filepath']:
self._delete_downloaded_files(in_file, msg=None)
return [], info