Compare commits

...

26 Commits

Author SHA1 Message Date
Riteo
7c8af4b490
Merge 1cae3bf46d into a9f85670d0 2024-11-12 01:14:25 +00:00
manav_chaudhary
a9f85670d0
[ie/Chaturbate] Support alternate domains (#10595)
Closes #10594
Authored by: manavchaudhary1
2024-11-11 23:41:56 +01:00
Sam
6b43a8d84b
[ie/goplay] Fix extractor (#11466)
Closes #10857
Authored by: SamDecrock, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-11 22:03:31 +00:00
Hugo
2db8c2e7d5
[ie/CloudflareStream] Avoid extraction via videodelivery.net (#11478)
Closes #11477
Authored by: hugovdev
2024-11-11 22:00:05 +00:00
bashonly
f9c8deb4e5
[build] Bump PyInstaller version pin to >=6.11.1 (#11507)
Authored by: bashonly
2024-11-11 21:19:03 +00:00
Sakura286
0ec9bfed4d
[ie/MixchMovie] Add extractor (#10897)
Closes #10765
Authored by: Sakura286
2024-11-11 21:40:29 +01:00
Riteo
1cae3bf46d Use unpack operator for files to delete 2024-11-08 03:52:50 +01:00
Riteo
4aa3c401d4 Do not pass -map -0:s multiple times 2024-11-08 03:49:39 +01:00
Riteo
0cc0f3f086 Merge remote-tracking branch 'origin/master' into json-subtitles 2024-11-08 03:44:09 +01:00
Riteo
85a844aef3 Select copy mode depending on extension 2024-09-11 11:43:33 +02:00
Riteo
17781f9d7d Remove debug thing
I'm dumb
2024-09-08 13:33:24 +02:00
Riteo
fc349670c3 Fix info attachment in subpaths 2024-09-08 13:30:35 +02:00
Riteo
4b5be635b1 Add missing comma (again)
oops
2024-09-08 13:30:35 +02:00
Riteo
45d1f2bb6c Fix attachments in subpaths 2024-09-08 13:30:32 +02:00
Riteo
7fb0c05ff6 Revert format check stuff 2024-09-08 13:04:59 +02:00
Riteo
aaa25eb508 Add missing trailing comma 2024-08-14 03:18:55 +02:00
Riteo
780bfd044f Pass target extension to all stream_copy_opts instances 2024-08-14 03:05:11 +02:00
Riteo
fe5de0005e Add extra checks for non-matroska formats when copying 2024-08-14 02:55:33 +02:00
Riteo
9db000a9af Check also if there are json subtitles 2024-08-14 02:55:29 +02:00
Riteo
62e274f515 Move regular subtitles options to their loop 2024-08-14 02:10:14 +02:00
Riteo
e202aae5d6 Remove redundant copy_unknown 2024-08-14 02:03:09 +02:00
Riteo
3b8050da5b Merge remote-tracking branch 'origin/master' into json-subtitles 2024-08-14 02:02:56 +02:00
Riteo
38a9f70044 Use a map for JSON sub handling instead of two lists 2024-08-14 01:16:15 +02:00
Riteo
550b3a046a Use the -copy_unknown flag in the stream copy otions
Also split the yield expression as the comment above was a bit
misleading (it was only related to the `-dn` flag).
2024-08-13 22:30:08 +02:00
Riteo
ba3a7232f0 [pp/FFmpegEmbedSubtitle] Embed JSON subtitles as Matroska attachments
Since we can't embed them as regular subtitles (due to them not having
any consistent structure), we embed them as file attachments, if
exporting as Matroska.

This allows us to have single-file downloads with everything embedded
for e.g. archival purposes.
2024-06-14 16:56:54 +02:00
Riteo
339828d777 [pp/FFmpegMetadata] Use metadata stream specifier for info.json
The old stream index specifiers would indiscriminately select any JSON
attachment, which made stuff like embedding live chat json data risky if
not impossible.

Also adds `-copy_unknown` as JSON data is "unknown" according to FFmpeg
(since it has no codec id) and thus would otherwise be rejected by
default.
2024-06-14 16:56:52 +02:00
8 changed files with 203 additions and 79 deletions

View File

@ -411,7 +411,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -460,7 +460,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |

View File

@ -83,7 +83,7 @@ test = [
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
] ]
[project.urls] [project.urls]

View File

@ -1156,6 +1156,7 @@ from .mitele import MiTeleIE
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,

View File

@ -9,7 +9,7 @@ from ..utils import (
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ROOM_OFFLINE = 'Room is currently offline'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, tld = self._match_valid_url(url).group('id', 'tld')
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
found_m3u8_urls = [] found_m3u8_urls = []

View File

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View File

@ -5,56 +5,63 @@ import hashlib
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View File

@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
IE_NAME = 'mixch' IE_NAME = 'mixch'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/u/16943797/live', 'url': 'https://mixch.tv/u/16943797/live',
@ -74,7 +74,7 @@ class MixchIE(InfoExtractor):
class MixchArchiveIE(InfoExtractor): class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive' IE_NAME = 'mixch:archive'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/archive/421', 'url': 'https://mixch.tv/archive/421',
@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor):
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
} }
class MixchMovieIE(InfoExtractor):
IE_NAME = 'mixch:movie'
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
_TESTS = [{
'url': 'https://mixch.tv/m/Ve8KNkJ5',
'info_dict': {
'id': 'Ve8KNkJ5',
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
'ext': 'mp4',
'uploader': 'ミス東大No.5 松藤百香🍑💫',
'uploader_id': '12299174',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1724070828,
'uploader_url': 'https://mixch.tv/u/12299174',
'live_status': 'not_live',
'upload_date': '20240819',
},
}, {
'url': 'https://mixch.tv/m/61DzpIKE',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
return {
'id': video_id,
'formats': [{
'format_id': 'mp4',
'url': data['movie']['file'],
'ext': 'mp4',
}],
**traverse_obj(data, {
'title': ('movie', 'title', {str}),
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
'uploader': ('ownerInfo', 'name', {str}),
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
'view_count': ('ownerInfo', 'view', {int_or_none}),
'like_count': ('movie', 'favCount', {int_or_none}),
'comment_count': ('movie', 'commentCount', {int_or_none}),
'timestamp': ('movie', 'published', {int_or_none}),
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
}),
'live_status': 'not_live',
}

View File

@ -220,9 +220,20 @@ class FFmpegPostProcessor(PostProcessor):
@staticmethod @staticmethod
def stream_copy_opts(copy=True, *, ext=None): def stream_copy_opts(copy=True, *, ext=None):
yield from ('-map', '0') yield from ('-map', '0')
if ext in ('mkv', 'mka'):
# Some streams, such as JSON attachments, are considered of unknown
# type by FFmpeg but we still want to copy them.
yield '-copy_unknown'
else:
# Most containers don't really like unknown streams. Let's make
# sure to get rid of them.
yield '-ignore_unknown'
# Don't copy Apple TV chapters track, bin_data # Don't copy Apple TV chapters track, bin_data
# See https://github.com/yt-dlp/yt-dlp/issues/2, #19042, #19024, https://trac.ffmpeg.org/ticket/6016 # See https://github.com/yt-dlp/yt-dlp/issues/2, #19042, #19024, https://trac.ffmpeg.org/ticket/6016
yield from ('-dn', '-ignore_unknown') yield '-dn'
if copy: if copy:
yield from ('-c', 'copy') yield from ('-c', 'copy')
if ext in ('mp4', 'mov', 'm4a'): if ext in ('mp4', 'mov', 'm4a'):
@ -557,7 +568,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@staticmethod @staticmethod
def _options(target_ext): def _options(target_ext):
yield from FFmpegPostProcessor.stream_copy_opts(False) yield from FFmpegPostProcessor.stream_copy_opts(False, ext=target_ext)
if target_ext == 'avi': if target_ext == 'avi':
yield from ('-c:v', 'libxvid', '-vtag', 'XVID') yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
@ -583,7 +594,7 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
@staticmethod @staticmethod
def _options(target_ext): def _options(target_ext):
return FFmpegPostProcessor.stream_copy_opts() return FFmpegPostProcessor.stream_copy_opts(ext=target_ext)
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
@ -620,13 +631,18 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
webm_vtt_warn = False webm_vtt_warn = False
mp4_ass_warn = False mp4_ass_warn = False
json_subs = {}
for lang, sub_info in subtitles.items(): for lang, sub_info in subtitles.items():
if not os.path.exists(sub_info.get('filepath', '')): if not os.path.exists(sub_info.get('filepath', '')):
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
continue continue
sub_ext = sub_info['ext'] sub_ext = sub_info['ext']
if sub_ext == 'json': if sub_ext == 'json':
self.report_warning('JSON subtitles cannot be embedded') if info['ext'] in ('mkv', 'mka'):
json_subs[lang] = sub_info['filepath']
else:
self.report_warning('JSON subtitles can only be embedded in mkv/mka files.')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang) sub_langs.append(lang)
sub_names.append(sub_info.get('name')) sub_names.append(sub_info.get('name'))
@ -639,31 +655,48 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
mp4_ass_warn = True mp4_ass_warn = True
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if not sub_langs: if not sub_langs and not json_subs:
return [], info return [], info
input_files = [filename, *sub_filenames] input_files = [filename, *sub_filenames]
opts = [ opts = [*self.stream_copy_opts(ext=info['ext'])]
*self.stream_copy_opts(ext=info['ext']),
# Don't copy the existing subtitles, we may be running the if sub_langs and sub_names:
# postprocessor a second time # We have regular subtitles available to embed. Don't copy the
'-map', '-0:s', # existing subtitles, we may be running the postprocessor a second
] # time.
opts.extend([
'-map', '-0:s',
])
for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
opts.extend(['-map', f'{i + 1}:0'])
lang_code = ISO639Utils.short2long(lang) or lang lang_code = ISO639Utils.short2long(lang) or lang
opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}']) opts.extend([
'-map', f'{i + 1}:0',
f'-metadata:s:s:{i}', f'language={lang_code}',
])
if name: if name:
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
f'-metadata:s:s:{i}', f'title={name}']) f'-metadata:s:s:{i}', f'title={name}'])
for json_lang, json_filename in json_subs.items():
escaped_json_filename = self._ffmpeg_filename_argument(json_filename)
json_basename = os.path.basename(json_filename)
opts.extend([
'-map', f'-0:m:filename:{json_lang}.json?',
'-attach', escaped_json_filename,
f'-metadata:s:m:filename:{json_basename}', 'mimetype=application/json',
f'-metadata:s:m:filename:{json_basename}', f'filename={json_lang}.json',
])
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
self.to_screen(f'Embedding subtitles in "{filename}"') self.to_screen(f'Embedding subtitles in "{filename}"')
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.replace(temp_filename, filename) os.replace(temp_filename, filename)
files_to_delete = [] if self._already_have_subtitle else sub_filenames files_to_delete = [] if self._already_have_subtitle else [*sub_filenames, *json_subs.values()]
return files_to_delete, info return files_to_delete, info
@ -678,7 +711,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
@staticmethod @staticmethod
def _options(target_ext): def _options(target_ext):
audio_only = target_ext == 'm4a' audio_only = target_ext == 'm4a'
yield from FFmpegPostProcessor.stream_copy_opts(not audio_only) yield from FFmpegPostProcessor.stream_copy_opts(not audio_only, ext=target_ext)
if audio_only: if audio_only:
yield from ('-vn', '-acodec', 'copy') yield from ('-vn', '-acodec', 'copy')
@ -806,15 +839,20 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn) write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn)
info['infojson_filename'] = infofn info['infojson_filename'] = infofn
old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') escaped_name = self._ffmpeg_filename_argument(infofn)
if old_stream is not None: info_basename = os.path.basename(infofn)
yield ('-map', f'-0:{old_stream}')
new_stream -= 1
yield ( yield (
'-attach', self._ffmpeg_filename_argument(infofn), # In order to override any old info.json reliably we need to
f'-metadata:s:{new_stream}', 'mimetype=application/json', # instruct FFmpeg to consider valid tracks without a codec id, like
f'-metadata:s:{new_stream}', 'filename=info.json', # JSON attachments.
'-copy_unknown',
# This map operation allows us to actually replace any previous
# info.json data.
'-map', '-0:m:filename:info.json?',
'-attach', escaped_name,
f'-metadata:s:m:filename:{info_basename}', 'mimetype=application/json',
f'-metadata:s:m:filename:{info_basename}', 'filename=info.json',
) )
@ -873,7 +911,7 @@ class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
stretched_ratio = info.get('stretched_ratio') stretched_ratio = info.get('stretched_ratio')
if stretched_ratio not in (None, 1): if stretched_ratio not in (None, 1):
self._fixup('Fixing aspect ratio', info['filepath'], [ self._fixup('Fixing aspect ratio', info['filepath'], [
*self.stream_copy_opts(), '-aspect', f'{stretched_ratio:f}']) *self.stream_copy_opts(ext=info['ext']), '-aspect', f'{stretched_ratio:f}'])
return [], info return [], info
@ -881,7 +919,7 @@ class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
@PostProcessor._restrict_to(images=False, video=False) @PostProcessor._restrict_to(images=False, video=False)
def run(self, info): def run(self, info):
if info.get('container') == 'm4a_dash': if info.get('container') == 'm4a_dash':
self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(), '-f', 'mp4']) self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(ext=info['ext']), '-f', 'mp4'])
return [], info return [], info
@ -904,7 +942,7 @@ class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
if self.get_audio_codec(info['filepath']) == 'aac': if self.get_audio_codec(info['filepath']) == 'aac':
args.extend(['-bsf:a', 'aac_adtstoasc']) args.extend(['-bsf:a', 'aac_adtstoasc'])
self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [ self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
*self.stream_copy_opts(), *args]) *self.stream_copy_opts(ext=info['ext']), *args])
return [], info return [], info
@ -925,7 +963,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
opts = ['-vf', 'setpts=PTS-STARTPTS'] opts = ['-vf', 'setpts=PTS-STARTPTS']
else: else:
opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False), '-ss', self.trim]) self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False, ext=info['ext']), '-ss', self.trim])
return [], info return [], info
@ -934,7 +972,7 @@ class FFmpegCopyStreamPP(FFmpegFixupPostProcessor):
@PostProcessor._restrict_to(images=False) @PostProcessor._restrict_to(images=False)
def run(self, info): def run(self, info):
self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts()) self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts(ext=info['ext']))
return [], info return [], info
@ -1063,7 +1101,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
self.to_screen(f'Splitting video by chapters; {len(chapters)} chapters found') self.to_screen(f'Splitting video by chapters; {len(chapters)} chapters found')
for idx, chapter in enumerate(chapters): for idx, chapter in enumerate(chapters):
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts(ext=info['ext']))])
if in_file != info['filepath']: if in_file != info['filepath']:
self._delete_downloaded_files(in_file, msg=None) self._delete_downloaded_files(in_file, msg=None)
return [], info return [], info