Compare commits

...

11 Commits

Author SHA1 Message Date
Peter Rowlands (변기호)
c0020db139
Merge bd62cdba1a into 1d253b0a27 2024-11-17 02:37:59 +05:30
Jackson Humphrey
1d253b0a27
[ie/patreon] Fix comments extraction (#11530)
Closes #11483
Authored by: jshumphrey, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-16 20:02:14 +00:00
powergold1
720b3dc453
[ie/chaturbate] Extract from API and support impersonation (#11555)
Closes #6546, Closes #10359
Authored by: powergold1
2024-11-16 19:55:40 +00:00
Jackson Humphrey
d215fba7ed
[ie/RedGifsUser] Fix extraction (#11531)
Closes #7382, Closes #9131
Authored by: jshumphrey
2024-11-16 19:50:17 +00:00
Jackson Humphrey
8388ec256f
[ie/spankbang] Support browser impersonation (#11542)
Closes #6545
Authored by: jshumphrey
2024-11-16 19:48:47 +00:00
sepro
6365e92589
[ie/bandlab] Add extractors (#11535)
Closes #7750
Authored by: seproDev
2024-11-16 17:56:43 +01:00
Alessandro Campolo
70c55cb08f
[ie/RadioRadicale] Add extractor (#5607)
Authored by: a13ssandr0, pzhlkj6612

Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-11-16 13:56:15 +01:00
Peter Rowlands
bd62cdba1a [fd/dash] support DASH SEA (AES-128-CBC) decryption 2024-10-05 17:21:50 +09:00
Peter Rowlands
e0ce6eed92 [extractor] Parse DASH-SEA content protection in DASH manifests 2024-10-05 17:21:47 +09:00
Peter Rowlands
6b0ce31939 [fd/dash, pp/ffmpeg] support DASH CENC decryption 2024-10-05 00:59:58 +09:00
Peter Rowlands
a95757d3b7 [extractor] parse CENC + Clear Key information in DASH manifests 2024-10-04 21:02:26 +09:00
16 changed files with 1173 additions and 38 deletions

View File

@ -1381,6 +1381,175 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
}, },
], ],
}, },
), (
# Clear Key with CENC default_KID
'clearkey_cenc',
'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd', # mpd_url
'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/', # mpd_base_url
[{
'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
'ext': 'mp4',
'format_id': '1',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.64001f',
'tbr': 389.802,
'width': 512,
'height': 288,
'dash_cenc': {
'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
'key_ids': ['9eb4050de44b4802932e27d75083e266'],
},
}, {
'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
'ext': 'mp4',
'format_id': '2',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.64001f',
'tbr': 764.935,
'width': 640,
'height': 360,
'dash_cenc': {
'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
'key_ids': ['9eb4050de44b4802932e27d75083e266'],
},
}, {
'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
'ext': 'mp4',
'format_id': '3',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640028',
'tbr': 1120.439,
'width': 852,
'height': 480,
'dash_cenc': {
'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
'key_ids': ['9eb4050de44b4802932e27d75083e266'],
},
}, {
'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
'ext': 'mp4',
'format_id': '4',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640032',
'tbr': 1945.258,
'width': 1280,
'height': 720,
'dash_cenc': {
'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
'key_ids': ['9eb4050de44b4802932e27d75083e266'],
},
}, {
'manifest_url': 'https://media.axprod.net/TestVectors/v7-MultiDRM-SingleKey/Manifest_1080p_ClearKey.mpd',
'ext': 'mp4',
'format_id': '5',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640033',
'tbr': 2726.377,
'width': 1920,
'height': 1080,
'dash_cenc': {
'laurl': 'https://drm-clearkey-testvectors.axtest.net/AcquireLicense',
'key_ids': ['9eb4050de44b4802932e27d75083e266'],
},
}],
{},
), (
# default CENC KID overridden via W3C PSSH box, no license server in manifest
'w3c_pssh',
'https://unknown/manifest.mpd', # mpd_url
'https://unknown/', # mpd_base_url
[{
'manifest_url': 'https://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.64001f',
'tbr': 389.802,
'width': 512,
'height': 288,
'dash_cenc': {
'key_ids': ['43215678123412341234123412341234'],
},
'has_drm': True,
}],
{},
), (
# DASH SEA with AES-128-CBC
'dash_sea',
'https://unknown/manifest.mpd', # mpd_url
'https://unknown/', # mpd_base_url
[{
'manifest_url': 'https://unknown/manifest.mpd',
'ext': 'm4a',
'format_id': '5_A_aac_eng_2_127999_2_1_1',
'format_note': 'DASH audio',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 127.999,
'hls_aes': {
'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
'iv': '0x7BD31E102B0CE9CCD39691782533656C',
},
}, {
'manifest_url': 'https://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1_V_video_3',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.64001F',
'tbr': 258.591,
'width': 960,
'height': 540,
'hls_aes': {
'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
'iv': '0x7BD31E102B0CE9CCD39691782533656C',
},
}, {
'manifest_url': 'https://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1_V_video_2',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.64001F',
'tbr': 422.519,
'width': 1280,
'height': 720,
'hls_aes': {
'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
'iv': '0x7BD31E102B0CE9CCD39691782533656C',
},
}, {
'manifest_url': 'https://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1_V_video_1',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640028',
'tbr': 628.102,
'width': 1920,
'height': 1080,
'hls_aes': {
'uri': 'https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012',
'iv': '0x7BD31E102B0CE9CCD39691782533656C',
},
}],
{},
), ),
] ]

29
test/testdata/mpd/clearkey_cenc.mpd vendored Normal file
View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Version information:
Axinom.MediaProcessing v3.0.0 targeting General Purpose Media Format specification v7
ffmpeg version N-81423-g61fac0e Copyright (c) 2000-2016 the FFmpeg developers
x265 [info]: HEVC encoder version 2.0+12-49a0d1176aef5bc6
x264 0.148.2705 3f5ed56
MP4Box - GPAC version 0.6.2-DEV-rev683-g7b29fbe-master
MediaInfoLib - v0.7.87
For more info about this video, see https://github.com/Axinom/dash-test-vectors
-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H12M14.000S" maxSegmentDuration="PT0H0M4.000S" profiles="urn:mpeg:dash:profile:isoff-live:2011,http://dashif.org/guidelines/dash264" xmlns:cenc="urn:mpeg:cenc:2013" xmlns:clearkey="http://dashif.org/guidelines/clearKey">
<Period duration="PT0H12M14.000S">
<AdaptationSet segmentAlignment="true" group="1" maxWidth="1920" maxHeight="1080" maxFrameRate="24" par="16:9" lang="und">
<ContentProtection schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc" cenc:default_KID="9eb4050d-e44b-4802-932e-27d75083e266" />
<ContentProtection value="ClearKey1.0" schemeIdUri="urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e">
<clearkey:Laurl Lic_type="EME-1.0">https://drm-clearkey-testvectors.axtest.net/AcquireLicense</clearkey:Laurl>
</ContentProtection>
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate timescale="24" media="$RepresentationID$/$Number%04d$.m4s" startNumber="1" duration="96" initialization="$RepresentationID$/init.mp4" />
<Representation id="1" mimeType="video/mp4" codecs="avc1.64001f" width="512" height="288" frameRate="24" sar="1:1" startWithSAP="1" bandwidth="389802"></Representation>
<Representation id="2" mimeType="video/mp4" codecs="avc1.64001f" width="640" height="360" frameRate="24" sar="1:1" startWithSAP="1" bandwidth="764935"></Representation>
<Representation id="3" mimeType="video/mp4" codecs="avc1.640028" width="852" height="480" frameRate="24" sar="640:639" startWithSAP="1" bandwidth="1120439"></Representation>
<Representation id="4" mimeType="video/mp4" codecs="avc1.640032" width="1280" height="720" frameRate="24" sar="1:1" startWithSAP="1" bandwidth="1945258"></Representation>
<Representation id="5" mimeType="video/mp4" codecs="avc1.640033" width="1920" height="1080" frameRate="24" sar="1:1" startWithSAP="1" bandwidth="2726377"></Representation>
</AdaptationSet>
</Period>
</MPD>

109
test/testdata/mpd/dash_sea.mpd vendored Normal file
View File

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="utf-8"?>
<MPD
xmlns="urn:mpeg:dash:schema:mpd:2011"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" profiles="urn:mpeg:dash:profile:isoff-live:2011" type="static"
xmlns:sea="urn:mpeg:dash:schema:sea:2012" mediaPresentationDuration="PT3M32.949S" minBufferTime="PT3S">
<Period>
<AdaptationSet id="1" group="5" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="audio" mimeType="audio/mp4" codecs="mp4a.40.2" lang="en">
<ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012">
<sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/>
<sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/>
<sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/>
</ContentProtection>
<Label>aac_eng_2_127999_2_1</Label>
<SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(aac_eng_2_127999_2_1=i,format=mpd-time-csf)">
<SegmentTimeline>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333" r="1"/>
<S d="20053334"/>
<S d="20053333"/>
<S d="3840000"/>
</SegmentTimeline>
</SegmentTemplate>
<Representation id="5_A_aac_eng_2_127999_2_1_1" bandwidth="127999" audioSamplingRate="48000"/>
</AdaptationSet>
<AdaptationSet id="2" group="1" profiles="ccff" bitstreamSwitching="false" segmentAlignment="true" contentType="video" mimeType="video/mp4" codecs="avc1.640028" maxWidth="1920" maxHeight="1080" startWithSAP="1">
<ContentProtection schemeIdUri="urn:mpeg:dash:sea:2012">
<sea:SegmentEncryption schemeIdUri="urn:mpeg:dash:sea:aes128-cbc:2013"/>
<sea:KeySystem keySystemUri="urn:mpeg:dash:sea:keysys:http:2013"/>
<sea:CryptoPeriod keyUriTemplate="https://zavideoplatform.keydelivery.eastus.media.azure.net/?kid=9280864f-064e-48c0-97e0-f2bcb1d8d012" IV="0x7BD31E102B0CE9CCD39691782533656C"/>
</ContentProtection>
<SegmentTemplate timescale="10000000" media="QualityLevels($Bandwidth$)/Fragments(video=$Time$,format=mpd-time-csf)" initialization="QualityLevels($Bandwidth$)/Fragments(video=i,format=mpd-time-csf)">
<SegmentTimeline>
<S d="20000000" r="105"/>
<S d="8666666"/>
</SegmentTimeline>
</SegmentTemplate>
<Representation id="1_V_video_1" bandwidth="628102" width="1920" height="1080"/>
<Representation id="1_V_video_2" bandwidth="422519" codecs="avc1.64001F" width="1280" height="720"/>
<Representation id="1_V_video_3" bandwidth="258591" codecs="avc1.64001F" width="960" height="540"/>
</AdaptationSet>
</Period>
</MPD>

13
test/testdata/mpd/w3c_pssh.mpd vendored Normal file
View File

@ -0,0 +1,13 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H12M14.000S" maxSegmentDuration="PT0H0M4.000S" profiles="urn:mpeg:dash:profile:isoff-live:2011,http://dashif.org/guidelines/dash264" xmlns:cenc="urn:mpeg:cenc:2013" xmlns:clearkey="http://dashif.org/guidelines/clearKey">
<Period duration="PT0H12M14.000S">
<AdaptationSet segmentAlignment="true" group="1" maxWidth="1920" maxHeight="1080" maxFrameRate="24" par="16:9" lang="und">
<ContentProtection schemeIdUri="urn:mpeg:dash:mp4protection:2011" value="cenc" cenc:default_KID="9eb4050d-e44b-4802-932e-27d75083e266" />
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAFDIVZ4EjQSNBI0EjQSNBI0AAAAAA==</cenc:pssh>
</ContentProtection>
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate timescale="24" media="$RepresentationID$/$Number%04d$.m4s" startNumber="1" duration="96" initialization="$RepresentationID$/init.mp4" />
<Representation id="1" mimeType="video/mp4" codecs="avc1.64001f" width="512" height="288" frameRate="24" sar="1:1" startWithSAP="1" bandwidth="389802"></Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -48,6 +48,7 @@ from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import ( from .postprocessor import (
EmbedThumbnailPP, EmbedThumbnailPP,
FFmpegCENCDecryptPP,
FFmpegFixupDuplicateMoovPP, FFmpegFixupDuplicateMoovPP,
FFmpegFixupDurationPP, FFmpegFixupDurationPP,
FFmpegFixupM3u8PP, FFmpegFixupM3u8PP,
@ -3380,6 +3381,8 @@ class YoutubeDL:
self.report_error(f'{msg}. Aborting') self.report_error(f'{msg}. Aborting')
return return
decrypter = FFmpegCENCDecryptPP(self)
info_dict.setdefault('__files_to_cenc_decrypt', [])
if info_dict.get('requested_formats') is not None: if info_dict.get('requested_formats') is not None:
old_ext = info_dict['ext'] old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None: if self.params.get('merge_output_format') is None:
@ -3460,8 +3463,12 @@ class YoutubeDL:
downloaded.append(fname) downloaded.append(fname)
partial_success, real_download = self.dl(fname, new_info) partial_success, real_download = self.dl(fname, new_info)
info_dict['__real_download'] = info_dict['__real_download'] or real_download info_dict['__real_download'] = info_dict['__real_download'] or real_download
if new_info.get('dash_cenc', {}).get('key'):
info_dict['__files_to_cenc_decrypt'].append((fname, new_info['dash_cenc']['key']))
success = success and partial_success success = success and partial_success
if downloaded and info_dict['__files_to_cenc_decrypt'] and decrypter.available:
info_dict['__postprocessors'].append(decrypter)
if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
info_dict['__postprocessors'].append(merger) info_dict['__postprocessors'].append(merger)
info_dict['__files_to_merge'] = downloaded info_dict['__files_to_merge'] = downloaded
@ -3478,6 +3485,9 @@ class YoutubeDL:
# So we should try to resume the download # So we should try to resume the download
success, real_download = self.dl(temp_filename, info_dict) success, real_download = self.dl(temp_filename, info_dict)
info_dict['__real_download'] = real_download info_dict['__real_download'] = real_download
if info_dict.get('dash_cenc', {}).get('key') and decrypter.available:
info_dict['__postprocessors'].append(decrypter)
info_dict['__files_to_cenc_decrypt'] = [(temp_filename, info_dict['dash_cenc']['key'])]
else: else:
self.report_file_already_downloaded(dl_filename) self.report_file_already_downloaded(dl_filename)

View File

@ -1,9 +1,14 @@
import base64
import binascii
import json
import time import time
import urllib.parse import urllib.parse
from . import get_suitable_downloader from . import get_suitable_downloader
from .fragment import FragmentFD from .fragment import FragmentFD
from ..utils import update_url_query, urljoin from ..networking import Request
from ..networking.exceptions import RequestError
from ..utils import remove_start, traverse_obj, update_url_query, urljoin
class DashSegmentsFD(FragmentFD): class DashSegmentsFD(FragmentFD):
@ -49,6 +54,25 @@ class DashSegmentsFD(FragmentFD):
if extra_param_to_segment_url: if extra_param_to_segment_url:
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
hls_aes = fmt.get('hls_aes', {})
if hls_aes:
decrypt_info = {'METHOD', 'AES-128'}
key = hls_aes.get('key')
if key:
key = binascii.unhexlify(remove_start(key, '0x'))
assert len(key) in (16, 24, 32), 'Invalid length for HLS AES-128 key'
decrypt_info['KEY'] = key
iv = hls_aes.get('iv')
if iv:
iv = binascii.unhexlify(remove_start(iv, '0x').zfill(32))
decrypt_info['IV'] = iv
uri = hls_aes.get('uri')
if uri:
if extra_query:
uri = update_url_query(uri, extra_query)
decrypt_info['URI'] = uri
ctx['decrypt_info'] = decrypt_info
fragments_to_download = self._get_fragments(fmt, ctx, extra_query) fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
if real_downloader: if real_downloader:
@ -60,6 +84,12 @@ class DashSegmentsFD(FragmentFD):
args.append([ctx, fragments_to_download, fmt]) args.append([ctx, fragments_to_download, fmt])
cenc_key = traverse_obj(info_dict, ('dash_cenc', 'key'))
cenc_key_ids = traverse_obj(info_dict, ('dash_cenc', 'key_ids'))
clearkey_laurl = traverse_obj(info_dict, ('dash_cenc', 'laurl'))
if not cenc_key and cenc_key_ids and clearkey_laurl:
self._get_clearkey_cenc(info_dict, clearkey_laurl, cenc_key_ids)
return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
def _resolve_fragments(self, fragments, ctx): def _resolve_fragments(self, fragments, ctx):
@ -87,4 +117,35 @@ class DashSegmentsFD(FragmentFD):
'fragment_count': fragment.get('fragment_count'), 'fragment_count': fragment.get('fragment_count'),
'index': i, 'index': i,
'url': fragment_url, 'url': fragment_url,
'decrypt_info': ctx.get('decrypt_info', {'METHOD': 'NONE'}),
} }
def _get_clearkey_cenc(self, info_dict, laurl, key_ids):
dash_cenc = info_dict.get('dash_cenc', {})
payload = json.dumps({
'kids': [
base64.urlsafe_b64encode(bytes.fromhex(k)).decode().rstrip('=')
for k in key_ids
],
'type': 'temporary',
}).encode()
try:
response = self.ydl.urlopen(Request(
laurl, data=payload, headers={'Content-Type': 'application/json'}))
data = json.loads(response.read())
except (RequestError, json.JSONDecodeError) as err:
self.report_error(f'Failed to retrieve key from Clear Key license server: {err}')
return
keys = data.get('keys', [])
if len(keys) > 1:
self.report_warning('Clear Key license server returned multiple keys but only single key CENC is supported')
for key in keys:
k = key.get('k')
if k:
try:
dash_cenc.update({'key': base64.urlsafe_b64decode(f'{k}==').hex()})
info_dict['dash_cenc'] = dash_cenc
return
except (ValueError, binascii.Error):
pass
self.report_error('Clear key license server did not return any valid CENC keys')

View File

@ -208,6 +208,10 @@ from .bandcamp import (
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -1649,6 +1653,7 @@ from .radiokapital import (
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,

438
yt_dlp/extractor/bandlab.py Normal file
View File

@ -0,0 +1,438 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View File

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id, tld = self._match_valid_url(url).group('id', 'tld') response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
status = response.get('room_status')
if status != 'public':
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
self.report_warning('Falling back to webpage extraction')
return None
m3u8_url = response.get('url')
if not m3u8_url:
self.raise_geo_restricted()
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_webpage(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.{tld}/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -85,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -113,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)

View File

@ -14,12 +14,14 @@ import netrc
import os import os
import random import random
import re import re
import struct
import subprocess import subprocess
import sys import sys
import time import time
import types import types
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import uuid
import xml.etree.ElementTree import xml.etree.ElementTree
from ..compat import ( from ..compat import (
@ -247,7 +249,9 @@ class InfoExtractor:
* hls_aes A dictionary of HLS AES-128 decryption information * hls_aes A dictionary of HLS AES-128 decryption information
used by the native HLS downloader to override the used by the native HLS downloader to override the
values in the media playlist when an '#EXT-X-KEY' tag values in the media playlist when an '#EXT-X-KEY' tag
is present in the playlist: is present in the playlist. Used by the native DASH downloader
when DASH-SEA with AES-128-CBC content protection is present
in the manifest.:
* uri The URI from which the key will be downloaded * uri The URI from which the key will be downloaded
* key The key (as hex) used to decrypt fragments. * key The key (as hex) used to decrypt fragments.
If `key` is given, any key URI will be ignored If `key` is given, any key URI will be ignored
@ -259,6 +263,16 @@ class InfoExtractor:
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output) * ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* is_dash_periods Whether the format is a result of merging * is_dash_periods Whether the format is a result of merging
multiple DASH periods. multiple DASH periods.
* dash_cenc A dictionary of DASH CENC decryption information
used by the native DASH downloader when MPEG CENC content protection
is present in the manifest.
* laurl The Clear Key license server URL from which
CENC keys will be downloaded.
* key_ids List of key IDs (as hex) to request from the ClearKey
license server.
* key The CENC key (as hex) used to decrypt fragments.
If `key` is given, any license server URL and
key IDs will be ignored.
RTMP formats can also have the additional fields: page_url, RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time rtmp_protocol, rtmp_real_time
@ -2680,7 +2694,11 @@ class InfoExtractor:
assert 'is_dash_periods' not in f, 'format already processed' assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in ( format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number'))) ('format_id', 'fragments', 'manifest_stream_number', 'dash_cenc', 'hls_aes')))
for k in ('dash_cenc', 'hls_aes'):
if k in f:
format_key = format_key + tuple(
tuple(v) if isinstance(v, list) else v for v in f[k].values())
if format_key not in formats: if format_key not in formats:
formats[format_key] = f formats[format_key] = f
elif 'fragments' in f: elif 'fragments' in f:
@ -2714,8 +2732,16 @@ class InfoExtractor:
def _add_ns(path): def _add_ns(path):
return self._xpath_ns(path, namespace) return self._xpath_ns(path, namespace)
def is_drm_protected(element): def extract_drm_info(element):
return element.find(_add_ns('ContentProtection')) is not None info = {}
has_drm = False
for cp_e in element.findall(_add_ns('ContentProtection')):
has_drm = True
self._extract_mpd_content_protection_info(cp_e, info)
cenc_info = info.get('dash_cenc', {})
if has_drm and not ('hls_aes' in info or cenc_info.get('key') or (cenc_info.get('laurl') and cenc_info.get('key_ids'))):
info['has_drm'] = True
return info
def extract_multisegment_info(element, ms_parent_info): def extract_multisegment_info(element, ms_parent_info):
ms_info = ms_parent_info.copy() ms_info = ms_parent_info.copy()
@ -2789,6 +2815,7 @@ class InfoExtractor:
'timescale': 1, 'timescale': 1,
}) })
for adaptation_set in period.findall(_add_ns('AdaptationSet')): for adaptation_set in period.findall(_add_ns('AdaptationSet')):
adaptation_set_drm_info = extract_drm_info(adaptation_set)
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')): for representation in adaptation_set.findall(_add_ns('Representation')):
representation_attrib = adaptation_set.attrib.copy() representation_attrib = adaptation_set.attrib.copy()
@ -2875,8 +2902,8 @@ class InfoExtractor:
'acodec': 'none', 'acodec': 'none',
'vcodec': 'none', 'vcodec': 'none',
} }
if is_drm_protected(adaptation_set) or is_drm_protected(representation): f.update(adaptation_set_drm_info)
f['has_drm'] = True f.update(extract_drm_info(representation))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
def prepare_template(template_name, identifiers): def prepare_template(template_name, identifiers):
@ -3037,6 +3064,86 @@ class InfoExtractor:
period_entry['subtitles'][lang or 'und'].append(f) period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry yield period_entry
def _extract_mpd_content_protection_info(self, cp_e, info):
"""
Extract supported DASH-CENC parameters for an MPD ContentProtection element.
Called multiple times per extracted format in an MPD (once per ContentProtection element
within AdaptationSet and Representation elements). Subclasses may override this method
when necessary (such as when the Clear Key license server URL is provided separately
from the manifest or when an extractor needs to process the optional data section in W3C
PSSH boxes).
Note that after all ContentProtection elements have been handled, the `has_drm` flag
will be set for any format that does not meet one or more of these conditions:
* `dash_cenc` is set and both `laurl` and `key_ids` are set (indicating the native
DASH downloader should use the specified Clear Key server URL to retreive the
CENC key for this format).
* `dash_cenc` is set and `key` is set (indicating the native DASH downloader should
use the specified CENC key for this format).
* `hls_aes` is set (indicating the native DASH downloader should use DASH SEA
AES-128-CBC decryption for this format).
References:
1. DASH-IF Content Protection Identifiers
https://dashif.org/identifiers/content_protection/
2. DASH-IF Content Protection Guidelines
https://dashif.org/docs/IOP-Guidelines/DASH-IF-IOP-Part6-v5.0.0.pdf
3. W3C "cenc" Initialization Data Format
https://w3c.github.io/encrypted-media/format-registry/initdata/cenc.html
"""
scheme_id = cp_e.get('schemeIdUri')
cenc_info = info.get('dash_cenc', {})
if scheme_id == 'urn:mpeg:dash:mp4protection:2011':
if cp_e.get('value') == 'cenc':
# ISO/IEC 23009-1 MPEG Common Encryption (CENC)
if not cenc_info.get('key_ids'):
try:
default_kid = uuid.UUID(cp_e.get('{urn:mpeg:cenc:2013}default_KID')).hex
cenc_info['key_ids'] = [default_kid]
except (ValueError, TypeError):
pass
elif scheme_id == 'urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e':
# Clear Key DASH-IF
for tag, ns in itertools.product(
('Laurl', 'laurl'),
('https://dashif.org/CPS', 'http://dashif.org/guidelines/clearKey'),
):
url_e = cp_e.find(self._xpath_ns(tag, ns))
if url_e is not None:
cenc_info['laurl'] = url_e.text
break
elif scheme_id == 'urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b':
# W3C Common System ID
pssh_e = cp_e.find(self._xpath_ns('pssh', 'urn:mpeg:cenc:2013'))
if pssh_e is not None:
# W3C PSSH box (may contain Clear Key KIDs but can also be used
# to store KIDs for other DRM systems)
try:
pssh_box = base64.b64decode(pssh_e.text)
kid_count, = struct.unpack('!L', pssh_box[28:32])
kids = []
for i in range(kid_count):
kid = pssh_box[32 + i * 16:32 + (i + 1) * 16]
kids.append(kid.hex())
cenc_info['key_ids'] = kids
except (ValueError, TypeError, struct.error):
pass
elif scheme_id == 'urn:mpeg:dash:sea:2012':
# ISO/IEC 23009-4 DASH Segment Encryption and Authentication (AES-128-CBC)
sea_ns = 'urn:mpeg:dash:schema:sea:2012'
se_e = cp_e.find(self._xpath_ns('SegmentEncryption', sea_ns))
ks_e = cp_e.find(self._xpath_ns('KeySystem', sea_ns))
crypto_e = cp_e.find(self._xpath_ns('CryptoPeriod', sea_ns))
if (se_e is not None and se_e.get('schemeIdUri') == 'urn:mpeg:dash:sea:aes128-cbc:2013'
and ks_e is not None and ks_e.get('keySystemUri') == 'urn:mpeg:dash:sea:keysys:http:2013'
and crypto_e is not None and crypto_e.get('keyUriTemplate') and crypto_e.get('IV')
):
info['hls_aes'] = {'uri': crypto_e.get('keyUriTemplate'), 'iv': crypto_e.get('IV')}
if cenc_info:
info['dash_cenc'] = cenc_info
def _extract_ism_formats(self, *args, **kwargs): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
if subs: if subs:

View File

@ -16,10 +16,10 @@ from ..utils import (
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
traverse_obj,
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj, value
class PatreonBaseIE(InfoExtractor): class PatreonBaseIE(InfoExtractor):
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': r're:^https?://.+', 'thumbnail': r're:^https?://.+',
}, },
'skip': 'Patron-only content', 'skip': 'Patron-only content',
}, {
# Contains a comment reply in the 'included' section
'url': 'https://www.patreon.com/posts/114721679',
'info_dict': {
'id': '114721679',
'ext': 'mp4',
'upload_date': '20241025',
'uploader': 'Japanalysis',
'like_count': int,
'thumbnail': r're:^https?://.+',
'comment_count': int,
'title': 'Karasawa Part 2',
'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
'uploader_url': 'https://www.patreon.com/japanalysis',
'uploader_id': '80504268',
'channel_url': 'https://www.patreon.com/japanalysis',
'channel_follower_count': int,
'timestamp': 1729897015,
'channel_id': '9346307',
},
'params': {'getcomments': True},
}] }]
_RETURN_TYPE = 'video' _RETURN_TYPE = 'video'
@ -404,26 +425,24 @@ class PatreonIE(PatreonBaseIE):
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
cursor = None cursor = None
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
count += 1 count += 1
comment_id = comment.get('id')
attributes = comment.get('attributes') or {}
if comment_id is None:
continue
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
author_info = traverse_obj(
response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
get_all=False, expected_type=dict, default={})
yield { yield {
'id': comment_id, **traverse_obj(comment, {
'text': attributes.get('body'), 'id': ('id', {str_or_none}),
'timestamp': parse_iso8601(attributes.get('created')), 'text': ('attributes', 'body', {str}),
'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), 'timestamp': ('attributes', 'created', {parse_iso8601}),
'author_is_uploader': attributes.get('is_by_creator'), 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
}),
**traverse_obj(response, (
'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
'author': ('full_name', {str}),
'author_thumbnail': ('image_url', {url_or_none}),
}), get_all=False),
'author_id': author_id, 'author_id': author_id,
'author': author_info.get('full_name'),
'author_thumbnail': author_info.get('image_url'),
} }
if count < traverse_obj(response, ('meta', 'count')): if count < traverse_obj(response, ('meta', 'count')):

View File

@ -0,0 +1,105 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class RadioRadicaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.radioradicale.it/scheda/471591',
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
'info_dict': {
'id': '471591',
'ext': 'mp4',
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
'location': 'Napoli',
'duration': 2852.0,
'timestamp': 1459987200,
'upload_date': '20160407',
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
},
}, {
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
'info_dict': {
'id': '742783',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
'location': 'CAMERA',
'duration': 5868.0,
'timestamp': 1730246400,
'upload_date': '20241030',
},
'playlist': [{
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
'info_dict': {
'id': '742783-0',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'be915c189c70ad2920e5810f32260ff5',
'info_dict': {
'id': '742783-1',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}, {
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
'info_dict': {
'id': '742783-2',
'ext': 'mp4',
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
},
}],
}]
def _entries(self, videos_info, page_id):
for idx, video in enumerate(traverse_obj(
videos_info, ('playlist', lambda _, v: v['sources']))):
video_id = f'{page_id}-{idx}'
formats = []
subtitles = {}
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
self._merge_subtitles({sub.get('srclang') or 'und': [{
'url': sub['src'],
'name': sub.get('label'),
}]}, target=subtitles)
yield {
'id': video_id,
'title': video.get('title'),
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
videos_info = self._search_json(
r'jQuery\.extend\(Drupal\.settings\s*,',
webpage, 'videos_info', page_id)['RRscheda']
entries = list(self._entries(videos_info, page_id))
common_info = {
'id': page_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'location': videos_info.get('luogo'),
**self._search_json_ld(webpage, page_id),
}
if len(entries) == 1:
return {
**entries[0],
**common_info,
}
return self.playlist_result(entries, multi_video=True, **common_info)

View File

@ -213,7 +213,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor):
class RedGifsUserIE(RedGifsBaseInfoExtractor): class RedGifsUserIE(RedGifsBaseInfoExtractor):
IE_DESC = 'Redgifs user' IE_DESC = 'Redgifs user'
_VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
_PAGE_SIZE = 30 _PAGE_SIZE = 80
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.redgifs.com/users/lamsinka89', 'url': 'https://www.redgifs.com/users/lamsinka89',
@ -222,7 +222,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?page=3', 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
@ -231,7 +231,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by recent', 'description': 'RedGifs user lamsinka89, ordered by recent',
}, },
'playlist_count': 30, 'playlist_count': 80,
}, },
{ {
'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
@ -240,7 +240,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
'title': 'lamsinka89', 'title': 'lamsinka89',
'description': 'RedGifs user lamsinka89, ordered by best', 'description': 'RedGifs user lamsinka89, ordered by best',
}, },
'playlist_mincount': 100, 'playlist_mincount': 391,
},
{
'url': 'https://www.redgifs.com/users/ignored52',
'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
'info_dict': {
'id': 'ignored52',
'title': 'ignored52',
'description': 'RedGifs user ignored52, ordered by recent',
},
'playlist_mincount': 121,
}, },
] ]

View File

@ -71,9 +71,11 @@ class SpankBangIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2') video_id = mobj.group('id') or mobj.group('id_2')
country = self.get_param('geo_bypass_country') or 'US'
self._set_cookie('.spankbang.com', 'country', country.upper())
webpage = self._download_webpage( webpage = self._download_webpage(
url.replace(f'/{video_id}/embed', f'/{video_id}/video'), url.replace(f'/{video_id}/embed', f'/{video_id}/video'),
video_id, headers={'Cookie': 'country=US'}) video_id, impersonate=True)
if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(

View File

@ -8,6 +8,7 @@ from .ffmpeg import (
FFmpegCopyStreamPP, FFmpegCopyStreamPP,
FFmpegEmbedSubtitlePP, FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
FFmpegCENCDecryptPP,
FFmpegFixupDuplicateMoovPP, FFmpegFixupDuplicateMoovPP,
FFmpegFixupDurationPP, FFmpegFixupDurationPP,
FFmpegFixupM3u8PP, FFmpegFixupM3u8PP,

View File

@ -331,7 +331,7 @@ class FFmpegPostProcessor(PostProcessor):
[(path, []) for path in input_paths], [(path, []) for path in input_paths],
[(out_path, opts)], **kwargs) [(out_path, opts)], **kwargs)
def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)): def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, prepend_opts=None, expected_retcodes=(0,)):
self.check_version() self.check_version()
oldest_mtime = min( oldest_mtime = min(
@ -342,6 +342,9 @@ class FFmpegPostProcessor(PostProcessor):
if self.basename == 'ffmpeg': if self.basename == 'ffmpeg':
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
if prepend_opts:
cmd += prepend_opts
def make_args(file, args, name, number): def make_args(file, args, name, number):
keys = [f'_{name}{number}', f'_{name}'] keys = [f'_{name}{number}', f'_{name}']
if name == 'o': if name == 'o':
@ -857,12 +860,23 @@ class FFmpegMergerPP(FFmpegPostProcessor):
return True return True
class FFmpegCENCDecryptPP(FFmpegPostProcessor):
@PostProcessor._restrict_to(images=False)
def run(self, info):
for filename, key in info.get('__files_to_cenc_decrypt', []):
temp_filename = prepend_extension(filename, 'temp')
self.to_screen(f'Decrypting "{filename}"')
self.run_ffmpeg(filename, temp_filename, self.stream_copy_opts(), prepend_opts=['-decryption_key', key])
os.replace(temp_filename, filename)
return [], info
class FFmpegFixupPostProcessor(FFmpegPostProcessor): class FFmpegFixupPostProcessor(FFmpegPostProcessor):
def _fixup(self, msg, filename, options): def _fixup(self, msg, filename, options, prepend_opts=None):
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
self.to_screen(f'{msg} of "{filename}"') self.to_screen(f'{msg} of "{filename}"')
self.run_ffmpeg(filename, temp_filename, options) self.run_ffmpeg(filename, temp_filename, options, prepend_opts=prepend_opts)
os.replace(temp_filename, filename) os.replace(temp_filename, filename)
@ -934,7 +948,11 @@ class FFmpegCopyStreamPP(FFmpegFixupPostProcessor):
@PostProcessor._restrict_to(images=False) @PostProcessor._restrict_to(images=False)
def run(self, info): def run(self, info):
self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts()) self._fixup(
self.MESSAGE,
info['filepath'],
self.stream_copy_opts(),
)
return [], info return [], info