Compare commits

..

No commits in common. "4416f82c809a81737d68875dcb201e366d58dabd" and "dc512e3a8a26a8e3fc7f1f67e5ee5e7699db8659" have entirely different histories.

7 changed files with 198 additions and 994 deletions

View File

@ -993,8 +993,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 5997.485, 'tbr': 5997.485,
'width': 1920, 'width': 1920,
'height': 1080, 'height': 1080,
}], }]
{},
), ( ), (
# https://github.com/ytdl-org/youtube-dl/pull/14844 # https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only', 'urls_only',
@ -1077,8 +1076,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 4400, 'tbr': 4400,
'width': 1920, 'width': 1920,
'height': 1080, 'height': 1080,
}], }]
{},
), ( ), (
# https://github.com/ytdl-org/youtube-dl/issues/20346 # https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains # Media considered unfragmented even though it contains
@ -1124,185 +1122,18 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 360, 'width': 360,
'height': 360, 'height': 360,
'fps': 30, 'fps': 30,
}], }]
{},
), (
# https://github.com/ytdl-org/youtube-dl/issues/30235
# Bento4 generated test mpd
# mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
'url_and_range',
'http://unknown/manifest.mpd', # mpd_url
'http://unknown/', # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/',
'ext': 'm4a',
'format_id': 'audio-und-mp4a.40.2',
'format_note': 'DASH audio',
'container': 'm4a_dash',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 98.808,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/',
'ext': 'mp4',
'format_id': 'video-avc1',
'format_note': 'DASH video',
'container': 'mp4_dash',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4D401E',
'tbr': 699.597,
'width': 768,
'height': 432
}],
{},
), (
# https://github.com/ytdl-org/youtube-dl/issues/27575
# GPAC generated test mpd
# MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
'range_only',
'http://unknown/manifest.mpd', # mpd_url
'http://unknown/', # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
'ext': 'm4a',
'format_id': '2',
'format_note': 'DASH audio',
'container': 'm4a_dash',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 98.096,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/video_dashinit.mp4',
'ext': 'mp4',
'format_id': '1',
'format_note': 'DASH video',
'container': 'mp4_dash',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4D401E',
'tbr': 526.987,
'width': 768,
'height': 432
}],
{},
), (
'subtitles',
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
[{
'format_id': 'audio=128001',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'm4a',
'tbr': 128.001,
'asr': 48000,
'format_note': 'DASH audio',
'container': 'm4a_dash',
'vcodec': 'none',
'acodec': 'mp4a.40.2',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=100000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 336,
'height': 144,
'tbr': 100,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=326000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 562,
'height': 240,
'tbr': 326,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=698000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 844,
'height': 360,
'tbr': 698,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=1493000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 1126,
'height': 480,
'tbr': 1493,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=4482000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 1688,
'height': 720,
'tbr': 4482,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}],
{
'en': [
{
'ext': 'mp4',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}
]
},
) )
] ]
for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
with open('./test/testdata/mpd/%s.mpd' % mpd_file, with open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f: mode='r', encoding='utf-8') as f:
formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')), compat_etree_fromstring(f.read().encode('utf-8')),
mpd_base_url=mpd_base_url, mpd_url=mpd_url) mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats) self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None) expect_value(self, formats, expected_formats, None)
expect_value(self, subtitles, expected_subtitles, None)
def test_parse_f4m_formats(self): def test_parse_f4m_formats(self):
_TEST_CASES = [ _TEST_CASES = [

View File

@ -1,35 +0,0 @@
<?xml version="1.0"?>
<!-- MPD file Generated with GPAC version 1.0.1-revrelease at 2021-11-27T20:53:11.690Z -->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H0M30.196S" maxSegmentDuration="PT0H0M10.027S" profiles="urn:mpeg:dash:profile:full:2011">
<ProgramInformation moreInformationURL="http://gpac.io">
<Title>manifest.mpd generated by GPAC</Title>
</ProgramInformation>
<Period duration="PT0H0M30.196S">
<AdaptationSet segmentAlignment="true" maxWidth="768" maxHeight="432" maxFrameRate="30000/1001" par="16:9" lang="und" startWithSAP="1">
<Representation id="1" mimeType="video/mp4" codecs="avc1.4D401E" width="768" height="432" frameRate="30000/1001" sar="1:1" bandwidth="526987">
<BaseURL>video_dashinit.mp4</BaseURL>
<SegmentList timescale="90000" duration="900000">
<Initialization range="0-881"/>
<SegmentURL mediaRange="882-876094" indexRange="882-925"/>
<SegmentURL mediaRange="876095-1466732" indexRange="876095-876138"/>
<SegmentURL mediaRange="1466733-1953615" indexRange="1466733-1466776"/>
<SegmentURL mediaRange="1953616-1994211" indexRange="1953616-1953659"/>
</SegmentList>
</Representation>
</AdaptationSet>
<AdaptationSet segmentAlignment="true" lang="und" startWithSAP="1">
<Representation id="2" mimeType="audio/mp4" codecs="mp4a.40.2" audioSamplingRate="48000" bandwidth="98096">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>audio_dashinit.mp4</BaseURL>
<SegmentList timescale="48000" duration="480000">
<Initialization range="0-752"/>
<SegmentURL mediaRange="753-124129" indexRange="753-796"/>
<SegmentURL mediaRange="124130-250544" indexRange="124130-124173"/>
<SegmentURL mediaRange="250545-374929" indexRange="250545-250588"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -1,351 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
<MPD
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="urn:mpeg:dash:schema:mpd:2011"
xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
type="static"
mediaPresentationDuration="PT14M48S"
maxSegmentDuration="PT1M"
minBufferTime="PT10S"
profiles="urn:mpeg:dash:profile:isoff-live:2011">
<Period
id="1"
duration="PT14M48S">
<BaseURL>dash/</BaseURL>
<AdaptationSet
id="1"
group="1"
contentType="audio"
segmentAlignment="true"
audioSamplingRate="48000"
mimeType="audio/mp4"
codecs="mp4a.40.2"
startWithSAP="1">
<AudioChannelConfiguration
schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
value="2" />
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="48000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="3584" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="audio=128001"
bandwidth="128001">
</Representation>
</AdaptationSet>
<AdaptationSet
id="2"
group="3"
contentType="text"
lang="en"
mimeType="application/mp4"
codecs="stpp"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
<SegmentTemplate
timescale="1000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="60000" r="9" />
<S d="24000" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="textstream_eng=1000"
bandwidth="1000">
</Representation>
</AdaptationSet>
<AdaptationSet
id="3"
group="2"
contentType="video"
par="960:409"
minBandwidth="100000"
maxBandwidth="4482000"
maxWidth="1689"
maxHeight="720"
segmentAlignment="true"
mimeType="video/mp4"
codecs="avc1.4D401F"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="12288"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="24576" r="443" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="video=100000"
bandwidth="100000"
width="336"
height="144"
sar="2880:2863"
scanType="progressive">
</Representation>
<Representation
id="video=326000"
bandwidth="326000"
width="562"
height="240"
sar="115200:114929"
scanType="progressive">
</Representation>
<Representation
id="video=698000"
bandwidth="698000"
width="844"
height="360"
sar="86400:86299"
scanType="progressive">
</Representation>
<Representation
id="video=1493000"
bandwidth="1493000"
width="1126"
height="480"
sar="230400:230267"
scanType="progressive">
</Representation>
<Representation
id="video=4482000"
bandwidth="4482000"
width="1688"
height="720"
sar="86400:86299"
scanType="progressive">
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -1,32 +0,0 @@
<?xml version="1.0" ?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT10.01S" mediaPresentationDuration="PT30.097S" type="static">
<!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-639 -->
<Period>
<!-- Video -->
<AdaptationSet mimeType="video/mp4" segmentAlignment="true" startWithSAP="1" maxWidth="768" maxHeight="432">
<Representation id="video-avc1" codecs="avc1.4D401E" width="768" height="432" scanType="progressive" frameRate="30000/1001" bandwidth="699597">
<SegmentList timescale="1000" duration="10010">
<Initialization sourceURL="video-frag.mp4" range="36-746"/>
<SegmentURL media="video-frag.mp4" mediaRange="747-876117"/>
<SegmentURL media="video-frag.mp4" mediaRange="876118-1466913"/>
<SegmentURL media="video-frag.mp4" mediaRange="1466914-1953954"/>
<SegmentURL media="video-frag.mp4" mediaRange="1953955-1994652"/>
</SegmentList>
</Representation>
</AdaptationSet>
<!-- Audio -->
<AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true">
<Representation id="audio-und-mp4a.40.2" codecs="mp4a.40.2" bandwidth="98808" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
<SegmentList timescale="1000" duration="10010">
<Initialization sourceURL="audio-frag.mp4" range="32-623"/>
<SegmentURL media="audio-frag.mp4" mediaRange="624-124199"/>
<SegmentURL media="audio-frag.mp4" mediaRange="124200-250303"/>
<SegmentURL media="audio-frag.mp4" mediaRange="250304-374365"/>
<SegmentURL media="audio-frag.mp4" mediaRange="374366-374836"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -35,7 +35,6 @@ class DashSegmentsFD(FragmentFD):
for frag_index, fragment in enumerate(fragments, 1): for frag_index, fragment in enumerate(fragments, 1):
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
success = False
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = frag_index == 1 or not skip_unavailable_fragments fatal = frag_index == 1 or not skip_unavailable_fragments
@ -43,14 +42,10 @@ class DashSegmentsFD(FragmentFD):
if not fragment_url: if not fragment_url:
assert fragment_base_url assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path']) fragment_url = urljoin(fragment_base_url, fragment['path'])
headers = info_dict.get('http_headers') success = False
fragment_range = fragment.get('range')
if fragment_range:
headers = headers.copy() if headers else {}
headers['Range'] = 'bytes=%s' % (fragment_range,)
for count in itertools.count(): for count in itertools.count():
try: try:
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
return False return False
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import collections
import datetime import datetime
import functools import functools
import hashlib import hashlib
@ -59,7 +58,6 @@ from ..utils import (
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
JSON_LD_RE, JSON_LD_RE,
mimetype2ext, mimetype2ext,
@ -76,7 +74,6 @@ from ..utils import (
str_or_none, str_or_none,
str_to_int, str_to_int,
strip_or_none, strip_or_none,
T,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
@ -183,8 +180,6 @@ class InfoExtractor(object):
fragment_base_url fragment_base_url
* "duration" (optional, int or float) * "duration" (optional, int or float)
* "filesize" (optional, int) * "filesize" (optional, int)
* "range" (optional, str of the form "start-end"
to use in HTTP Range header)
* preference Order number of this format. If this field is * preference Order number of this format. If this field is
present and not None, the formats get sorted present and not None, the formats get sorted
by this field, regardless of all other values. by this field, regardless of all other values.
@ -1756,12 +1751,6 @@ class InfoExtractor(object):
'format_note': 'Quality selection URL', 'format_note': 'Quality selection URL',
} }
def _report_ignoring_subs(self, name):
self.report_warning(bug_reports_message(
'Ignoring subtitle tracks found in the {0} manifest; '
'if any subtitle tracks are missing,'.format(name)
), only_once=True)
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None, entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None, m3u8_id=None, note=None, errnote=None,
@ -2202,46 +2191,23 @@ class InfoExtractor(object):
}) })
return entries return entries
def _extract_mpd_formats(self, *args, **kwargs): def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('DASH')
return fmts
def _extract_mpd_formats_and_subtitles(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers=None, query=None):
# TODO: or not? param not yet implemented
if self.get_param('ignore_no_formats_error'):
fatal = False
res = self._download_xml_handle( res = self._download_xml_handle(
mpd_url, video_id, mpd_url, video_id,
note='Downloading MPD manifest' if note is None else note, note=note or 'Downloading MPD manifest',
errnote='Failed to download MPD manifest' if errnote is None else errnote, errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal, data=data, headers=headers or {}, query=query or {}) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [], {} return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [], {} return []
mpd_base_url = base_url(urlh.geturl())
# We could have been redirected to a new url when we retrieved our mpd file. return self._parse_mpd_formats(
mpd_url = urlh.geturl()
mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url) mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs): def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('DASH')
return fmts
def _parse_mpd_formats_and_subtitles(
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2249,10 +2215,8 @@ class InfoExtractor(object):
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
""" """
# TODO: param not yet implemented: default like previous yt-dl logic if mpd_doc.get('type') == 'dynamic':
if not self.get_param('dynamic_mpd', False): return []
if mpd_doc.get('type') == 'dynamic':
return [], {}
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None) namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
@ -2262,24 +2226,8 @@ class InfoExtractor(object):
def is_drm_protected(element): def is_drm_protected(element):
return element.find(_add_ns('ContentProtection')) is not None return element.find(_add_ns('ContentProtection')) is not None
from ..utils import YoutubeDLHandler
fix_path = YoutubeDLHandler._fix_path
def resolve_base_url(element, parent_base_url=None):
# TODO: use native XML traversal when ready
b_url = traverse_obj(element, (
T(lambda e: e.find(_add_ns('BaseURL')).text)))
if parent_base_url and b_url:
if not parent_base_url[-1] in ('/', ':'):
parent_base_url += '/'
b_url = compat_urlparse.urljoin(parent_base_url, b_url)
if b_url:
b_url = fix_path(b_url)
return b_url or parent_base_url
def extract_multisegment_info(element, ms_parent_info): def extract_multisegment_info(element, ms_parent_info):
ms_info = ms_parent_info.copy() ms_info = ms_parent_info.copy()
base_url = ms_info['base_url'] = resolve_base_url(element, ms_info.get('base_url'))
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
# common attributes and elements. We will only extract relevant # common attributes and elements. We will only extract relevant
@ -2313,27 +2261,15 @@ class InfoExtractor(object):
def extract_Initialization(source): def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization')) initialization = source.find(_add_ns('Initialization'))
if initialization is not None: if initialization is not None:
ms_info['initialization_url'] = initialization.get('sourceURL') or base_url ms_info['initialization_url'] = initialization.attrib['sourceURL']
initialization_url_range = initialization.get('range')
if initialization_url_range:
ms_info['initialization_url_range'] = initialization_url_range
segment_list = element.find(_add_ns('SegmentList')) segment_list = element.find(_add_ns('SegmentList'))
if segment_list is not None: if segment_list is not None:
extract_common(segment_list) extract_common(segment_list)
extract_Initialization(segment_list) extract_Initialization(segment_list)
segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
segment_urls = traverse_obj(segment_urls_e, ( if segment_urls_e:
Ellipsis, T(lambda e: e.attrib), 'media')) ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
if segment_urls:
ms_info['segment_urls'] = segment_urls
segment_urls_range = traverse_obj(segment_urls_e, (
Ellipsis, T(lambda e: e.attrib), 'mediaRange',
T(lambda r: re.findall(r'^\d+-\d+$', r)), 0))
if segment_urls_range:
ms_info['segment_urls_range'] = segment_urls_range
if not segment_urls:
ms_info['segment_urls'] = [base_url for _ in segment_urls_range]
else: else:
segment_template = element.find(_add_ns('SegmentTemplate')) segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None: if segment_template is not None:
@ -2349,20 +2285,17 @@ class InfoExtractor(object):
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {} formats = []
stream_numbers = collections.defaultdict(int)
mpd_base_url = resolve_base_url(mpd_doc, mpd_base_url or mpd_url)
for period in mpd_doc.findall(_add_ns('Period')): for period in mpd_doc.findall(_add_ns('Period')):
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
'timescale': 1, 'timescale': 1,
'base_url': mpd_base_url,
}) })
for adaptation_set in period.findall(_add_ns('AdaptationSet')): for adaptation_set in period.findall(_add_ns('AdaptationSet')):
if is_drm_protected(adaptation_set): if is_drm_protected(adaptation_set):
continue continue
adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')): for representation in adaptation_set.findall(_add_ns('Representation')):
if is_drm_protected(representation): if is_drm_protected(representation):
continue continue
@ -2370,35 +2303,27 @@ class InfoExtractor(object):
representation_attrib.update(representation.attrib) representation_attrib.update(representation.attrib)
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
mime_type = representation_attrib['mimeType'] mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType') or mime_type.split('/')[0] content_type = mime_type.split('/')[0]
codec_str = representation_attrib.get('codecs', '') if content_type == 'text':
# Some kind of binary subtitle found in some youtube livestreams # TODO implement WebVTT downloading
if mime_type == 'application/x-rawcc': pass
codecs = {'scodec': codec_str} elif content_type in ('video', 'audio'):
else: base_url = ''
codecs = parse_codecs(codec_str) for element in (representation, adaptation_set, period, mpd_doc):
if content_type not in ('video', 'audio', 'text'): base_url_e = element.find(_add_ns('BaseURL'))
if mime_type == 'image/jpeg': if base_url_e is not None:
content_type = mime_type base_url = base_url_e.text + base_url
elif codecs.get('vcodec', 'none') != 'none': if re.match(r'^https?://', base_url):
content_type = 'video' break
elif codecs.get('acodec', 'none') != 'none': if mpd_base_url and not re.match(r'^https?://', base_url):
content_type = 'audio' if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
elif codecs.get('scodec', 'none') != 'none': mpd_base_url += '/'
content_type = 'text' base_url = mpd_base_url + base_url
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): representation_id = representation_attrib.get('id')
content_type = 'text' lang = representation_attrib.get('lang')
else: url_el = representation.find(_add_ns('BaseURL'))
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
continue bandwidth = int_or_none(representation_attrib.get('bandwidth'))
representation_id = representation_attrib.get('id')
lang = representation_attrib.get('lang')
url_el = representation.find(_add_ns('BaseURL'))
filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
format_id = join_nonempty(representation_id or content_type, mpd_id)
if content_type in ('video', 'audio'):
f = { f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'manifest_url': mpd_url, 'manifest_url': mpd_url,
@ -2413,130 +2338,104 @@ class InfoExtractor(object):
'filesize': filesize, 'filesize': filesize,
'container': mimetype2ext(mime_type) + '_dash', 'container': mimetype2ext(mime_type) + '_dash',
} }
f.update(codecs) f.update(parse_codecs(representation_attrib.get('codecs')))
elif content_type == 'text': representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
f = {
'ext': mimetype2ext(mime_type),
'manifest_url': mpd_url,
'filesize': filesize,
}
elif content_type == 'image/jpeg':
# See test case in VikiIE
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
f = {
'format_id': format_id,
'ext': 'mhtml',
'manifest_url': mpd_url,
'format_note': 'DASH storyboards (jpeg)',
'acodec': 'none',
'vcodec': 'none',
}
if is_drm_protected(adaptation_set) or is_drm_protected(representation):
f['has_drm'] = True
representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info)
def prepare_template(template_name, identifiers): def prepare_template(template_name, identifiers):
tmpl = representation_ms_info[template_name] tmpl = representation_ms_info[template_name]
# First of, % characters outside $...$ templates # First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing # must be escaped by doubling for proper processing
# by % operator string formatting used further (see # by % operator string formatting used further (see
# https://github.com/ytdl-org/youtube-dl/issues/16867). # https://github.com/ytdl-org/youtube-dl/issues/16867).
t = '' t = ''
in_template = False in_template = False
for c in tmpl: for c in tmpl:
t += c
if c == '$':
in_template = not in_template
elif c == '%' and not in_template:
t += c t += c
# Next, $...$ templates are translated to their if c == '$':
# %(...) counterparts to be used with % operator in_template = not in_template
t = t.replace('$RepresentationID$', representation_id) elif c == '%' and not in_template:
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) t += c
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) # Next, $...$ templates are translated to their
t.replace('$$', '$') # %(...) counterparts to be used with % operator
return t t = t.replace('$RepresentationID$', representation_id)
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
t.replace('$$', '$')
return t
# @initialization is a regular template like @media one # @initialization is a regular template like @media one
# so it should be handled just the same way (see # so it should be handled just the same way (see
# https://github.com/ytdl-org/youtube-dl/issues/11605) # https://github.com/ytdl-org/youtube-dl/issues/11605)
if 'initialization' in representation_ms_info: if 'initialization' in representation_ms_info:
initialization_template = prepare_template( initialization_template = prepare_template(
'initialization', 'initialization',
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
# $Time$ shall not be included for @initialization thus # $Time$ shall not be included for @initialization thus
# only $Bandwidth$ remains # only $Bandwidth$ remains
('Bandwidth', )) ('Bandwidth', ))
representation_ms_info['initialization_url'] = initialization_template % { representation_ms_info['initialization_url'] = initialization_template % {
'Bandwidth': bandwidth, 'Bandwidth': bandwidth,
} }
def location_key(location): def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path' return 'url' if re.match(r'^https?://', location) else 'path'
def calc_segment_duration(): if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
return float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
media_location_key = location_key(media_template)
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
media_location_key = location_key(media_template) # can't be used at the same time
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
'duration': segment_duration,
} for segment_number in range(
representation_ms_info['start_number'],
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
else:
# $Number*$ or $Time$ in media template with S list available
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
representation_ms_info['fragments'] = []
segment_time = 0
segment_d = None
segment_number = representation_ms_info['start_number']
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ def add_segment_url():
# can't be used at the same time segment_url = media_template % {
if '%(Number' in media_template and 's' not in representation_ms_info: 'Time': segment_time,
segment_duration = None 'Bandwidth': bandwidth,
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: 'Number': segment_number,
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) }
representation_ms_info['total_number'] = int(math.ceil( representation_ms_info['fragments'].append({
float_or_none(period_duration, segment_duration, default=0))) media_location_key: segment_url,
representation_ms_info['fragments'] = [{ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
media_location_key: media_template % { })
'Number': segment_number,
'Bandwidth': bandwidth,
},
'duration': segment_duration,
} for segment_number in range(
representation_ms_info['start_number'],
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
else:
# $Number*$ or $Time$ in media template with S list available
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
representation_ms_info['fragments'] = []
segment_time = 0
segment_d = None
segment_number = representation_ms_info['start_number']
def add_segment_url(): for num, s in enumerate(representation_ms_info['s']):
segment_url = media_template % { segment_time = s.get('t') or segment_time
'Time': segment_time, segment_d = s['d']
'Bandwidth': bandwidth,
'Number': segment_number,
}
representation_ms_info['fragments'].append({
media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
for num, s in enumerate(representation_ms_info['s']):
segment_time = s.get('t') or segment_time
segment_d = s['d']
add_segment_url()
segment_number += 1
for r in range(s.get('r', 0)):
segment_time += segment_d
add_segment_url() add_segment_url()
segment_number += 1 segment_number += 1
segment_time += segment_d for r in range(s.get('r', 0)):
elif 'segment_urls' in representation_ms_info: segment_time += segment_d
fragments = [] add_segment_url()
if 's' in representation_ms_info: segment_number += 1
segment_time += segment_d
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
# No media template # No media template
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video # or any YouTube dashsegments video
fragments = []
segment_index = 0 segment_index = 0
timescale = representation_ms_info['timescale'] timescale = representation_ms_info['timescale']
for s in representation_ms_info['s']: for s in representation_ms_info['s']:
@ -2548,78 +2447,48 @@ class InfoExtractor(object):
'duration': duration, 'duration': duration,
}) })
segment_index += 1 segment_index += 1
elif 'segment_urls_range' in representation_ms_info: representation_ms_info['fragments'] = fragments
# Segment URLs with mediaRange elif 'segment_urls' in representation_ms_info:
# Example: https://kinescope.io/200615537/master.mpd
# https://github.com/ytdl-org/youtube-dl/issues/30235
# or any mpd generated with Bento4 `mp4dash --no-split --use-segment-list`
segment_duration = calc_segment_duration()
for segment_url, segment_url_range in zip(
representation_ms_info['segment_urls'], representation_ms_info['segment_urls_range']):
fragments.append({
location_key(segment_url): segment_url,
'range': segment_url_range,
'duration': segment_duration,
})
else:
# Segment URLs with no SegmentTimeline # Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/ytdl-org/youtube-dl/pull/14844 # https://github.com/ytdl-org/youtube-dl/pull/14844
segment_duration = calc_segment_duration() fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
for segment_url in representation_ms_info['segment_urls']: for segment_url in representation_ms_info['segment_urls']:
fragments.append({ fragment = {
location_key(segment_url): segment_url, location_key(segment_url): segment_url,
'duration': segment_duration, }
}) if segment_duration:
representation_ms_info['fragments'] = fragments fragment['duration'] = segment_duration
fragments.append(fragment)
# If there is a fragments key available then we correctly recognized fragmented media. representation_ms_info['fragments'] = fragments
# Otherwise we will assume unfragmented media with direct access. Technically, such # If there is a fragments key available then we correctly recognized fragmented media.
# assumption is not necessarily correct since we may simply have no support for # Otherwise we will assume unfragmented media with direct access. Technically, such
# some forms of fragmented media renditions yet, but for now we'll use this fallback. # assumption is not necessarily correct since we may simply have no support for
if 'fragments' in representation_ms_info: # some forms of fragmented media renditions yet, but for now we'll use this fallback.
base_url = representation_ms_info['base_url'] if 'fragments' in representation_ms_info:
f.update({ f.update({
# NB: mpd_url may be empty when MPD manifest is parsed from a string # NB: mpd_url may be empty when MPD manifest is parsed from a string
'url': mpd_url or base_url, 'url': mpd_url or base_url,
'fragment_base_url': base_url, 'fragment_base_url': base_url,
'fragments': [], 'fragments': [],
'protocol': 'http_dash_segments', 'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info and 'initialization_url_range' in representation_ms_info:
# Initialization URL with range (accompanied by Segment URLs with mediaRange above)
# https://github.com/ytdl-org/youtube-dl/issues/30235
initialization_url = representation_ms_info['initialization_url']
f['fragments'].append({
location_key(initialization_url): initialization_url,
'range': representation_ms_info['initialization_url_range'],
}) })
elif 'initialization_url' in representation_ms_info: if 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url'] initialization_url = representation_ms_info['initialization_url']
if not f.get('url'): if not f.get('url'):
f['url'] = initialization_url f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].append({location_key(initialization_url): initialization_url})
elif 'initialization_url_range' in representation_ms_info: f['fragments'].extend(representation_ms_info['fragments'])
# no Initialization URL but range (accompanied by no Segment URLs but mediaRange above) else:
# https://github.com/ytdl-org/youtube-dl/issues/27575 # Assuming direct URL to unfragmented media.
f['fragments'].append({ f['url'] = base_url
location_key(base_url): base_url,
'range': representation_ms_info['initialization_url_range'],
})
f['fragments'].extend(representation_ms_info['fragments'])
if not period_duration:
period_duration = sum(traverse_obj(representation_ms_info, (
'fragments', Ellipsis, 'duration', T(float_or_none))))
else:
# Assuming direct URL to unfragmented media.
f['url'] = representation_ms_info['base_url']
if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1
formats.append(f) formats.append(f)
elif content_type == 'text': else:
subtitles.setdefault(lang or 'und', []).append(f) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats, subtitles return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle( res = self._download_xml_handle(

View File

@ -2,20 +2,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_kwargs from ..utils import ExtractorError
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
merge_dicts,
T,
traverse_obj,
txt_or_none,
url_or_none,
)
class Vbox7IE(InfoExtractor): class Vbox7IE(InfoExtractor):
@ -31,12 +20,10 @@ class Vbox7IE(InfoExtractor):
) )
(?P<id>[\da-fA-F]+) (?P<id>[\da-fA-F]+)
''' '''
_EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
_GEO_COUNTRIES = ['BG'] _GEO_COUNTRIES = ['BG']
_GEO_BYPASS = False
_TESTS = [{ _TESTS = [{
'url': 'https://vbox7.com/play:0946fff23c', 'url': 'http://vbox7.com/play:0946fff23c',
'md5': '50ca1f78345a9c15391af47d8062d074', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
'info_dict': { 'info_dict': {
'id': '0946fff23c', 'id': '0946fff23c',
'ext': 'mp4', 'ext': 'mp4',
@ -47,21 +34,18 @@ class Vbox7IE(InfoExtractor):
'upload_date': '20160812', 'upload_date': '20160812',
'uploader': 'zdraveibulgaria', 'uploader': 'zdraveibulgaria',
}, },
'expected_warnings': [ 'params': {
'Unable to download webpage', 'proxy': '127.0.0.1:8118',
], },
}, { }, {
'url': 'http://vbox7.com/play:249bb972c2', 'url': 'http://vbox7.com/play:249bb972c2',
'md5': 'aaf19465e37ec0b30b918df83ec32c50', 'md5': '99f65c0c9ef9b682b97313e052734c3f',
'info_dict': { 'info_dict': {
'id': '249bb972c2', 'id': '249bb972c2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Смях! Чудо - чист за секунди - Скрита камера', 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
'timestamp': 1360215023,
'upload_date': '20130207',
'uploader': 'svideteliat_ot_varshava',
}, },
'skip': 'georestricted',
}, { }, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True, 'only_matching': True,
@ -70,109 +54,52 @@ class Vbox7IE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod @staticmethod
def _extract_url(cls, webpage): def _extract_url(webpage):
mobj = re.search(cls._EMBED_REGEX[0], webpage) mobj = re.search(
r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
# transform_source=None, fatal=True
def _parse_json(self, json_string, video_id, *args, **kwargs):
if '"@context"' in json_string[:30]:
# this is ld+json, or that's the way to bet
transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
if not transform_source:
def fix_chars(src):
# fix malformed ld+json: replace raw CRLFs with escaped LFs
return re.sub(
r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
if len(args) > 0:
args = (fix_chars,) + args[1:]
else:
kwargs['transform_source'] = fix_chars
kwargs = compat_kwargs(kwargs)
return super(Vbox7IE, self)._parse_json(
json_string, video_id, *args, **kwargs)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = 'https://vbox7.com/play:%s' % (video_id,)
now = time.time()
response = self._download_json( response = self._download_json(
'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,), 'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
video_id, headers={'Referer': url}) video_id)
# estimate time to which possible `ago` member is relative
now = now + 0.5 * (time.time() - now)
if 'error' in response: if 'error' in response:
raise ExtractorError( raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']), expected=True) '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
video_url = traverse_obj(response, ('options', 'src', T(url_or_none))) video = response['options']
if '/na.mp4' in video_url or '': title = video['title']
video_url = video['src']
if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
ext = determine_ext(video_url) uploader = video.get('uploader')
if ext == 'mpd':
# In case MPD cannot be parsed, or anyway, get mp4 combined
# formats usually provided to Safari, iOS, and old Windows
try:
formats, subtitles = self._extract_mpd_formats_and_subtitles(
video_url, video_id, 'dash', fatal=False)
except KeyError:
self.report_warning('Failed to parse MPD manifest')
formats, subtitles = [], {}
video = response['options'] webpage = self._download_webpage(
resolutions = (1080, 720, 480, 240, 144) 'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
if res > highest_res:
continue
formats.append({
'url': video_url.replace('.mpd', '_%d.mp4' % res),
'format_id': '%dp' % res,
'height': res,
})
# if above formats are flaky, enable the line below
# self._check_formats(formats, video_id)
else:
formats = [{
'url': video_url,
}]
subtitles = {}
self._sort_formats(formats)
webpage = self._download_webpage(url, video_id, fatal=False) or '' info = {}
info = self._search_json_ld( if webpage:
webpage.replace('"/*@context"', '"@context"'), video_id, info = self._search_json_ld(
fatal=False) if webpage else {} webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False)
if not info.get('title'): info.update({
info['title'] = traverse_obj(response, (
'options', 'title', T(txt_or_none))) or self._og_search_title(webpage)
def if_missing(k):
return lambda x: None if k in info else x
info = merge_dicts(info, {
'id': video_id, 'id': video_id,
'formats': formats, 'title': title,
'subtitles': subtitles or None, 'url': video_url,
}, info, traverse_obj(response, ('options', { 'uploader': uploader,
'uploader': ('uploader', T(txt_or_none)), 'thumbnail': self._proto_relative_url(
'timestamp': ('ago', T(if_missing('timestamp')), T(lambda t: int(round((now - t) / 60.0)) * 60)),
'duration': ('duration', T(if_missing('duration')), T(float_or_none)),
})))
if 'thumbnail' not in info:
info['thumbnail'] = self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage), info.get('thumbnail') or self._og_search_thumbnail(webpage),
'https:'), 'http:'),
})
return info return info