mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 17:21:23 +01:00
Compare commits
16 Commits
b01da2335c
...
6511c09851
Author | SHA1 | Date | |
---|---|---|---|
|
6511c09851 | ||
|
9c8175c7e0 | ||
|
878c6bcace | ||
|
37755a037e | ||
|
196eb0fe77 | ||
|
db8b4edc7d | ||
|
1c54a98e19 | ||
|
00a3e47bf5 | ||
|
c5f01bf7d4 | ||
|
c91af948e4 | ||
|
6b5d93b0b0 | ||
|
298230e550 | ||
|
d5d1517e7d | ||
|
7e09c147fd | ||
|
e370f9ec36 | ||
|
b1a1ec1540 |
|
@ -1333,6 +1333,7 @@ The available fields are:
|
||||||
- `was_live` (boolean): Whether this video was originally a live stream
|
- `was_live` (boolean): Whether this video was originally a live stream
|
||||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||||
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
|
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
|
||||||
|
- `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
|
||||||
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
||||||
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
||||||
- `extractor` (string): Name of the extractor
|
- `extractor` (string): Name of the extractor
|
||||||
|
|
|
@ -328,7 +328,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
https_server_thread.start()
|
https_server_thread.start()
|
||||||
|
|
||||||
with handler(verify=False) as rh:
|
with handler(verify=False) as rh:
|
||||||
with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
|
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
|
|
|
@ -2370,6 +2370,11 @@ Line 1
|
||||||
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
|
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
|
||||||
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
|
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
|
||||||
|
|
||||||
|
# common mistake: strip whitespace from values
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/8729
|
||||||
|
headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
|
||||||
|
self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
|
||||||
|
|
||||||
def test_extract_basic_auth(self):
|
def test_extract_basic_auth(self):
|
||||||
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
|
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
|
||||||
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
|
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
|
||||||
|
|
|
@ -148,7 +148,7 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||||
def test_ssl_error(self, handler):
|
def test_ssl_error(self, handler):
|
||||||
with handler(verify=False) as rh:
|
with handler(verify=False) as rh:
|
||||||
with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
|
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
||||||
validate_and_send(rh, Request(self.bad_wss_host))
|
validate_and_send(rh, Request(self.bad_wss_host))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
|
|
|
@ -276,6 +276,7 @@ from .brilliantpala import (
|
||||||
)
|
)
|
||||||
from .businessinsider import BusinessInsiderIE
|
from .businessinsider import BusinessInsiderIE
|
||||||
from .bundesliga import BundesligaIE
|
from .bundesliga import BundesligaIE
|
||||||
|
from .bundestag import BundestagIE
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
@ -864,6 +865,7 @@ from .jiosaavn import (
|
||||||
)
|
)
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
|
from .joqrag import JoqrAgIE
|
||||||
from .jstream import JStreamIE
|
from .jstream import JStreamIE
|
||||||
from .jtbc import (
|
from .jtbc import (
|
||||||
JTBCIE,
|
JTBCIE,
|
||||||
|
@ -991,6 +993,7 @@ from .lynda import (
|
||||||
LyndaIE,
|
LyndaIE,
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
|
from .maariv import MaarivIE
|
||||||
from .magellantv import MagellanTVIE
|
from .magellantv import MagellanTVIE
|
||||||
from .magentamusik360 import MagentaMusik360IE
|
from .magentamusik360 import MagentaMusik360IE
|
||||||
from .mailru import (
|
from .mailru import (
|
||||||
|
@ -1590,6 +1593,7 @@ from .restudy import RestudyIE
|
||||||
from .reuters import ReutersIE
|
from .reuters import ReutersIE
|
||||||
from .reverbnation import ReverbNationIE
|
from .reverbnation import ReverbNationIE
|
||||||
from .rheinmaintv import RheinMainTVIE
|
from .rheinmaintv import RheinMainTVIE
|
||||||
|
from .rinsefm import RinseFMIE
|
||||||
from .rmcdecouverte import RMCDecouverteIE
|
from .rmcdecouverte import RMCDecouverteIE
|
||||||
from .rockstargames import RockstarGamesIE
|
from .rockstargames import RockstarGamesIE
|
||||||
from .rokfin import (
|
from .rokfin import (
|
||||||
|
|
|
@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '22253814',
|
'id': '22253814',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Winter is Coming',
|
'title': 'Winter Is Coming',
|
||||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
'description': 'md5:a40e370925074260b1c8a633c632c63a',
|
||||||
'timestamp': 1338306241,
|
'timestamp': 1338306241,
|
||||||
'upload_date': '20120529',
|
'upload_date': '20120529',
|
||||||
'uploader': 'AENE-NEW',
|
'uploader': 'AENE-NEW',
|
||||||
|
'duration': 2592.0,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'chapters': 'count:5',
|
||||||
|
'tags': 'count:14',
|
||||||
|
'categories': ['Mountain Men'],
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'series': 'Mountain Men',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'timestamp': 1452634428,
|
'timestamp': 1452634428,
|
||||||
'upload_date': '20160112',
|
'upload_date': '20160112',
|
||||||
'uploader': 'AENE-NEW',
|
'uploader': 'AENE-NEW',
|
||||||
|
'duration': 1277.695,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'chapters': 'count:4',
|
||||||
|
'tags': 'count:23',
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'season': 'Season 9',
|
||||||
|
'season_number': 9,
|
||||||
|
'series': 'Duck Dynasty',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
|
|
@ -7,8 +7,10 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
get_element_html_by_class,
|
||||||
get_elements_html_by_class,
|
get_elements_html_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -17,6 +19,7 @@ from ..utils import (
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20170103',
|
'upload_date': '20170103',
|
||||||
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
|
'channel': 'BitChute',
|
||||||
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# test case: video with different channel and uploader
|
||||||
|
'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
|
||||||
|
'md5': 'f10e6a8e787766235946d0868703f1d0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Yti_j9A-UZ4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Israel at War | Full Measure',
|
||||||
|
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'sharylattkisson',
|
||||||
|
'upload_date': '20231106',
|
||||||
|
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||||
|
'channel': 'Full Measure with Sharyl Attkisson',
|
||||||
|
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# video not downloadable in browser, but we can recover it
|
# video not downloadable in browser, but we can recover it
|
||||||
|
@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20181113',
|
'upload_date': '20181113',
|
||||||
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
|
'channel': 'BitChute',
|
||||||
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||||
},
|
},
|
||||||
'params': {'check_formats': None},
|
'params': {'check_formats': None},
|
||||||
}, {
|
}, {
|
||||||
|
@ -99,6 +124,11 @@ class BitChuteIE(InfoExtractor):
|
||||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||||
self.raise_geo_restricted(reason)
|
self.raise_geo_restricted(reason)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_url(html):
|
||||||
|
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
||||||
|
return urljoin('https://www.bitchute.com', path)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
|
@ -121,12 +151,19 @@ class BitChuteIE(InfoExtractor):
|
||||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||||
'before reporting this issue.', expected=True, video_id=video_id)
|
'before reporting this issue.', expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
details = get_element_by_class('details', webpage) or ''
|
||||||
|
uploader_html = get_element_html_by_class('creator', details) or ''
|
||||||
|
channel_html = get_element_html_by_class('name', details) or ''
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader': clean_html(get_element_by_class('owner', webpage)),
|
'uploader': clean_html(uploader_html),
|
||||||
|
'uploader_url': self._make_url(uploader_html),
|
||||||
|
'channel': clean_html(channel_html),
|
||||||
|
'channel_url': self._make_url(channel_html),
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
'upload_date': unified_strdate(self._search_regex(
|
||||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -154,6 +191,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20170103',
|
'upload_date': '20170103',
|
||||||
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
|
'channel': 'BitChute',
|
||||||
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||||
'duration': 16,
|
'duration': 16,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
@ -169,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'wV9Imujxasw9',
|
'id': 'wV9Imujxasw9',
|
||||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||||
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
|
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
123
yt_dlp/extractor/bundestag.py
Normal file
123
yt_dlp/extractor/bundestag.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
import re
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
bug_reports_message,
|
||||||
|
clean_html,
|
||||||
|
format_field,
|
||||||
|
get_element_text_and_html_by_tag,
|
||||||
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class BundestagIE(InfoExtractor):
|
||||||
|
_VALID_URL = [
|
||||||
|
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
|
||||||
|
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
|
||||||
|
]
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://dbtg.tv/cvid/7605304',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7605304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
|
||||||
|
'description': 'md5:321a9dc6bdad201264c0045efc371561',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7602120',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
|
||||||
|
'description': 'Befragung der Bundesregierung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://dbtg.tv/fvid/3594346',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
|
||||||
|
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
|
||||||
|
|
||||||
|
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
|
||||||
|
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
|
||||||
|
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
|
||||||
|
|
||||||
|
def _bt_extract_share_formats(self, video_id):
|
||||||
|
share_data = self._download_json(
|
||||||
|
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
|
||||||
|
if traverse_obj(share_data, ('status', 'code', {int})) != 1:
|
||||||
|
self.report_warning(format_field(
|
||||||
|
share_data, [('status', 'message', {str})],
|
||||||
|
'Share API response: %s', default='Unknown Share API Error')
|
||||||
|
+ bug_reports_message())
|
||||||
|
return
|
||||||
|
|
||||||
|
for name, url in share_data.items():
|
||||||
|
if not isinstance(name, str) or not url_or_none(url):
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif name.startswith('audio'):
|
||||||
|
match = re.search(self._SHARE_AUDIO_REGEX, url)
|
||||||
|
yield {
|
||||||
|
'format_id': name,
|
||||||
|
'url': url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
**traverse_obj(match, {
|
||||||
|
'acodec': 'codec',
|
||||||
|
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
|
||||||
|
'abr': ('bitrate', {int_or_none}),
|
||||||
|
'ext': 'ext',
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
elif name.startswith('download'):
|
||||||
|
match = re.search(self._SHARE_VIDEO_REGEX, url)
|
||||||
|
yield {
|
||||||
|
'format_id': name,
|
||||||
|
'url': url,
|
||||||
|
**traverse_obj(match, {
|
||||||
|
'vcodec': 'codec',
|
||||||
|
'tbr': ('bitrate', {int_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
'ext': 'ext',
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = []
|
||||||
|
result = {'id': video_id, 'formats': formats}
|
||||||
|
|
||||||
|
try:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||||
|
raise ExtractorError('Could not find video id', expected=True)
|
||||||
|
self.report_warning(f'Error extracting hls formats: {error}', video_id)
|
||||||
|
formats.extend(self._bt_extract_share_formats(video_id))
|
||||||
|
if not formats:
|
||||||
|
self.raise_no_formats('Could not find suitable formats', video_id=video_id)
|
||||||
|
|
||||||
|
result.update(traverse_obj(self._download_webpage(
|
||||||
|
self._OVERLAY_URL, video_id,
|
||||||
|
query={'videoid': video_id, 'view': 'main'},
|
||||||
|
note='Downloading metadata overlay', fatal=False,
|
||||||
|
), {
|
||||||
|
'title': (
|
||||||
|
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||||
|
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||||
|
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
return result
|
|
@ -180,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 494.811,
|
'duration': 494.811,
|
||||||
|
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||||
|
'tags': 'count:8',
|
||||||
|
'location': 'Quebec',
|
||||||
|
'series': 'All in a Weekend Montreal',
|
||||||
|
'season': 'Season 2015',
|
||||||
|
'season_number': 2015,
|
||||||
|
'media_type': 'Excerpt',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||||
|
@ -195,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 186.867,
|
'duration': 186.867,
|
||||||
|
'series': 'CBC News: Windsor at 6:00',
|
||||||
|
'categories': ['News/Canada/Windsor'],
|
||||||
|
'location': 'Windsor',
|
||||||
|
'tags': ['cancer'],
|
||||||
|
'creator': 'Allison Johnson',
|
||||||
|
'media_type': 'Excerpt',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Has subtitles
|
# Has subtitles
|
||||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
'url': 'http://www.cbc.ca/player/play/2284799043667',
|
||||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2249992771553',
|
'id': '2284799043667',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
|
||||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
|
||||||
'timestamp': 1690596000,
|
'timestamp': 1700272800,
|
||||||
'duration': 2716.333,
|
'duration': 2718.833,
|
||||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
'chapters': 'count:5',
|
'chapters': 'count:5',
|
||||||
'upload_date': '20230729',
|
'upload_date': '20231118',
|
||||||
|
'categories': 'count:4',
|
||||||
|
'series': 'The National - Full Show',
|
||||||
|
'tags': 'count:1',
|
||||||
|
'creator': 'News',
|
||||||
|
'location': 'Canada',
|
||||||
|
'media_type': 'Full Program',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
|
@ -382,6 +382,7 @@ class InfoExtractor:
|
||||||
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
||||||
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||||
to set it
|
to set it
|
||||||
|
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
|
||||||
_old_archive_ids: A list of old archive ids needed for backward compatibility
|
_old_archive_ids: A list of old archive ids needed for backward compatibility
|
||||||
_format_sort_fields: A list of fields to use for sorting formats
|
_format_sort_fields: A list of fields to use for sorting formats
|
||||||
__post_extractor: A function to be called just before the metadata is
|
__post_extractor: A function to be called just before the metadata is
|
||||||
|
|
|
@ -46,6 +46,10 @@ class CWTVIE(InfoExtractor):
|
||||||
'timestamp': 1444107300,
|
'timestamp': 1444107300,
|
||||||
'age_limit': 14,
|
'age_limit': 14,
|
||||||
'uploader': 'CWTV',
|
'uploader': 'CWTV',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'chapters': 'count:4',
|
||||||
|
'episode': 'Episode 20',
|
||||||
|
'season': 'Season 11',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
|
|
@ -1,15 +1,20 @@
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
try_call,
|
try_call,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EplusIbIE(InfoExtractor):
|
class EplusIbIE(InfoExtractor):
|
||||||
IE_NAME = 'eplus:inbound'
|
_NETRC_MACHINE = 'eplus'
|
||||||
IE_DESC = 'e+ (イープラス) overseas'
|
IE_NAME = 'eplus'
|
||||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
IE_DESC = 'e+ (イープラス)'
|
||||||
|
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||||
|
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -29,14 +34,97 @@ class EplusIbIE(InfoExtractor):
|
||||||
'No video formats found!',
|
'No video formats found!',
|
||||||
'Requested format is not available',
|
'Requested format is not available',
|
||||||
],
|
],
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.eplus.jp/sample',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'stream1ng20210719-test-005',
|
||||||
|
'title': 'Online streaming test for DRM',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20210719',
|
||||||
|
'release_timestamp': 1626703200,
|
||||||
|
'description': None,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Could not find the playlist URL. This event may not be accessible',
|
||||||
|
'No video formats found!',
|
||||||
|
'Requested format is not available',
|
||||||
|
'This video is DRM protected',
|
||||||
|
],
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.eplus.jp/2053935',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '331320-0001-001',
|
||||||
|
'title': '丘みどり2020配信LIVE Vol.2 ~秋麗~ 【Streaming+(配信チケット)】',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20200920',
|
||||||
|
'release_timestamp': 1600596000,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Could not find the playlist URL. This event may not be accessible',
|
||||||
|
'No video formats found!',
|
||||||
|
'Requested format is not available',
|
||||||
|
],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
|
||||||
|
|
||||||
|
def _login(self, username, password, urlh):
|
||||||
|
if not self._get_cookies('https://live.eplus.jp/').get('ci_session'):
|
||||||
|
raise ExtractorError('Unable to get ci_session cookie')
|
||||||
|
|
||||||
|
cltft_token = urlh.headers.get('X-CLTFT-Token')
|
||||||
|
if not cltft_token:
|
||||||
|
raise ExtractorError('Unable to get X-CLTFT-Token')
|
||||||
|
self._set_cookie('live.eplus.jp', 'X-CLTFT-Token', cltft_token)
|
||||||
|
|
||||||
|
login_json = self._download_json(
|
||||||
|
'https://live.eplus.jp/member/api/v1/FTAuth/idpw', None,
|
||||||
|
note='Sending pre-login info', errnote='Unable to send pre-login info', headers={
|
||||||
|
'Content-Type': 'application/json; charset=UTF-8',
|
||||||
|
'Referer': urlh.url,
|
||||||
|
'X-Cltft-Token': cltft_token,
|
||||||
|
'Accept': '*/*',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'loginId': username,
|
||||||
|
'loginPassword': password,
|
||||||
|
}).encode())
|
||||||
|
if not login_json.get('isSuccess'):
|
||||||
|
raise ExtractorError('Login failed: Invalid id or password', expected=True)
|
||||||
|
|
||||||
|
self._request_webpage(
|
||||||
|
urlh.url, None, note='Logging in', errnote='Unable to log in',
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'loginId': username,
|
||||||
|
'loginPassword': password,
|
||||||
|
'Token.Default': cltft_token,
|
||||||
|
'op': 'nextPage',
|
||||||
|
}), headers={'Referer': urlh.url})
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage, urlh = self._download_webpage_handle(
|
||||||
|
url, video_id, headers={'User-Agent': self._USER_AGENT})
|
||||||
|
if urlh.url.startswith('https://live.eplus.jp/member/auth'):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username:
|
||||||
|
self.raise_login_required()
|
||||||
|
self._login(username, password, urlh)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers={'User-Agent': self._USER_AGENT})
|
||||||
|
|
||||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||||
|
|
||||||
|
if data_json.get('drm_mode') == 'ON':
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
delivery_status = data_json.get('delivery_status')
|
delivery_status = data_json.get('delivery_status')
|
||||||
archive_mode = data_json.get('archive_mode')
|
archive_mode = data_json.get('archive_mode')
|
||||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||||
|
@ -64,7 +152,7 @@ class EplusIbIE(InfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
m3u8_playlist_urls = self._search_json(
|
m3u8_playlist_urls = self._search_json(
|
||||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
r'var\s+listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||||
if not m3u8_playlist_urls:
|
if not m3u8_playlist_urls:
|
||||||
if live_status == 'is_upcoming':
|
if live_status == 'is_upcoming':
|
||||||
self.raise_no_formats(
|
self.raise_no_formats(
|
||||||
|
|
112
yt_dlp/extractor/joqrag.py
Normal file
112
yt_dlp/extractor/joqrag.py
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
import datetime
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
datetime_from_str,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JoqrAgIE(InfoExtractor):
|
||||||
|
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
|
||||||
|
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
|
||||||
|
r'https?://(?:www\.)?joqr\.co\.jp/ag/',
|
||||||
|
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'live',
|
||||||
|
'title': str,
|
||||||
|
'channel': '超!A&G+',
|
||||||
|
'description': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'release_timestamp': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.joqr.co.jp/ag/article/103760/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_metadata(self, variable, html):
|
||||||
|
return clean_html(urllib.parse.unquote_plus(self._search_regex(
|
||||||
|
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
html, 'metadata', group='value', default=''))) or None
|
||||||
|
|
||||||
|
def _extract_start_timestamp(self, video_id, is_live):
|
||||||
|
def extract_start_time_from(date_str):
|
||||||
|
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
|
||||||
|
date = dt.strftime('%Y%m%d')
|
||||||
|
start_time = self._search_regex(
|
||||||
|
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
|
||||||
|
self._download_webpage(
|
||||||
|
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
|
||||||
|
note=f'Downloading program list of {date}', fatal=False,
|
||||||
|
errnote=f'Failed to download program list of {date}') or '',
|
||||||
|
'start time', default=None)
|
||||||
|
if start_time:
|
||||||
|
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
|
||||||
|
return None
|
||||||
|
|
||||||
|
start_timestamp = extract_start_time_from('today')
|
||||||
|
if not start_timestamp:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not is_live or start_timestamp < datetime_from_str('now').timestamp():
|
||||||
|
return start_timestamp
|
||||||
|
else:
|
||||||
|
return extract_start_time_from('yesterday')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = 'live'
|
||||||
|
|
||||||
|
metadata = self._download_webpage(
|
||||||
|
'https://www.uniqueradio.jp/aandg', video_id,
|
||||||
|
note='Downloading metadata', errnote='Failed to download metadata')
|
||||||
|
title = self._extract_metadata('Program_name', metadata)
|
||||||
|
|
||||||
|
if title == '放送休止':
|
||||||
|
formats = []
|
||||||
|
live_status = 'is_upcoming'
|
||||||
|
release_timestamp = self._extract_start_timestamp(video_id, False)
|
||||||
|
msg = 'This stream is not currently live'
|
||||||
|
if release_timestamp:
|
||||||
|
msg += (' and will start at '
|
||||||
|
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
||||||
|
self.raise_no_formats(msg, expected=True)
|
||||||
|
else:
|
||||||
|
m3u8_path = self._search_regex(
|
||||||
|
r'<source\s[^>]*\bsrc="([^"]+)"',
|
||||||
|
self._download_webpage(
|
||||||
|
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
|
||||||
|
note='Downloading player data', errnote='Failed to download player data'),
|
||||||
|
'm3u8 url')
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
|
||||||
|
live_status = 'is_live'
|
||||||
|
release_timestamp = self._extract_start_timestamp(video_id, True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'channel': '超!A&G+',
|
||||||
|
'description': self._extract_metadata('Program_text', metadata),
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': live_status,
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
}
|
62
yt_dlp/extractor/maariv.py
Normal file
62
yt_dlp/extractor/maariv.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_resolution,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class MaarivIE(InfoExtractor):
|
||||||
|
IE_NAME = 'maariv.co.il'
|
||||||
|
_VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3611585',
|
||||||
|
'duration': 75,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20231009',
|
||||||
|
'title': 'מבצע חרבות ברזל',
|
||||||
|
'timestamp': 1696851301,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.maariv.co.il/news/law/Article-1044008',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3611585',
|
||||||
|
'duration': 75,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20231009',
|
||||||
|
'title': 'מבצע חרבות ברזל',
|
||||||
|
'timestamp': 1696851301,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
|
||||||
|
formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
|
||||||
|
formats.append({
|
||||||
|
'url': http_format,
|
||||||
|
'format_id': 'http',
|
||||||
|
**parse_resolution(http_format),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'duration': ('video', 'duration', {int_or_none}),
|
||||||
|
'timestamp': ('upload_date', {unified_timestamp}),
|
||||||
|
}),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
|
@ -73,6 +73,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
'season_number': 5,
|
'season_number': 5,
|
||||||
'episode_number': 5,
|
'episode_number': 5,
|
||||||
'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
|
'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
|
||||||
|
'categories': ['Informazione'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# DRM
|
# DRM
|
||||||
|
@ -149,6 +150,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
'season_number': 12,
|
'season_number': 12,
|
||||||
'episode': 'Episode 8',
|
'episode': 'Episode 8',
|
||||||
'episode_number': 8,
|
'episode_number': 8,
|
||||||
|
'categories': ['Intrattenimento'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
|
|
@ -53,6 +53,8 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||||
'chapters': 'count:1',
|
'chapters': 'count:1',
|
||||||
'tags': 'count:4',
|
'tags': 'count:4',
|
||||||
'thumbnail': r're:https?://.+\.jpg',
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
|
||||||
|
'media_type': 'Full Episode',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
|
@ -131,6 +133,8 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||||
'tags': 'count:10',
|
'tags': 'count:10',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'thumbnail': r're:https?://.+\.jpg',
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'categories': ['Series/Quantum Leap 2022'],
|
||||||
|
'media_type': 'Highlight',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
|
|
33
yt_dlp/extractor/rinsefm.py
Normal file
33
yt_dlp/extractor/rinsefm.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import format_field, parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
|
class RinseFMIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
|
||||||
|
'md5': '76ee0b719315617df42e15e710f46c7b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1536535',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Club Glow - 15/12/2023 - 20:00',
|
||||||
|
'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
|
||||||
|
'release_timestamp': 1702598400,
|
||||||
|
'release_date': '20231215'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': entry['id'],
|
||||||
|
'title': entry.get('title'),
|
||||||
|
'url': entry['fileUrl'],
|
||||||
|
'vcodec': 'none',
|
||||||
|
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
|
||||||
|
'thumbnail': format_field(
|
||||||
|
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
@ -29,6 +30,15 @@ class RudoVideoIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/podcast/b42ZUznHX0',
|
||||||
|
'md5': 'b91c70d832938871367f8ad10c895821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b42ZUznHX0',
|
||||||
|
'title': 'Columna Ruperto Concha',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://rudo.video/vod/bN5AaJ',
|
'url': 'https://rudo.video/vod/bN5AaJ',
|
||||||
'md5': '01324a329227e2591530ecb4f555c881',
|
'md5': '01324a329227e2591530ecb4f555c881',
|
||||||
|
@ -75,13 +85,13 @@ class RudoVideoIE(InfoExtractor):
|
||||||
if 'Streaming is not available in your area' in webpage:
|
if 'Streaming is not available in your area' in webpage:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
m3u8_url = (
|
media_url = (
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
|
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
|
||||||
# Source URL must be used only if streamURL is unavailable
|
# Source URL must be used only if streamURL is unavailable
|
||||||
or self._search_regex(
|
or self._search_regex(
|
||||||
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
|
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
|
||||||
if not m3u8_url:
|
if not media_url:
|
||||||
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
|
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
|
||||||
webpage, 'youtube url', default=None)
|
webpage, 'youtube url', default=None)
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
|
@ -97,7 +107,12 @@ class RudoVideoIE(InfoExtractor):
|
||||||
raise ExtractorError('Invalid access token array')
|
raise ExtractorError('Invalid access token array')
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
token_url, video_id, note='Downloading access token')['data']['authToken']
|
token_url, video_id, note='Downloading access token')['data']['authToken']
|
||||||
m3u8_url = update_url_query(m3u8_url, {'auth-token': access_token})
|
media_url = update_url_query(media_url, {'auth-token': access_token})
|
||||||
|
|
||||||
|
if determine_ext(media_url) == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
|
||||||
|
else:
|
||||||
|
formats = [{'url': media_url}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -109,6 +124,6 @@ class RudoVideoIE(InfoExtractor):
|
||||||
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
|
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
|
||||||
webpage, 'thumbnail', default=None)
|
webpage, 'thumbnail', default=None)
|
||||||
or self._og_search_thumbnail(webpage)),
|
or self._og_search_thumbnail(webpage)),
|
||||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, live=is_live),
|
'formats': formats,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
|
@ -114,6 +114,8 @@ class ScrippsNetworksIE(InfoExtractor):
|
||||||
'timestamp': 1475678834,
|
'timestamp': 1475678834,
|
||||||
'upload_date': '20161005',
|
'upload_date': '20161005',
|
||||||
'uploader': 'SCNI-SCND',
|
'uploader': 'SCNI-SCND',
|
||||||
|
'tags': 'count:10',
|
||||||
|
'creator': 'Cooking Channel',
|
||||||
'duration': 29.995,
|
'duration': 29.995,
|
||||||
'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
|
'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
|
||||||
'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
|
'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
|
||||||
|
|
|
@ -104,6 +104,10 @@ class ThePlatformBaseIE(OnceIE):
|
||||||
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
|
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
|
||||||
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
|
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
|
||||||
|
|
||||||
|
def extract_site_specific_field(field):
|
||||||
|
# A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
|
||||||
|
return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
@ -113,6 +117,14 @@ class ThePlatformBaseIE(OnceIE):
|
||||||
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
||||||
'uploader': info.get('billingCode'),
|
'uploader': info.get('billingCode'),
|
||||||
'chapters': chapters,
|
'chapters': chapters,
|
||||||
|
'creator': traverse_obj(info, ('author', {str})) or None,
|
||||||
|
'categories': traverse_obj(info, (
|
||||||
|
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
|
||||||
|
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
|
||||||
|
'location': extract_site_specific_field('region'),
|
||||||
|
'series': extract_site_specific_field('show'),
|
||||||
|
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
|
||||||
|
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_theplatform_metadata(self, path, video_id):
|
def _extract_theplatform_metadata(self, path, video_id):
|
||||||
|
|
|
@ -479,9 +479,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'_old_archive_ids': ['twitter 643211948184596480'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||||
|
@ -515,6 +515,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 665052190608723968'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
||||||
|
@ -558,9 +559,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Damndaniel'],
|
'tags': ['Damndaniel'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 700207533655363584'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||||
|
@ -599,9 +600,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 719944021058060289'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
||||||
|
@ -616,6 +617,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
'add_ie': ['Periscope'],
|
'add_ie': ['Periscope'],
|
||||||
|
'skip': 'Broadcast not found',
|
||||||
}, {
|
}, {
|
||||||
# has mp4 formats via mobile API
|
# has mp4 formats via mobile API
|
||||||
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
|
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
|
||||||
|
@ -635,9 +637,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 852138619213144067'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
|
@ -657,9 +659,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Maria'],
|
'tags': ['Maria'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 910031516746514432'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
|
@ -683,9 +685,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1001551623938805763'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
|
@ -749,6 +751,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1349794411333394432'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -771,18 +774,18 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1577855540407197696'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1577719286659006464',
|
'id': '1577719286659006464',
|
||||||
'title': 'Ultima📛| New Era - Test',
|
'title': 'Ultima - Test',
|
||||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||||
'uploader': 'Ultima📛| New Era',
|
'uploader': 'Ultima',
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
|
@ -813,9 +816,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['HurricaneIan'],
|
'tags': ['HurricaneIan'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1575560063510810624'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Adult content, fails if not logged in
|
# Adult content, fails if not logged in
|
||||||
|
@ -951,10 +954,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||||
'display_id': '1600649710662213632',
|
'display_id': '1600649710662213632',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||||
'upload_date': '20221208',
|
'upload_date': '20221208',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1600649710662213632'],
|
||||||
},
|
},
|
||||||
'params': {'noplaylist': True},
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -979,7 +982,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'_old_archive_ids': ['twitter 1621117700482416640'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||||
|
@ -995,13 +998,13 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'duration': 9.531,
|
'duration': 9.531,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20221203',
|
'upload_date': '20221203',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'timestamp': 1670092210.0,
|
'timestamp': 1670092210.0,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'uploader': '\u06ea',
|
'uploader': '\u06ea',
|
||||||
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
|
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
|
||||||
|
'_old_archive_ids': ['twitter 1599108751385972737'],
|
||||||
},
|
},
|
||||||
'params': {'noplaylist': True},
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -1012,7 +1015,6 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
||||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Mün',
|
'uploader': 'Mün',
|
||||||
|
@ -1025,6 +1027,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_id': 'MunTheShinobi',
|
'uploader_id': 'MunTheShinobi',
|
||||||
'duration': 139.987,
|
'duration': 139.987,
|
||||||
'timestamp': 1670306984.0,
|
'timestamp': 1670306984.0,
|
||||||
|
'_old_archive_ids': ['twitter 1600009574919962625'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# retweeted_status (private)
|
# retweeted_status (private)
|
||||||
|
@ -1068,8 +1071,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# retweeted_status w/ legacy API
|
# retweeted_status w/ legacy API
|
||||||
|
@ -1091,18 +1094,24 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||||
}, {
|
}, {
|
||||||
# Broadcast embedded in tweet
|
# Broadcast embedded in tweet
|
||||||
'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1yNGaNLjEblJj',
|
'id': '1rmxPMjLzAXKN',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
|
'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
|
||||||
'uploader': 'Jessica Dobson',
|
'uploader': 'Jessica Dobson',
|
||||||
'uploader_id': '1DZEoDwDovRQa',
|
'uploader_id': 'JessicaDobsonWX',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'uploader_url': 'https://twitter.com/JessicaDobsonWX',
|
||||||
|
'timestamp': 1701566398,
|
||||||
|
'upload_date': '20231203',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
|
||||||
|
'concurrent_view_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterBroadcast'],
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
@ -1125,6 +1134,30 @@ class TwitterIE(TwitterBaseIE):
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
||||||
'expected_warnings': ['Not all metadata'],
|
'expected_warnings': ['Not all metadata'],
|
||||||
|
}, {
|
||||||
|
# "stale tweet" with typename "TweetWithVisibilityResults"
|
||||||
|
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
||||||
|
'md5': '62b1e11cdc2cdd0e527f83adb081f536',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1724883339285544960',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
|
||||||
|
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
|
||||||
|
'display_id': '1724884212803834154',
|
||||||
|
'uploader': 'Robert F. Kennedy Jr',
|
||||||
|
'uploader_id': 'RobertKennedyJr',
|
||||||
|
'uploader_url': 'https://twitter.com/RobertKennedyJr',
|
||||||
|
'upload_date': '20231115',
|
||||||
|
'timestamp': 1700079417.0,
|
||||||
|
'duration': 341.048,
|
||||||
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
|
'tags': ['Kennedy24'],
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1724884212803834154'],
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -1179,19 +1212,23 @@ class TwitterIE(TwitterBaseIE):
|
||||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
||||||
data, ('tweetResult', 'result', {dict}), default={})
|
data, ('tweetResult', 'result', {dict}), default={})
|
||||||
|
|
||||||
if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
|
typename = result.get('__typename')
|
||||||
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
|
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||||
|
self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
|
||||||
|
|
||||||
if 'tombstone' in result:
|
if 'tombstone' in result:
|
||||||
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
||||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||||
elif result.get('__typename') == 'TweetUnavailable':
|
elif typename == 'TweetUnavailable':
|
||||||
reason = result.get('reason')
|
reason = result.get('reason')
|
||||||
if reason == 'NsfwLoggedOut':
|
if reason == 'NsfwLoggedOut':
|
||||||
self.raise_login_required('NSFW tweet requires authentication')
|
self.raise_login_required('NSFW tweet requires authentication')
|
||||||
elif reason == 'Protected':
|
elif reason == 'Protected':
|
||||||
self.raise_login_required('You are not authorized to view this protected tweet')
|
self.raise_login_required('You are not authorized to view this protected tweet')
|
||||||
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
|
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
|
||||||
|
# Result for "stale tweet" needs additional transformation
|
||||||
|
elif typename == 'TweetWithVisibilityResults':
|
||||||
|
result = traverse_obj(result, ('tweet', {dict})) or {}
|
||||||
|
|
||||||
status = result.get('legacy', {})
|
status = result.get('legacy', {})
|
||||||
status.update(traverse_obj(result, {
|
status.update(traverse_obj(result, {
|
||||||
|
@ -1377,7 +1414,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
||||||
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||||
# The codec of http formats are unknown
|
# The codec of http formats are unknown
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
||||||
|
|
|
@ -4480,14 +4480,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if mobj:
|
if mobj:
|
||||||
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
|
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
|
||||||
break
|
break
|
||||||
sbr_tooltip = try_get(
|
|
||||||
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
|
info['like_count'] = traverse_obj(vpir, (
|
||||||
if sbr_tooltip:
|
'videoActions', 'menuRenderer', 'topLevelButtons', ...,
|
||||||
like_count, dislike_count = sbr_tooltip.split(' / ')
|
'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
|
||||||
info.update({
|
'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
|
||||||
'like_count': str_to_int(like_count),
|
'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
|
||||||
'dislike_count': str_to_int(dislike_count),
|
|
||||||
})
|
|
||||||
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
|
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
|
||||||
if vcr:
|
if vcr:
|
||||||
vc = self._get_count(vcr, 'viewCount')
|
vc = self._get_count(vcr, 'viewCount')
|
||||||
|
|
|
@ -67,7 +67,7 @@ class HTTPHeaderDict(collections.UserDict, dict):
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
if isinstance(value, bytes):
|
if isinstance(value, bytes):
|
||||||
value = value.decode('latin-1')
|
value = value.decode('latin-1')
|
||||||
super().__setitem__(key.title(), str(value))
|
super().__setitem__(key.title(), str(value).strip())
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return super().__getitem__(key.title())
|
return super().__getitem__(key.title())
|
||||||
|
|
|
@ -286,8 +286,8 @@ class CueBlock(Block):
|
||||||
m1 = parser.consume(_REGEX_TS)
|
m1 = parser.consume(_REGEX_TS)
|
||||||
if not m1:
|
if not m1:
|
||||||
return None
|
return None
|
||||||
parser.consume(_REGEX_OPTIONAL_WHITESPACE)
|
|
||||||
m2 = parser.consume(cls._REGEX_SETTINGS)
|
m2 = parser.consume(cls._REGEX_SETTINGS)
|
||||||
|
parser.consume(_REGEX_OPTIONAL_WHITESPACE)
|
||||||
if not parser.consume(_REGEX_NL):
|
if not parser.consume(_REGEX_NL):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user