mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-09-25 20:01:24 +02:00
Compare commits
No commits in common. "6314563ce5cde9b6e9906a957d0263f87a019f0f" and "0560d30b350e75146f4fe6bf604e2caa6ff9c3cb" have entirely different histories.
6314563ce5
...
0560d30b35
|
@ -1268,7 +1268,7 @@ The field names themselves (the part inside the parenthesis) can also have some
|
|||
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
|
||||
1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||
|
||||
|
|
|
@ -797,7 +797,6 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
test('%(title|%)s %(title|%%)s', '% %%')
|
||||
test('%(id+1-height+3)05d', '00158')
|
||||
test('%(width+100)05d', 'NA')
|
||||
test('%(filesize*8)d', '8192')
|
||||
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None))
|
||||
test('%(formats.0)r', (repr(FORMATS[0]), None))
|
||||
test('%(height.0)03d', '001')
|
||||
|
|
|
@ -2317,6 +2317,23 @@ Line 1
|
|||
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
|
||||
# Test is_user_input behavior
|
||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
||||
is_user_input=True), 3,
|
||||
msg='allow for string indexing if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
||||
is_user_input=True), tuple(range(8))[3:],
|
||||
msg='allow for string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
||||
is_user_input=True), tuple(range(8))[:4:2],
|
||||
msg='allow step in string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
||||
is_user_input=True), range(8),
|
||||
msg='`:` should be treated as `...` if `is_user_input`')
|
||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
|
||||
|
||||
# Test re.Match as input obj
|
||||
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
||||
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
|
||||
|
|
|
@ -1179,7 +1179,6 @@ class YoutubeDL:
|
|||
MATH_FUNCTIONS = {
|
||||
'+': float.__add__,
|
||||
'-': float.__sub__,
|
||||
'*': float.__mul__,
|
||||
}
|
||||
# Field is of the form key1.key2...
|
||||
# where keys (except first) can be string, int, slice or "{field, ...}"
|
||||
|
@ -1201,15 +1200,6 @@ class YoutubeDL:
|
|||
(?:\|(?P<default>.*?))?
|
||||
)$''')
|
||||
|
||||
def _from_user_input(field):
|
||||
if field == ':':
|
||||
return ...
|
||||
elif ':' in field:
|
||||
return slice(*map(int_or_none, field.split(':')))
|
||||
elif int_or_none(field) is not None:
|
||||
return int(field)
|
||||
return field
|
||||
|
||||
def _traverse_infodict(fields):
|
||||
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
|
||||
for f in ([x] if x.startswith('{') else x.split('.'))]
|
||||
|
@ -1219,12 +1209,11 @@ class YoutubeDL:
|
|||
|
||||
for i, f in enumerate(fields):
|
||||
if not f.startswith('{'):
|
||||
fields[i] = _from_user_input(f)
|
||||
continue
|
||||
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
|
||||
fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
|
||||
fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
|
||||
|
||||
return traverse_obj(info_dict, fields, traverse_string=True)
|
||||
return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
|
||||
|
||||
def get_value(mdict):
|
||||
# Object traversal
|
||||
|
|
|
@ -1643,10 +1643,7 @@ from .rumble import (
|
|||
RumbleIE,
|
||||
RumbleChannelIE,
|
||||
)
|
||||
from .rudovideo import (
|
||||
RudoVideoLiveIE,
|
||||
RudoVideoIE,
|
||||
)
|
||||
from .rudovideo import RudoVideoLiveIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
@ -84,8 +82,6 @@ class FranceTVIE(InfoExtractor):
|
|||
videos = []
|
||||
title = None
|
||||
subtitle = None
|
||||
episode_number = None
|
||||
season_number = None
|
||||
image = None
|
||||
duration = None
|
||||
timestamp = None
|
||||
|
@ -116,9 +112,7 @@ class FranceTVIE(InfoExtractor):
|
|||
if meta:
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
|
||||
season_number, episode_number = self._search_regex(
|
||||
r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
|
||||
# XXX: what is meta['pre_title']?
|
||||
if subtitle is None:
|
||||
subtitle = meta.get('additional_title')
|
||||
if image is None:
|
||||
|
@ -197,19 +191,19 @@ class FranceTVIE(InfoExtractor):
|
|||
} for sheet in spritesheets]
|
||||
})
|
||||
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
title = title.strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(title, subtitle, delim=' - ').strip(),
|
||||
'title': title,
|
||||
'thumbnail': image,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'episode': subtitle if episode_number else None,
|
||||
'series': title if episode_number else None,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'season_number': int_or_none(season_number),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -236,31 +230,14 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||
'info_dict': {
|
||||
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
|
||||
'ext': 'mp4',
|
||||
'title': 'Foot2Rue - Duel au vieux port',
|
||||
'episode': 'Duel au vieux port',
|
||||
'series': 'Foot2Rue',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'timestamp': 1642761360,
|
||||
'upload_date': '20220121',
|
||||
'season': 'Season 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
|
|
|
@ -3,11 +3,8 @@ import re
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
@ -111,9 +108,7 @@ class MediaStreamIE(MediaStreamBaseIE):
|
|||
|
||||
for message in [
|
||||
'Debido a tu ubicación no puedes ver el contenido',
|
||||
'You are not allowed to watch this video: Geo Fencing Restriction',
|
||||
'Este contenido no está disponible en tu zona geográfica.',
|
||||
'El contenido sólo está disponible dentro de',
|
||||
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||
]:
|
||||
if message in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
@ -123,16 +118,7 @@ class MediaStreamIE(MediaStreamBaseIE):
|
|||
formats, subtitles = [], {}
|
||||
for video_format in player_config['src']:
|
||||
if video_format == 'hls':
|
||||
params = {
|
||||
'at': 'web-app',
|
||||
'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
|
||||
}
|
||||
for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
|
||||
params[key] = self._search_regex(
|
||||
rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif video_format == 'mpd':
|
||||
|
|
|
@ -3,6 +3,7 @@ import re
|
|||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
|
@ -83,17 +84,15 @@ class OnDemandKoreaIE(InfoExtractor):
|
|||
def try_geo_bypass(url):
|
||||
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
|
||||
|
||||
def try_upgrade_quality(url):
|
||||
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
|
||||
return mod_url if mod_url != url and self._request_webpage(
|
||||
HEADRequest(mod_url), video_id, note='Checking for higher quality format',
|
||||
errnote='No higher quality format found', fatal=False) else url
|
||||
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
|
||||
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url)
|
||||
if mod_url != m3u8_url:
|
||||
mod_format = self._extract_m3u8_formats(
|
||||
mod_url, video_id, note='Checking for higher quality format',
|
||||
errnote='No higher quality format found', fatal=False)
|
||||
if mod_format:
|
||||
formats.extend(mod_format)
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
|
||||
|
||||
subtitles = {}
|
||||
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
|
||||
|
|
|
@ -4,14 +4,7 @@ from urllib.parse import unquote
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils import ExtractorError, make_archive_id, urljoin
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
|
@ -33,7 +26,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5466437'],
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
|
@ -51,7 +43,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 3052805'],
|
||||
},
|
||||
}, {
|
||||
# Requires verified account
|
||||
|
@ -69,7 +60,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5848332'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
|
@ -120,61 +110,37 @@ class Pr0grammIE(InfoExtractor):
|
|||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _create_source_url(path):
|
||||
return urljoin('https://img.pr0gramm.com', path)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = traverse_obj(
|
||||
self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
|
||||
('items', 0, {dict}))
|
||||
|
||||
source = video_info.get('image')
|
||||
source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
|
||||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id})
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
formats = traverse_obj(video_info, ('variants', ..., {
|
||||
'format_id': ('name', {str}),
|
||||
'url': ('path', {self._create_source_url}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
'vcodec': ('codec', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'bitrate': ('bitRate', {float_or_none}),
|
||||
'filesize': ('fileSize', {int_or_none}),
|
||||
})) if video_info.get('variants') else [{
|
||||
'ext': 'mp4',
|
||||
'format_id': 'source',
|
||||
**traverse_obj(video_info, {
|
||||
'url': ('image', {self._create_source_url}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}]
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
|
||||
subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
|
||||
'url': ('path', {self._create_source_url}),
|
||||
'note': ('label', {str}),
|
||||
}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
|
||||
'formats': [{
|
||||
'url': source,
|
||||
'ext': 'mp4',
|
||||
**traverse_obj(video_info, {
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
}],
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
|
||||
'_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
|
||||
**traverse_obj(video_info, {
|
||||
|
|
|
@ -2,33 +2,9 @@ from .common import InfoExtractor
|
|||
from ..utils import ExtractorError, traverse_obj, js_to_json, update_url_query
|
||||
|
||||
|
||||
class RudoVideoBaseIE(InfoExtractor):
|
||||
def get_title(self, webpage):
|
||||
return self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
def get_thumbnail(self, webpage):
|
||||
return self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None) or self._og_search_thumbnail(webpage)
|
||||
|
||||
def get_creator(self, webpage):
|
||||
return self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)', webpage, "videoAuthor", default=None)
|
||||
|
||||
def get_stream_url(self, webpage, video_id):
|
||||
stream_url = self._search_regex(r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'streamUrl', default=None) or self._search_regex(r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'sourceUrl', default=None)
|
||||
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube.com[^\'"]+)', webpage, 'youtubeUrl', default=None)
|
||||
if stream_url is None:
|
||||
if youtube_url is None:
|
||||
raise ExtractorError('Unable to extract stream url')
|
||||
return self.url_result(youtube_url, display_id=video_id)
|
||||
return stream_url
|
||||
|
||||
def check_geo_restricted(self, webpage):
|
||||
if 'Streaming is not available in your area.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
|
||||
class RudoVideoIE(RudoVideoBaseIE):
|
||||
_VALID_URL = r'https?://rudo\.video/(?:vod|podcast)/(?P<id>[^/?]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast)/[^\'"]+)']
|
||||
class RudoVideoLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rudo\.video/(?P<type>live|vod|podcast)/(?P<id>[^/?]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:live|vod|podcast)/[^\'"]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
|
||||
'md5': '28ed82b477708dc5e12e072da2449221',
|
||||
|
@ -57,29 +33,7 @@ class RudoVideoIE(RudoVideoBaseIE):
|
|||
'ext': 'mp4',
|
||||
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.check_geo_restricted(webpage)
|
||||
|
||||
stream_url = self.get_stream_url(webpage, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self.get_title(webpage),
|
||||
'formats': self._extract_m3u8_formats(stream_url, video_id, live=True),
|
||||
'creator': self.get_creator(webpage),
|
||||
'thumbnail': self.get_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class RudoVideoLiveIE(RudoVideoBaseIE):
|
||||
_VALID_URL = r'https?://rudo\.video/live/(?P<id>[^/?]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/live/[^\'"]+)']
|
||||
_TESTS = [{
|
||||
}, {
|
||||
'url': 'https://rudo.video/live/bbtv',
|
||||
'info_dict': {
|
||||
'id': 'bbtv',
|
||||
|
@ -107,13 +61,43 @@ class RudoVideoLiveIE(RudoVideoBaseIE):
|
|||
'skip': 'Geo-restricted to Chile',
|
||||
}]
|
||||
|
||||
def get_title(self, webpage):
|
||||
title = self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None)
|
||||
if title is None:
|
||||
title = self._search_regex(r'<meta[^>]+property=[\'"]og:title[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'title', fatal=False)
|
||||
return title
|
||||
|
||||
def get_thumbnail(self, webpage):
|
||||
thumbnail = self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None)
|
||||
if thumbnail is None:
|
||||
thumbnail = self._search_regex(r'<meta[^>]+property=[\'"]og:image[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
|
||||
return thumbnail
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
type = self._match_valid_url(url).group('type')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.check_geo_restricted(webpage)
|
||||
if 'Streaming is not available in your area.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
stream_url = self._search_regex(r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'streamUrl', default=None)
|
||||
source_url = self._search_regex(r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'sourceUrl', default=None)
|
||||
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube.com[^\'"]+)', webpage, 'youtubeUrl', default=None)
|
||||
if stream_url is None:
|
||||
if source_url is not None:
|
||||
stream_url = source_url
|
||||
elif youtube_url is not None:
|
||||
return self.url_result(youtube_url, display_id=video_id)
|
||||
else:
|
||||
raise ExtractorError('Unable to extract stream url')
|
||||
|
||||
title = self.get_title(webpage)
|
||||
thumbnail = self.get_thumbnail(webpage)
|
||||
is_live = None
|
||||
if type == 'live':
|
||||
is_live = True
|
||||
|
||||
stream_url = self.get_stream_url(webpage, video_id)
|
||||
token_array = self._search_json(r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
|
||||
if token_array:
|
||||
|
@ -124,9 +108,9 @@ class RudoVideoLiveIE(RudoVideoBaseIE):
|
|||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self.get_title(webpage),
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(stream_url, video_id, live=True),
|
||||
'is_live': True,
|
||||
'creator': self.get_creator(webpage),
|
||||
'thumbnail': self.get_thumbnail(webpage),
|
||||
'is_live': is_live,
|
||||
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)', webpage, "videoAuthor", default=None),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
|
|
@ -6469,9 +6469,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
def _has_tab(self, tabs, tab_id):
|
||||
return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
|
||||
|
||||
def _empty_playlist(self, item_id, data):
|
||||
return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
|
||||
|
||||
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
|
||||
def _real_extract(self, url, smuggled_data):
|
||||
item_id = self._match_id(url)
|
||||
|
@ -6537,10 +6534,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
|
||||
self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
|
||||
|
||||
# /about is no longer a tab
|
||||
if original_tab_id == 'about':
|
||||
return self._empty_playlist(item_id, data)
|
||||
|
||||
if not original_tab_id and selected_tab_name:
|
||||
self.to_screen('Downloading all uploads of the channel. '
|
||||
'To download only the videos in a specific tab, pass the tab\'s URL')
|
||||
|
@ -6553,7 +6546,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
if not extra_tabs and selected_tab_id != 'videos':
|
||||
# Channel does not have streams, shorts or videos tabs
|
||||
if item_id[:2] != 'UC':
|
||||
return self._empty_playlist(item_id, data)
|
||||
raise ExtractorError('This channel has no uploads', expected=True)
|
||||
|
||||
# Topic channels don't have /videos. Use the equivalent playlist instead
|
||||
pl_id = f'UU{item_id[2:]}'
|
||||
|
@ -6561,7 +6554,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
|||
try:
|
||||
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
|
||||
except ExtractorError:
|
||||
return self._empty_playlist(item_id, data)
|
||||
raise ExtractorError('This channel has no uploads', expected=True)
|
||||
else:
|
||||
item_id, url = pl_id, pl_url
|
||||
self.to_screen(
|
||||
|
|
|
@ -8,7 +8,7 @@ from ._utils import (
|
|||
IDENTITY,
|
||||
NO_DEFAULT,
|
||||
LazyList,
|
||||
deprecation_warning,
|
||||
int_or_none,
|
||||
is_iterable_like,
|
||||
try_call,
|
||||
variadic,
|
||||
|
@ -17,7 +17,7 @@ from ._utils import (
|
|||
|
||||
def traverse_obj(
|
||||
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
||||
casesense=True, is_user_input=NO_DEFAULT, traverse_string=False):
|
||||
casesense=True, is_user_input=False, traverse_string=False):
|
||||
"""
|
||||
Safely traverse nested `dict`s and `Iterable`s
|
||||
|
||||
|
@ -63,8 +63,10 @@ def traverse_obj(
|
|||
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
||||
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
||||
|
||||
`traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
|
||||
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
||||
|
||||
@param is_user_input Whether the keys are generated from user input.
|
||||
If `True` strings get converted to `int`/`slice` if needed.
|
||||
@param traverse_string Whether to traverse into objects as strings.
|
||||
If `True`, any non-compatible object will first be
|
||||
converted into a string and then traversed into.
|
||||
|
@ -78,9 +80,6 @@ def traverse_obj(
|
|||
If no `default` is given and the last path branches, a `list` of results
|
||||
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
||||
"""
|
||||
if is_user_input is not NO_DEFAULT:
|
||||
deprecation_warning('The is_user_input parameter is deprecated and no longer works')
|
||||
|
||||
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
||||
|
||||
if isinstance(expected_type, type):
|
||||
|
@ -196,6 +195,14 @@ def traverse_obj(
|
|||
|
||||
key = None
|
||||
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
|
||||
if is_user_input and isinstance(key, str):
|
||||
if key == ':':
|
||||
key = ...
|
||||
elif ':' in key:
|
||||
key = slice(*map(int_or_none, key.split(':')))
|
||||
elif int_or_none(key) is not None:
|
||||
key = int(key)
|
||||
|
||||
if not casesense and isinstance(key, str):
|
||||
key = key.casefold()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user