mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 03:11:24 +01:00
Compare commits
8 Commits
9d6254069c
...
2269065ad6
Author | SHA1 | Date | |
---|---|---|---|
|
2269065ad6 | ||
|
a5e264d74b | ||
|
b84fda7388 | ||
|
5fccabac27 | ||
|
21f40e75df | ||
|
b3febedbeb | ||
|
295fbb3ae3 | ||
|
35f9a306e6 |
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
||||||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||||
version=str(next(filter(None, (
|
version=str(next(filter(None, (
|
||||||
getattr(module, attr, None)
|
getattr(module, attr, None)
|
||||||
for attr in ('__version__', 'version_string', 'version')
|
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||||
)), None)))
|
)), None)))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,8 @@ except Exception as _err:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||||
|
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||||
|
|
|
@ -296,9 +296,11 @@ from .cammodels import CamModelsIE
|
||||||
from .camsoda import CamsodaIE
|
from .camsoda import CamsodaIE
|
||||||
from .camtasia import CamtasiaEmbedIE
|
from .camtasia import CamtasiaEmbedIE
|
||||||
from .camwithher import CamWithHerIE
|
from .camwithher import CamWithHerIE
|
||||||
|
from .canal1 import Canal1IE
|
||||||
from .canalalpha import CanalAlphaIE
|
from .canalalpha import CanalAlphaIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
|
from .caracoltv import CaracolTvPlayIE
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
CarambaTVPageIE,
|
CarambaTVPageIE,
|
||||||
|
@ -565,6 +567,7 @@ from .epicon import (
|
||||||
EpiconIE,
|
EpiconIE,
|
||||||
EpiconSeriesIE,
|
EpiconSeriesIE,
|
||||||
)
|
)
|
||||||
|
from .eplus import EplusIbIE
|
||||||
from .epoch import EpochIE
|
from .epoch import EpochIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
from .eroprofile import (
|
from .eroprofile import (
|
||||||
|
@ -1598,6 +1601,7 @@ from .rbmaradio import RBMARadioIE
|
||||||
from .rbgtum import (
|
from .rbgtum import (
|
||||||
RbgTumIE,
|
RbgTumIE,
|
||||||
RbgTumCourseIE,
|
RbgTumCourseIE,
|
||||||
|
RbgTumNewCourseIE,
|
||||||
)
|
)
|
||||||
from .rcs import (
|
from .rcs import (
|
||||||
RCSIE,
|
RCSIE,
|
||||||
|
|
|
@ -49,14 +49,14 @@ class BilibiliBaseIE(InfoExtractor):
|
||||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||||
}
|
}
|
||||||
|
|
||||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||||
if flac_audio:
|
if flac_audio:
|
||||||
audios.append(flac_audio)
|
audios.append(flac_audio)
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||||
'acodec': audio.get('codecs'),
|
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||||
'filesize': int_or_none(audio.get('size')),
|
'filesize': int_or_none(audio.get('size')),
|
||||||
|
@ -71,6 +71,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||||
'height': int_or_none(video.get('height')),
|
'height': int_or_none(video.get('height')),
|
||||||
'vcodec': video.get('codecs'),
|
'vcodec': video.get('codecs'),
|
||||||
'acodec': 'none' if audios else None,
|
'acodec': 'none' if audios else None,
|
||||||
|
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||||
'filesize': int_or_none(video.get('size')),
|
'filesize': int_or_none(video.get('size')),
|
||||||
'quality': int_or_none(video.get('id')),
|
'quality': int_or_none(video.get('id')),
|
||||||
|
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Canal1IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '63b39f6b354977084b85ab54',
|
||||||
|
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||||
|
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||||
|
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '63b39e93f5fd223aa32250fb',
|
||||||
|
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||||
|
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||||
|
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Geo-restricted to Colombia
|
||||||
|
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||||
|
display_id=display_id, url_transparent=True)
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
traverse_obj,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CaracolTvPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||||
|
_NETRC_MACHINE = 'caracoltv-play'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||||
|
'title': 'La teoría del promedio',
|
||||||
|
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||||
|
'title': 'Ella',
|
||||||
|
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||||
|
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||||
|
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||||
|
},
|
||||||
|
'playlist_count': 17,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_USER_TOKEN = None
|
||||||
|
|
||||||
|
def _extract_app_token(self, webpage):
|
||||||
|
config_js_path = self._search_regex(
|
||||||
|
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||||
|
|
||||||
|
mediation_config = {} if not config_js_path else self._search_json(
|
||||||
|
r'mediation\s*:', self._download_webpage(
|
||||||
|
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||||
|
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
key = traverse_obj(
|
||||||
|
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||||
|
secret = traverse_obj(
|
||||||
|
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||||
|
|
||||||
|
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||||
|
|
||||||
|
def _perform_login(self, email, password):
|
||||||
|
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||||
|
app_token = self._extract_app_token(webpage)
|
||||||
|
|
||||||
|
bearer_token = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||||
|
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||||
|
|
||||||
|
self._USER_TOKEN = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {bearer_token}',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'device_data': {
|
||||||
|
'device_id': str(uuid.uuid4()),
|
||||||
|
'device_token': '',
|
||||||
|
'device_type': 'web'
|
||||||
|
},
|
||||||
|
'login_data': {
|
||||||
|
'enabled': True,
|
||||||
|
'email': email,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
}).encode())['user_token']
|
||||||
|
|
||||||
|
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_data['id'],
|
||||||
|
'title': video_data.get('name'),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': traverse_obj(
|
||||||
|
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||||
|
'series_id': series_id,
|
||||||
|
'season_id': season_id,
|
||||||
|
'season_number': int_or_none(season_number),
|
||||||
|
'episode_number': int_or_none(video_data.get('item_order')),
|
||||||
|
'is_live': video_data.get('entry_type') == 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_series_seasons(self, seasons, series_id):
|
||||||
|
for season in seasons:
|
||||||
|
api_response = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||||
|
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||||
|
|
||||||
|
season_number = season.get('order')
|
||||||
|
for episode in api_response['items']:
|
||||||
|
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
|
||||||
|
if self._USER_TOKEN is None:
|
||||||
|
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||||
|
|
||||||
|
api_response = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||||
|
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||||
|
|
||||||
|
if not api_response.get('seasons'):
|
||||||
|
return self._extract_video(api_response)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||||
|
series_id, **traverse_obj(api_response, {
|
||||||
|
'title': 'name',
|
||||||
|
'description': 'description',
|
||||||
|
}))
|
|
@ -1,31 +1,72 @@
|
||||||
import time
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
|
||||||
import urllib
|
import urllib
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .openload import PhantomJSwrapper
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
UserNotLive,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DouyuTVIE(InfoExtractor):
|
class DouyuBaseIE(InfoExtractor):
|
||||||
IE_DESC = '斗鱼'
|
def _download_cryptojs_md5(self, video_id):
|
||||||
|
for url in [
|
||||||
|
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||||
|
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||||
|
]:
|
||||||
|
js_code = self._download_webpage(
|
||||||
|
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||||
|
if js_code:
|
||||||
|
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||||
|
return js_code
|
||||||
|
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||||
|
|
||||||
|
def _get_cryptojs_md5(self, video_id):
|
||||||
|
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||||
|
|
||||||
|
def _calc_sign(self, sign_func, video_id, a):
|
||||||
|
b = uuid.uuid4().hex
|
||||||
|
c = round(time.time())
|
||||||
|
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||||
|
phantom = PhantomJSwrapper(self)
|
||||||
|
result = phantom.execute(js_script, video_id,
|
||||||
|
note='Executing JS signing script').strip()
|
||||||
|
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||||
|
|
||||||
|
def _search_js_sign_func(self, webpage, fatal=True):
|
||||||
|
# The greedy look-behind ensures last possible script tag is matched
|
||||||
|
return self._search_regex(
|
||||||
|
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuTVIE(DouyuBaseIE):
|
||||||
|
IE_DESC = '斗鱼直播'
|
||||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.douyutv.com/iseven',
|
'url': 'https://www.douyu.com/pigff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17732',
|
'id': '24422',
|
||||||
'display_id': 'iseven',
|
'display_id': 'pigff',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': r're:.*m7show@163\.com.*',
|
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||||
'thumbnail': r're:^https?://.*\.png',
|
'thumbnail': str,
|
||||||
'uploader': '7师傅',
|
'uploader': 'pigff',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_sign_func(self, room_id, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||||
|
note='Getting signing script')['data'][f'room{room_id}']
|
||||||
|
|
||||||
|
def _extract_stream_formats(self, stream_formats):
|
||||||
|
formats = []
|
||||||
|
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||||
|
stream_url = urljoin(
|
||||||
|
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||||
|
if stream_url:
|
||||||
|
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||||
|
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||||
|
ext = determine_ext(stream_url)
|
||||||
|
formats.append({
|
||||||
|
'url': stream_url,
|
||||||
|
'format_id': str_or_none(rate_id),
|
||||||
|
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||||
|
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||||
|
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||||
|
**traverse_obj(rate_info, {
|
||||||
|
'format': ('name', {str_or_none}),
|
||||||
|
'tbr': ('bit', {int_or_none}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
if video_id.isdigit():
|
webpage = self._download_webpage(url, video_id)
|
||||||
room_id = video_id
|
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||||
else:
|
|
||||||
page = self._download_webpage(url, video_id)
|
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||||
room_id = self._html_search_regex(
|
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||||
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
# Grab metadata from API
|
# Grab metadata from API
|
||||||
params = {
|
params = {
|
||||||
|
@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
|
||||||
'time': int(time.time()),
|
'time': int(time.time()),
|
||||||
}
|
}
|
||||||
params['auth'] = hashlib.md5(
|
params['auth'] = hashlib.md5(
|
||||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||||
room = self._download_json(
|
room = traverse_obj(self._download_json(
|
||||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||||
note='Downloading room info', query=params)['data']
|
note='Downloading room info', query=params, fatal=False), 'data')
|
||||||
|
|
||||||
# 1 = live, 2 = offline
|
# 1 = live, 2 = offline
|
||||||
if room.get('show_status') == '2':
|
if traverse_obj(room, 'show_status') == '2':
|
||||||
raise ExtractorError('Live stream is offline', expected=True)
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
form_data = {
|
||||||
|
'rate': 0,
|
||||||
|
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||||
|
}
|
||||||
|
stream_formats = [self._download_json(
|
||||||
|
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||||
|
video_id, note="Downloading livestream format",
|
||||||
|
data=urlencode_postdata(form_data))]
|
||||||
|
|
||||||
title = unescapeHTML(room['room_name'])
|
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||||
description = room.get('show_details')
|
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||||
thumbnail = room.get('room_src')
|
form_data['rate'] = rate_id
|
||||||
uploader = room.get('nickname')
|
stream_formats.append(self._download_json(
|
||||||
|
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||||
|
video_id, note=f'Downloading livestream format {rate_id}',
|
||||||
|
data=urlencode_postdata(form_data)))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': room_id,
|
'id': room_id,
|
||||||
'display_id': video_id,
|
'formats': self._extract_stream_formats(stream_formats),
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'subtitles': subs,
|
**traverse_obj(room, {
|
||||||
'formats': formats,
|
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||||
|
'title': ('room_name', {unescapeHTML}),
|
||||||
|
'description': ('show_details', {str}),
|
||||||
|
'uploader': ('nickname', {str}),
|
||||||
|
'thumbnail': ('room_src', {url_or_none}),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class DouyuShowIE(InfoExtractor):
|
class DouyuShowIE(DouyuBaseIE):
|
||||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rjNBdvnVXNzvE2yw',
|
'id': 'mPyq7oVNe5Yv1gLY',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||||
'duration': 7150.08,
|
'duration': 633,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': str,
|
||||||
'uploader': '陈一发儿',
|
'uploader': '美食作家王刚V',
|
||||||
'uploader_id': 'XrZwYelr5wbK',
|
'uploader_id': 'OVAO4NVx1m7Q',
|
||||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
'timestamp': 1661850002,
|
||||||
'upload_date': '20170402',
|
'upload_date': '20220830',
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['美食', '美食综合'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_FORMATS = {
|
||||||
|
'super': '原画',
|
||||||
|
'high': '超清',
|
||||||
|
'normal': '高清',
|
||||||
|
}
|
||||||
|
|
||||||
|
_QUALITIES = {
|
||||||
|
'super': -1,
|
||||||
|
'high': -2,
|
||||||
|
'normal': -3,
|
||||||
|
}
|
||||||
|
|
||||||
|
_RESOLUTIONS = {
|
||||||
|
'super': '1920x1080',
|
||||||
|
'high': '1280x720',
|
||||||
|
'normal': '852x480',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('vmobile.', 'v.')
|
url = url.replace('vmobile.', 'v.')
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
room_info = self._parse_json(self._search_regex(
|
video_info = self._search_json(
|
||||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||||
|
'video info', video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
video_info = None
|
js_sign_func = self._search_js_sign_func(webpage)
|
||||||
|
form_data = {
|
||||||
|
'vid': video_id,
|
||||||
|
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||||
|
}
|
||||||
|
url_info = self._download_json(
|
||||||
|
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||||
|
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||||
|
|
||||||
for trial in range(5):
|
formats = []
|
||||||
# Sometimes Douyu rejects our request. Let's try it more times
|
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||||
try:
|
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||||
video_info = self._download_json(
|
if video_url:
|
||||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
ext = determine_ext(video_url)
|
||||||
query={'vid': video_id},
|
formats.append({
|
||||||
headers={
|
'format': self._FORMATS.get(name),
|
||||||
'Referer': url,
|
'format_id': name,
|
||||||
'x-requested-with': 'XMLHttpRequest',
|
'url': video_url,
|
||||||
|
'quality': self._QUALITIES.get(name),
|
||||||
|
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||||
|
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||||
|
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||||
})
|
})
|
||||||
break
|
else:
|
||||||
except ExtractorError:
|
self.to_screen(
|
||||||
self._sleep(1, video_id)
|
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||||
|
|
||||||
if not video_info:
|
|
||||||
raise ExtractorError('Can\'t fetch video info')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_info['data']['video_url'], video_id,
|
|
||||||
entry_protocol='m3u8_native', ext='mp4')
|
|
||||||
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
|
||||||
'upload date', fatal=False))
|
|
||||||
|
|
||||||
uploader = uploader_id = uploader_url = None
|
|
||||||
mobj = re.search(
|
|
||||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
|
||||||
webpage)
|
|
||||||
if mobj:
|
|
||||||
uploader_id, uploader = mobj.groups()
|
|
||||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': room_info['name'],
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': room_info.get('duration'),
|
**traverse_obj(video_info, ('DATA', {
|
||||||
'thumbnail': room_info.get('pic'),
|
'title': ('content', 'title', {str}),
|
||||||
'upload_date': upload_date,
|
'uploader': ('content', 'author', {str}),
|
||||||
'uploader': uploader,
|
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||||
'uploader_id': uploader_id,
|
'duration': ('content', 'video_duration', {int_or_none}),
|
||||||
'uploader_url': uploader_url,
|
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||||
|
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||||
|
'view_count': ('content', 'view_num', {int_or_none}),
|
||||||
|
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
try_call,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EplusIbIE(InfoExtractor):
|
||||||
|
IE_NAME = 'eplus:inbound'
|
||||||
|
IE_DESC = 'e+ (イープラス) overseas'
|
||||||
|
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '354502-0001-002',
|
||||||
|
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20211231',
|
||||||
|
'release_timestamp': 1640952000,
|
||||||
|
'description': str,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Could not find the playlist URL. This event may not be accessible',
|
||||||
|
'No video formats found!',
|
||||||
|
'Requested format is not available',
|
||||||
|
],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||||
|
|
||||||
|
delivery_status = data_json.get('delivery_status')
|
||||||
|
archive_mode = data_json.get('archive_mode')
|
||||||
|
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||||
|
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||||
|
|
||||||
|
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||||
|
|
||||||
|
if delivery_status == 'PREPARING':
|
||||||
|
live_status = 'is_upcoming'
|
||||||
|
elif delivery_status == 'STARTED':
|
||||||
|
live_status = 'is_live'
|
||||||
|
elif delivery_status == 'STOPPED':
|
||||||
|
if archive_mode != 'ON':
|
||||||
|
raise ExtractorError(
|
||||||
|
'This event has ended and there is no archive for this event', expected=True)
|
||||||
|
live_status = 'post_live'
|
||||||
|
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||||
|
live_status = 'post_live'
|
||||||
|
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||||
|
live_status = 'was_live'
|
||||||
|
else:
|
||||||
|
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||||
|
live_status = 'is_live'
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
m3u8_playlist_urls = self._search_json(
|
||||||
|
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||||
|
if not m3u8_playlist_urls:
|
||||||
|
if live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats(
|
||||||
|
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||||
|
else:
|
||||||
|
self.raise_no_formats(
|
||||||
|
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||||
|
elif live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||||
|
elif live_status == 'post_live':
|
||||||
|
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||||
|
else:
|
||||||
|
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||||
|
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||||
|
# FIXME: HTTP request headers need to be updated to continue download
|
||||||
|
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||||
|
if live_status == 'is_live':
|
||||||
|
self.report_warning(warning)
|
||||||
|
elif live_status == 'was_live':
|
||||||
|
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': data_json['app_id'],
|
||||||
|
'title': data_json.get('app_name'),
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': live_status,
|
||||||
|
'description': data_json.get('content'),
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
}
|
|
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?(?:expressen|di)\.se/
|
(?:www\.)?(?:expressen|di)\.se/
|
||||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||||
tv/(?:[^/]+/)*
|
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
'''
|
'''
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||||
|
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -106,7 +106,11 @@ class MediaStreamIE(MediaStreamBaseIE):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
for message in [
|
||||||
|
'Debido a tu ubicación no puedes ver el contenido',
|
||||||
|
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||||
|
]:
|
||||||
|
if message in webpage:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||||
|
|
|
@ -6,7 +6,6 @@ from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
qualities,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -49,13 +48,8 @@ class NovaEmbedIE(InfoExtractor):
|
||||||
duration = None
|
duration = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
player = self._parse_json(
|
def process_format_list(format_list, format_id=""):
|
||||||
self._search_regex(
|
nonlocal formats, has_drm
|
||||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
|
||||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
|
||||||
webpage, 'player', default='{}', group='json'), video_id, fatal=False)
|
|
||||||
if player:
|
|
||||||
for format_id, format_list in player['tracks'].items():
|
|
||||||
if not isinstance(format_list, list):
|
if not isinstance(format_list, list):
|
||||||
format_list = [format_list]
|
format_list = [format_list]
|
||||||
for format_dict in format_list:
|
for format_dict in format_list:
|
||||||
|
@ -82,44 +76,24 @@ class NovaEmbedIE(InfoExtractor):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
})
|
})
|
||||||
duration = int_or_none(player.get('duration'))
|
|
||||||
else:
|
player = self._search_json(
|
||||||
# Old path, not actual as of 08.04.2020
|
r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
|
||||||
bitrates = self._parse_json(
|
if player:
|
||||||
|
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
|
||||||
|
process_format_list(src)
|
||||||
|
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
|
||||||
|
if not formats and not has_drm:
|
||||||
|
# older code path, in use before August 2023
|
||||||
|
player = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||||
video_id, transform_source=js_to_json)
|
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||||
|
webpage, 'player', group='json'), video_id)
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
if player:
|
||||||
quality_key = qualities(QUALITIES)
|
for format_id, format_list in player['tracks'].items():
|
||||||
|
process_format_list(format_list, format_id)
|
||||||
for format_id, format_list in bitrates.items():
|
duration = int_or_none(player.get('duration'))
|
||||||
if not isinstance(format_list, list):
|
|
||||||
format_list = [format_list]
|
|
||||||
for format_url in format_list:
|
|
||||||
format_url = url_or_none(format_url)
|
|
||||||
if not format_url:
|
|
||||||
continue
|
|
||||||
if format_id == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, ext='mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
|
||||||
fatal=False))
|
|
||||||
continue
|
|
||||||
f = {
|
|
||||||
'url': format_url,
|
|
||||||
}
|
|
||||||
f_id = format_id
|
|
||||||
for quality in QUALITIES:
|
|
||||||
if '%s.mp4' % quality in format_url:
|
|
||||||
f_id += '-%s' % quality
|
|
||||||
f.update({
|
|
||||||
'quality': quality_key(quality),
|
|
||||||
'format_note': quality.upper(),
|
|
||||||
})
|
|
||||||
break
|
|
||||||
f['format_id'] = f_id
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
if not formats and has_drm:
|
if not formats and has_drm:
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class RbgTumIE(InfoExtractor):
|
class RbgTumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
_VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Combined view
|
# Combined view
|
||||||
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||||
|
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
|
||||||
'title': 'Fachschaftsvollversammlung',
|
'title': 'Fachschaftsvollversammlung',
|
||||||
'series': 'Fachschaftsvollversammlung Informatik',
|
'series': 'Fachschaftsvollversammlung Informatik',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/w/linalginfo/27102',
|
||||||
|
'only_matching': True,
|
||||||
}, ]
|
}, ]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
|
||||||
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||||
lecture_series_title = self._html_search_regex(
|
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||||
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
@ -57,9 +60,9 @@ class RbgTumIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class RbgTumCourseIE(InfoExtractor):
|
class RbgTumCourseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
'id': '2022/S/fpv',
|
'id': '2022/S/fpv',
|
||||||
|
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SET FSMPIC',
|
'title': 'SET FSMPIC',
|
||||||
'id': '2022/W/set',
|
'id': '2022/W/set',
|
||||||
|
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
|
||||||
'noplaylist': False,
|
'noplaylist': False,
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/old/course/2023/S/linalginfo',
|
||||||
|
'only_matching': True,
|
||||||
}, ]
|
}, ]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
course_id = self._match_id(url)
|
course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
|
||||||
|
meta = self._download_json(
|
||||||
|
f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
|
||||||
|
query={'year': year, 'term': term}) or {}
|
||||||
|
lecture_series_title = meta.get('Name')
|
||||||
|
lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
|
||||||
|
for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
|
||||||
|
|
||||||
|
if not lectures:
|
||||||
webpage = self._download_webpage(url, course_id)
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||||
|
lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
|
||||||
|
for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
|
||||||
|
|
||||||
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
return self.playlist_result(lectures, course_id, lecture_series_title)
|
||||||
|
|
||||||
lecture_urls = []
|
|
||||||
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
|
||||||
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
|
||||||
|
|
||||||
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
class RbgTumNewCourseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
|
'id': '2022/S/fpv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'SET FSMPIC',
|
||||||
|
'id': '2022/W/set',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = parse_qs(url)
|
||||||
|
errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
|
||||||
|
if errors:
|
||||||
|
raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
|
||||||
|
year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
|
||||||
|
hostname = self._match_valid_url(url).group('hostname')
|
||||||
|
|
||||||
|
return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user