Compare commits

...

26 Commits

Author SHA1 Message Date
ChocoLZS
e374a72c73
Merge 8f5a765e25 into 6365e92589 2024-11-16 20:03:47 +01:00
ChocoLZS
8f5a765e25
Update yt_dlp/extractor/piaulizaportal.py
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-10-28 11:22:18 +08:00
ChocoLZS
75ea808d0a
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-10-05 22:49:09 +08:00
ChocoLZS
b42637c619
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-10-01 16:31:47 +08:00
Mozi
a75a02ad2a
[ie/pialive] Follow your steps (#1)
* [ie/pialive] Support detecting upcoming and ended live events

* Pack API arguments

* fix UnboundLocalError for "chat_room_url"

* extract video_id from query string by "parse_qs()"

* Fix tests
2024-09-25 14:47:15 +08:00
ChocoLZS
d993580e6f
Apply suggestions from code review
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
2024-09-11 09:15:34 +08:00
chocoie
0f4cdc03d9 fix: suggestions 2024-09-09 21:35:40 +08:00
chocoie
7b94a0000c fix: error 2024-09-09 21:27:29 +08:00
ChocoLZS
aa410c803b
Apply suggestions from code review
Co-authored-by: sepro <sepro@sepr0.com>
2024-09-09 21:13:03 +08:00
ChocoLZS
83f4c5a98e fix: code style 2024-08-28 22:48:42 +08:00
ChocoLZS
841a557c0e fix: code style 2024-08-28 22:35:13 +08:00
ChocoLZS
bca2ca9852 fix: code style 2024-08-27 08:35:10 +00:00
ChocoLZS
a2ed14747b fix: code style 2024-08-25 23:49:08 +08:00
ChocoLZS
04f1bfde50 feat: use extract_comments instead 2024-08-25 23:26:37 +08:00
ChocoLZS
848a923252 doc: add note for json downloader 2024-08-24 23:44:28 +08:00
ChocoLZS
1ff33d1333 fix: remove unnecessary code 2024-08-24 23:34:06 +08:00
ChocoLZS
27b31cc3df chore: correct tests 2024-08-24 17:44:14 +08:00
ChocoLZS
fad7c2a75a chore: remove smuggled_url 2024-08-23 23:12:18 +08:00
ChocoLZS
1a22cc3d3f chore: remove unnecessary smuggled data 2024-08-23 10:01:07 +08:00
ChocoLZS
95dffc0e75 fix: use url_transparent 2024-08-23 09:18:01 +08:00
ChocoLZS
b3ebbaf8dd feat: fetch comments 2024-08-22 23:06:16 +08:00
ChocoLZS
1a16c62638 chore: rename regex and add referer 2024-08-22 19:51:05 +08:00
ChocoLZS
3fa3edc3c4 chore: use url instead of Request and add tests 2024-08-22 11:59:32 +08:00
ChocoLZS
edf1eebbce chore: extract id from m3u8 url 2024-08-21 23:55:59 +08:00
ChocoLZS
0639489bc5 feat: add embed player handler 2024-08-21 14:55:09 +08:00
ChocoLZS
9c2a6fa449 feat: add support for uliza in pia-live 2024-08-21 11:43:45 +08:00
3 changed files with 199 additions and 34 deletions

View File

@ -1522,8 +1522,12 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .piaulizaportal import (
PIAULIZAPortalAPIIE,
PIAULIZAPortalIE,
)
from .picarto import (
PicartoIE,
PicartoVodIE,

126
yt_dlp/extractor/pialive.py Normal file
View File

@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
class PiaLiveIE(InfoExtractor):
PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
PIA_LIVE_API_URL = 'https://api.pia-live.jp'
API_KEY = 'kfds)FKFps-dms9e'
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'display_id': '2431867_001',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
'info_dict': {
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
'display_id': '2431867_002',
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
'live_status': 'was_live',
'comment_count': int,
},
'params': {
'getcomments': True,
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
]
def _extract_vars(self, variable, html):
return self._search_regex(
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, f'variable {variable}', group='value')
def _real_extract(self, url):
video_key = self._match_id(url)
webpage = self._download_webpage(url, video_key)
program_code = self._extract_vars('programCode', webpage)
article_code = self._extract_vars('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if date and time:
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self.API_KEY,
})
api_kwargs = {
'video_id': program_code,
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,
note='Fetching player tag list', errnote='Unable to fetch player tag list')
chat_room_url = None
if self.get_param('getcomments'):
chat_room_url = traverse_obj(self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,
note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,
video_title=title, display_id=program_code, __post_extractor=self.extract_comments(
program_code, chat_room_url))
def _get_comments(self, video_id, chat_room_url):
if not chat_room_url:
return
if comment_page := self._download_webpage(
chat_room_url, video_id, headers={'Referer': self.PLAYER_ROOT_URL},
note='Fetching comment page', errnote='Unable to fetch comment page', fatal=False):
yield from traverse_obj(self._search_json(
r'var\s+_history\s*=', comment_page, 'comment list',
video_id, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
'timestamp': 0,
'author_is_uploader': (1, {lambda x: x == 2}),
'author': 2,
'text': 3,
'id': 4,
}))

View File

@ -1,11 +1,62 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
time_seconds,
traverse_obj,
)
from ..utils import ExtractorError, int_or_none, parse_qs, time_seconds
from ..utils.traversal import traverse_obj
class PIAULIZAPortalAPIIE(InfoExtractor):
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
_TESTS = [
{
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
},
},
{
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
},
},
{
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
},
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
player_data = self._download_webpage(
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}
class PIAULIZAPortalIE(InfoExtractor):
@ -14,7 +65,8 @@ class PIAULIZAPortalIE(InfoExtractor):
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
},
@ -25,7 +77,8 @@ class PIAULIZAPortalIE(InfoExtractor):
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
},
@ -44,27 +97,9 @@ class PIAULIZAPortalIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'),
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}
player_data_url = self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url')
return self.url_result(
player_data_url, PIAULIZAPortalAPIIE, url_transparent=True,
display_id=video_id, video_title=self._html_extract_title(webpage))