Compare commits

...

4 Commits

Author SHA1 Message Date
bashonly
8e765755f7
[ie/vimeo] Fix API headers (#9125)
Closes #9124
Authored by: bashonly
2024-02-02 21:15:04 +00:00
c-basalt
ffa017cfc5
[ie/BiliBiliSearch] Set cookie to fix extraction (#9119)
Closes #5083
Authored by: c-basalt
2024-02-02 21:08:29 +00:00
HobbyistDev
a0d50aabc5
[ie/orf:on] Add extractor (#9113)
Closes #8903
Authored by: HobbyistDev
2024-02-02 20:57:53 +00:00
HobbyistDev
2f4b575946
[ie/zetland] Add extractor (#9116)
Closes #9024
Authored by: HobbyistDev
2024-02-02 20:56:29 +00:00
5 changed files with 173 additions and 3 deletions

View File

@ -1394,6 +1394,7 @@ from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFONIE,
ORFRadioIE, ORFRadioIE,
ORFPodcastIE, ORFPodcastIE,
ORFIPTVIE, ORFIPTVIE,
@ -2496,6 +2497,7 @@ from .zee5 import (
Zee5SeriesIE, Zee5SeriesIE,
) )
from .zeenews import ZeeNewsIE from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE from .zhihu import ZhihuIE
from .zingmp3 import ( from .zingmp3 import (
ZingMp3IE, ZingMp3IE,

View File

@ -7,6 +7,7 @@ import math
import re import re
import time import time
import urllib.parse import urllib.parse
import uuid
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome from ..dependencies import Cryptodome
@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search' IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000 _MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch' _SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年我怎么还在等你单推的女人睡觉后开播啊”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]
def _search_results(self, query): def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1): for page_num in itertools.count(1):
videos = self._download_json( videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query, 'https://api.bilibili.com/x/web-interface/search/type', query,

View File

@ -1,3 +1,4 @@
import base64
import functools import functools
import re import re
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
}) })
return self.playlist_result(entries) return self.playlist_result(entries)
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
'id': '14210000',
'ext': 'mp4',
'duration': 2651.08,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
}
}]
def _extract_video(self, video_id, display_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(api_json, {
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
}
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, display_id, fatal=False),
**self._extract_video(video_id, display_id),
}

View File

@ -269,7 +269,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'): if not jwt_response.get('jwt'):
return return
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json( original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id, f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {} headers=headers, fatal=False, expected_status=(403, 404)) or {}
@ -751,6 +751,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video = self._download_json( video = self._download_json(
api_url, video_id, headers={ api_url, video_id, headers={
'Authorization': 'jwt ' + token, 'Authorization': 'jwt ' + token,
'Accept': 'application/json',
}, query={ }, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
}) })
@ -785,7 +786,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword') password = self.get_param('videopassword')
@ -1147,10 +1148,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization, 'Authorization': 'jwt ' + authorization,
'Accept': 'application/json',
})['data'] })['data']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return return
raise
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')
if not link: if not link:
@ -1171,7 +1174,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
hashed_pass = None hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':

View File

@ -0,0 +1,71 @@
from .common import InfoExtractor
from ..utils import merge_dicts, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
class ZetlandDKArticleIE(InfoExtractor):
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
_TESTS = [{
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
'info_dict': {
'id': 'sO9aq2MY-a81VP3BY-66e69',
'ext': 'mp3',
'modified_date': '20240118',
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
'upload_date': '20240116',
'uploader_id': 'a81VP3BY',
'modified_timestamp': 1705568739,
'release_timestamp': 1705377592,
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
'uploader': 'Helle Fuusager',
'release_date': '20240116',
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
'description': 'md5:9619d426772c133f5abb26db27f26a01',
'timestamp': 1705377592,
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
}
}]
def _real_extract(self, url):
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
formats = []
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
formats.append({
'url': audio_url,
'vcodec': 'none',
})
return merge_dicts({
'id': display_id,
'formats': formats,
'uploader_id': uploader_id
}, traverse_obj(story_data, {
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
'uploader': ('sharer', 'name'),
'uploader_id': ('sharer', 'sharer_id'),
'description': ('story_content', 'content', 'socialDescription'),
'series_id': ('story_content', 'meta', 'seriesId'),
'release_timestamp': ('published_at', {unified_timestamp}),
'modified_timestamp': ('revised_at', {unified_timestamp}),
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
}), get_all=False), {
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'uploader': self._html_search_meta(['author'], webpage),
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
}, self._search_json_ld(webpage, display_id, fatal=False))