Compare commits

..

4 Commits

Author SHA1 Message Date
bashonly
8e765755f7
[ie/vimeo] Fix API headers (#9125)
Closes #9124
Authored by: bashonly
2024-02-02 21:15:04 +00:00
c-basalt
ffa017cfc5
[ie/BiliBiliSearch] Set cookie to fix extraction (#9119)
Closes #5083
Authored by: c-basalt
2024-02-02 21:08:29 +00:00
HobbyistDev
a0d50aabc5
[ie/orf:on] Add extractor (#9113)
Closes #8903
Authored by: HobbyistDev
2024-02-02 20:57:53 +00:00
HobbyistDev
2f4b575946
[ie/zetland] Add extractor (#9116)
Closes #9024
Authored by: HobbyistDev
2024-02-02 20:56:29 +00:00
5 changed files with 173 additions and 3 deletions

View File

@ -1394,6 +1394,7 @@ from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
ORFONIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
@ -2496,6 +2497,7 @@ from .zee5 import (
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
ZingMp3IE,

View File

@ -7,6 +7,7 @@ import math
import re
import time
import urllib.parse
import uuid
from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome
@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年我怎么还在等你单推的女人睡觉后开播啊”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]
def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1):
videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query,

View File

@ -1,3 +1,4 @@
import base64
import functools
import re
@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
})
return self.playlist_result(entries)
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
'id': '14210000',
'ext': 'mp4',
'duration': 2651.08,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
}
}]
def _extract_video(self, video_id, display_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(api_json, {
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
}
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, display_id, fatal=False),
**self._extract_video(video_id, display_id),
}

View File

@ -269,7 +269,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'):
return
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {}
@ -751,6 +751,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video = self._download_json(
api_url, video_id, headers={
'Authorization': 'jwt ' + token,
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
@ -785,7 +786,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt},
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword')
@ -1147,10 +1148,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization,
'Accept': 'application/json',
})['data']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return
raise
for video in videos:
link = video.get('link')
if not link:
@ -1171,7 +1174,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt},
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password':

View File

@ -0,0 +1,71 @@
from .common import InfoExtractor
from ..utils import merge_dicts, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
class ZetlandDKArticleIE(InfoExtractor):
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
_TESTS = [{
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
'info_dict': {
'id': 'sO9aq2MY-a81VP3BY-66e69',
'ext': 'mp3',
'modified_date': '20240118',
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
'upload_date': '20240116',
'uploader_id': 'a81VP3BY',
'modified_timestamp': 1705568739,
'release_timestamp': 1705377592,
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
'uploader': 'Helle Fuusager',
'release_date': '20240116',
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
'description': 'md5:9619d426772c133f5abb26db27f26a01',
'timestamp': 1705377592,
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
}
}]
def _real_extract(self, url):
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
formats = []
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
formats.append({
'url': audio_url,
'vcodec': 'none',
})
return merge_dicts({
'id': display_id,
'formats': formats,
'uploader_id': uploader_id
}, traverse_obj(story_data, {
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
'uploader': ('sharer', 'name'),
'uploader_id': ('sharer', 'sharer_id'),
'description': ('story_content', 'content', 'socialDescription'),
'series_id': ('story_content', 'meta', 'seriesId'),
'release_timestamp': ('published_at', {unified_timestamp}),
'modified_timestamp': ('revised_at', {unified_timestamp}),
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
}), get_all=False), {
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'uploader': self._html_search_meta(['author'], webpage),
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
}, self._search_json_ld(webpage, display_id, fatal=False))