Compare commits

..

25 Commits

Author SHA1 Message Date
bashonly
098bb98280
Add *.ssa to .gitignore and make clean-test
Authored by: bashonly
2024-01-28 15:56:51 -06:00
bashonly
8790348b1e
[ie/MedalTV] Cleanup
Authored by: bashonly
2024-01-28 15:51:01 -06:00
bashonly
33be3bb962
Merge branch 'yt-dlp:master' into cleanup/2024-01 2024-01-28 15:49:14 -06:00
Danish Humair
02e343f6ef
[ie/MedalTV] Fix extraction (#9098)
Closes #8766
Authored by: Danish-H
2024-01-28 21:23:52 +00:00
Elan Ruusamäe
a514cc2feb
[ie/ERRJupiter] Add extractor (#8549)
Authored by: glensc
2024-01-28 19:58:34 +01:00
kclauhk
87286e93af
[ie/facebook] Support permalink URLs (#9061)
Authored by: kclauhk
2024-01-28 18:50:03 +00:00
kclauhk
3c4d3ee491
[ie/facebook] Improve thumbnail extraction (#9060)
Authored by: kclauhk
2024-01-28 18:41:56 +00:00
kclauhk
5b68c478fb
[ie/facebook] Set format HTTP chunk size (#9058)
Closes #8197
Authored by: bashonly, kclauhk
2024-01-28 18:39:14 +00:00
Christopher Schreiner
9526b1f179
[ie/adn] Improve auth error handling (#9068)
Closes #9067
Authored by: infanf
2024-01-28 16:03:19 +00:00
vista-narvas
0023af81fb
[ie/RumbleChannel] Fix extractor (#9092)
Closes #8782
Authored by: vista-narvas, Pranaxcau
2024-01-28 15:32:19 +00:00
Christian Kündig
cae6e46107
[ie/PlaySuisse] Add login support (#9077)
Closes #7974
Authored by: chkuendig
2024-01-28 02:19:54 +00:00
jazz1611
c91d8b1899
[ie/redtube] Fix formats extraction (#9076)
Authored by: jazz1611
2024-01-28 02:15:29 +00:00
jazz1611
77c2472ca1
[ie/Gofile] Fix extraction (#9074)
Closes #9073
Authored by: jazz1611
2024-01-28 02:12:40 +00:00
shmohawk
d79c7e9937
[ie/Txxx] Extract thumbnails (#9063)
Authored by: shmohawk
2024-01-28 02:10:20 +00:00
Caesim404
5dda3b291f
[ie/lsm,cloudycdn] Add extractors (#8643)
Closes #2978
Authored by: Caesim404
2024-01-28 02:02:09 +00:00
Simon Sawicki
5f25f348f9
[ie/pr0gramm] Enable POL filter and provide tags without login (#9051)
Authored by: Grub4K
2024-01-23 23:20:13 +01:00
kclauhk
a40b0070c2
[ie/facebook:ads] Add extractor (#8870)
Closes #8083
Authored by: kclauhk
2024-01-22 06:28:11 +00:00
chtk
9cd9044790
[ie/Floatplane] Improve metadata extraction (#8934)
Authored by: chtk
2024-01-22 06:57:52 +01:00
John Victor
f0e8bc7c60
[ie/patreon] Fix embedded HLS extraction (#8993)
Closes #8973
Authored by: johnvictorfs
2024-01-21 22:36:59 +00:00
Stefan Lobbenmeier
c099ec9392
[ie/ard:mediathek] Support cookies to verify age (#9037)
Closes #9035
Authored by: StefanLobbenmeier
2024-01-21 20:54:11 +00:00
gmes78
c0ecceeefe
[ie/Rule34Video] Fix _VALID_URL (#9044)
Authored by: gmes78
2024-01-21 18:56:01 +00:00
u-spec-png
3e083191cd
[ie/Newgrounds:user] Fix extractor (#9046)
Closes #7308
Authored by: u-spec-png
2024-01-21 18:50:14 +00:00
dasidiot
9f1e9dab21
[ie/motherless] Support uploader playlists (#8994)
Authored by: dasidiot
2024-01-21 02:46:53 +00:00
Martin Renold
5a63454b36
[ie/mx3] Add extractors (#8736)
Authored by: martinxyz
2024-01-21 03:45:38 +01:00
lauren n. liberda
fcaa2e735b
[ie/Sejm,RedCDNLivx] Add extractors (#8676)
Authored by: selfisekai
2024-01-21 03:22:26 +01:00
24 changed files with 1501 additions and 70 deletions

1
.gitignore vendored
View File

@ -47,6 +47,7 @@ cookies
*.png
*.sbv
*.srt
*.ssa
*.swf
*.swp
*.tt

View File

@ -17,8 +17,8 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
clean-test:
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
*.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
*.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap

View File

@ -369,6 +369,7 @@ from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .closertotruth import CloserToTruthIE
from .cloudflarestream import CloudflareStreamIE
from .cloudycdn import CloudyCDNIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
@ -564,6 +565,7 @@ from .eroprofile import (
EroProfileIE,
EroProfileAlbumIE,
)
from .err import ERRJupiterIE
from .ertgr import (
ERTFlixCodenameIE,
ERTFlixIE,
@ -588,6 +590,7 @@ from .facebook import (
FacebookPluginsVideoIE,
FacebookRedirectURLIE,
FacebookReelIE,
FacebookAdsIE,
)
from .fancode import (
FancodeVodIE,
@ -1000,6 +1003,11 @@ from .lrt import (
LRTVODIE,
LRTStreamIE
)
from .lsm import (
LSMLREmbedIE,
LSMLTVEmbedIE,
LSMReplayIE
)
from .lumni import (
LumniIE
)
@ -1111,6 +1119,7 @@ from .motherless import (
MotherlessIE,
MotherlessGroupIE,
MotherlessGalleryIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
from .moviepilot import MoviepilotIE
@ -1137,6 +1146,11 @@ from .musicdex import (
MusicdexArtistIE,
MusicdexPlaylistIE,
)
from .mx3 import (
Mx3IE,
Mx3NeoIE,
Mx3VolksmusikIE,
)
from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
@ -1593,6 +1607,7 @@ from .redbulltv import (
RedBullIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
from .redgifs import (
RedGifsIE,
RedGifsSearchIE,
@ -1727,6 +1742,7 @@ from .scte import (
)
from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
from .sendtonews import SendtoNewsIE

View File

@ -3,6 +3,7 @@ import binascii
import json
import os
import random
import time
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
@ -17,6 +18,7 @@ from ..utils import (
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
strip_or_none,
str_or_none,
@ -185,7 +187,10 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
user = options['user']
if not user.get('hasAccess'):
self.raise_login_required()
start_date = traverse_obj(options, ('video', 'startDate', {str}))
if (parse_iso8601(start_date) or 0) > time.time():
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
self.raise_login_required('This video requires a subscription', method='password')
token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@ -267,6 +272,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
f['language'] = 'de'
formats.extend(m3u8_formats)
if not formats:
self.raise_login_required('This video requires a subscription', method='password')
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}

View File

@ -8,6 +8,7 @@ from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
jwt_decode_hs256,
make_archive_id,
parse_duration,
parse_iso8601,
@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
(?P<id>[a-zA-Z0-9]+)
/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
_TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
@ -359,12 +361,27 @@ class ARDBetaMediathekIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
query = {'embedded': 'false', 'mcV6': 'true'}
headers = {}
if self._get_cookies(self._TOKEN_URL).get('ams'):
token = self._download_json(
self._TOKEN_URL, display_id, 'Fetching token for age verification',
'Unable to fetch age verification token', fatal=False)
id_token = traverse_obj(token, ('idToken', {str}))
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
if not user_id:
self.report_warning('Unable to extract token, continuing without authentication')
else:
headers['x-authorization'] = f'Bearer {id_token}'
query['userId'] = user_id
if decoded_token.get('age_rating') != 18:
self.report_warning('Account is not verified as 18+; video may be unavailable')
page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
'embedded': 'false',
'mcV6': 'true',
})
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
display_id, query=query, headers=headers)
# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
@ -383,7 +400,7 @@ class ARDBetaMediathekIE(InfoExtractor):
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
if player_data.get('blockedByFsk'):
self.raise_no_formats('This video is only available after 22:00', expected=True)
self.raise_login_required('This video is only available for age verified users or after 22:00')
formats = []
subtitles = {}

View File

@ -0,0 +1,79 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class CloudyCDNIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
'md5': '64f72a360ca530d5ed89c77646c9eee5',
'info_dict': {
'id': '46k_d23-6000-105',
'ext': 'mp4',
'timestamp': 1700589151,
'duration': 1442,
'upload_date': '20231121',
'title': 'D23-6000-105_cetstud',
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
}
}, {
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
'md5': '798828a479151e2444d8dcfbec76e482',
'info_dict': {
'id': '26e_lv-8-5-1',
'ext': 'mp4',
'title': 'LV-8-5-1',
'timestamp': 1669767167,
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
'duration': 1205,
'upload_date': '20221130',
}
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
'info_dict': {
'id': 'cqd_lib-2',
'ext': 'mp4',
'upload_date': '20230223',
'duration': 629,
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
'timestamp': 1677181513,
'title': 'LIB-2',
}
}]
def _real_extract(self, url):
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
data = self._download_json(
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
video_id, data=urlencode_postdata({
'version': '6.4.0',
'referer': url,
}))
formats, subtitles = [], {}
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('name', {str}),
'duration': ('duration', {int_or_none}),
'timestamp': ('upload_date', {parse_iso8601}),
'thumbnail': ('source', 'poster', {url_or_none}),
}),
}

199
yt_dlp/extractor/err.py Normal file
View File

@ -0,0 +1,199 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class ERRJupiterIE(InfoExtractor):
_VALID_URL = r'https?://jupiter(?:pluss)?\.err\.ee/(?P<id>\d+)'
_TESTS = [{
'note': 'Jupiter: Movie: siin-me-oleme',
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
'md5': '9b45d1682a98853acaa1e1b0c791f425',
'info_dict': {
'id': '1211107',
'ext': 'mp4',
'title': 'Siin me oleme!',
'alt_title': '',
'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
'release_date': '20231226',
'upload_date': '20201217',
'modified_date': '20201217',
'release_timestamp': 1703577600,
'timestamp': 1608210000,
'modified_timestamp': 1608220800,
'release_year': 1978,
},
}, {
'note': 'Jupiter: Series: Impulss',
'url': 'https://jupiter.err.ee/1609145945/impulss',
'md5': 'a378486df07ed1ba74e46cc861886243',
'info_dict': {
'id': '1609145945',
'ext': 'mp4',
'title': 'Impulss',
'alt_title': 'Loteriipilet hooldekodusse',
'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
'release_date': '20231107',
'upload_date': '20231026',
'modified_date': '20231118',
'release_timestamp': 1699380000,
'timestamp': 1698327601,
'modified_timestamp': 1700311802,
'series': 'Impulss',
'season': 'Season 1',
'season_number': 1,
'episode': 'Loteriipilet hooldekodusse',
'episode_number': 6,
'series_id': '1609108187',
'release_year': 2023,
'episode_id': '1609145945',
},
}, {
'note': 'Jupiter: Radio Show: mnemoturniir episode',
'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
'info_dict': {
'id': '1037919',
'ext': 'm4a',
'title': 'Mnemoturniir',
'alt_title': '',
'description': 'md5:626db52394e7583c26ab74d6a34d9982',
'release_date': '20240121',
'upload_date': '20240108',
'modified_date': '20240121',
'release_timestamp': 1705827900,
'timestamp': 1704675602,
'modified_timestamp': 1705827601,
'series': 'Mnemoturniir',
'season': 'Season 0',
'season_number': 0,
'episode': 'Episode 0',
'episode_number': 0,
'series_id': '1037919',
'release_year': 2024,
'episode_id': '1609215101',
},
}, {
'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
'info_dict': {
'id': '1609180445',
'ext': 'mp4',
'title': 'Более зеленый Таллинн',
'alt_title': '',
'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
'release_date': '20231224',
'upload_date': '20231130',
'modified_date': '20231207',
'release_timestamp': 1703423400,
'timestamp': 1701338400,
'modified_timestamp': 1701967200,
'release_year': 2023,
},
}, {
'note': 'Jupiter+: Series: The Sniffer',
'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
'md5': '2abdeb7131ce551bce49e8d0cea08536',
'info_dict': {
'id': '1608311387',
'ext': 'mp4',
'title': 'Нюхач',
'alt_title': '',
'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
'release_date': '20230601',
'upload_date': '20210818',
'modified_date': '20210903',
'release_timestamp': 1685633400,
'timestamp': 1629318000,
'modified_timestamp': 1630686000,
'release_year': 2013,
'episode': 'Episode 1',
'episode_id': '1608311390',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'Нюхач',
'series_id': '1608311387',
},
}, {
'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
'md5': '8b46d7e4510b254a14b7a52211b5bf96',
'info_dict': {
'id': '1608990335',
'ext': 'm4a',
'title': 'Лесные истории | Аисты',
'alt_title': '',
'description': 'md5:065e721623e271e7a63e6540d409ca6b',
'release_date': '20230609',
'upload_date': '20230527',
'modified_date': '20230608',
'release_timestamp': 1686308700,
'timestamp': 1685145600,
'modified_timestamp': 1686252600,
'release_year': 2023,
'episode': 'Episode 0',
'episode_id': '1608990335',
'episode_number': 0,
'season': 'Season 0',
'season_number': 0,
'series': 'Лесные истории | Аисты',
'series_id': '1037497',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
query={'contentId': video_id})['data']['mainContent']
media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
self.report_drm(video_id)
formats, subtitles = [], {}
for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
formats.append({
'url': format_url,
'format_id': 'http',
})
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('heading', {str}),
'alt_title': ('subHeading', {str}),
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
'timestamp': ('created', {int_or_none}),
'modified_timestamp': ('updated', {int_or_none}),
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
'release_year': ('year', {int_or_none}),
}, get_all=False),
**(traverse_obj(data, {
'series': ('heading', {str}),
'series_id': ('rootContentId', {str_or_none}),
'episode': ('subHeading', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'episode_id': ('id', {str_or_none}),
}) if data.get('type') == 'episode' else {}),
}

View File

@ -20,6 +20,7 @@ from ..utils import (
get_element_by_id,
get_first,
int_or_none,
join_nonempty,
js_to_json,
merge_dicts,
parse_count,
@ -43,6 +44,7 @@ class FacebookIE(InfoExtractor):
(?:[^#]*?\#!/)?
(?:
(?:
permalink\.php|
video/video\.php|
photo\.php|
video\.php|
@ -248,6 +250,7 @@ class FacebookIE(InfoExtractor):
'duration': 148.435,
},
}, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
'info_dict': {
'id': '6968553779868435',
@ -262,6 +265,22 @@ class FacebookIE(InfoExtractor):
'thumbnail': r're:^https?://.*',
'timestamp': 1701975646,
},
}, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
'info_dict': {
'id': '270103405756416',
'ext': 'mp4',
'title': 'Lela Evans',
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*',
'uploader': 'Lela Evans',
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
'upload_date': '20231228',
'timestamp': 1703804085,
'duration': 394.347,
'view_count': int,
},
}, {
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
'only_matching': True,
@ -563,7 +582,11 @@ class FacebookIE(InfoExtractor):
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
for f in info['formats']:
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
@ -677,6 +700,9 @@ class FacebookIE(InfoExtractor):
# honor precise duration in video info
if video_info.get('duration'):
webpage_info['duration'] = video_info['duration']
# preserve preferred_thumbnail in video info
if video_info.get('thumbnail'):
webpage_info['thumbnail'] = video_info['thumbnail']
return merge_dicts(webpage_info, video_info)
if not video_data:
@ -907,3 +933,114 @@ class FacebookReelIE(InfoExtractor):
video_id = self._match_id(url)
return self.url_result(
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
class FacebookAdsIE(InfoExtractor):
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
IE_NAME = 'facebook:ads'
_TESTS = [{
'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
'info_dict': {
'id': '899206155126718',
'ext': 'mp4',
'title': 'video by Kandao',
'uploader': 'Kandao',
'uploader_id': '774114102743284',
'uploader_url': r're:^https?://.*',
'timestamp': 1702548330,
'thumbnail': r're:^https?://.*',
'upload_date': '20231214',
'like_count': int,
}
}, {
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
'info_dict': {
'id': '893637265423481',
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
'uploader': 'Eataly Paris Marais',
'uploader_id': '2086668958314152',
'uploader_url': r're:^https?://.*',
'timestamp': 1703571529,
'upload_date': '20231226',
'like_count': int,
},
'playlist_count': 3,
}, {
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
'only_matching': True,
}, {
'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
'only_matching': True,
}]
_FORMATS_MAP = {
'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
'video_sd_url': ('sd', None),
'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
'video_hd_url': ('hd', None),
}
def _extract_formats(self, video_dict):
formats = []
for format_key, format_url in traverse_obj(video_dict, (
{dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
)):
formats.append({
'format_id': self._FORMATS_MAP[format_key][0],
'format_note': self._FORMATS_MAP[format_key][1],
'url': format_url,
'ext': 'mp4',
'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
})
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
post_data = [self._parse_json(j, video_id, fatal=False)
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
data = traverse_obj(post_data, (
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
if not data:
raise ExtractorError('Unable to extract ad data')
title = data.get('title')
if not title or title == '{{product.name}}':
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
info_dict = traverse_obj(data, {
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}),
})
entries = []
for idx, entry in enumerate(traverse_obj(
data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
):
entries.append({
'id': f'{video_id}_{idx}',
'title': entry.get('title') or title,
'description': entry.get('link_description') or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry),
})
if len(entries) == 1:
info_dict.update(entries[0])
elif len(entries) > 1:
info_dict.update({
'title': entries[0]['title'],
'entries': entries,
'_type': 'playlist',
})
info_dict['id'] = video_id
return info_dict

View File

@ -11,6 +11,7 @@ from ..utils import (
join_nonempty,
parse_codecs,
parse_iso8601,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
'availability': 'subscriber_only',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
'info_dict': {
'id': '65B5PNoBtf',
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
'display_id': '65B5PNoBtf',
'like_count': int,
'release_timestamp': 1701249480,
'uploader': 'The Trash Network',
'availability': 'subscriber_only',
'uploader_id': '61bc20c9a131fb692bf2a513',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'comment_count': int,
'title': 'The $50 electronic drum kit.',
'channel_id': '64424fe73cd58cbcf8d8e131',
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
'dislike_count': int,
'channel': 'The Drum Thing',
'release_date': '20231129',
},
'playlist_count': 2,
'playlist': [{
'info_dict': {
'id': 'ISPJjexylS',
'ext': 'mp4',
'release_date': '20231129',
'release_timestamp': 1701249480,
'title': 'The $50 electronic drum kit. .mov',
'channel_id': '64424fe73cd58cbcf8d8e131',
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
'availability': 'subscriber_only',
'uploader': 'The Trash Network',
'duration': 622,
'channel': 'The Drum Thing',
'uploader_id': '61bc20c9a131fb692bf2a513',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
},
}, {
'info_dict': {
'id': 'qKfxu6fEpu',
'ext': 'aac',
'release_date': '20231129',
'release_timestamp': 1701249480,
'title': 'Roland TD-7 Demo.m4a',
'channel_id': '64424fe73cd58cbcf8d8e131',
'availability': 'subscriber_only',
'uploader': 'The Trash Network',
'duration': 114,
'channel': 'The Drum Thing',
'uploader_id': '61bc20c9a131fb692bf2a513',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
},
}],
'skip': 'requires subscription: "The Trash Network"',
'params': {'skip_download': 'm3u8'},
}]
def _real_initialize(self):
@ -124,6 +183,22 @@ class FloatplaneIE(InfoExtractor):
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
raise ExtractorError('Post does not contain a video or audio track', expected=True)
uploader_url = format_field(
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
common_info = {
'uploader_url': uploader_url,
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
'availability': self._availability(needs_subscription=True),
**traverse_obj(post_data, {
'uploader': ('creator', 'title', {str}),
'uploader_id': ('creator', 'id', {str}),
'channel': ('channel', 'title', {str}),
'channel_id': ('channel', 'id', {str}),
'release_timestamp': ('releaseDate', {parse_iso8601}),
}),
}
items = []
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
media_id = media['id']
@ -150,11 +225,11 @@ class FloatplaneIE(InfoExtractor):
formats = []
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
url = urljoin(stream['cdn'], format_path(traverse_obj(
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
formats.append({
**traverse_obj(quality, {
'format_id': 'name',
'format_note': 'label',
'format_id': ('name', {str}),
'format_note': ('label', {str}),
'width': ('width', {int}),
'height': ('height', {int}),
}),
@ -164,38 +239,28 @@ class FloatplaneIE(InfoExtractor):
})
items.append({
**common_info,
'id': media_id,
**traverse_obj(metadata, {
'title': 'title',
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('thumbnail', 'path'),
'thumbnail': ('thumbnail', 'path', {url_or_none}),
}),
'formats': formats,
})
uploader_url = format_field(
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
post_info = {
**common_info,
'id': post_id,
'display_id': post_id,
**traverse_obj(post_data, {
'title': 'title',
'title': ('title', {str}),
'description': ('text', {clean_html}),
'uploader': ('creator', 'title'),
'uploader_id': ('creator', 'id'),
'channel': ('channel', 'title'),
'channel_id': ('channel', 'id'),
'like_count': ('likes', {int_or_none}),
'dislike_count': ('dislikes', {int_or_none}),
'comment_count': ('comments', {int_or_none}),
'release_timestamp': ('releaseDate', {parse_iso8601}),
'thumbnail': ('thumbnail', 'path'),
'thumbnail': ('thumbnail', 'path', {url_or_none}),
}),
'uploader_url': uploader_url,
'channel_url': channel_url,
'availability': self._availability(needs_subscription=True),
}
if len(items) > 1:

View File

@ -66,7 +66,7 @@ class GofileIE(InfoExtractor):
query_params = {
'contentId': file_id,
'token': self._TOKEN,
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
}
password = self.get_param('videopassword')
if password:

282
yt_dlp/extractor/lsm.py Normal file
View File

@ -0,0 +1,282 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
js_to_json,
parse_iso8601,
parse_qs,
str_or_none,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class LSMLREmbedIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:
(?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
pieci
)\.lv/[^/?#]+/(?:
pleijeris|embed
)/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
_TESTS = [{
'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
'md5': '719b33875cd1429846eeeaeec6df2830',
'info_dict': {
'id': 'a342781',
'ext': 'mp3',
'duration': 1823,
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
}
}, {
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
'info_dict': {
'id': '1270',
},
'playlist_count': 3,
'playlist': [{
'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
'info_dict': {
'id': 'a297397',
'ext': 'mp3',
'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
'duration': 3300,
},
}],
}, {
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
'md5': '24810d4a961da2295d9860afdcaf4f5a',
'info_dict': {
'id': 'a230690',
'ext': 'mp3',
'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
'duration': 1788,
}
}, {
'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
'info_dict': {
'id': '166557',
},
'playlist_count': 2,
'playlist': [{
'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
'info_dict': {
'id': 'a303104',
'ext': 'mp3',
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
'duration': 3222,
},
}, {
'md5': '5d5e191e718b7644e5118b7b4e093a6d',
'info_dict': {
'id': 'v303104',
'ext': 'mp4',
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
'duration': 3222,
},
}],
}, {
'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
'only_matching': True,
}, {
'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
'only_matching': True,
}]
def _real_extract(self, url):
query = parse_qs(url)
video_id = traverse_obj(query, (
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
webpage = self._download_webpage(url, video_id)
player_data, media_data = self._search_regex(
r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
webpage, 'player json', group=('player', 'media'))
player_json = self._parse_json(
player_data, video_id, transform_source=js_to_json, fatal=False) or {}
media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
entries = []
for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
formats = []
for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
if determine_ext(source_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
else:
formats.append({'url': source_url})
id_ = item['id']
title = item.get('title')
if id_.startswith('v') and not title:
title = traverse_obj(
media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
{lambda x: x and f'{x} - Video Version'}), get_all=False)
entries.append({
'formats': formats,
'thumbnail': urljoin(url, player_json.get('poster')),
'id': id_,
'title': title,
'duration': traverse_obj(item, ('duration', {int_or_none})),
})
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, video_id)
class LSMLTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
'md5': '64f72a360ca530d5ed89c77646c9eee5',
'info_dict': {
'id': '46k_d23-6000-105',
'ext': 'mp4',
'timestamp': 1700589151,
'duration': 1442,
'upload_date': '20231121',
'title': 'D23-6000-105_cetstud',
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
}
}, {
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
'md5': 'a1711e190fe680fdb68fd8413b378e87',
'info_dict': {
'id': 'wUnFArIPDSY',
'ext': 'mp4',
'uploader': 'LTV_16plus',
'release_date': '20220514',
'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
'view_count': int,
'availability': 'public',
'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
'release_timestamp': 1652544074,
'title': 'EIROVĪZIJA SALĀTOS',
'live_status': 'was_live',
'uploader_id': '@LTV16plus',
'comment_count': int,
'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
'channel_follower_count': int,
'categories': ['Entertainment'],
'duration': 5269,
'upload_date': '20220514',
'age_limit': 0,
'channel': 'LTV_16plus',
'playable_in_embed': True,
'tags': [],
'uploader_url': 'https://www.youtube.com/@LTV16plus',
'like_count': int,
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
}
}]
def _real_extract(self, url):
video_id = urllib.parse.unquote(self._match_id(url))
webpage = self._download_webpage(url, video_id)
data = self._search_json(
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
embed_type = traverse_obj(data, ('source', 'name', {str}))
if embed_type == 'telia':
ie_key = 'CloudyCDN'
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
elif embed_type == 'youtube':
ie_key = 'Youtube'
embed_url = traverse_obj(data, ('source', 'id', {str}))
else:
raise ExtractorError(f'Unsupported embed type {embed_type!r}')
return self.url_result(
embed_url, ie_key, video_id, **traverse_obj(data, {
'title': ('parentInfo', 'title'),
'duration': ('parentInfo', 'duration', {int_or_none}),
'thumbnail': ('source', 'poster', {url_or_none}),
}))
class LSMReplayIE(InfoExtractor):
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'md5': '64f72a360ca530d5ed89c77646c9eee5',
'info_dict': {
'id': '46k_d23-6000-105',
'ext': 'mp4',
'timestamp': 1700586300,
'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
'duration': 1442,
'upload_date': '20231121',
'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
}
}, {
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
'md5': '719b33875cd1429846eeeaeec6df2830',
'info_dict': {
'id': 'a342781',
'ext': 'mp3',
'duration': 1823,
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
'upload_date': '20231102',
'timestamp': 1698921060,
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
}
}, {
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'only_matching': True,
}]
def _fix_nuxt_data(self, webpage):
return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_nuxt_data(
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
return {
'_type': 'url_transparent',
'id': video_id,
**traverse_obj(data, {
'url': ('playback', 'service', 'url', {url_or_none}),
'title': ('mediaItem', 'title'),
'description': ('mediaItem', ('lead', 'body')),
'duration': ('mediaItem', 'duration', {int_or_none}),
'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
}, get_all=False),
}

View File

@ -8,7 +8,7 @@ from ..utils import (
float_or_none,
int_or_none,
str_or_none,
traverse_obj
traverse_obj,
)
@ -16,7 +16,7 @@ class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
'md5': '03e4911fdcf7fce563090705c2e79267',
'info_dict': {
'id': 'jTBFnLKdLy15K',
'ext': 'mp4',
@ -33,8 +33,8 @@ class MedalTVIE(InfoExtractor):
'duration': 13,
}
}, {
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
'md5': '3d19d426fe0b2d91c26e412684e66a06',
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
'info_dict': {
'id': '2mA60jWAGQCBH',
'ext': 'mp4',
@ -52,7 +52,7 @@ class MedalTVIE(InfoExtractor):
'duration': 23,
}
}, {
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
'info_dict': {
'id': '2um24TWdty0NA',
@ -81,7 +81,7 @@ class MedalTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
hydration_data = self._search_json(
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,

View File

@ -177,6 +177,7 @@ class MotherlessIE(InfoExtractor):
class MotherlessPaginatedIE(InfoExtractor):
_EXTRA_QUERY = {}
_PAGE_SIZE = 60
def _correct_path(self, url, item_id):
@ -199,7 +200,7 @@ class MotherlessPaginatedIE(InfoExtractor):
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
yield from self._extract_entries(current_page, real_url)
return self.playlist_result(
@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
},
'playlist_mincount': 540,
}, {
@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 190,
'playlist_mincount': 171,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')
class MotherlessUploaderIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://motherless.com/u/Mrgo4hrs2023',
'info_dict': {
'id': 'Mrgo4hrs2023',
'title': "Mrgo4hrs2023's Uploads - Videos",
},
'playlist_mincount': 32,
}, {
'url': 'https://motherless.com/u/Happy_couple?t=v',
'info_dict': {
'id': 'Happy_couple',
'title': "Happy_couple's Uploads - Videos",
},
'playlist_mincount': 8,
}]
_EXTRA_QUERY = {'t': 'v'}
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')

171
yt_dlp/extractor/mx3.py Normal file
View File

@ -0,0 +1,171 @@
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
get_element_by_class,
int_or_none,
try_call,
url_or_none,
urlhandle_detect_ext,
)
from ..utils.traversal import traverse_obj
class Mx3BaseIE(InfoExtractor):
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
_FORMATS = [{
'url': 'player_asset',
'format_id': 'default',
'quality': 0,
}, {
'url': 'player_asset?quality=hd',
'format_id': 'hd',
'quality': 1,
}, {
'url': 'download',
'format_id': 'download',
'quality': 2,
}, {
'url': 'player_asset?quality=source',
'format_id': 'source',
'quality': 2,
}]
def _extract_formats(self, track_id):
formats = []
for fmt in self._FORMATS:
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
urlh = self._request_webpage(
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
note=f'Checking for format {fmt["format_id"]}')
if urlh and urlh.status == 200:
formats.append({
**fmt,
'url': format_url,
'ext': urlhandle_detect_ext(urlh),
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
return formats
def _real_extract(self, url):
track_id = self._match_id(url)
webpage = self._download_webpage(url, track_id)
more_info = get_element_by_class('single-more-info', webpage)
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
def get_info_field(name):
return self._html_search_regex(
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
more_info, name, default=None, flags=re.DOTALL)
return {
'id': track_id,
'formats': self._extract_formats(track_id),
'genre': self._html_search_regex(
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
'release_year': int_or_none(get_info_field('Year of creation')),
'description': get_info_field('Description'),
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
**traverse_obj(data, {
'title': ('title', {str}),
'artist': (('performer_name', 'artist'), {str}),
'album_artist': ('artist', {str}),
'composer': ('composer_name', {str}),
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
}, get_all=False),
}
class Mx3IE(Mx3BaseIE):
_DOMAIN = 'mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://mx3.ch/t/1Cru',
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
'info_dict': {
'id': '1Cru',
'ext': 'wav',
'artist': 'Godina',
'album_artist': 'Tortue Tortue',
'composer': 'Olivier Godinat',
'genre': 'Rock',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
'title': "S'envoler",
'release_year': 2021,
'tags': [],
}
}, {
'url': 'https://mx3.ch/t/1LIY',
'md5': '48293cb908342547827f963a5a2e9118',
'info_dict': {
'id': '1LIY',
'ext': 'mov',
'artist': 'Tania Kimfumu',
'album_artist': 'The Broots',
'composer': 'Emmanuel Diserens',
'genre': 'Electro',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
'title': 'The Broots-Larytta remix "Begging For Help"',
'release_year': 2023,
'tags': ['the broots', 'cassata records', 'larytta'],
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
}
}, {
'url': 'https://mx3.ch/t/1C6E',
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
'info_dict': {
'id': '1C6E',
'ext': 'wav',
'artist': 'Alien Bubblegum',
'album_artist': 'Alien Bubblegum',
'composer': 'Alien Bubblegum',
'genre': 'Punk',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
'title': 'Wide Awake',
'release_year': 2021,
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
}
}]
class Mx3NeoIE(Mx3BaseIE):
_DOMAIN = 'neo.mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://neo.mx3.ch/t/1hpd',
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
'info_dict': {
'id': '1hpd',
'ext': 'wav',
'artist': 'Baptiste Lopez',
'album_artist': 'Kammerorchester Basel',
'composer': 'Jannik Giger',
'genre': 'Composition, Orchestra',
'title': 'Troisième œil. Für Kammerorchester (2023)',
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
'release_year': 2023,
'tags': [],
}
}]
class Mx3VolksmusikIE(Mx3BaseIE):
_DOMAIN = 'volksmusik.mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://volksmusik.mx3.ch/t/Zx',
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
'info_dict': {
'id': 'Zx',
'ext': 'mp3',
'artist': 'Ländlerkapelle GrischArt',
'album_artist': 'Ländlerkapelle GrischArt',
'composer': 'Urs Glauser',
'genre': 'Instrumental, Graubünden',
'title': 'Chämilouf',
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
'release_year': 2012,
'tags': [],
}
}]

View File

@ -3,15 +3,15 @@ import re
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_id,
int_or_none,
parse_count,
parse_duration,
traverse_obj,
unified_timestamp,
OnDemandPagedList,
try_get,
)
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
def _fetch_page(self, channel_id, url, page):
page += 1
posts_info = self._download_json(
f'{url}/page/{page}', channel_id,
f'{url}?page={page}', channel_id,
note=f'Downloading page {page}', headers={
'Accept': 'application/json, text/javascript, */*; q = 0.01',
'X-Requested-With': 'XMLHttpRequest',
})
sequence = posts_info.get('sequence', [])
for year in sequence:
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
for post in posts:
path, media_id = self._search_regex(
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
post, 'url', group=(1, 2))
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
path, media_id = self._search_regex(
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
post, 'url', group=(1, 2))
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
def _real_extract(self, url):
channel_id = self._match_id(url)

View File

@ -275,7 +275,7 @@ class PatreonIE(PatreonBaseIE):
'ext': ext,
'url': post_file['url'],
}
elif name == 'video':
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
return {
**info,

View File

@ -1,10 +1,18 @@
import json
from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
traverse_obj,
update_url_query,
urlencode_postdata,
)
class PlaySuisseIE(InfoExtractor):
_NETRC_MACHINE = 'playsuisse'
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
_TESTS = [
{
@ -134,12 +142,47 @@ class PlaySuisseIE(InfoExtractor):
id
url
}'''
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
_LOGIN_PATH = 'B2C_1A__SignInV2'
_ID_TOKEN = None
def _perform_login(self, username, password):
login_page = self._download_webpage(
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
csrf_token = settings['csrf']
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
status = traverse_obj(self._download_json(
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
'request_type': 'RESPONSE',
'signInName': username,
'password': password
}), expected_status=400), ('status', {int_or_none}))
if status == 400:
raise ExtractorError('Invalid username or password', expected=True)
urlh = self._request_webpage(
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
None, 'Downloading ID token', query={
'rememberMe': 'false',
'csrf_token': csrf_token,
**query,
'diags': '',
})
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
if not self._ID_TOKEN:
raise ExtractorError('Login failed')
def _get_media_data(self, media_id):
# NOTE In the web app, the "locale" header is used to switch between languages,
# However this doesn't seem to take effect when passing the header here.
response = self._download_json(
'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
'https://www.playsuisse.ch/api/graphql',
media_id, data=json.dumps({
'operationName': 'AssetWatch',
'query': self._GRAPHQL_QUERY,
@ -150,6 +193,9 @@ class PlaySuisseIE(InfoExtractor):
return response['data']['assetV2']
def _real_extract(self, url):
if not self._ID_TOKEN:
self.raise_login_required(method='password')
media_id = self._match_id(url)
media_data = self._get_media_data(media_id)
info = self._extract_single(media_data)
@ -168,7 +214,8 @@ class PlaySuisseIE(InfoExtractor):
if not media.get('url') or media.get('type') != 'HLS':
continue
f, subs = self._extract_m3u8_formats_and_subtitles(
media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
formats.extend(f)
self._merge_subtitles(subs, target=subtitles)

View File

@ -18,7 +18,6 @@ from ..utils.traversal import traverse_obj
class Pr0grammIE(InfoExtractor):
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
_TESTS = [{
# Tags require account
'url': 'https://pr0gramm.com/new/video/5466437',
'info_dict': {
'id': '5466437',
@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
'_old_archive_ids': ['pr0grammstatic 5466437'],
},
}, {
# Tags require account
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
'info_dict': {
'id': '3052805',
@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
'_old_archive_ids': ['pr0grammstatic 5848332'],
},
}, {
'url': 'https://pr0gramm.com/top/5895149',
'info_dict': {
'id': '5895149',
'ext': 'mp4',
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
'tags': 'count:19',
'uploader': 'algoholigSeeManThrower',
'uploader_id': 457556,
'upload_timestamp': 1697580902,
'upload_date': '20231018',
'like_count': int,
'dislike_count': int,
'age_limit': 0,
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
'_old_archive_ids': ['pr0grammstatic 5895149'],
},
}, {
'url': 'https://pr0gramm.com/static/5466437',
'only_matching': True,
@ -92,15 +107,15 @@ class Pr0grammIE(InfoExtractor):
def _maximum_flags(self):
# We need to guess the flags for the content otherwise the api will raise an error
# We can guess the maximum allowed flags for the account from the cookies
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
flags = 0b0001
# Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
flags = 0b10001
if self._is_logged_in:
flags |= 0b1000
flags |= 0b01000
cookies = self._get_cookies(self.BASE_URL)
if 'me' not in cookies:
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
flags |= 0b0110
flags |= 0b00110
return flags
@ -134,14 +149,12 @@ class Pr0grammIE(InfoExtractor):
if not source or not source.endswith('mp4'):
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
tags = None
if self._is_logged_in:
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
# Sorted by "confidence", higher confidence = earlier in list
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
if confidences:
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
# Sorted by "confidence", higher confidence = earlier in list
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
if confidences:
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
formats = traverse_obj(video_info, ('variants', ..., {
'format_id': ('name', {str}),

135
yt_dlp/extractor/redge.py Normal file
View File

@ -0,0 +1,135 @@
import functools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_qs,
update_url_query,
)
from ..utils.traversal import traverse_obj
class RedCDNLivxIE(InfoExtractor):
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
IE_NAME = 'redcdnlivx'
_TESTS = [{
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
'info_dict': {
'id': 'ENC02-638272860000-638292544000',
'ext': 'mp4',
'title': 'ENC02',
'duration': 19683.982,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
'info_dict': {
'id': 'ENC18-722333096000-722335562000',
'ext': 'mp4',
'title': 'ENC18',
'duration': 2463.995,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
'info_dict': {
'id': 'triathlon2018-warsaw-550305000000-550327620000',
'ext': 'mp4',
'title': 'triathlon2018/warsaw',
'duration': 22619.98,
'live_status': 'was_live',
},
}, {
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
'only_matching': True,
}, {
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
'only_matching': True,
}]
"""
Known methods (first in url path):
- `livedash` - DASH MPD
- `livehls` - HTTP Live Streaming
- `livess` - IIS Smooth Streaming
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
- `sc` - shoutcast/icecast (audio streams, like radio)
"""
def _real_extract(self, url):
tenant, path = self._match_valid_url(url).group('tenant', 'id')
qs = parse_qs(url)
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
def livx_mode(mode):
suffix = ''
if mode == 'livess':
suffix = '/manifest'
elif mode == 'livehls':
suffix = '/playlist.m3u8'
file_qs = {}
if start_time:
file_qs['startTime'] = start_time
if stop_time:
file_qs['stopTime'] = stop_time
if mode == 'nvr':
file_qs['nolimit'] = 1
elif mode != 'sc':
file_qs['indexMode'] = 'true'
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
# no id or title for a transmission. making ones up.
title = path \
.replace('/live', '').replace('live/', '') \
.replace('/channel', '').replace('channel/', '') \
.strip('/')
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
formats = []
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
ism_res = self._download_xml_handle(
livx_mode('livess'), video_id,
note='Downloading ISM manifest',
errnote='Failed to download ISM manifest',
fatal=False)
ism_doc = None
if ism_res is not False:
ism_doc, ism_urlh = ism_res
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
nvr_urlh = self._request_webpage(
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
expected_status=lambda _: True)
if nvr_urlh and nvr_urlh.status == 200:
formats.append({
'url': nvr_urlh.url,
'ext': 'flv',
'format_id': 'direct-0',
'preference': -1, # might be slow
})
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
duration = traverse_obj(
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
live_status = None
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
live_status = 'is_live'
elif duration:
live_status = 'was_live'
return {
'id': video_id,
'title': title,
'formats': formats,
'duration': duration,
'live_status': live_status,
}

View File

@ -7,6 +7,7 @@ from ..utils import (
str_to_int,
unified_strdate,
url_or_none,
urljoin,
)
@ -79,7 +80,7 @@ class RedTubeIE(InfoExtractor):
'media definitions', default='{}'),
video_id, fatal=False)
for media in medias if isinstance(medias, list) else []:
format_url = url_or_none(media.get('videoUrl'))
format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
if not format_url:
continue
format_id = media.get('format')

View File

@ -18,10 +18,10 @@ from ..utils.traversal import traverse_obj
class Rule34VideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/',
'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
'md5': 'ffccac2c23799dabbd192621ae4d04f3',
'info_dict': {
'id': '3065157',

View File

@ -383,7 +383,7 @@ class RumbleChannelIE(InfoExtractor):
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
yield self.url_result('https://rumble.com' + video_url)
def _real_extract(self, url):

218
yt_dlp/extractor/sejmpl.py Normal file
View File

@ -0,0 +1,218 @@
import datetime
from .common import InfoExtractor
from .redge import RedCDNLivxIE
from ..utils import (
clean_html,
join_nonempty,
js_to_json,
strip_or_none,
update_url_query,
)
from ..utils.traversal import traverse_obj
def is_dst(date):
last_march = datetime.datetime(date.year, 3, 31)
last_october = datetime.datetime(date.year, 10, 31)
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
def rfc3339_to_atende(date):
date = datetime.datetime.fromisoformat(date)
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
return int((date.timestamp() - 978307200) * 1000)
class SejmIE(InfoExtractor):
_VALID_URL = (
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
)
IE_NAME = 'sejm'
_TESTS = [{
# multiple cameras, polish SL iterpreter
'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
'info_dict': {
'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
'title': '1. posiedzenie Sejmu X kadencji',
'duration': 20145,
'live_status': 'was_live',
'location': 'Sala Posiedzeń',
},
'playlist': [{
'info_dict': {
'id': 'ENC01-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC30-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC31-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC32-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
'live_status': 'was_live',
},
}, {
# sign lang interpreter
'info_dict': {
'id': 'Migacz-ENC01-1-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
'live_status': 'was_live',
},
}],
}, {
'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
'info_dict': {
'id': '9377A9D65518E9A5C125808E002E9FF2',
'title': 'Debata "Lepsza Polska: obywatelska"',
'description': 'KP .Nowoczesna',
'duration': 8770,
'live_status': 'was_live',
'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
},
'playlist': [{
'info_dict': {
'id': 'ENC08-1-503831270000-503840040000',
'ext': 'mp4',
'duration': 8770,
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
'live_status': 'was_live',
},
}],
}, {
# 7th term is very special, since it does not use redcdn livx
'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
'info_dict': {
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
'description': 'SLD - Biuro Prasowe Klubu',
'duration': 514,
'location': 'sala 101/bud. C',
'live_status': 'was_live',
},
'playlist': [{
'info_dict': {
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
'ext': 'mp4',
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
'duration': 514,
},
}],
}, {
'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
'only_matching': True,
}]
def _real_extract(self, url):
term, video_id = self._match_valid_url(url).group('term', 'id')
frame = self._download_webpage(
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
video_id)
# despite it says "transmisje_arch", it works for live streams too!
data = self._download_json(
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
video_id)
params = data['params']
title = strip_or_none(data.get('title'))
if data.get('status') == 'VIDEO_ENDED':
live_status = 'was_live'
elif data.get('status') == 'VIDEO_PLAYING':
live_status = 'is_live'
else:
live_status = None
self.report_warning(f'unknown status: {data.get("status")}')
start_time = rfc3339_to_atende(params['start'])
# current streams have a stop time of *expected* end of session, but actual times
# can change during the transmission. setting a stop_time would artificially
# end the stream at that time, while the session actually keeps going.
if live_status == 'was_live':
stop_time = rfc3339_to_atende(params['stop'])
duration = (stop_time - start_time) // 1000
else:
stop_time, duration = None, None
entries = []
def add_entry(file, legacy_file=False):
if not file:
return
file = self._proto_relative_url(file)
if not legacy_file:
file = update_url_query(file, {'startTime': start_time})
if stop_time is not None:
file = update_url_query(file, {'stopTime': stop_time})
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
common_info = {
'url': file,
'duration': duration,
}
if legacy_file:
entries.append({
**common_info,
'id': video_id,
'title': title,
})
else:
entries.append({
**common_info,
'_type': 'url_transparent',
'ie_key': RedCDNLivxIE.ie_key(),
'id': stream_id,
'title': join_nonempty(title, stream_id, delim=' - '),
})
cameras = self._search_json(
r'var\s+cameras\s*=', frame, 'camera list', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
fatal=False) or []
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
if camera_file.get('flv'):
add_entry(camera_file['flv'])
elif camera_file.get('mp4'):
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
add_entry(camera_file['mp4'], legacy_file=True)
else:
self.report_warning('Unknown camera stream type found')
if params.get('mig'):
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
return {
'_type': 'playlist',
'entries': entries,
'id': video_id,
'title': title,
'description': clean_html(data.get('desc')) or None,
'duration': duration,
'live_status': live_status,
'location': strip_or_none(data.get('location')),
}

View File

@ -10,6 +10,7 @@ from ..utils import (
parse_duration,
traverse_obj,
try_call,
url_or_none,
urljoin,
variadic,
)
@ -83,6 +84,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
}
}, {
'url': 'https://txxx.tube/videos/16574965/digital-desire-malena-morgan/',
@ -98,6 +100,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
}
}, {
'url': 'https://vxxx.com/video-68925/',
@ -113,6 +116,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.vxxx.com/contents/videos_sources/68000/68925/screenshots/1.jpg',
}
}, {
'url': 'https://hclips.com/videos/6291073/malena-morgan-masturbates-her-sweet/',
@ -128,6 +132,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/6291000/6291073/screenshots/1.jpg',
}
}, {
'url': 'https://hdzog.com/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
@ -143,6 +148,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
}
}, {
'url': 'https://hdzog.tube/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
@ -158,6 +164,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
}
}, {
'url': 'https://hotmovs.com/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
@ -173,6 +180,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
}
}, {
'url': 'https://hotmovs.tube/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
@ -188,6 +196,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
}
}, {
'url': 'https://inporn.com/video/517897/malena-morgan-solo/',
@ -203,6 +212,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://iptn.m3pd.com/media/tn/sources/517897_1.jpg',
}
}, {
'url': 'https://privatehomeclips.com/videos/3630599/malena-morgan-cam-show/',
@ -218,6 +228,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/3630000/3630599/screenshots/15.jpg',
}
}, {
'url': 'https://tubepornclassic.com/videos/1015455/mimi-rogers-full-body-massage-nude-compilation/',
@ -233,6 +244,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.tubepornclassic.com/contents/videos_sources/1015000/1015455/screenshots/6.jpg',
}
}, {
'url': 'https://upornia.com/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
@ -248,6 +260,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
}
}, {
'url': 'https://upornia.tube/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
@ -263,6 +276,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
}
}, {
'url': 'https://vjav.com/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
@ -278,6 +292,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
}
}, {
'url': 'https://vjav.tube/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
@ -293,6 +308,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
}
}, {
'url': 'https://voyeurhit.com/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
@ -308,6 +324,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
}
}, {
'url': 'https://voyeurhit.tube/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
@ -323,6 +340,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
}
}]
_WEBPAGE_TESTS = [{
@ -338,6 +356,7 @@ class TxxxIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/5119000/5119660/screenshots/1.jpg',
}
}]
@ -371,6 +390,7 @@ class TxxxIE(InfoExtractor):
'like_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'likes'))),
'dislike_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'dislikes'))),
'age_limit': 18,
'thumbnail': traverse_obj(video_info, ('video', 'thumbsrc', {url_or_none})),
'formats': get_formats(host, video_file),
}