Compare commits

..

13 Commits

Author SHA1 Message Date
bashonly
1ee7623470
[build] Add cole to maintainers in pyproject.toml
Authored by: bashonly
2024-02-16 17:46:27 -06:00
bashonly
ccc2d03e8c
Merge branch 'yt-dlp:master' into features/docs 2024-02-16 17:44:28 -06:00
diman8
ddd4b5e10a
[ie/SVTPage] Fix extractor (#8938)
Closes #8930
Authored by: diman8
2024-02-16 16:59:25 +00:00
nixxo
f788149237
[ie/rai] Filter unavailable formats (#9189)
Closes #9154
Authored by: nixxo
2024-02-16 00:20:58 +00:00
barsnick
017adb28e7
[ie/LinkedIn] Fix metadata and extract subtitles (#9056)
Closes #9003
Authored by: barsnick
2024-02-16 00:19:00 +00:00
ringus1
2e30b5567b
[ie/facebook] Improve extraction
Partially addresses #4311

Authored by: jingtra, ringus1

Co-authored-by: Jing Kjeldsen <jingtra@gmail.com>
2024-02-15 16:51:43 -06:00
bashonly
beaa1a4455
[build:Makefile] Ensure compatibility with BSD make (#9210)
Authored by: bashonly
2024-02-15 22:42:43 +00:00
Florian Meißner
fb44020fa9
[build:Makefile] Fix man pages generated by pandoc>=3 (#7047)
Closes #7046, Closes #8481
Authored by: t-nil
2024-02-14 21:12:34 +00:00
sepro
3dc9232e1a
[ie/MagellanTV] Support episodes (#9199)
Authored by: seproDev
2024-02-13 20:53:17 +01:00
sepro
9401736fd0
[ie/LeFigaroVideoEmbed] Fix extractor (#9198)
Authored by: seproDev
2024-02-13 20:52:41 +01:00
sepro
cd0443fb14
[ie/Funk] Fix extractor (#9194)
Authored by: seproDev
2024-02-13 04:12:17 +01:00
sepro
03536126d3
[ie/CrooksAndLiars] Fix extractor (#9192)
Authored by: seproDev
2024-02-13 04:11:40 +01:00
sepro
1ed5ee2f04
[ie/Ant1NewsGrEmbed] Fix extractor (#9191)
Authored by: seproDev
2024-02-13 04:11:17 +01:00
11 changed files with 158 additions and 84 deletions

View File

@ -38,11 +38,13 @@ MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python3 PYTHON ?= /usr/bin/env python3
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # $(shell) and $(error) are no-ops in BSD Make and the != variable assignment operator is not supported by GNU Make <4.0
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) VERSION_CHECK != echo supported
VERSION_CHECK ?= $(error GNU Make 4+ or BSD Make is required)
CHECK_VERSION := $(VERSION_CHECK)
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) MARKDOWN != if [ "`pandoc -v | head -n1 | cut -d' ' -f2 | head -c1`" -ge "2" ]; then echo markdown-smart; else echo markdown; fi
install: lazy-extractors yt-dlp yt-dlp.1 completions install: lazy-extractors yt-dlp yt-dlp.1 completions
mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(BINDIR)
@ -73,17 +75,17 @@ test:
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
CODE_FOLDERS := $(shell find yt_dlp -type d -not -name '__*' -exec sh -c 'test -e "$$1"/__init__.py' sh {} \; -print) CODE_FOLDERS != find yt_dlp -type f -name '__init__.py' -exec dirname {} \+ | grep -v '/__' | sort
CODE_FILES := $(shell for f in $(CODE_FOLDERS); do echo "$$f" | awk '{gsub(/\/[^\/]+/,"/*"); print $$1"/*.py"}'; done | sort -u) CODE_FILES != for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
yt-dlp: $(CODE_FILES) yt-dlp: $(CODE_FILES)
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\ mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\ cp -pPR $$d/*.py zip/$$d/ ;\
done done
cd zip ; touch -t 200001010101 $(CODE_FILES) (cd zip && touch -t 200001010101 $(CODE_FILES))
mv zip/yt_dlp/__main__.py zip/ mv zip/yt_dlp/__main__.py zip/
cd zip ; zip -q ../yt-dlp $(CODE_FILES) __main__.py (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
rm -rf zip rm -rf zip
echo '#!$(PYTHON)' > yt-dlp echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp cat yt-dlp.zip >> yt-dlp
@ -127,7 +129,7 @@ completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py $(PYTHON) devscripts/fish-completion.py
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') _EXTRACTOR_FILES != find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
@ -141,6 +143,7 @@ yt-dlp.tar.gz: all
--exclude '__pycache__' \ --exclude '__pycache__' \
--exclude '.pytest_cache' \ --exclude '.pytest_cache' \
--exclude '.git' \ --exclude '.git' \
--exclude '__pyinstaller' \
-- \ -- \
README.md supportedsites.md Changelog.md LICENSE \ README.md supportedsites.md Changelog.md LICENSE \
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \

View File

@ -8,6 +8,7 @@ maintainers = [
{name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"}, {name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
{name = "Grub4K", email = "contact@grub4k.xyz"}, {name = "Grub4K", email = "contact@grub4k.xyz"},
{name = "bashonly", email = "bashonly@protonmail.com"}, {name = "bashonly", email = "bashonly@protonmail.com"},
{name = "coletdjnz", email = "coletdjnz@protonmail.com"},
] ]
description = "A youtube-dl fork with additional features and patches" description = "A youtube-dl fork with additional features and patches"
readme = "README.md" readme = "README.md"

View File

@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
'md5': '294f18331bb516539d72d85a82887dcc', 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
'info_dict': { 'info_dict': {
'id': '_xvg/m_cmbatw=', 'id': '_xvg/m_cmbatw=',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
'timestamp': 1603092840, 'timestamp': 1666166520,
'upload_date': '20201019', 'upload_date': '20221019',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
}, },
}, { }, {
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer' _API_PATH = '/templates/data/jsonPlayer'
_TESTS = [{ _TESTS = [{
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',

View File

@ -33,10 +33,7 @@ class CrooksAndLiarsIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
manifest = self._parse_json( manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
self._search_regex(
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
video_id)
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))

View File

@ -500,6 +500,7 @@ class FacebookIE(InfoExtractor):
webpage, 'description', default=None) webpage, 'description', default=None)
uploader_data = ( uploader_data = (
get_first(media, ('owner', {dict})) get_first(media, ('owner', {dict}))
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {}) or get_first(post, ('event', 'event_creator', {dict})) or {})
@ -583,8 +584,8 @@ class FacebookIE(InfoExtractor):
def extract_relay_prefetched_data(_filter): def extract_relay_prefetched_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')), 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ..., lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
'__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([

View File

@ -1,25 +1,29 @@
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxIE from .nexx import NexxIE
from ..utils import (
int_or_none,
str_or_none,
)
class FunkIE(InfoExtractor): class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'md5': '8610449476156f338761a75391b0017d',
'info_dict': { 'info_dict': {
'id': '1155821', 'id': '1155821',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2', 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
'description': 'md5:a691d0413ef4835588c5b03ded670c1f', 'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
'timestamp': 1514507395, 'timestamp': 1514507395,
'upload_date': '20171229', 'upload_date': '20171229',
'duration': 426.0,
'cast': ['United Creators PMB GmbH'],
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
'season_number': 0,
'season': 'Season 0',
'episode_number': 0,
'episode': 'Episode 0',
}, },
}, { }, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True, 'only_matching': True,
@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id, nexx_id = self._match_valid_url(url).groups() display_id, nexx_id = self._match_valid_url(url).groups()
video = self._download_json(
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'nexx:741:' + nexx_id, 'url': f'nexx:741:{nexx_id}',
'ie_key': NexxIE.ie_key(), 'ie_key': NexxIE.ie_key(),
'id': nexx_id, 'id': nexx_id,
'title': video.get('title'),
'description': video.get('description'),
'duration': int_or_none(video.get('duration')),
'channel_id': str_or_none(video.get('channelId')),
'display_id': display_id, 'display_id': display_id,
'tags': video.get('tags'),
'thumbnail': video.get('imageUrlLandscape'),
} }

View File

@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
'md5': 'e94de44cd80818084352fcf8de1ce82c', 'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
'info_dict': { 'info_dict': {
'id': 'g9j7Eovo', 'id': 'g9j7Eovo',
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées', 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
'md5': '0b3f10332b812034b3a3eda1ef877c5f', 'md5': '319c662943dd777bab835cae1e2d73a5',
'info_dict': { 'info_dict': {
'id': 'LeAgybyc', 'id': 'LeAgybyc',
'title': 'Intelligence artificielle : faut-il sen méfier ?', 'title': 'Intelligence artificielle : faut-il sen méfier ?',
@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/', 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
'md5': '3972ddf2d5f8b98699f191687258e2f9', 'md5': '6289f9489efb969e38245f31721596fe',
'info_dict': { 'info_dict': {
'id': 'QChnbPYA', 'id': 'QChnbPYA',
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International', 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/', 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
'md5': '3ac0a0769546ee6be41ab52caea5d9a9', 'md5': 'f6df814cae53e85937621599d2967520',
'info_dict': { 'info_dict': {
'id': 'QJzqoNbf', 'id': 'QJzqoNbf',
'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live', 'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live',
@ -73,7 +73,8 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData'] player_data = self._search_nextjs_data(
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
return self.url_result( return self.url_result(
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'), f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),

View File

@ -3,16 +3,15 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes,
ExtractorError, ExtractorError,
extract_attributes,
float_or_none, float_or_none,
get_element_by_class,
int_or_none, int_or_none,
srt_subtitles_timecode, srt_subtitles_timecode,
strip_or_none,
mimetype2ext, mimetype2ext,
traverse_obj,
try_get, try_get,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -83,15 +82,29 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
'id': '6850898786781339649', 'id': '6850898786781339649',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
'description': 'md5:be125430bab1c574f16aeb186a4d5b19', 'description': 'md5:2998a31f6f479376dd62831f53a80f71',
'creator': 'Mishal K.' 'uploader': 'Mishal K.',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int
},
}, {
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
'info_dict': {
'id': '7151241570371948544',
'ext': 'mp4',
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
'uploader': 'MathWorks',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int,
'subtitles': 'mincount:1'
}, },
}] }]
@ -99,26 +112,30 @@ class LinkedInIE(LinkedInBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage) video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) sources = self._parse_json(video_attrs['data-sources'], video_id)
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
formats = [{ formats = [{
'url': source['src'], 'url': source['src'],
'ext': mimetype2ext(source.get('type')), 'ext': mimetype2ext(source.get('type')),
'tbr': float_or_none(source.get('data-bitrate'), scale=1000), 'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
} for source in sources] } for source in sources]
subtitles = {'en': [{
'url': video_attrs['data-captions-url'],
'ext': 'vtt',
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'like_count': like_count, 'like_count': int_or_none(self._search_regex(
'creator': creator, r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
'uploader': traverse_obj(
self._yield_json_ld(webpage, video_id),
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': description, 'description': self._og_search_description(webpage, default=None),
'subtitles': subtitles,
} }

View File

@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'], 'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.magellantv.com/watch/celebration-nation',
'info_dict': {
'id': 'celebration-nation',
'ext': 'mp4',
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
'duration': 2640.0,
'title': 'Ancestors',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail'] data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', 'reactContext',
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
return { return {

View File

@ -1,6 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
@ -91,7 +92,7 @@ class RaiBaseIE(InfoExtractor):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
if not audio_only and not is_live: if not audio_only and not is_live:
formats.extend(self._create_http_urls(media_url, relinker_url, formats)) formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
return filter_dict({ return filter_dict({
'is_live': is_live, 'is_live': is_live,
@ -99,7 +100,7 @@ class RaiBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
}) })
def _create_http_urls(self, manifest_url, relinker_url, fmts): def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8' _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
_QUALITY = { _QUALITY = {
@ -166,6 +167,14 @@ class RaiBaseIE(InfoExtractor):
'fps': 25, 'fps': 25,
} }
# Check if MP4 download is available
try:
self._request_webpage(
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
except ExtractorError as e:
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
return []
# filter out single-stream formats # filter out single-stream formats
fmts = [f for f in fmts fmts = [f for f in fmts
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none'] if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']

View File

@ -7,8 +7,6 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
str_or_none,
strip_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -388,15 +386,55 @@ class SVTSeriesIE(SVTPlayBaseIE):
dict_get(series, ('longDescription', 'shortDescription'))) dict_get(series, ('longDescription', 'shortDescription')))
class SVTPageIE(InfoExtractor): class SVTPageIE(SVTBaseIE):
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))' _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
'info_dict': {
'title': 'Viktor, 18, förlorade armar och ben i sepsis vill återuppta karaten och bli svetsare',
'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
},
'playlist_count': 2,
}, {
'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
'info_dict': {
'id': 'jXvk42E',
'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'ext': 'mp4',
"duration": 80,
'age_limit': 0,
'timestamp': 1704370009,
'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'series': 'Lokala Nyheter Skåne',
'upload_date': '20240104'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
'info_dict': {
'title': '2023 tungt år för svensk media',
'id': 'ewqAZv4',
'ext': 'mp4',
"duration": 3074,
'age_limit': 0,
'series': '',
'timestamp': 1702980479,
'upload_date': '20231219',
'episode': 'Mediestudier'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa', 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
'info_dict': { 'info_dict': {
'id': '25298267', 'id': '25298267',
'title': 'Bakom masken Lehners kamp mot mental ohälsa', 'title': 'Bakom masken Lehners kamp mot mental ohälsa',
}, },
'playlist_count': 4, 'playlist_count': 4,
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien', 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
'info_dict': { 'info_dict': {
@ -404,6 +442,7 @@ class SVTPageIE(InfoExtractor):
'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien', 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
}, },
'playlist_count': 2, 'playlist_count': 2,
'skip': 'Video is gone'
}, { }, {
# only programTitle # only programTitle
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
@ -414,6 +453,7 @@ class SVTPageIE(InfoExtractor):
'duration': 27, 'duration': 27,
'age_limit': 0, 'age_limit': 0,
}, },
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1', 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
'only_matching': True, 'only_matching': True,
@ -427,26 +467,23 @@ class SVTPageIE(InfoExtractor):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
article = self._download_json( webpage = self._download_webpage(url, display_id)
'https://api.svt.se/nss-api/page/' + path, display_id, title = self._og_search_title(webpage)
query={'q': 'articles'})['articles']['content'][0]
entries = [] urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
def _process_content(content): data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
video_id = compat_str(content['image']['svtId'])
entries.append(self.url_result(
'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
for media in article.get('media', []): def entries():
_process_content(media) for video_id in set(traverse_obj(data, (
'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
))):
info = self._extract_video(
self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
info['title'] = title
yield info
for obj in article.get('structuredBody', []): return self.playlist_result(entries(), display_id, title)
_process_content(obj.get('content') or {})
return self.playlist_result(
entries, str_or_none(article.get('id')),
strip_or_none(article.get('title')))