Merge 7ab6662997 into da252d9d32

[cleanup] Misc (#11554 )
Closes #6884 Authored by: bashonly, Grub4K, seproDev Co-authored-by: Simon Sawicki <contact@grub4k.xyz> Co-authored-by: sepro <sepro@sepr0.com>
2024-11-24 16:21:24 +01:00 · 2024-11-18 02:47:52 +02:00 · 2024-11-17 23:25:05 +00:00 · 2024-11-17 21:37:15 +00:00 · 2024-11-17 21:16:22 +00:00 · 2024-11-17 21:06:50 +00:00
14 changed files with 533 additions and 256 deletions
--- a/README.md
+++ b/README.md
@ -342,8 +342,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
                                    extractor plugins; postprocessor plugins can
                                    only be loaded from the default plugin
                                    directories
-    --flat-playlist                 Do not extract the videos of a playlist,
-                                    only list them
+    --flat-playlist                 Do not extract a playlist's URL result
+                                    entries; some entry metadata may be missing
+                                    and downloading may be bypassed
    --no-flat-playlist              Fully extract the videos of a playlist
                                    (default)
    --live-from-start               Download livestreams from the start.
@ -1869,6 +1870,9 @@ The following extractors use this feature:
 #### digitalconcerthall
 * `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats

+#### sonylivseries
+* `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
+
 **Note**: These options may be changed/removed in the future without concern for backward compatibility

 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -234,5 +234,10 @@
        "when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
        "short": "[ie/vimeo] Fix API retries (#11351)",
        "authors": ["bashonly"]
+    },
+    {
+        "action": "add",
+        "when": "52c0ffe40ad6e8404d93296f575007b05b04c686",
+        "short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)"
    }
 ]
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import FakeYDL, is_download_test, md5
 from yt_dlp.extractor import (
    NPOIE,
-    NRKTVIE,
+    NRKIE,
    PBSIE,
    CeskaTelevizeIE,
    ComedyCentralIE,
@ -299,15 +299,16 @@ class TestMTVSubtitles(BaseTestSubtitles):

@is_download_test
 class TestNRKSubtitles(BaseTestSubtitles):
-    url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
-    IE = NRKTVIE
+    url = 'nrk:DMPV73000411'  # http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1
+    IE = NRKIE

    def test_allsubtitles(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
+        self.assertEqual(set(subtitles.keys()), {'nb-ttv', 'no'})
        self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
+        self.assertEqual(md5(subtitles['no']), 'fc01036074116d245ddc6ba6f679263b')


@is_download_test
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1402,7 +1402,6 @@ from .nrk import (
    NRKSkoleIE,
    NRKTVDirekteIE,
    NRKTVEpisodeIE,
-    NRKTVEpisodesIE,
    NRKTVSeasonIE,
    NRKTVSeriesIE,
 )
--- a/yt_dlp/extractor/bandlab.py
+++ b/yt_dlp/extractor/bandlab.py
@ -1,4 +1,3 @@
-
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -3767,7 +3767,7 @@ class InfoExtractor:
        """ Merge subtitle dictionaries, language by language. """
        if target is None:
            target = {}
-        for d in dicts:
+        for d in filter(None, dicts):
            for lang, subs in d.items():
                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
        return target
--- a/yt_dlp/extractor/ctvnews.py
+++ b/yt_dlp/extractor/ctvnews.py
@ -1,14 +1,27 @@
+import json
 import re
+import urllib.parse

 from .common import InfoExtractor
-from ..utils import orderedSet
+from .ninecninemedia import NineCNineMediaIE
+from ..utils import extract_attributes, orderedSet
+from ..utils.traversal import find_element, traverse_obj


 class CTVNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
+    _BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
+    _VIDEO_ID_RE = r'(?P<id>\d{5,})'
+    _PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
+    _VALID_URL = [
+        rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
+        rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
+        rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
+        rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
+        rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
+    ]
    _TESTS = [{
        'url': 'http://www.ctvnews.ca/video?clipId=901995',
-        'md5': '9b8624ba66351a23e0b6e1391971f9af',
+        'md5': 'b608f466c7fa24b9666c6439d766ab7e',
        'info_dict': {
            'id': '901995',
            'ext': 'flv',
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
            'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
            'timestamp': 1467286284,
            'upload_date': '20160630',
+            'categories': [],
+            'season_number': 0,
+            'season': 'Season 0',
+            'tags': [],
+            'series': 'CTV News National | Archive | Stories 2',
+            'season_id': '57981',
+            'thumbnail': r're:https?://.*\.jpg$',
+            'duration': 764.631,
+        },
+    }, {
+        'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
+        'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
+        'info_dict': {
+            'id': '3030933',
+            'ext': 'flv',
+            'title': 'Here’s what’s making news for Nov. 15',
+            'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
+            'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
+            'season_id': '58104',
+            'season_number': 0,
+            'tags': [],
+            'season': 'Season 0',
+            'categories': [],
+            'series': 'CTV News Barrie',
+            'upload_date': '20241116',
+            'duration': 42.943,
+            'timestamp': 1731722452,
        },
    }, {
        'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
            'id': '1.2876780',
        },
        'playlist_mincount': 100,
+    }, {
+        'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
+        'info_dict':
+        {
+            'id': '1.5736957',
+        },
+        'playlist_mincount': 6,
+    }, {
+        'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
+        'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
+        'info_dict': {
+            'id': '2695026',
+            'ext': 'flv',
+            'season_id': '89852',
+            'series': 'From CTV News Channel',
+            'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
+            'season': '2023',
+            'title': 'Bank of Canada asks public about digital currency',
+            'categories': [],
+            'tags': [],
+            'upload_date': '20230526',
+            'season_number': 2023,
+            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
+            'timestamp': 1685105157,
+            'duration': 253.553,
+        },
+    }, {
+        'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
+        'md5': '135cc592df607d29dddc931f1b756ae2',
+        'info_dict': {
+            'id': '582589',
+            'ext': 'flv',
+            'categories': [],
+            'timestamp': 1427906183,
+            'season_number': 0,
+            'duration': 125.559,
+            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
+            'series': 'CTV News Stox',
+            'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
+            'title': 'Berlin Wall',
+            'season_id': '63817',
+            'season': 'Season 0',
+            'tags': [],
+            'upload_date': '20150401',
+        },
+    }, {
+        'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
+        'md5': 'a14c0603557decc6531260791c23cc5e',
+        'info_dict': {
+            'id': '3023759',
+            'ext': 'flv',
+            'season_number': 2024,
+            'timestamp': 1731798000,
+            'season': '2024',
+            'episode': 'Episode 125',
+            'description': 'CTV News Ottawa at Six',
+            'duration': 2712.076,
+            'episode_number': 125,
+            'upload_date': '20241116',
+            'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
+            'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
+            'categories': [],
+            'tags': [],
+            'series': 'CTV News Ottawa at Six',
+            'season_id': '92667',
+        },
    }, {
        'url': 'http://www.ctvnews.ca/1.810401',
        'only_matching': True,
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
        'only_matching': True,
    }]

+    def _ninecninemedia_url_result(self, clip_id):
+        return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
+
    def _real_extract(self, url):
        page_id = self._match_id(url)

-        def ninecninemedia_url_result(clip_id):
-            return {
-                '_type': 'url_transparent',
-                'id': clip_id,
-                'url': f'9c9media:ctvnews_web:{clip_id}',
-                'ie_key': 'NineCNineMedia',
-            }
+        if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
+            page_id = mobj.group('id')

-        if page_id.isdigit():
-            return ninecninemedia_url_result(page_id)
-        else:
-            webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={
-                'ot': 'example.AjaxPageLayout.ot',
-                'maxItemsPerPage': 1000000,
-            })
-            entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
-                re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
-            if not entries:
-                webpage = self._download_webpage(url, page_id)
-                if 'getAuthStates("' in webpage:
-                    entries = [ninecninemedia_url_result(clip_id) for clip_id in
-                               self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
-            return self.playlist_result(entries, page_id)
+        if re.fullmatch(self._VIDEO_ID_RE, page_id):
+            return self._ninecninemedia_url_result(page_id)
+
+        webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
+            'ot': 'example.AjaxPageLayout.ot',
+            'maxItemsPerPage': 1000000,
+        })
+        entries = [self._ninecninemedia_url_result(clip_id)
+                   for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
+        if not entries:
+            webpage = self._download_webpage(url, page_id)
+            if 'getAuthStates("' in webpage:
+                entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
+                           self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
+            else:
+                entries = [
+                    self._ninecninemedia_url_result(clip_id) for clip_id in
+                    traverse_obj(webpage, (
+                        {find_element(tag='jasper-player-container', html=True)},
+                        {extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
+                ]
+
+        return self.playlist_result(entries, page_id)
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -569,7 +569,7 @@ class FacebookIE(InfoExtractor):
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
-                    mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
+                    mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))

        def process_formats(info):
            # Downloads with browser's User-Agent are rate limited. Working around
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@ -1,30 +1,32 @@
 import json
+import uuid

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
+    join_nonempty,
    smuggle_url,
    traverse_obj,
    try_call,
    unsmuggle_url,
+    urljoin,
 )


 class LiTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
-
-    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
-
+    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
+    _URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
+    _GEO_COUNTRIES = ['TW']
    _TESTS = [{
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'info_dict': {
            'id': 'VOD00041606',
            'title': '花千骨',
        },
        'playlist_count': 51,  # 50 episodes + 1 trailer
    }, {
-        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'url': 'https://www.litv.tv/drama/watch/VOD00041610',
        'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
        'info_dict': {
            'id': 'VOD00041610',
@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
            'title': '花千骨第1集',
            'thumbnail': r're:https?://.*\.jpg$',
            'description': '《花千骨》陸劇線上看。十六年前，平靜的村莊內，一名女嬰隨異相出生，途徑此地的蜀山掌門清虛道長算出此女命運非同一般，她體內散發的異香易招惹妖魔。一念慈悲下，他在村莊周邊設下結界阻擋妖魔入侵，讓其年滿十六後去蜀山，並賜名花千骨。',
-            'categories': ['奇幻', '愛情', '中國', '仙俠'],
+            'categories': ['奇幻', '愛情', '仙俠', '古裝'],
            'episode': 'Episode 1',
            'episode_number': 1,
        },
        'params': {
            'noplaylist': True,
        },
-        'skip': 'Georestricted to Taiwan',
    }, {
-        'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+        'url': 'https://www.litv.tv/drama/watch/VOD00044841',
        'md5': '88322ea132f848d6e3e18b32a832b918',
        'info_dict': {
            'id': 'VOD00044841',
@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
    def _extract_playlist(self, playlist_data, content_type):
        all_episodes = [
            self.url_result(smuggle_url(
-                self._URL_TEMPLATE % (content_type, episode['contentId']),
+                self._URL_TEMPLATE % (content_type, episode['content_id']),
                {'force_noplaylist': True}))  # To prevent infinite recursion
-            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
+            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]

-        return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
+        return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))

    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
-
        video_id = self._match_id(url)
-
        webpage = self._download_webpage(url, video_id)
+        vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']

-        if self._search_regex(
-                r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
-                webpage, 'meta refresh redirect', default=False, group=0):
-            raise ExtractorError('No such content found', expected=True)
+        program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
+        playlist_data = traverse_obj(vod_data, ('seriesTree'))
+        if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
+            return self._extract_playlist(playlist_data, program_info.get('content_type'))

-        program_info = self._parse_json(self._search_regex(
-            r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
-            video_id)
+        asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
+        if asset_id:  # This is a VOD
+            media_type = 'vod'
+        else:  # This is a live stream
+            asset_id = program_info['content_id']
+            media_type = program_info['content_type']
+        puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
+        if puid:
+            endpoint = 'get-urls'
+        else:
+            puid = str(uuid.uuid4())
+            endpoint = 'get-urls-no-auth'
+        video_data = self._download_json(
+            f'https://www.litv.tv/api/{endpoint}', video_id,
+            data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
+            headers={'Content-Type': 'application/json'})

-        # In browsers `getProgramInfo` request is always issued. Usually this
-        # endpoint gives the same result as the data embedded in the webpage.
-        # If, for some reason, there are no embedded data, we do an extra request.
-        if 'assetId' not in program_info:
-            program_info = self._download_json(
-                'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
-                query={'contentId': video_id},
-                headers={'Accept': 'application/json'})
-
-        series_id = program_info['seriesId']
-        if self._yes_playlist(series_id, video_id, smuggled_data):
-            playlist_data = self._download_json(
-                'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
-                query={'seriesId': series_id}, headers={'Accept': 'application/json'})
-            return self._extract_playlist(playlist_data, program_info['contentType'])
-
-        video_data = self._parse_json(self._search_regex(
-            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
-            webpage, 'video data', default='{}'), video_id)
-        if not video_data:
-            payload = {'assetId': program_info['assetId']}
-            puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
-            if puid:
-                payload.update({
-                    'type': 'auth',
-                    'puid': puid,
-                })
-                endpoint = 'getUrl'
-            else:
-                payload.update({
-                    'watchDevices': program_info['watchDevices'],
-                    'contentType': program_info['contentType'],
-                })
-                endpoint = 'getMainUrlNoAuth'
-            video_data = self._download_json(
-                f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
-                data=json.dumps(payload).encode(),
-                headers={'Content-Type': 'application/json'})
-
-        if not video_data.get('fullpath'):
-            error_msg = video_data.get('errorMessage')
-            if error_msg == 'vod.error.outsideregionerror':
+        if error := traverse_obj(video_data, ('error', {dict})):
+            error_msg = traverse_obj(error, ('message', {str}))
+            if error_msg and 'OutsideRegionError' in error_msg:
                self.raise_geo_restricted('This video is available in Taiwan only')
-            if error_msg:
+            elif error_msg:
                raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
-            raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
+            raise ExtractorError(f'Unexpected error from {self.IE_NAME}')

        formats = self._extract_m3u8_formats(
-            video_data['fullpath'], video_id, ext='mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
+            video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
        for a_format in formats:
            # LiTV HLS segments doesn't like compressions
            a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'

-        title = program_info['title'] + program_info.get('secondaryMark', '')
-        description = program_info.get('description')
-        thumbnail = program_info.get('imageFile')
-        categories = [item['name'] for item in program_info.get('category', [])]
-        episode = int_or_none(program_info.get('episode'))
-
        return {
            'id': video_id,
            'formats': formats,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'categories': categories,
-            'episode_number': episode,
+            'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
+            **traverse_obj(program_info, {
+                'description': ('description', {str}),
+                'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
+                'categories': ('genres', ..., 'name', {str}),
+                'episode_number': ('episode', {int_or_none}),
+            }),
        }
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@ -1,4 +1,5 @@
 import itertools
+import json
 import random
 import re

@ -7,11 +8,12 @@ from ..networking.exceptions import HTTPError
 from ..utils import (
    ExtractorError,
    determine_ext,
+    float_or_none,
    int_or_none,
    parse_duration,
    parse_iso8601,
    str_or_none,
-    try_get,
+    traverse_obj,
    url_or_none,
    urljoin,
 )
@ -25,18 +27,23 @@ class NRKBaseIE(InfoExtractor):
            nrk-od-no\.telenorcdn\.net|
            minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
        )/'''
+    _NETRC_MACHINE = 'nrk'
+    _LOGIN_URL = 'https://innlogging.nrk.no/logginn'
+    _AUTH_TOKEN = ''
+    _API_CALL_HEADERS = {'Accept': 'application/json;device=player-core'}
+
+    def _extract_nrk_formats_and_subtitles(self, asset_url, video_id):

-    def _extract_nrk_formats(self, asset_url, video_id):
        if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
            return self._extract_akamai_formats(asset_url, video_id)
-        asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
-        formats = self._extract_m3u8_formats(
+        asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only|adap=.+?\b)&?', '', asset_url)
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
            asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
        if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
-            formats = self._extract_m3u8_formats(
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(
                re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
                video_id, 'mp4', 'm3u8_native', fatal=False)
-        return formats
+        return formats, subtitles

    def _raise_error(self, data):
        MESSAGES = {
@ -47,7 +54,7 @@ class NRKBaseIE(InfoExtractor):
        }
        message_type = data.get('messageType', '')
        # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
-        if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
+        if 'IsGeoBlocked' in message_type or traverse_obj(data, ('usageRights', 'isGeoBlocked')) is True:
            self.raise_geo_restricted(
                msg=MESSAGES.get('ProgramIsGeoBlocked'),
                countries=self._GEO_COUNTRIES)
@ -58,7 +65,7 @@ class NRKBaseIE(InfoExtractor):
        return self._download_json(
            urljoin('https://psapi.nrk.no/', path),
            video_id, note or f'Downloading {item} JSON',
-            fatal=fatal, query=query)
+            fatal=fatal, query=query, headers=self._API_CALL_HEADERS)


 class NRKIE(NRKBaseIE):
@ -73,17 +80,20 @@ class NRKIE(NRKBaseIE):
                            )
                            (?P<id>[^?\#&]+)
                        '''
-
    _TESTS = [{
        # video
        'url': 'http://www.nrk.no/video/PS*150533',
-        'md5': 'f46be075326e23ad0e524edfcb06aeb6',
+        'md5': '2b88a652ad2e275591e61cf550887eec',
        'info_dict': {
            'id': '150533',
            'ext': 'mp4',
            'title': 'Dompap og andre fugler i Piip-Show',
            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
            'duration': 262,
+            'timestamp': 1395751833,
+            'upload_date': '20140325',
+            'thumbnail': 'https://gfx.nrk.no/0mZgeckEzRU6qTWrbQHD2QcyralHrYB08wBvh-K-AtAQ',
+            'alt_title': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
        },
    }, {
        # audio
@ -95,6 +105,10 @@ class NRKIE(NRKBaseIE):
            'title': 'Slik høres internett ut når du er blind',
            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
            'duration': 20,
+            'alt_title': 'Cathrine Lie Wathne er blind, og bruker hurtigtaster for å navigere seg rundt på ulike nettsider.',
+            'upload_date': '20140425',
+            'timestamp': 1398429565,
+            'thumbnail': 'https://gfx.nrk.no/urxQMSXF-WnbfjBH5ke2igLGyN27EdJVWZ6FOsEAclhA',
        },
    }, {
        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
@ -144,18 +158,10 @@ class NRKIE(NRKBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url).split('/')[-1]

-        def call_playback_api(item, query=None):
-            try:
-                return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
-            except ExtractorError as e:
-                if isinstance(e.cause, HTTPError) and e.cause.status == 400:
-                    return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
-                raise
-
        # known values for preferredCdn: akamai, iponly, minicdn and telenor
-        manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
+        manifest = self._call_api(f'playback/manifest/{video_id}', video_id, 'manifest', query={'preferredCdn': 'akamai'})

-        video_id = try_get(manifest, lambda x: x['id'], str) or video_id
+        video_id = manifest.get('id') or video_id

        if manifest.get('playability') == 'nonPlayable':
            self._raise_error(manifest['nonPlayable'])
@ -163,17 +169,22 @@ class NRKIE(NRKBaseIE):
        playable = manifest['playable']

        formats = []
-        for asset in playable['assets']:
-            if not isinstance(asset, dict):
-                continue
-            if asset.get('encrypted'):
+        subtitles = {}
+        has_drm = False
+        for asset in traverse_obj(playable, ('assets', ..., {dict})):
+            encryption_scheme = asset.get('encryptionScheme')
+            if encryption_scheme not in (None, 'none', 'statickey'):
+                self.report_warning(f'Skipping asset with unsupported encryption scheme "{encryption_scheme}"')
+                has_drm = True
                continue
            format_url = url_or_none(asset.get('url'))
            if not format_url:
                continue
            asset_format = (asset.get('format') or '').lower()
            if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
-                formats.extend(self._extract_nrk_formats(format_url, video_id))
+                fmts, subs = self._extract_nrk_formats_and_subtitles(format_url, video_id)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
            elif asset_format == 'mp3':
                formats.append({
                    'url': format_url,
@ -181,19 +192,22 @@ class NRKIE(NRKBaseIE):
                    'vcodec': 'none',
                })

-        data = call_playback_api('metadata')
+        if not formats and has_drm:
+            self.report_drm(video_id)

-        preplay = data['preplay']
-        titles = preplay['titles']
-        title = titles['title']
+        data = self._call_api(traverse_obj(manifest, ('_links', 'metadata', 'href', {str})), video_id, 'metadata')
+
+        preplay = data.get('preplay')
+        titles = preplay.get('titles')
+        title = titles.get('title')
        alt_title = titles.get('subtitle')

-        description = try_get(preplay, lambda x: x['description'].replace('\r', '\n'))
-        duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
+        description = preplay.get('description')
+        # Use m3u8 vod dueration for NRKSkoleIE because of incorrect duration in metadata
+        duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration')) or self._extract_m3u8_vod_duration(formats[0]['url'], video_id)

        thumbnails = []
-        for image in try_get(
-                preplay, lambda x: x['poster']['images'], list) or []:
+        for image in traverse_obj(preplay, ('poster', 'images', {list})) or []:
            if not isinstance(image, dict):
                continue
            image_url = url_or_none(image.get('url'))
@ -205,13 +219,13 @@ class NRKIE(NRKBaseIE):
                'height': int_or_none(image.get('pixelHeight')),
            })

-        subtitles = {}
-        for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
+        for sub in traverse_obj(playable, ('subtitles', {list})) or []:
            if not isinstance(sub, dict):
                continue
            sub_url = url_or_none(sub.get('webVtt'))
            if not sub_url:
                continue
+
            sub_key = str_or_none(sub.get('language')) or 'nb'
            sub_type = str_or_none(sub.get('type'))
            if sub_type:
@ -220,8 +234,26 @@ class NRKIE(NRKBaseIE):
                'url': sub_url,
            })

-        legal_age = try_get(
-            data, lambda x: x['legalAge']['body']['rating']['code'], str)
+        chapters = []
+        if data.get('skipDialogInfo'):
+            chapters = [item for item in [{
+                'start_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'startIntroInSeconds'))),
+                'end_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'endIntroInSeconds'))),
+                'title': 'Intro',
+            }, {
+                'start_time': float_or_none(traverse_obj(data, ('skipDialogInfo', 'startCreditsInSeconds'))),
+                'end_time': duration,
+                'title': 'Outro',
+            }] if item['start_time'] != item['end_time']]
+        if preplay.get('indexPoints'):
+            seconds_or_none = lambda x: float_or_none(parse_duration(x))
+            chapters += traverse_obj(preplay, ('indexPoints', ..., {
+                'start_time': ('startPoint', {seconds_or_none}),
+                'end_time': ('endPoint', {seconds_or_none}),
+                'title': ('title', {lambda x: x}),
+            }))
+        chapters = sorted(chapters, key=lambda x: x['start_time']) if chapters else None
+        legal_age = traverse_obj(data, ('legalAge', 'body', 'rating', 'code'))
        # https://en.wikipedia.org/wiki/Norwegian_Media_Authority
        age_limit = None
        if legal_age:
@ -230,7 +262,7 @@ class NRKIE(NRKBaseIE):
            elif legal_age.isdigit():
                age_limit = int_or_none(legal_age)

-        is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
+        is_series = traverse_obj(data, ('_links', 'series', 'name')) == 'series'

        info = {
            'id': video_id,
@ -242,13 +274,23 @@ class NRKIE(NRKBaseIE):
            'age_limit': age_limit,
            'formats': formats,
            'subtitles': subtitles,
-            'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)),
+            'chapters': chapters,
+            'timestamp': parse_iso8601(traverse_obj(data, ('availability', 'onDemand', 'from'))),
        }
-
        if is_series:
            series = season_id = season_number = episode = episode_number = None
+
            programs = self._call_api(
                f'programs/{video_id}', video_id, 'programs', fatal=False)
+            matched_dates = [
+                int(match.group()) // 1000
+                for date in [
+                    traverse_obj(programs, ('firstTimeTransmitted', 'publicationDate')),
+                    traverse_obj(programs, ('usageRights', 'availableFrom')),
+                ] if date for match in [re.search(r'\d+', date)] if match
+            ]
+            if matched_dates:
+                info.update({'timestamp': min(info['timestamp'], *matched_dates)})
            if programs and isinstance(programs, dict):
                series = str_or_none(programs.get('seriesTitle'))
                season_id = str_or_none(programs.get('seasonId'))
@ -284,8 +326,38 @@ class NRKIE(NRKBaseIE):

        return info

+    def _perform_login(self, username, password):
+        try:
+            self._download_json(
+                self._LOGIN_URL, None, headers={'Content-Type': 'application/json; charset=UTF-8', 'accept': 'application/json; charset=utf-8'},
+                data=json.dumps({
+                    'clientId': '',
+                    'hashedPassword': {'current': {
+                        'hash': password,
+                        'recipe': {
+                            'algorithm': 'cleartext',
+                            'salt': '',
+                        },
+                    },
+                    },
+                    'password': password,
+                    'username': username,
+                }).encode())

-class NRKTVIE(InfoExtractor):
+            self._download_webpage('https://tv.nrk.no/auth/web/login/opsession', None)
+            response = self._download_json('https://tv.nrk.no/auth/session/tokenforsub/_', None)
+            self._AUTH_TOKEN = traverse_obj(response, ('session', 'accessToken'))
+            self._API_CALL_HEADERS['authorization'] = f'Bearer {self._AUTH_TOKEN}'
+        except ExtractorError as e:
+            message = None
+            if isinstance(e.cause, HTTPError) and e.cause.status in (401, 400):
+                resp = self._parse_json(
+                    e.cause.response.read().decode(), None, fatal=False) or {}
+                message = next((error['message'] for error in resp['errors'] if error['field'] == 'Password'), None)
+            self.report_warning(message or 'Unable to log in')
+
+
+class NRKTVIE(NRKBaseIE):
    IE_DESC = 'NRK TV and NRK Radio'
    _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
    _VALID_URL = rf'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*{_EPISODE_RE}'
@ -307,6 +379,14 @@ class NRKTVIE(InfoExtractor):
                    'ext': 'vtt',
                }],
            },
+            'upload_date': '20170627',
+            'chapters': [{'start_time': 0, 'end_time': 2213.0, 'title': '<Untitled Chapter 1>'}, {'start_time': 2213.0, 'end_time': 2223.44, 'title': 'Outro'}],
+            'timestamp': 1498591822,
+            'thumbnail': 'https://gfx.nrk.no/myRSc4vuFlahB60P3n6swwRTQUZI1LqJZl9B7icZFgzA',
+            'alt_title': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
+        },
+        'params': {
+            'skip_download': True,
        },
    }, {
        'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
@ -318,9 +398,31 @@ class NRKTVIE(InfoExtractor):
            'alt_title': '23. mai 2014',
            'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
            'duration': 1741,
+            'age_limit': 0,
            'series': '20 spørsmål',
            'episode': '23. mai 2014',
-            'age_limit': 0,
+            'upload_date': '20140523',
+            'thumbnail': 'https://gfx.nrk.no/u7uCe79SEfPVGRAGVp2_uAZnNc4mfz_kjXg6Bgek8lMQ',
+            'season_id': '126936',
+            'season_number': 2014,
+            'season': 'Season 2014',
+            'chapters': [
+                {'start_time': 0.0, 'end_time': 39.0, 'title': 'Intro'},
+                {'start_time': 0.0, 'title': 'Velkommen', 'end_time': 152.32},
+                {'start_time': 152.32, 'title': 'Tannpirker', 'end_time': 304.76},
+                {'start_time': 304.76, 'title': 'Orgelbrus', 'end_time': 513.48},
+                {'start_time': 513.48, 'title': 'G-streng', 'end_time': 712.96},
+                {'start_time': 712.96, 'title': 'Medalje', 'end_time': 837.76},
+                {'start_time': 837.76, 'title': 'Globus', 'end_time': 1124.48},
+                {'start_time': 1124.48, 'title': 'Primstav', 'end_time': 1417.4},
+                {'start_time': 1417.4, 'title': 'Fyr', 'end_time': 1721.0},
+                {'start_time': 1721.0, 'end_time': 1741.0, 'title': 'Outro'},
+            ],
+            'episode_number': 3,
+            'timestamp': 1400871900,
+        },
+        'params': {
+            'skip_download': True,
        },
    }, {
        'url': 'https://tv.nrk.no/program/mdfp15000514',
@ -333,6 +435,18 @@ class NRKTVIE(InfoExtractor):
            'series': 'Kunnskapskanalen',
            'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
            'age_limit': 0,
+            'upload_date': '20140524',
+            'episode_number': 17,
+            'chapters': [
+                {'start_time': 0, 'end_time': 4595.0, 'title': '<Untitled Chapter 1>'},
+                {'start_time': 4595.0, 'end_time': 4605.08, 'title': 'Outro'},
+            ],
+            'season': 'Season 2014',
+            'timestamp': 1400937600,
+            'thumbnail': 'https://gfx.nrk.no/D2u6-EyVUZpVCq0PdSNHRgdBZCV40ekpk6s9fZWiMtyg',
+            'season_number': 2014,
+            'season_id': '39240',
+            'alt_title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
        },
        'params': {
            'skip_download': True,
@ -343,23 +457,51 @@ class NRKTVIE(InfoExtractor):
        'info_dict': {
            'id': 'MSPO40010515',
            'ext': 'mp4',
-            'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
-            'description': 'md5:c03aba1e917561eface5214020551b7a',
+            'title': 'Tour de Ski - Sprint fri teknikk, kvinner og menn',
+            'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
            'age_limit': 0,
+            'episode': 'Sprint fri teknikk, kvinner og menn',
+            'series': 'Tour de Ski',
+            'thumbnail': 'https://gfx.nrk.no/s9vNwGPGN-Un-UCvitD09we9HRLDxisnipA9K__d5c3Q',
+            'season_id': '53512',
+            'chapters': [
+                {'start_time': 0, 'end_time': 6938.0, 'title': '<Untitled Chapter 1>'},
+                {'start_time': 6938.0, 'end_time': 6947.52, 'title': 'Outro'},
+            ],
+            'season_number': 2015,
+            'episode_number': 5,
+            'upload_date': '20150106',
+            'duration': 6947.52,
+            'timestamp': 1420545563,
+            'alt_title': 'Sprint fri teknikk, kvinner og menn',
+            'season': 'Season 2015',
        },
        'params': {
            'skip_download': True,
        },
-        'expected_warnings': ['Failed to download m3u8 information'],
-        'skip': 'particular part is not supported currently',
    }, {
        'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
        'info_dict': {
            'id': 'MSPO40010515',
            'ext': 'mp4',
-            'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
-            'description': 'md5:c03aba1e917561eface5214020551b7a',
+            'title': 'Tour de Ski - Sprint fri teknikk, kvinner og menn',
+            'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
            'age_limit': 0,
+            'episode': 'Sprint fri teknikk, kvinner og menn',
+            'series': 'Tour de Ski',
+            'thumbnail': 'https://gfx.nrk.no/s9vNwGPGN-Un-UCvitD09we9HRLDxisnipA9K__d5c3Q',
+            'season_id': '53512',
+            'chapters': [
+                {'start_time': 0, 'end_time': 6938.0, 'title': '<Untitled Chapter 1>'},
+                {'start_time': 6938.0, 'end_time': 6947.52, 'title': 'Outro'},
+            ],
+            'season_number': 2015,
+            'episode_number': 5,
+            'upload_date': '20150106',
+            'duration': 6947.52,
+            'timestamp': 1420545563,
+            'alt_title': 'Sprint fri teknikk, kvinner og menn',
+            'season': 'Season 2015',
        },
        'expected_warnings': ['Failed to download m3u8 information'],
        'skip': 'Ikke tilgjengelig utenfor Norge',
@ -380,6 +522,7 @@ class NRKTVIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+        'skip': 'ProgramRightsHasExpired',
    }, {
        'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
        'info_dict': {
@ -413,7 +556,7 @@ class NRKTVIE(InfoExtractor):
            f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)


-class NRKTVEpisodeIE(InfoExtractor):
+class NRKTVEpisodeIE(NRKBaseIE):
    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
    _TESTS = [{
        'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
@ -421,13 +564,24 @@ class NRKTVEpisodeIE(InfoExtractor):
            'id': 'MUHH36005220',
            'ext': 'mp4',
            'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
-            'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
+            'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
            'duration': 1563.92,
            'series': 'Hellums kro',
            'season_number': 1,
            'episode_number': 2,
            'episode': '2. Kro, krig og kjærlighet',
            'age_limit': 6,
+            'timestamp': 1572584520,
+            'upload_date': '20191101',
+            'thumbnail': 'https://gfx.nrk.no/2_4mhU2JhR-8IYRC_OMmAQDbbOHgwcHqgi2sBrNrsjkg',
+            'alt_title': '2. Kro, krig og kjærlighet',
+            'season': 'Season 1',
+            'season_id': '124163',
+            'chapters': [
+                {'start_time': 0, 'end_time': 29.0, 'title': '<Untitled Chapter 1>'},
+                {'start_time': 29.0, 'end_time': 50.0, 'title': 'Intro'},
+                {'start_time': 1530.0, 'end_time': 1563.92, 'title': 'Outro'},
+            ],
        },
        'params': {
            'skip_download': True,
@ -453,26 +607,14 @@ class NRKTVEpisodeIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        display_id, season_number, episode_number = self._match_valid_url(url).groups()
+        # HEADRequest(url) only works if a regular GET request was recently made by anyone for the specific URL being requested.
+        response = self._request_webpage(url, None, expected_status=True)

-        webpage = self._download_webpage(url, display_id)
+        nrk_id = self._match_id(url)

-        info = self._search_json_ld(webpage, display_id, default={})
-        nrk_id = info.get('@id') or self._html_search_meta(
-            'nrk:program-id', webpage, default=None) or self._search_regex(
-            rf'data-program-id=["\']({NRKTVIE._EPISODE_RE})', webpage,
-            'nrk id')
-        assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
-
-        info.update({
-            '_type': 'url',
-            'id': nrk_id,
-            'url': f'nrk:{nrk_id}',
-            'ie_key': NRKIE.ie_key(),
-            'season_number': int(season_number),
-            'episode_number': int(episode_number),
-        })
-        return info
+        return self.url_result(
+            response.url, NRKTVIE.ie_key(), nrk_id, url_transparent=True,
+        )


 class NRKTVSerieBaseIE(NRKBaseIE):
@ -482,6 +624,9 @@ class NRKTVSerieBaseIE(NRKBaseIE):
        entries = []
        for episode in entry_list:
            nrk_id = episode.get('prfId') or episode.get('episodeId')
+            if traverse_obj(episode, ('availability', 'status')) == 'expired':
+                self.report_warning(episode['availability'].get('label'), nrk_id)
+                continue
            if not nrk_id or not isinstance(nrk_id, str):
                continue
            entries.append(self.url_result(
@ -508,18 +653,18 @@ class NRKTVSerieBaseIE(NRKBaseIE):
            if not assets_key:
                break
            # Extract entries
-            entries = try_get(
+            entries = traverse_obj(
                embedded,
-                (lambda x: x[assets_key]['_embedded'][assets_key],
-                 lambda x: x[assets_key]),
-                list)
+                (assets_key, '_embedded', assets_key, {list}),
+                (assets_key, {list}),
+            )
            yield from self._extract_entries(entries)
            # Find next URL
-            next_url_path = try_get(
+            next_url_path = traverse_obj(
                data,
-                (lambda x: x['_links']['next']['href'],
-                 lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
-                str)
+                ('_links', 'next', 'href'),
+                ('_embedded', assets_key, '_links', 'next', 'href'),
+            )
            if not next_url_path:
                break
            data = self._call_api(
@ -548,6 +693,27 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
            'title': 'Sesong 1',
        },
        'playlist_mincount': 30,
+    }, {
+        'url': 'https://tv.nrk.no/serie/presten/sesong/ekstramateriale',
+        'info_dict': {
+            'id': 'MUHH47005117',
+            'ext': 'mp4',
+            'description': '',
+            'thumbnail': 'https://gfx.nrk.no/sJZroQqD2P8wGMMl5ADznwqiIlAXaCpNofA2pIhe3udA',
+            'alt_title': 'Bloopers: Episode 1',
+            'chapters': [
+                {'start_time': 0, 'end_time': 356.0, 'title': '<Untitled Chapter 1>'},
+                {'start_time': 356.0, 'end_time': 365.8, 'title': 'Outro'},
+            ],
+            'upload_date': '20180302',
+            'timestamp': 1519966800,
+            'title': 'Presten',
+            'age_limit': 0,
+            'duration': 365.8,
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        # no /sesong/ in path
        'url': 'https://tv.nrk.no/serie/lindmo/2016',
@ -572,6 +738,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
            'title': 'September 2015',
        },
        'playlist_mincount': 841,
+        'skip': 'ProgramRightsHasExpired',
    }, {
        # 180 entries, single page
        'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
@ -594,21 +761,20 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
                else super().suitable(url))

    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        domain = mobj.group('domain')
-        serie_kind = mobj.group('serie_kind')
-        serie = mobj.group('serie')
-        season_id = mobj.group('id') or mobj.group('id_2')
+        domain, serie_kind, serie, season_id, season_id_2 = self._match_valid_url(url).group(
+            'domain', 'serie_kind', 'serie', 'id', 'id_2')
+        season_id = season_id or season_id_2
        display_id = f'{serie}/{season_id}'

+        api_suffix = f'/seasons/{season_id}' if season_id != 'ekstramateriale' else '/extramaterial'
+
        data = self._call_api(
-            f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}/seasons/{season_id}',
+            f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}{api_suffix}',
            display_id, 'season', query={'pageSize': 50})

-        title = try_get(data, lambda x: x['titles']['title'], str) or display_id
        return self.playlist_result(
-            self._entries(data, display_id),
-            display_id, title)
+            self._entries(data, display_id), display_id,
+            title=traverse_obj(data, ('titles', 'title', {str})))


 class NRKTVSeriesIE(NRKTVSerieBaseIE):
@ -666,7 +832,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
        'info_dict': {
            'id': 'dickie-dick-dickens',
            'title': 'Dickie Dick Dickens',
-            'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
+            'description': 'md5:605464fab26d06b1ce6a11c3ea37d36d',
        },
        'playlist_mincount': 8,
    }, {
@ -676,6 +842,8 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
        'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
        'info_dict': {
            'id': 'ulrikkes_univers',
+            'title': 'Ulrikkes univers',
+            'description': 'md5:8af9fc2ee4aecd7f91777383fde50dcc',
        },
        'playlist_mincount': 10,
    }, {
@ -699,16 +867,18 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
        series = self._call_api(
            f'{domain}/catalog/{self._catalog_name(serie_kind)}/{series_id}',
            series_id, 'serie', query={size_prefix + 'ageSize': 50})
-        titles = try_get(series, [
-            lambda x: x['titles'],
-            lambda x: x[x['type']]['titles'],
-            lambda x: x[x['seriesType']]['titles'],
-        ]) or {}
+        titles = traverse_obj(
+            series,
+            (..., 'titles'),
+            (..., 'type', 'titles'),
+            (..., 'seriesType', 'titles'),
+            get_all=False,

+        )
        entries = []
        entries.extend(self._entries(series, series_id))
        embedded = series.get('_embedded') or {}
-        linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
+        linked_seasons = traverse_obj(series, ('_links', 'seasons')) or []
        embedded_seasons = embedded.get('seasons') or []
        if len(linked_seasons) > len(embedded_seasons):
            for season in linked_seasons:
@ -731,7 +901,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
            entries, series_id, titles.get('title'), titles.get('subtitle'))


-class NRKTVDirekteIE(NRKTVIE):  # XXX: Do not subclass from concrete IE
+class NRKTVDirekteIE(NRKBaseIE):
    IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
    _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'

@ -743,21 +913,29 @@ class NRKTVDirekteIE(NRKTVIE):  # XXX: Do not subclass from concrete IE
        'only_matching': True,
    }]

+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(
+            f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)

-class NRKRadioPodkastIE(InfoExtractor):
+
+class NRKRadioPodkastIE(NRKBaseIE):
    _VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'

    _TESTS = [{
        'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
-        'md5': '8d40dab61cea8ab0114e090b029a0565',
+        'md5': 'a68c3564be2f4426254f026c95a06348',
        'info_dict': {
-            'id': 'MUHH48000314AA',
-            'ext': 'mp4',
-            'title': '20 spørsmål 23.05.2014',
-            'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
-            'duration': 1741,
-            'series': '20 spørsmål',
-            'episode': '23.05.2014',
+            'id': 'l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
+            'ext': 'mp3',
+            'timestamp': 1522897200,
+            'alt_title': 'md5:06eae9f8c8ccf0718b54c83654e65550',
+            'upload_date': '20180405',
+            'thumbnail': 'https://gfx.nrk.no/CEDlVkEKxLYiBZ-CXjxSxgduDdaL-a4XTZlar9AoJFOA',
+            'description': '',
+            'title': 'Jeg er sinna og det må du tåle!',
+            'age_limit': 0,
+            'duration': 1682.0,
        },
    }, {
        'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
@ -776,15 +954,16 @@ class NRKRadioPodkastIE(InfoExtractor):
            f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id)


-class NRKPlaylistBaseIE(InfoExtractor):
+class NRKPlaylistBaseIE(NRKBaseIE):
    def _extract_description(self, webpage):
        pass

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

-        webpage = self._download_webpage(url, playlist_id)
-
+        # Uses the render HTML endpoint instead of the regular article URL to prevent unrelated videos from being downloaded
+        # if .rich[data-video-id] elements appear in the "related articles" section too instead of just the main article.
+        webpage = self._download_webpage(f'https://www.nrk.no/serum/api/render/{playlist_id.split("-")[-1]}', playlist_id)
        entries = [
            self.url_result(f'nrk:{video_id}', NRKIE.ie_key())
            for video_id in re.findall(self._ITEM_RE, webpage)
@ -800,6 +979,8 @@ class NRKPlaylistBaseIE(InfoExtractor):
 class NRKPlaylistIE(NRKPlaylistBaseIE):
    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
    _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
+    _TITLE_RE = r'class="[^"]*\barticle-title\b[^"]*"[^>]*>([^<]+)<'
+    _DESCRIPTION_RE = r'class="[^"]*[\s"]article-lead[\s"][^>]*>[^<]*<p>([^<]*)<'
    _TESTS = [{
        'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
        'info_dict': {
@ -819,42 +1000,29 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
    }]

    def _extract_title(self, webpage):
-        return self._og_search_title(webpage, fatal=False)
+        return re.search(self._TITLE_RE, webpage).group(1)

    def _extract_description(self, webpage):
-        return self._og_search_description(webpage)
+        return re.search(self._DESCRIPTION_RE, webpage).group(1)


-class NRKTVEpisodesIE(NRKPlaylistBaseIE):
-    _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
-    _ITEM_RE = rf'data-episode=["\']{NRKTVIE._EPISODE_RE}'
-    _TESTS = [{
-        'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
-        'info_dict': {
-            'id': '69031',
-            'title': 'Nytt på nytt, sesong: 201210',
-        },
-        'playlist_count': 4,
-    }]
-
-    def _extract_title(self, webpage):
-        return self._html_search_regex(
-            r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
-
-
-class NRKSkoleIE(InfoExtractor):
+class NRKSkoleIE(NRKBaseIE):
    IE_DESC = 'NRK Skole'
    _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
-        'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
+        'md5': '1d54ec4cff70d8f2c7909d1922514af2',
        'info_dict': {
            'id': '6021',
            'ext': 'mp4',
            'title': 'Genetikk og eneggede tvillinger',
-            'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+            'description': 'md5:7c0cc42d35d99bbc58f45639cdbcc163',
            'duration': 399,
+            'thumbnail': 'https://gfx.nrk.no/5SN-Uq11iR3ADwrCwTv0bAKbbBXXNpVJsaCLGiU8lFoQ',
+            'timestamp': 1205622000,
+            'upload_date': '20080315',
+            'alt_title': '',
        },
    }, {
        'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
@ -863,9 +1031,14 @@ class NRKSkoleIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-
-        nrk_id = self._download_json(
+        response = self._download_json(
            f'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/{video_id}',
-            video_id)['psId']
-
-        return self.url_result(f'nrk:{nrk_id}')
+            video_id)
+        nrk_id = response['psId']
+        return self.url_result(
+            f'nrk:{nrk_id}', NRKIE, nrk_id, url_transparent=True,
+            **traverse_obj(response, {
+                'title': ('title', {str}),
+                'timestamp': ('airedDate', {parse_iso8601}),
+                'description': ('summary', {str}),
+            }))
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@ -13,7 +13,10 @@ from ..utils import (
    unified_timestamp,
    url_or_none,
 )
-from ..utils.traversal import traverse_obj
+from ..utils.traversal import (
+    subs_list_to_dict,
+    traverse_obj,
+)


 class RutubeBaseIE(InfoExtractor):
@ -92,11 +95,11 @@ class RutubeBaseIE(InfoExtractor):
                hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
-        for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))):
-            subtitles.setdefault(caption.get('code') or 'ru', []).append({
-                'url': caption['file'],
-                'name': caption.get('langTitle'),
-            })
+        self._merge_subtitles(traverse_obj(options, ('captions', ..., {
+            'id': 'code',
+            'url': 'file',
+            'name': ('langTitle', {str}),
+        }, all, {subs_list_to_dict(lang='ru')})), target=subtitles)
        return formats, subtitles

    def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@ -199,8 +199,9 @@ class SonyLIVSeriesIE(InfoExtractor):
        },
    }]
    _API_BASE = 'https://apiv2.sonyliv.com/AGL'
+    _SORT_ORDERS = ('asc', 'desc')

-    def _entries(self, show_id):
+    def _entries(self, show_id, sort_order):
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Referer': 'https://www.sonyliv.com',
@ -215,6 +216,9 @@ class SonyLIVSeriesIE(InfoExtractor):
                'from': '0',
                'to': '49',
            }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+
+        if sort_order == 'desc':
+            seasons = reversed(seasons)
        for season in seasons:
            season_id = str(season['id'])
            note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
@ -226,7 +230,7 @@ class SonyLIVSeriesIE(InfoExtractor):
                        'from': str(cursor),
                        'to': str(cursor + 99),
                        'orderBy': 'episodeNumber',
-                        'sortOrder': 'asc',
+                        'sortOrder': sort_order,
                    }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
                if not episodes:
                    break
@ -237,4 +241,10 @@ class SonyLIVSeriesIE(InfoExtractor):

    def _real_extract(self, url):
        show_id = self._match_id(url)
-        return self.playlist_result(self._entries(show_id), playlist_id=show_id)
+
+        sort_order = self._configuration_arg('sort_order', [self._SORT_ORDERS[0]])[0]
+        if sort_order not in self._SORT_ORDERS:
+            raise ValueError(
+                f'Invalid sort order "{sort_order}". Allowed values are: {", ".join(self._SORT_ORDERS)}')
+
+        return self.playlist_result(self._entries(show_id, sort_order), playlist_id=show_id)
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -241,7 +241,7 @@ class SoundcloudBaseIE(InfoExtractor):
                    format_urls.add(format_url)
                    formats.append({
                        'format_id': 'download',
-                        'ext': urlhandle_detect_ext(urlh) or 'mp3',
+                        'ext': urlhandle_detect_ext(urlh, default='mp3'),
                        'filesize': int_or_none(urlh.headers.get('Content-Length')),
                        'url': format_url,
                        'quality': 10,
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -419,7 +419,9 @@ def create_parser():
    general.add_option(
        '--flat-playlist',
        action='store_const', dest='extract_flat', const='in_playlist', default=False,
-        help='Do not extract the videos of a playlist, only list them')
+        help=(
+            'Do not extract a playlist\'s URL result entries; '
+            'some entry metadata may be missing and downloading may be bypassed'))
    general.add_option(
        '--no-flat-playlist',
        action='store_false', dest='extract_flat',
Author	SHA1	Message	Date
MrDemocracy	ff58ddfc46	Merge `7ab6662997` into `da252d9d32`	2024-11-18 02:47:52 +02:00
bashonly	da252d9d32	[cleanup] Misc (#11554 ) Closes #6884 Authored by: bashonly, Grub4K, seproDev Co-authored-by: Simon Sawicki <contact@grub4k.xyz> Co-authored-by: sepro <sepro@sepr0.com>	2024-11-17 23:25:05 +00:00
gillux	e079ffbda6	[ie/litv] Fix extractor (#11071 ) Authored by: jiru	2024-11-17 21:37:15 +00:00
bashonly	2009cb27e1	[ie/SonyLIVSeries] Add `sort_order` extractor-arg (#11569 ) Authored by: bashonly	2024-11-17 21:16:22 +00:00
Jackson Humphrey	f351440f1d	[ie/ctvnews] Fix extractor (#11534 ) Closes #8689 Authored by: jshumphrey, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2024-11-17 21:06:50 +00:00
qbnu	f9d98509a8	[ie/ctvnews] Fix playlist ID extraction (#8892 ) Authored by: qbnu	2024-11-17 19:35:10 +00:00
bashonly	7ab6662997	Merge branch 'yt-dlp:master' into pr/10187	2024-11-15 21:54:22 -06:00
MrDemocracy	2b5eaf8601	[nrk] Wrong file	2024-10-24 17:38:51 +02:00
MrDemocracy	725ab6ef3e	[nrk] Linting	2024-10-24 17:31:01 +02:00
MrDemocracy	17b667c2fa	[nrk] Remove unused import	2024-10-24 17:26:20 +02:00
MrDemocracy	38746cb1af	[nrk] Accidentally removed login function	2024-10-24 17:24:10 +02:00
MrDemocracy	670ac229d9	[nrk] Run Ruff to apply linting fixes in nrk.py	2024-10-24 17:13:20 +02:00
MrDemocracy	3213c07265	[nrk] Restore NRKBaseIE class and remove subclassing from concrete IE	2024-10-24 17:09:06 +02:00
MrDemocracy	5cc9b64268	[nrk] Run autopep8 to format test_subtitles.py	2024-10-24 15:45:09 +02:00
MrDemocracy	0048ed894e	[nrk] Made suggested changes, some slight refactoring and updated subtitles test	2024-10-24 15:37:45 +02:00
MrDemocracy	b691d1dadb	[nrk] Remove unused manifest_type variable	2024-10-06 02:15:46 +02:00
MrDemocracy	4cd8abfc08	[nrk] Run autopep8 to format nrk.py	2024-10-06 02:12:37 +02:00
MrDemocracy	4522cce417	[nrk] Run Ruff to apply linting fixes in nrk.py	2024-10-06 02:05:27 +02:00
MrDemocracy	6b2b7dbc42	[nrk] Standardize string formatting in f-string	2024-10-06 02:00:15 +02:00
MrDemocracy	7e8e6cb621	[nrk] Modify api_url construction logic for season extractor	2024-10-06 01:44:41 +02:00
MrDemocracy	34236d0b95	[nrk] Add 1080p support, linting improvements, and update tests	2024-10-06 01:35:35 +02:00
MrDemocracy	6d7eb0e827	[nrk] Change initial chapters variable from None to empty list	2024-06-15 03:03:54 +02:00
MrDemocracy	b5a111eeb8	[nrk] Add login support and chapter extraction	2024-06-15 02:46:44 +02:00