Merge branch 'yt-dlp:master' into feat/run-tests

[ie/Facebook] Fix Memories extraction (#8681 )
- Support group /posts/ URLs - Raise a proper error message if no formats are found Closes #8669 Authored by: kclauhk
2024-11-27 09:41:26 +01:00 · 2023-12-25 16:35:31 -06:00 · 2023-12-24 23:43:35 +01:00 · 2023-12-24 23:38:21 +01:00 · 2023-12-24 22:09:01 +01:00 · 2023-12-24 16:41:28 +00:00
44 changed files with 1061 additions and 228 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -80,12 +80,12 @@ on:
        default: true
        type: boolean
      origin:
-        description: .
+        description: Origin
        required: false
-        default: ''
+        default: 'current repo'
        type: choice
        options:
-        - ''
+        - 'current repo'

 permissions:
  contents: read
@ -99,7 +99,7 @@ jobs:
      - name: Process origin
        id: process_origin
        run: |
-          echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
+          echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"

  unix:
    needs: process
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -1,5 +1,25 @@
 name: Core Tests
-on: [push, pull_request]
+on:
+  push:
+    paths:
+      - .github/**
+      - devscripts/**
+      - test/**
+      - yt_dlp/**.py
+      - '!yt_dlp/extractor/*.py'
+      - yt_dlp/extractor/__init__.py
+      - yt_dlp/extractor/common.py
+      - yt_dlp/extractor/extractors.py
+  pull_request:
+    paths:
+      - .github/**
+      - devscripts/**
+      - test/**
+      - yt_dlp/**.py
+      - '!yt_dlp/extractor/*.py'
+      - yt_dlp/extractor/__init__.py
+      - yt_dlp/extractor/common.py
+      - yt_dlp/extractor/extractors.py
 permissions:
  contents: read

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -64,7 +64,6 @@ jobs:
      target_tag: ${{ steps.setup_variables.outputs.target_tag }}
      pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
      pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
-      pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
      head_sha: ${{ steps.get_target.outputs.head_sha }}

    steps:
@ -153,7 +152,6 @@ jobs:
              ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
              pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
              pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
-              ${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
            fi
          else
            target_tag="${source_tag:-${version}}"
@ -163,7 +161,6 @@ jobs:
              ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
              pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
              pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
-              ${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
            else
              target_repo='${{ github.repository }}'
            fi
@ -172,13 +169,6 @@ jobs:
          if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
            pypi_project='${{ vars.PYPI_PROJECT }}'
          fi
-          if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
-            if ${{ !secrets.PYPI_TOKEN }}; then
-              pypi_token=OIDC
-            else
-              pypi_token=PYPI_TOKEN
-            fi
-          fi

          echo "::group::Output variables"
          cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -189,7 +179,6 @@ jobs:
          target_tag=${target_tag}
          pypi_project=${pypi_project}
          pypi_suffix=${pypi_suffix}
-          pypi_token=${pypi_token}
          EOF
          echo "::endgroup::"

@ -286,18 +275,7 @@ jobs:
          python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
          python setup.py sdist bdist_wheel

-      - name: Publish to PyPI via token
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
-        if: |
-          needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
-        run: |
-          twine upload dist/*
-
-      - name: Publish to PyPI via trusted publishing
-        if: |
-          needs.prepare.outputs.pypi_token == 'OIDC'
+      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/README.md
+++ b/README.md
@ -1333,6 +1333,7 @@ The available fields are:
 - `was_live` (boolean): Whether this video was originally a live stream
 - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
 - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
+ - `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
 - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
 - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
 - `extractor` (string): Name of the extractor
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -140,6 +140,8 @@ class TestFormatSelection(unittest.TestCase):
        test('example-with-dashes', 'example-with-dashes')
        test('all', '2', '47', '45', 'example-with-dashes', '35')
        test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        test('7_a/worst', '35')

    def test_format_selection_audio(self):
        formats = [
--- a/test/test_networking.py
+++ b/test/test_networking.py
@ -328,7 +328,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
        https_server_thread.start()

        with handler(verify=False) as rh:
-            with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
+            with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
                validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
            assert not issubclass(exc_info.type, CertificateVerifyError)

--- a/test/test_update.py
+++ b/test/test_update.py
@ -11,6 +11,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import FakeYDL, report_warning
 from yt_dlp.update import Updater, UpdateInfo

+
+# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
+TEST_UPDATE_SOURCES = {
+    'stable': 'yt-dlp/yt-dlp',
+    'nightly': 'yt-dlp/yt-dlp-nightly-builds',
+    'master': 'yt-dlp/yt-dlp-master-builds',
+}
+
 TEST_API_DATA = {
    'yt-dlp/yt-dlp/latest': {
        'tag_name': '2023.12.31',
@ -104,6 +112,7 @@ class FakeUpdater(Updater):

    _channel = 'stable'
    _origin = 'yt-dlp/yt-dlp'
+    _update_sources = TEST_UPDATE_SOURCES

    def _download_update_spec(self, *args, **kwargs):
        return TEST_LOCKFILE_ACTUAL
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2317,23 +2317,6 @@ Line 1
        self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
                         msg='branching should result in list if `traverse_string`')

-        # Test is_user_input behavior
-        _IS_USER_INPUT_DATA = {'range8': list(range(8))}
-        self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
-                                      is_user_input=True), 3,
-                         msg='allow for string indexing if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
-                                           is_user_input=True), tuple(range(8))[3:],
-                              msg='allow for string slice if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
-                                           is_user_input=True), tuple(range(8))[:4:2],
-                              msg='allow step in string slice if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
-                                           is_user_input=True), range(8),
-                              msg='`:` should be treated as `...` if `is_user_input`')
-        with self.assertRaises(TypeError, msg='too many params should result in error'):
-            traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
-
        # Test re.Match as input obj
        mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
        self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
@ -2387,6 +2370,11 @@ Line 1
        headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
        self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})

+        # common mistake: strip whitespace from values
+        # https://github.com/yt-dlp/yt-dlp/issues/8729
+        headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
+        self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
+
    def test_extract_basic_auth(self):
        assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
        assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@ -148,7 +148,7 @@ class TestWebsSocketRequestHandlerConformance:
    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    def test_ssl_error(self, handler):
        with handler(verify=False) as rh:
-            with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
+            with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
                validate_and_send(rh, Request(self.bad_wss_host))
            assert not issubclass(exc_info.type, CertificateVerifyError)

--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1201,6 +1201,15 @@ class YoutubeDL:
                (?:\|(?P<default>.*?))?
            )$''')

+        def _from_user_input(field):
+            if field == ':':
+                return ...
+            elif ':' in field:
+                return slice(*map(int_or_none, field.split(':')))
+            elif int_or_none(field) is not None:
+                return int(field)
+            return field
+
        def _traverse_infodict(fields):
            fields = [f for x in re.split(r'\.({.+?})\.?', fields)
                      for f in ([x] if x.startswith('{') else x.split('.'))]
@ -1210,11 +1219,12 @@ class YoutubeDL:

            for i, f in enumerate(fields):
                if not f.startswith('{'):
+                    fields[i] = _from_user_input(f)
                    continue
                assert f.endswith('}'), f'No closing brace for {f} in {fields}'
-                fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
+                fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}

-            return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
+            return traverse_obj(info_dict, fields, traverse_string=True)

        def get_value(mdict):
            # Object traversal
@ -2455,9 +2465,16 @@ class YoutubeDL:
                return selector_function(ctx_copy)
            return final_selector

-        stream = io.BytesIO(format_spec.encode())
+        # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+        #       Prefix numbers with random letters to avoid it being classified as a number
+        #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        # TODO: Implement parser not reliant on tokenize.tokenize
+        prefix = ''.join(random.choices(string.ascii_letters, k=32))
+        stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
        try:
-            tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
+            tokens = list(_remove_unused_ops(
+                token._replace(string=token.string.replace(prefix, ''))
+                for token in tokenize.tokenize(stream.readline)))
        except tokenize.TokenError:
            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -276,6 +276,7 @@ from .brilliantpala import (
 )
 from .businessinsider import BusinessInsiderIE
 from .bundesliga import BundesligaIE
+from .bundestag import BundestagIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
@ -864,6 +865,7 @@ from .jiosaavn import (
 )
 from .jove import JoveIE
 from .joj import JojIE
+from .joqrag import JoqrAgIE
 from .jstream import JStreamIE
 from .jtbc import (
    JTBCIE,
@ -991,6 +993,7 @@ from .lynda import (
    LyndaIE,
    LyndaCourseIE
 )
+from .maariv import MaarivIE
 from .magellantv import MagellanTVIE
 from .magentamusik360 import MagentaMusik360IE
 from .mailru import (
@ -1590,6 +1593,7 @@ from .restudy import RestudyIE
 from .reuters import ReutersIE
 from .reverbnation import ReverbNationIE
 from .rheinmaintv import RheinMainTVIE
+from .rinsefm import RinseFMIE
 from .rmcdecouverte import RMCDecouverteIE
 from .rockstargames import RockstarGamesIE
 from .rokfin import (
@ -1643,6 +1647,7 @@ from .rumble import (
    RumbleIE,
    RumbleChannelIE,
 )
+from .rudovideo import RudoVideoIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
        'info_dict': {
            'id': '22253814',
            'ext': 'mp4',
-            'title': 'Winter is Coming',
-            'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
+            'title': 'Winter Is Coming',
+            'description': 'md5:a40e370925074260b1c8a633c632c63a',
            'timestamp': 1338306241,
            'upload_date': '20120529',
            'uploader': 'AENE-NEW',
+            'duration': 2592.0,
+            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'chapters': 'count:5',
+            'tags': 'count:14',
+            'categories': ['Mountain Men'],
+            'episode_number': 1,
+            'episode': 'Episode 1',
+            'season': 'Season 1',
+            'season_number': 1,
+            'series': 'Mountain Men',
        },
        'params': {
            # m3u8 download
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
            'timestamp': 1452634428,
            'upload_date': '20160112',
            'uploader': 'AENE-NEW',
+            'duration': 1277.695,
+            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'chapters': 'count:4',
+            'tags': 'count:23',
+            'episode': 'Episode 1',
+            'episode_number': 1,
+            'season': 'Season 9',
+            'season_number': 9,
+            'series': 'Duck Dynasty',
        },
        'params': {
            # m3u8 download
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
    _TESTS = [{
        # available till 7.12.2023
        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
-        'md5': 'a438f671e87a7eba04000336a119ccc4',
+        'md5': '94812e6438488fb923c361a44469614b',
        'info_dict': {
            'id': 'maischberger-video-424',
            'display_id': 'maischberger-video-424',
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
    _VALID_URL = r'''(?x)https://
        (?:(?:beta|www)\.)?ardmediathek\.de/
        (?:(?P<client>[^/]+)/)?
-        (?:player|live|video|(?P<playlist>sendung|sammlung))/
+        (?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
        (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
        (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
        (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''

    _TESTS = [{
-        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
-        'md5': '3fd5fead7a370a819341129c8d713136',
+        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
+        'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
        'info_dict': {
-            'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
-            'id': '12172961',
-            'title': 'Wolfsland - Die traurigen Schwestern',
-            'description': r're:^Als der Polizeiobermeister Raaben',
-            'duration': 5241,
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
-            'timestamp': 1670710500,
-            'upload_date': '20221210',
+            'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
+            'id': '12939099',
+            'title': 'Liebe auf vier Pfoten',
+            'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
+            'duration': 5222,
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
+            'timestamp': 1701343800,
+            'upload_date': '20231130',
            'ext': 'mp4',
-            'age_limit': 12,
-            'episode': 'Wolfsland - Die traurigen Schwestern',
+            'episode': 'Liebe auf vier Pfoten',
            'series': 'Filme im MDR'
        },
    }, {
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            'duration': 915,
            'episode': 'tagesschau, 20:00 Uhr',
            'series': 'tagesschau',
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
        },
    }, {
        'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        # playlist of type 'sendung'
        'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
        'only_matching': True,
+    }, {
+        # playlist of type 'serie'
+        'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
+        'only_matching': True,
    }, {
        # playlist of type 'sammlung'
        'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        'only_matching': True,
    }]

-    def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
+    def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
        """ Query the ARD server for playlist information
        and returns the data in "raw" format """
-        if mode == 'sendung':
+        assert mode in ('sendung', 'serie', 'sammlung')
+        if mode in ('sendung', 'serie'):
            graphQL = json.dumps({
                'query': '''{
                    showPage(
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                            links { target { id href title } }
                            type
                        }
-                    }}''' % (client, playlist_id, pageNumber),
+                    }}''' % (client, playlist_id, page_number),
            }).encode()
        else:  # mode == 'sammlung'
            graphQL = json.dumps({
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                                type
                            }
                        }
-                    }}''' % (client, playlist_id, pageNumber),
+                    }}''' % (client, playlist_id, page_number),
            }).encode()
        # Ressources for ARD graphQL debugging:
        # https://api-test.ardmediathek.de/public-gateway
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            data=graphQL,
            headers={'Content-Type': 'application/json'})['data']
        # align the structure of the returned data:
-        if mode == 'sendung':
+        if mode in ('sendung', 'serie'):
            show_page = show_page['showPage']
        else:  # mode == 'sammlung'
            show_page = show_page['morePage']['widget']
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):

    def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
        """ Collects all playlist entries and returns them as info dict.
-        Supports playlists of mode 'sendung' and 'sammlung', and also nested
-        playlists. """
+        Supports playlists of mode 'sendung', 'serie', and 'sammlung',
+        as well as nested playlists. """
        entries = []
        pageNumber = 0
        while True:  # iterate by pageNumber
-            show_page = self._ARD_load_playlist_snipped(
+            show_page = self._ARD_load_playlist_snippet(
                playlist_id, display_id, client, mode, pageNumber)
            for teaser in show_page['teasers']:  # process playlist items
                if '/compilation/' in teaser['links']['target']['href']:
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -317,16 +317,25 @@ class BBCCoUkIE(InfoExtractor):

    def _download_media_selector(self, programme_id):
        last_exception = None
+        formats, subtitles = [], {}
        for media_set in self._MEDIA_SETS:
            try:
-                return self._download_media_selector_url(
+                fmts, subs = self._download_media_selector_url(
                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
+                formats.extend(fmts)
+                if subs:
+                    self._merge_subtitles(subs, target=subtitles)
            except BBCCoUkIE.MediaSelectionError as e:
                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                    last_exception = e
                    continue
                self._raise_extractor_error(e)
-        self._raise_extractor_error(last_exception)
+        if last_exception:
+            if formats or subtitles:
+                self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
+            else:
+                self._raise_extractor_error(last_exception)
+        return formats, subtitles

    def _download_media_selector_url(self, url, programme_id=None):
        media_selection = self._download_json(
@ -1188,7 +1197,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        if initial_data is None:
            initial_data = self._search_regex(
                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
-                'preload state', default={})
+                'preload state', default='{}')
        else:
            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
--- a/yt_dlp/extractor/bitchute.py
+++ b/yt_dlp/extractor/bitchute.py
@ -7,8 +7,10 @@ from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    clean_html,
+    extract_attributes,
    get_element_by_class,
    get_element_by_id,
+    get_element_html_by_class,
    get_elements_html_by_class,
    int_or_none,
    orderedSet,
@ -17,6 +19,7 @@ from ..utils import (
    traverse_obj,
    unified_strdate,
    urlencode_postdata,
+    urljoin,
 )


@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'BitChute',
            'upload_date': '20170103',
+            'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+            'channel': 'BitChute',
+            'channel_url': 'https://www.bitchute.com/channel/bitchute/'
+        },
+    }, {
+        # test case: video with different channel and uploader
+        'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
+        'md5': 'f10e6a8e787766235946d0868703f1d0',
+        'info_dict': {
+            'id': 'Yti_j9A-UZ4',
+            'ext': 'mp4',
+            'title': 'Israel at War | Full Measure',
+            'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'sharylattkisson',
+            'upload_date': '20231106',
+            'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
+            'channel': 'Full Measure with Sharyl Attkisson',
+            'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
        },
    }, {
        # video not downloadable in browser, but we can recover it
@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'BitChute',
            'upload_date': '20181113',
+            'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+            'channel': 'BitChute',
+            'channel_url': 'https://www.bitchute.com/channel/bitchute/'
        },
        'params': {'check_formats': None},
    }, {
@ -99,6 +124,11 @@ class BitChuteIE(InfoExtractor):
            reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
            self.raise_geo_restricted(reason)

+    @staticmethod
+    def _make_url(html):
+        path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
+        return urljoin('https://www.bitchute.com', path)
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
@ -121,12 +151,19 @@ class BitChuteIE(InfoExtractor):
                'Video is unavailable. Please make sure this video is playable in the browser '
                'before reporting this issue.', expected=True, video_id=video_id)

+        details = get_element_by_class('details', webpage) or ''
+        uploader_html = get_element_html_by_class('creator', details) or ''
+        channel_html = get_element_html_by_class('name', details) or ''
+
        return {
            'id': video_id,
            'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
            'description': self._og_search_description(webpage, default=None),
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader': clean_html(get_element_by_class('owner', webpage)),
+            'uploader': clean_html(uploader_html),
+            'uploader_url': self._make_url(uploader_html),
+            'channel': clean_html(channel_html),
+            'channel_url': self._make_url(channel_html),
            'upload_date': unified_strdate(self._search_regex(
                r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
            'formats': formats,
@ -154,6 +191,9 @@ class BitChuteChannelIE(InfoExtractor):
                    'thumbnail': r're:^https?://.*\.jpg$',
                    'uploader': 'BitChute',
                    'upload_date': '20170103',
+                    'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+                    'channel': 'BitChute',
+                    'channel_url': 'https://www.bitchute.com/channel/bitchute/',
                    'duration': 16,
                    'view_count': int,
                },
@ -169,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
        'info_dict': {
            'id': 'wV9Imujxasw9',
            'title': 'Bruce MacDonald and "The Light of Darkness"',
-            'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
+            'description': 'md5:747724ef404eebdfc04277714f81863e',
        }
    }]

--- a/yt_dlp/extractor/bundestag.py
+++ b/yt_dlp/extractor/bundestag.py
@ -0,0 +1,123 @@
+import re
+from functools import partial
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+    ExtractorError,
+    bug_reports_message,
+    clean_html,
+    format_field,
+    get_element_text_and_html_by_tag,
+    int_or_none,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class BundestagIE(InfoExtractor):
+    _VALID_URL = [
+        r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
+        r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
+    ]
+    _TESTS = [{
+        'url': 'https://dbtg.tv/cvid/7605304',
+        'info_dict': {
+            'id': '7605304',
+            'ext': 'mp4',
+            'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
+            'description': 'md5:321a9dc6bdad201264c0045efc371561',
+        },
+    }, {
+        'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
+        'info_dict': {
+            'id': '7602120',
+            'ext': 'mp4',
+            'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
+            'description': 'Befragung der Bundesregierung',
+        },
+    }, {
+        'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
+        'only_matching': True,
+    }, {
+        'url': 'http://dbtg.tv/fvid/3594346',
+        'only_matching': True,
+    }]
+
+    _OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
+    _INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
+
+    _SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
+    _SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
+    _SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
+
+    def _bt_extract_share_formats(self, video_id):
+        share_data = self._download_json(
+            f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
+        if traverse_obj(share_data, ('status', 'code', {int})) != 1:
+            self.report_warning(format_field(
+                share_data, [('status', 'message', {str})],
+                'Share API response: %s', default='Unknown Share API Error')
+                + bug_reports_message())
+            return
+
+        for name, url in share_data.items():
+            if not isinstance(name, str) or not url_or_none(url):
+                continue
+
+            elif name.startswith('audio'):
+                match = re.search(self._SHARE_AUDIO_REGEX, url)
+                yield {
+                    'format_id': name,
+                    'url': url,
+                    'vcodec': 'none',
+                    **traverse_obj(match, {
+                        'acodec': 'codec',
+                        'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
+                        'abr': ('bitrate', {int_or_none}),
+                        'ext': 'ext',
+                    }),
+                }
+
+            elif name.startswith('download'):
+                match = re.search(self._SHARE_VIDEO_REGEX, url)
+                yield {
+                    'format_id': name,
+                    'url': url,
+                    **traverse_obj(match, {
+                        'vcodec': 'codec',
+                        'tbr': ('bitrate', {int_or_none}),
+                        'width': ('width', {int_or_none}),
+                        'height': ('height', {int_or_none}),
+                        'ext': 'ext',
+                    }),
+                }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        formats = []
+        result = {'id': video_id, 'formats': formats}
+
+        try:
+            formats.extend(self._extract_m3u8_formats(
+                self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
+        except ExtractorError as error:
+            if isinstance(error.cause, HTTPError) and error.cause.status == 404:
+                raise ExtractorError('Could not find video id', expected=True)
+            self.report_warning(f'Error extracting hls formats: {error}', video_id)
+        formats.extend(self._bt_extract_share_formats(video_id))
+        if not formats:
+            self.raise_no_formats('Could not find suitable formats', video_id=video_id)
+
+        result.update(traverse_obj(self._download_webpage(
+            self._OVERLAY_URL, video_id,
+            query={'videoid': video_id, 'view': 'main'},
+            note='Downloading metadata overlay', fatal=False,
+        ), {
+            'title': (
+                {partial(get_element_text_and_html_by_tag, 'h3')}, 0,
+                {partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
+            'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
+        }))
+
+        return result
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -180,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
            'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
            'chapters': [],
            'duration': 494.811,
+            'categories': ['AudioMobile/All in a Weekend Montreal'],
+            'tags': 'count:8',
+            'location': 'Quebec',
+            'series': 'All in a Weekend Montreal',
+            'season': 'Season 2015',
+            'season_number': 2015,
+            'media_type': 'Excerpt',
        },
    }, {
        'url': 'http://www.cbc.ca/player/play/2164402062',
@ -195,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
            'chapters': [],
            'duration': 186.867,
+            'series': 'CBC News: Windsor at 6:00',
+            'categories': ['News/Canada/Windsor'],
+            'location': 'Windsor',
+            'tags': ['cancer'],
+            'creator': 'Allison Johnson',
+            'media_type': 'Excerpt',
        },
    }, {
        # Has subtitles
        # These broadcasts expire after ~1 month, can find new test URL here:
        # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
-        'url': 'http://www.cbc.ca/player/play/2249992771553',
-        'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
+        'url': 'http://www.cbc.ca/player/play/2284799043667',
+        'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
        'info_dict': {
-            'id': '2249992771553',
+            'id': '2284799043667',
            'ext': 'mp4',
-            'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
-            'description': 'md5:adba28011a56cfa47a080ff198dad27a',
-            'timestamp': 1690596000,
-            'duration': 2716.333,
+            'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
+            'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
+            'timestamp': 1700272800,
+            'duration': 2718.833,
            'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
-            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
            'uploader': 'CBCC-NEW',
            'chapters': 'count:5',
-            'upload_date': '20230729',
+            'upload_date': '20231118',
+            'categories': 'count:4',
+            'series': 'The National - Full Show',
+            'tags': 'count:1',
+            'creator': 'News',
+            'location': 'Canada',
+            'media_type': 'Full Program',
        },
    }]

--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -382,6 +382,7 @@ class InfoExtractor:
                    'private', 'premium_only', 'subscriber_only', 'needs_auth',
                    'unlisted' or 'public'. Use 'InfoExtractor._availability'
                    to set it
+    media_type:     The type of media as classified by the site, e.g. "episode", "clip", "trailer"
    _old_archive_ids: A list of old archive ids needed for backward compatibility
    _format_sort_fields: A list of fields to use for sorting formats
    __post_extractor: A function to be called just before the metadata is
--- a/yt_dlp/extractor/cwtv.py
+++ b/yt_dlp/extractor/cwtv.py
@ -46,6 +46,10 @@ class CWTVIE(InfoExtractor):
            'timestamp': 1444107300,
            'age_limit': 14,
            'uploader': 'CWTV',
+            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'chapters': 'count:4',
+            'episode': 'Episode 20',
+            'season': 'Season 11',
        },
        'params': {
            # m3u8 download
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@ -1,15 +1,20 @@
+import json
+
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    try_call,
    unified_timestamp,
+    urlencode_postdata,
 )


 class EplusIbIE(InfoExtractor):
-    IE_NAME = 'eplus:inbound'
-    IE_DESC = 'e+ (イープラス) overseas'
-    _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
+    _NETRC_MACHINE = 'eplus'
+    IE_NAME = 'eplus'
+    IE_DESC = 'e+ (イープラス)'
+    _VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
+                  r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
    _TESTS = [{
        'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
        'info_dict': {
@ -29,14 +34,97 @@ class EplusIbIE(InfoExtractor):
            'No video formats found!',
            'Requested format is not available',
        ],
+    }, {
+        'url': 'https://live.eplus.jp/sample',
+        'info_dict': {
+            'id': 'stream1ng20210719-test-005',
+            'title': 'Online streaming test for DRM',
+            'live_status': 'was_live',
+            'release_date': '20210719',
+            'release_timestamp': 1626703200,
+            'description': None,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': [
+            'Could not find the playlist URL. This event may not be accessible',
+            'No video formats found!',
+            'Requested format is not available',
+            'This video is DRM protected',
+        ],
+    }, {
+        'url': 'https://live.eplus.jp/2053935',
+        'info_dict': {
+            'id': '331320-0001-001',
+            'title': '丘みどり2020配信LIVE Vol.2 ～秋麗～ 【Streaming+(配信チケット)】',
+            'live_status': 'was_live',
+            'release_date': '20200920',
+            'release_timestamp': 1600596000,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': [
+            'Could not find the playlist URL. This event may not be accessible',
+            'No video formats found!',
+            'Requested format is not available',
+        ],
    }]

+    _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
+
+    def _login(self, username, password, urlh):
+        if not self._get_cookies('https://live.eplus.jp/').get('ci_session'):
+            raise ExtractorError('Unable to get ci_session cookie')
+
+        cltft_token = urlh.headers.get('X-CLTFT-Token')
+        if not cltft_token:
+            raise ExtractorError('Unable to get X-CLTFT-Token')
+        self._set_cookie('live.eplus.jp', 'X-CLTFT-Token', cltft_token)
+
+        login_json = self._download_json(
+            'https://live.eplus.jp/member/api/v1/FTAuth/idpw', None,
+            note='Sending pre-login info', errnote='Unable to send pre-login info', headers={
+                'Content-Type': 'application/json; charset=UTF-8',
+                'Referer': urlh.url,
+                'X-Cltft-Token': cltft_token,
+                'Accept': '*/*',
+            }, data=json.dumps({
+                'loginId': username,
+                'loginPassword': password,
+            }).encode())
+        if not login_json.get('isSuccess'):
+            raise ExtractorError('Login failed: Invalid id or password', expected=True)
+
+        self._request_webpage(
+            urlh.url, None, note='Logging in', errnote='Unable to log in',
+            data=urlencode_postdata({
+                'loginId': username,
+                'loginPassword': password,
+                'Token.Default': cltft_token,
+                'op': 'nextPage',
+            }), headers={'Referer': urlh.url})
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage, urlh = self._download_webpage_handle(
+            url, video_id, headers={'User-Agent': self._USER_AGENT})
+        if urlh.url.startswith('https://live.eplus.jp/member/auth'):
+            username, password = self._get_login_info()
+            if not username:
+                self.raise_login_required()
+            self._login(username, password, urlh)
+            webpage = self._download_webpage(
+                url, video_id, headers={'User-Agent': self._USER_AGENT})

        data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)

+        if data_json.get('drm_mode') == 'ON':
+            self.report_drm(video_id)
+
        delivery_status = data_json.get('delivery_status')
        archive_mode = data_json.get('archive_mode')
        release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
@ -64,7 +152,7 @@ class EplusIbIE(InfoExtractor):
        formats = []

        m3u8_playlist_urls = self._search_json(
-            r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
+            r'var\s+listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
        if not m3u8_playlist_urls:
            if live_status == 'is_upcoming':
                self.raise_no_formats(
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
-                            groups/[^/]+/permalink/|
+                            groups/[^/]+/(?:permalink|posts)/|
                            watchparty/
                        )|
                    facebook:
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
            'uploader_id': '100013949973717',
        },
        'skip': 'Requires logging in',
+    }, {
+        # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
+        'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
+        'info_dict': {
+            'id': '1569199726448814',
+            'ext': 'mp4',
+            'title': 'Pence MUST GO!',
+            'description': 'Vickie Gentry shared a memory.',
+            'timestamp': 1511548260,
+            'upload_date': '20171124',
+            'uploader': 'Vickie Gentry',
+            'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
+            'thumbnail': r're:^https?://.*',
+            'duration': 148.435,
+        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
                nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
                attachments = traverse_obj(nodes, (
                    ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
-                    ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
+                    ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
+                    'attachment', {dict}))
                for attachment in attachments:
-                    ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
+                    ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
+                                      ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
                    for n in ns:
                        parse_attachment(n)
                    parse_attachment(attachment)
@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
                if len(entries) > 1:
                    return self.playlist_result(entries, video_id)

-                video_info = entries[0]
+                video_info = entries[0] if entries else {'id': video_id}
                webpage_info = extract_metadata(webpage)
                # honor precise duration in video info
                if video_info.get('duration'):
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@ -1,12 +1,14 @@
 from .common import InfoExtractor
+from .dailymotion import DailymotionIE
 from ..utils import (
-    determine_ext,
    ExtractorError,
+    determine_ext,
    format_field,
+    int_or_none,
+    join_nonempty,
    parse_iso8601,
    parse_qs,
 )
-from .dailymotion import DailymotionIE


 class FranceTVBaseInfoExtractor(InfoExtractor):
@ -82,6 +84,8 @@ class FranceTVIE(InfoExtractor):
        videos = []
        title = None
        subtitle = None
+        episode_number = None
+        season_number = None
        image = None
        duration = None
        timestamp = None
@ -112,7 +116,9 @@ class FranceTVIE(InfoExtractor):
            if meta:
                if title is None:
                    title = meta.get('title')
-                # XXX: what is meta['pre_title']?
+                # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
+                season_number, episode_number = self._search_regex(
+                    r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
                if subtitle is None:
                    subtitle = meta.get('additional_title')
                if image is None:
@ -191,19 +197,19 @@ class FranceTVIE(InfoExtractor):
                } for sheet in spritesheets]
            })

-        if subtitle:
-            title += ' - %s' % subtitle
-        title = title.strip()
-
        return {
            'id': video_id,
-            'title': title,
+            'title': join_nonempty(title, subtitle, delim=' - ').strip(),
            'thumbnail': image,
            'duration': duration,
            'timestamp': timestamp,
            'is_live': is_live,
            'formats': formats,
            'subtitles': subtitles,
+            'episode': subtitle if episode_number else None,
+            'series': title if episode_number else None,
+            'episode_number': int_or_none(episode_number),
+            'season_number': int_or_none(season_number),
        }

    def _real_extract(self, url):
@ -230,14 +236,31 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
            'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
            'ext': 'mp4',
            'title': '13h15, le dimanche... - Les mystères de Jésus',
-            'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
            'timestamp': 1502623500,
+            'duration': 2580,
+            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20170813',
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': [FranceTVIE.ie_key()],
+    }, {
+        'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
+        'info_dict': {
+            'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
+            'ext': 'mp4',
+            'title': 'Foot2Rue - Duel au vieux port',
+            'episode': 'Duel au vieux port',
+            'series': 'Foot2Rue',
+            'episode_number': 1,
+            'season_number': 1,
+            'timestamp': 1642761360,
+            'upload_date': '20220121',
+            'season': 'Season 1',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 1441,
+        },
    }, {
        # france3
        'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
--- a/yt_dlp/extractor/iheart.py
+++ b/yt_dlp/extractor/iheart.py
@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):


 class IHeartRadioIE(IHeartRadioBaseIE):
-    IENAME = 'iheartradio'
+    IE_NAME = 'iheartradio'
    _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
    _TEST = {
        'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@ -10,6 +10,7 @@ from ..utils import (
    ExtractorError,
    decode_base_n,
    encode_base_n,
+    filter_dict,
    float_or_none,
    format_field,
    get_element_by_attribute,
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
        user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
        if not user_info:
            self.raise_login_required('This content is unreachable')
-        user_id = user_info.get('id')

+        user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
        story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
+        if not story_info_url:  # user id is only mandatory for non-highlights
+            raise ExtractorError('Unable to extract user id')
+
        videos = traverse_obj(self._download_json(
            f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
            story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
        if not videos:
            self.raise_login_required('You need to log in to access this content')

-        full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
+        full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
        story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
        if not story_title:
            story_title = f'Story by {username}'

-        highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
+        highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
        info_data = []
        for highlight in highlights:
            highlight_data = self._extract_product(highlight)
            if highlight_data.get('formats'):
                info_data.append({
-                    **highlight_data,
                    'uploader': full_name,
                    'uploader_id': user_id,
+                    **filter_dict(highlight_data),
                })
        return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
--- a/yt_dlp/extractor/joqrag.py
+++ b/yt_dlp/extractor/joqrag.py
@ -0,0 +1,112 @@
+import datetime
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    datetime_from_str,
+    unified_timestamp,
+    urljoin,
+)
+
+
+class JoqrAgIE(InfoExtractor):
+    IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
+    _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
+                  r'https?://(?:www\.)?joqr\.co\.jp/ag/',
+                  r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
+    _TESTS = [{
+        'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
+        'info_dict': {
+            'id': 'live',
+            'title': str,
+            'channel': '超!A&G+',
+            'description': str,
+            'live_status': 'is_live',
+            'release_timestamp': int,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+    }, {
+        'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.joqr.co.jp/ag/article/103760/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
+        'only_matching': True,
+    }]
+
+    def _extract_metadata(self, variable, html):
+        return clean_html(urllib.parse.unquote_plus(self._search_regex(
+            rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+            html, 'metadata', group='value', default=''))) or None
+
+    def _extract_start_timestamp(self, video_id, is_live):
+        def extract_start_time_from(date_str):
+            dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
+            date = dt.strftime('%Y%m%d')
+            start_time = self._search_regex(
+                r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
+                self._download_webpage(
+                    f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
+                    note=f'Downloading program list of {date}', fatal=False,
+                    errnote=f'Failed to download program list of {date}') or '',
+                'start time', default=None)
+            if start_time:
+                return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
+            return None
+
+        start_timestamp = extract_start_time_from('today')
+        if not start_timestamp:
+            return None
+
+        if not is_live or start_timestamp < datetime_from_str('now').timestamp():
+            return start_timestamp
+        else:
+            return extract_start_time_from('yesterday')
+
+    def _real_extract(self, url):
+        video_id = 'live'
+
+        metadata = self._download_webpage(
+            'https://www.uniqueradio.jp/aandg', video_id,
+            note='Downloading metadata', errnote='Failed to download metadata')
+        title = self._extract_metadata('Program_name', metadata)
+
+        if title == '放送休止':
+            formats = []
+            live_status = 'is_upcoming'
+            release_timestamp = self._extract_start_timestamp(video_id, False)
+            msg = 'This stream is not currently live'
+            if release_timestamp:
+                msg += (' and will start at '
+                        + datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
+            self.raise_no_formats(msg, expected=True)
+        else:
+            m3u8_path = self._search_regex(
+                r'<source\s[^>]*\bsrc="([^"]+)"',
+                self._download_webpage(
+                    'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
+                    note='Downloading player data', errnote='Failed to download player data'),
+                'm3u8 url')
+            formats = self._extract_m3u8_formats(
+                urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
+            live_status = 'is_live'
+            release_timestamp = self._extract_start_timestamp(video_id, True)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'channel': '超!A&G+',
+            'description': self._extract_metadata('Program_text', metadata),
+            'formats': formats,
+            'live_status': live_status,
+            'release_timestamp': release_timestamp,
+        }
--- a/yt_dlp/extractor/kinja.py
+++ b/yt_dlp/extractor/kinja.py
@ -12,7 +12,7 @@ from ..utils import (


 class KinjaEmbedIE(InfoExtractor):
-    IENAME = 'kinja:embed'
+    IE_NAME = 'kinja:embed'
    _DOMAIN_REGEX = r'''(?:[^.]+\.)?
        (?:
            avclub|
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@ -6,6 +6,7 @@ from ..utils import (
    int_or_none,
    smuggle_url,
    traverse_obj,
+    try_call,
    unsmuggle_url,
 )

@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
            webpage, 'video data', default='{}'), video_id)
        if not video_data:
-            payload = {
-                'assetId': program_info['assetId'],
-                'watchDevices': program_info['watchDevices'],
-                'contentType': program_info['contentType'],
-            }
+            payload = {'assetId': program_info['assetId']}
+            puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
+            if puid:
+                payload.update({
+                    'type': 'auth',
+                    'puid': puid,
+                })
+                endpoint = 'getUrl'
+            else:
+                payload.update({
+                    'watchDevices': program_info['watchDevices'],
+                    'contentType': program_info['contentType'],
+                })
+                endpoint = 'getMainUrlNoAuth'
            video_data = self._download_json(
-                'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
+                f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
                data=json.dumps(payload).encode('utf-8'),
                headers={'Content-Type': 'application/json'})

--- a/yt_dlp/extractor/maariv.py
+++ b/yt_dlp/extractor/maariv.py
@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_resolution,
+    unified_timestamp,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class MaarivIE(InfoExtractor):
+    IE_NAME = 'maariv.co.il'
+    _VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
+    _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
+    _TESTS = [{
+        'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
+        'info_dict': {
+            'id': '3611585',
+            'duration': 75,
+            'ext': 'mp4',
+            'upload_date': '20231009',
+            'title': 'מבצע חרבות ברזל',
+            'timestamp': 1696851301,
+        },
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://www.maariv.co.il/news/law/Article-1044008',
+        'info_dict': {
+            'id': '3611585',
+            'duration': 75,
+            'ext': 'mp4',
+            'upload_date': '20231009',
+            'title': 'מבצע חרבות ברזל',
+            'timestamp': 1696851301,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._download_json(
+            f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
+
+        formats = []
+        if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
+            formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
+
+        for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
+            formats.append({
+                'url': http_format,
+                'format_id': 'http',
+                **parse_resolution(http_format),
+            })
+
+        return {
+            'id': video_id,
+            **traverse_obj(data, {
+                'title': 'title',
+                'duration': ('video', 'duration', {int_or_none}),
+                'timestamp': ('upload_date', {unified_timestamp}),
+            }),
+            'formats': formats,
+        }
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@ -73,6 +73,7 @@ class MediasetIE(ThePlatformBaseIE):
            'season_number': 5,
            'episode_number': 5,
            'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
+            'categories': ['Informazione'],
        },
    }, {
        # DRM
@ -149,6 +150,7 @@ class MediasetIE(ThePlatformBaseIE):
            'season_number': 12,
            'episode': 'Episode 8',
            'episode_number': 8,
+            'categories': ['Intrattenimento'],
        },
        'params': {
            'skip_download': True,
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@ -3,8 +3,11 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
+    filter_dict,
+    parse_qs,
    remove_end,
    traverse_obj,
+    update_url_query,
    urljoin,
 )

@ -108,7 +111,9 @@ class MediaStreamIE(MediaStreamBaseIE):

        for message in [
            'Debido a tu ubicación no puedes ver el contenido',
-            'You are not allowed to watch this video: Geo Fencing Restriction'
+            'You are not allowed to watch this video: Geo Fencing Restriction',
+            'Este contenido no está disponible en tu zona geográfica.',
+            'El contenido sólo está disponible dentro de',
        ]:
            if message in webpage:
                self.raise_geo_restricted()
@ -118,7 +123,16 @@ class MediaStreamIE(MediaStreamBaseIE):
        formats, subtitles = [], {}
        for video_format in player_config['src']:
            if video_format == 'hls':
-                fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
+                params = {
+                    'at': 'web-app',
+                    'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
+                }
+                for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
+                    params[key] = self._search_regex(
+                        rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
+
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            elif video_format == 'mpd':
--- a/yt_dlp/extractor/nba.py
+++ b/yt_dlp/extractor/nba.py
@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):


 class NBAWatchEmbedIE(NBAWatchBaseIE):
-    IENAME = 'nba:watch:embed'
+    IE_NAME = 'nba:watch:embed'
    _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):


 class NBAEmbedIE(NBABaseIE):
-    IENAME = 'nba:embed'
+    IE_NAME = 'nba:embed'
    _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
    _TESTS = [{
        'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):


 class NBAIE(NBABaseIE):
-    IENAME = 'nba'
+    IE_NAME = 'nba'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):


 class NBAChannelIE(NBABaseIE):
-    IENAME = 'nba:channel'
+    IE_NAME = 'nba:channel'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/blazers/video/channel/summer_league',
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@ -53,6 +53,8 @@ class NBCIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                'chapters': 'count:1',
                'tags': 'count:4',
                'thumbnail': r're:https?://.+\.jpg',
+                'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
+                'media_type': 'Full Episode',
            },
            'params': {
                'skip_download': 'm3u8',
@ -131,6 +133,8 @@ class NBCIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                'tags': 'count:10',
                'age_limit': 0,
                'thumbnail': r're:https?://.+\.jpg',
+                'categories': ['Series/Quantum Leap 2022'],
+                'media_type': 'Highlight',
            },
            'params': {
                'skip_download': 'm3u8',
--- a/yt_dlp/extractor/ondemandkorea.py
+++ b/yt_dlp/extractor/ondemandkorea.py
@ -3,7 +3,6 @@ import re
 import uuid

 from .common import InfoExtractor
-from ..networking import HEADRequest
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
@ -84,15 +83,17 @@ class OnDemandKoreaIE(InfoExtractor):
        def try_geo_bypass(url):
            return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url

-        def try_upgrade_quality(url):
-            mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
-            return mod_url if mod_url != url and self._request_webpage(
-                HEADRequest(mod_url), video_id, note='Checking for higher quality format',
-                errnote='No higher quality format found', fatal=False) else url
-
        formats = []
        for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
-            formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
+            mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', m3u8_url)
+            if mod_url != m3u8_url:
+                mod_format = self._extract_m3u8_formats(
+                    mod_url, video_id, note='Checking for higher quality format',
+                    errnote='No higher quality format found', fatal=False)
+                if mod_format:
+                    formats.extend(mod_format)
+                    continue
+            formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, fatal=False))

        subtitles = {}
        for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@ -4,7 +4,14 @@ from urllib.parse import unquote

 from .common import InfoExtractor
 from ..compat import functools
-from ..utils import ExtractorError, make_archive_id, urljoin
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    make_archive_id,
+    mimetype2ext,
+    urljoin,
+)
 from ..utils.traversal import traverse_obj


@ -26,6 +33,7 @@ class Pr0grammIE(InfoExtractor):
            'dislike_count': int,
            'age_limit': 0,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+            '_old_archive_ids': ['pr0grammstatic 5466437'],
        },
    }, {
        # Tags require account
@ -43,6 +51,7 @@ class Pr0grammIE(InfoExtractor):
            'dislike_count': int,
            'age_limit': 0,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+            '_old_archive_ids': ['pr0grammstatic 3052805'],
        },
    }, {
        # Requires verified account
@ -60,6 +69,7 @@ class Pr0grammIE(InfoExtractor):
            'dislike_count': int,
            'age_limit': 18,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+            '_old_archive_ids': ['pr0grammstatic 5848332'],
        },
    }, {
        'url': 'https://pr0gramm.com/static/5466437',
@ -110,37 +120,61 @@ class Pr0grammIE(InfoExtractor):

        return data

+    @staticmethod
+    def _create_source_url(path):
+        return urljoin('https://img.pr0gramm.com', path)
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_info = traverse_obj(
            self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
            ('items', 0, {dict}))

-        source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
+        source = video_info.get('image')
        if not source or not source.endswith('mp4'):
            self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)

        tags = None
        if self._is_logged_in:
-            metadata = self._call_api('info', video_id, {'itemId': video_id})
+            metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
            tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
            # Sorted by "confidence", higher confidence = earlier in list
            confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
            if confidences:
                tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]

+        formats = traverse_obj(video_info, ('variants', ..., {
+            'format_id': ('name', {str}),
+            'url': ('path', {self._create_source_url}),
+            'ext': ('mimeType', {mimetype2ext}),
+            'vcodec': ('codec', {str}),
+            'width': ('width', {int_or_none}),
+            'height': ('height', {int_or_none}),
+            'bitrate': ('bitRate', {float_or_none}),
+            'filesize': ('fileSize', {int_or_none}),
+        })) if video_info.get('variants') else [{
+            'ext': 'mp4',
+            'format_id': 'source',
+            **traverse_obj(video_info, {
+                'url': ('image', {self._create_source_url}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+            }),
+        }]
+
+        subtitles = {}
+        for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])):
+            subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, {
+                'url': ('path', {self._create_source_url}),
+                'note': ('label', {str}),
+            }))
+
        return {
            'id': video_id,
            'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
-            'formats': [{
-                'url': source,
-                'ext': 'mp4',
-                **traverse_obj(video_info, {
-                    'width': ('width', {int}),
-                    'height': ('height', {int}),
-                }),
-            }],
            'tags': tags,
+            'formats': formats,
+            'subtitles': subtitles,
            'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
            '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
            **traverse_obj(video_info, {
--- a/yt_dlp/extractor/rinsefm.py
+++ b/yt_dlp/extractor/rinsefm.py
@ -0,0 +1,33 @@
+from .common import InfoExtractor
+from ..utils import format_field, parse_iso8601
+
+
+class RinseFMIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
+        'md5': '76ee0b719315617df42e15e710f46c7b',
+        'info_dict': {
+            'id': '1536535',
+            'ext': 'mp3',
+            'title': 'Club Glow - 15/12/2023 - 20:00',
+            'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
+            'release_timestamp': 1702598400,
+            'release_date': '20231215'
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
+
+        return {
+            'id': entry['id'],
+            'title': entry.get('title'),
+            'url': entry['fileUrl'],
+            'vcodec': 'none',
+            'release_timestamp': parse_iso8601(entry.get('episodeDate')),
+            'thumbnail': format_field(
+                entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
+        }
--- a/yt_dlp/extractor/rudovideo.py
+++ b/yt_dlp/extractor/rudovideo.py
@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    js_to_json,
+    traverse_obj,
+    update_url_query,
+    url_or_none,
+)
+
+
+class RudoVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
+    _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
+    _TESTS = [{
+        'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
+        'md5': '28ed82b477708dc5e12e072da2449221',
+        'info_dict': {
+            'id': 'cz2wrUy8l0o',
+            'title': 'Diego Cabot',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/podcast/bQkt07',
+        'md5': '36b22a9863de0f47f00fc7532a32a898',
+        'info_dict': {
+            'id': 'bQkt07',
+            'title': 'Tubular Bells',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/podcast/b42ZUznHX0',
+        'md5': 'b91c70d832938871367f8ad10c895821',
+        'info_dict': {
+            'id': 'b42ZUznHX0',
+            'title': 'Columna Ruperto Concha',
+            'ext': 'mp3',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/vod/bN5AaJ',
+        'md5': '01324a329227e2591530ecb4f555c881',
+        'info_dict': {
+            'id': 'bN5AaJ',
+            'title': 'Ucrania 19.03',
+            'creator': 'La Tercera',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/live/bbtv',
+        'info_dict': {
+            'id': 'bbtv',
+            'ext': 'mp4',
+            'creator': 'BioBioTV',
+            'live_status': 'is_live',
+            'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/live/c13',
+        'info_dict': {
+            'id': 'c13',
+            'title': 'CANAL13',
+            'ext': 'mp4',
+        },
+        'skip': 'Geo-restricted to Chile',
+    }, {
+        'url': 'https://rudo.video/live/t13-13cl',
+        'info_dict': {
+            'id': 't13-13cl',
+            'title': 'T13',
+            'ext': 'mp4',
+        },
+        'skip': 'Geo-restricted to Chile',
+    }]
+
+    def _real_extract(self, url):
+        video_id, type_ = self._match_valid_url(url).group('id', 'type')
+        is_live = type_ == 'live'
+
+        webpage = self._download_webpage(url, video_id)
+        if 'Streaming is not available in your area' in webpage:
+            self.raise_geo_restricted()
+
+        media_url = (
+            self._search_regex(
+                r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
+            # Source URL must be used only if streamURL is unavailable
+            or self._search_regex(
+                r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
+        if not media_url:
+            youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
+                                             webpage, 'youtube url', default=None)
+            if youtube_url:
+                return self.url_result(youtube_url, 'Youtube')
+            raise ExtractorError('Unable to extract stream url')
+
+        token_array = self._search_json(
+            r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
+            contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
+        if token_array:
+            token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
+            if not token_url:
+                raise ExtractorError('Invalid access token array')
+            access_token = self._download_json(
+                token_url, video_id, note='Downloading access token')['data']['authToken']
+            media_url = update_url_query(media_url, {'auth-token': access_token})
+
+        ext = determine_ext(media_url)
+        if ext == 'm3u8':
+            formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
+        elif ext == 'mp3':
+            formats = [{
+                'url': media_url,
+                'vcodec': 'none',
+            }]
+        else:
+            formats = [{'url': media_url}]
+
+        return {
+            'id': video_id,
+            'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
+                                         webpage, 'title', default=None)
+                      or self._og_search_title(webpage)),
+            'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
+                                          webpage, 'videoAuthor', default=None),
+            'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
+                                             webpage, 'thumbnail', default=None)
+                          or self._og_search_thumbnail(webpage)),
+            'formats': formats,
+            'is_live': is_live,
+        }
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@ -114,6 +114,8 @@ class ScrippsNetworksIE(InfoExtractor):
            'timestamp': 1475678834,
            'upload_date': '20161005',
            'uploader': 'SCNI-SCND',
+            'tags': 'count:10',
+            'creator': 'Cooking Channel',
            'duration': 29.995,
            'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
            'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@ -104,6 +104,10 @@ class ThePlatformBaseIE(OnceIE):
                _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
            _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)

+        def extract_site_specific_field(field):
+            # A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
+            return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
+
        return {
            'title': info['title'],
            'subtitles': subtitles,
@ -113,6 +117,14 @@ class ThePlatformBaseIE(OnceIE):
            'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
            'uploader': info.get('billingCode'),
            'chapters': chapters,
+            'creator': traverse_obj(info, ('author', {str})) or None,
+            'categories': traverse_obj(info, (
+                'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
+            'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
+            'location': extract_site_specific_field('region'),
+            'series': extract_site_specific_field('show'),
+            'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
+            'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
        }

    def _extract_theplatform_metadata(self, path, video_id):
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -10,6 +10,7 @@ from ..compat import (
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlparse,
 )
+from ..networking.exceptions import HTTPError
 from ..utils import (
    ExtractorError,
    dict_get,
@ -479,9 +480,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': [],
            'age_limit': 18,
+            '_old_archive_ids': ['twitter 643211948184596480'],
        },
    }, {
        'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
@ -515,6 +516,7 @@ class TwitterIE(TwitterBaseIE):
            'like_count': int,
            'tags': ['TV', 'StarWars', 'TheForceAwakens'],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 665052190608723968'],
        },
    }, {
        'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
@ -558,9 +560,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': ['Damndaniel'],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 700207533655363584'],
        },
    }, {
        'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@ -599,9 +601,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': [],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 719944021058060289'],
        },
    }, {
        'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
@ -616,6 +618,7 @@ class TwitterIE(TwitterBaseIE):
            'thumbnail': r're:^https?://.*\.jpg',
        },
        'add_ie': ['Periscope'],
+        'skip': 'Broadcast not found',
    }, {
        # has mp4 formats via mobile API
        'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
@ -635,9 +638,9 @@ class TwitterIE(TwitterBaseIE):
            'thumbnail': r're:^https?://.*\.jpg',
            'tags': [],
            'repost_count': int,
-            'view_count': int,
            'like_count': int,
            'comment_count': int,
+            '_old_archive_ids': ['twitter 852138619213144067'],
        },
    }, {
        'url': 'https://twitter.com/i/web/status/910031516746514432',
@ -657,9 +660,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': ['Maria'],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 910031516746514432'],
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -683,9 +686,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': [],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1001551623938805763'],
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -749,6 +752,7 @@ class TwitterIE(TwitterBaseIE):
            'like_count': int,
            'tags': [],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1349794411333394432'],
        },
        'params': {
            'skip_download': True,
@ -771,18 +775,18 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': [],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1577855540407197696'],
        },
        'params': {'skip_download': True},
    }, {
        'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
        'info_dict': {
            'id': '1577719286659006464',
-            'title': 'Ultima📛| New Era - Test',
+            'title': 'Ultima - Test',
            'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima📛| New Era',
+            'uploader': 'Ultima',
            'uploader_id': 'UltimaShadowX',
            'uploader_url': 'https://twitter.com/UltimaShadowX',
            'upload_date': '20221005',
@ -813,9 +817,9 @@ class TwitterIE(TwitterBaseIE):
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
-            'view_count': int,
            'tags': ['HurricaneIan'],
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1575560063510810624'],
        },
    }, {
        # Adult content, fails if not logged in
@ -951,10 +955,10 @@ class TwitterIE(TwitterBaseIE):
            'uploader_url': 'https://twitter.com/CTVJLaidlaw',
            'display_id': '1600649710662213632',
            'like_count': int,
-            'view_count': int,
            'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
            'upload_date': '20221208',
            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1600649710662213632'],
        },
        'params': {'noplaylist': True},
    }, {
@ -979,7 +983,7 @@ class TwitterIE(TwitterBaseIE):
            'like_count': int,
            'repost_count': int,
            'comment_count': int,
-            'view_count': int,
+            '_old_archive_ids': ['twitter 1621117700482416640'],
        },
    }, {
        'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@ -995,13 +999,13 @@ class TwitterIE(TwitterBaseIE):
            'repost_count': int,
            'duration': 9.531,
            'comment_count': int,
-            'view_count': int,
            'upload_date': '20221203',
            'age_limit': 0,
            'timestamp': 1670092210.0,
            'tags': [],
            'uploader': '\u06ea',
            'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
+            '_old_archive_ids': ['twitter 1599108751385972737'],
        },
        'params': {'noplaylist': True},
    }, {
@ -1012,7 +1016,6 @@ class TwitterIE(TwitterBaseIE):
            'ext': 'mp4',
            'uploader_url': 'https://twitter.com/MunTheShinobi',
            'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
-            'view_count': int,
            'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
            'age_limit': 0,
            'uploader': 'Mün',
@ -1025,6 +1028,7 @@ class TwitterIE(TwitterBaseIE):
            'uploader_id': 'MunTheShinobi',
            'duration': 139.987,
            'timestamp': 1670306984.0,
+            '_old_archive_ids': ['twitter 1600009574919962625'],
        },
    }, {
        # retweeted_status (private)
@ -1068,8 +1072,8 @@ class TwitterIE(TwitterBaseIE):
            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
            'like_count': int,
            'repost_count': int,
-            'view_count': int,
            'comment_count': int,
+            '_old_archive_ids': ['twitter 1695424220702888009'],
        },
    }, {
        # retweeted_status w/ legacy API
@ -1091,18 +1095,24 @@ class TwitterIE(TwitterBaseIE):
            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
            'like_count': int,
            'repost_count': int,
+            '_old_archive_ids': ['twitter 1695424220702888009'],
        },
        'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
    }, {
        # Broadcast embedded in tweet
-        'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
+        'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
        'info_dict': {
-            'id': '1yNGaNLjEblJj',
+            'id': '1rmxPMjLzAXKN',
            'ext': 'mp4',
-            'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
+            'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
            'uploader': 'Jessica Dobson',
-            'uploader_id': '1DZEoDwDovRQa',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'uploader_id': 'JessicaDobsonWX',
+            'uploader_url': 'https://twitter.com/JessicaDobsonWX',
+            'timestamp': 1701566398,
+            'upload_date': '20231203',
+            'live_status': 'was_live',
+            'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
+            'concurrent_view_count': int,
            'view_count': int,
        },
        'add_ie': ['TwitterBroadcast'],
@ -1125,6 +1135,30 @@ class TwitterIE(TwitterBaseIE):
        },
        'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
        'expected_warnings': ['Not all metadata'],
+    }, {
+        # "stale tweet" with typename "TweetWithVisibilityResults"
+        'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
+        'md5': '62b1e11cdc2cdd0e527f83adb081f536',
+        'info_dict': {
+            'id': '1724883339285544960',
+            'ext': 'mp4',
+            'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
+            'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
+            'display_id': '1724884212803834154',
+            'uploader': 'Robert F. Kennedy Jr',
+            'uploader_id': 'RobertKennedyJr',
+            'uploader_url': 'https://twitter.com/RobertKennedyJr',
+            'upload_date': '20231115',
+            'timestamp': 1700079417.0,
+            'duration': 341.048,
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+            'tags': ['Kennedy24'],
+            'repost_count': int,
+            'like_count': int,
+            'comment_count': int,
+            'age_limit': 0,
+            '_old_archive_ids': ['twitter 1724884212803834154'],
+        },
    }, {
        # onion route
        'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -1179,19 +1213,23 @@ class TwitterIE(TwitterBaseIE):
        ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
            data, ('tweetResult', 'result', {dict}), default={})

-        if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
-            self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
+        typename = result.get('__typename')
+        if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
+            self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)

        if 'tombstone' in result:
            cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
            raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
-        elif result.get('__typename') == 'TweetUnavailable':
+        elif typename == 'TweetUnavailable':
            reason = result.get('reason')
            if reason == 'NsfwLoggedOut':
                self.raise_login_required('NSFW tweet requires authentication')
            elif reason == 'Protected':
                self.raise_login_required('You are not authorized to view this protected tweet')
            raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
+        # Result for "stale tweet" needs additional transformation
+        elif typename == 'TweetWithVisibilityResults':
+            result = traverse_obj(result, ('tweet', {dict})) or {}

        status = result.get('legacy', {})
        status.update(traverse_obj(result, {
@ -1280,41 +1318,51 @@ class TwitterIE(TwitterBaseIE):
            }
        }

-    def _extract_status(self, twid):
-        if self.is_logged_in or self._selected_api == 'graphql':
-            status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
-
-        elif self._selected_api == 'legacy':
-            status = self._call_api(f'statuses/show/{twid}.json', twid, {
-                'cards_platform': 'Web-12',
-                'include_cards': 1,
-                'include_reply_count': 1,
-                'include_user_entities': 0,
-                'tweet_mode': 'extended',
+    def _call_syndication_api(self, twid):
+        self.report_warning(
+            'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
+        status = self._download_json(
+            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+            headers={'User-Agent': 'Googlebot'}, query={
+                'id': twid,
+                # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
+                'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
            })
+        if not status:
+            raise ExtractorError('Syndication endpoint returned empty JSON response')
+        # Transform the result so its structure matches that of legacy/graphql
+        media = []
+        for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
+            detail['id_str'] = traverse_obj(detail, (
+                'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
+            media.append(detail)
+        status['extended_entities'] = {'media': media}

-        elif self._selected_api == 'syndication':
-            self.report_warning(
-                'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
-            status = self._download_json(
-                'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
-                headers={'User-Agent': 'Googlebot'}, query={
-                    'id': twid,
-                    # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
-                    'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
+        return status
+
+    def _extract_status(self, twid):
+        if self._selected_api not in ('graphql', 'legacy', 'syndication'):
+            raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
+
+        try:
+            if self.is_logged_in or self._selected_api == 'graphql':
+                status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
+            elif self._selected_api == 'legacy':
+                status = self._call_api(f'statuses/show/{twid}.json', twid, {
+                    'cards_platform': 'Web-12',
+                    'include_cards': 1,
+                    'include_reply_count': 1,
+                    'include_user_entities': 0,
+                    'tweet_mode': 'extended',
                })
-            if not status:
-                raise ExtractorError('Syndication endpoint returned empty JSON response')
-            # Transform the result so its structure matches that of legacy/graphql
-            media = []
-            for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
-                detail['id_str'] = traverse_obj(detail, (
-                    'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
-                media.append(detail)
-            status['extended_entities'] = {'media': media}
+        except ExtractorError as e:
+            if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+                raise
+            self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
+            status = self._call_syndication_api(twid)

-        else:
-            raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
+        if self._selected_api == 'syndication':
+            status = self._call_syndication_api(twid)

        return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}

@ -1377,10 +1425,10 @@ class TwitterIE(TwitterBaseIE):
                'formats': formats,
                'subtitles': subtitles,
                'thumbnails': thumbnails,
-                'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
+                'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
                'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
-                # The codec of http formats are unknown
-                '_format_sort_fields': ('res', 'br', 'size', 'proto'),
+                # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
+                '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
            }

        def extract_from_card_info(card):
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -4480,14 +4480,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            if mobj:
                                info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
                                break
-            sbr_tooltip = try_get(
-                vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
-            if sbr_tooltip:
-                like_count, dislike_count = sbr_tooltip.split(' / ')
-                info.update({
-                    'like_count': str_to_int(like_count),
-                    'dislike_count': str_to_int(dislike_count),
-                })
+
+            info['like_count'] = traverse_obj(vpir, (
+                'videoActions', 'menuRenderer', 'topLevelButtons', ...,
+                'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+                'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+                'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
+
            vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
            if vcr:
                vc = self._get_count(vcr, 'viewCount')
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@ -206,13 +206,14 @@ class Updater:
    # XXX: use class variables to simplify testing
    _channel = CHANNEL
    _origin = ORIGIN
+    _update_sources = UPDATE_SOURCES

    def __init__(self, ydl, target: str | None = None):
        self.ydl = ydl
        # For backwards compat, target needs to be treated as if it could be None
        self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
        # Check if requested_tag is actually the requested repo/channel
-        if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
+        if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
            self.requested_channel = self.requested_tag
            self.requested_tag: str = None  # type: ignore (we set it later)
        elif not self.requested_channel:
@ -237,11 +238,11 @@ class Updater:
                self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
        else:
            # Check if requested_channel resolves to a known repository or else raise
-            self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
+            self.requested_repo = self._update_sources.get(self.requested_channel)
            if not self.requested_repo:
                self._report_error(
                    f'Invalid update channel {self.requested_channel!r} requested. '
-                    f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
+                    f'Valid channels are {", ".join(self._update_sources)}', True)

        self._identifier = f'{detect_variant()} {system_identifier()}'

--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@ -67,7 +67,7 @@ class HTTPHeaderDict(collections.UserDict, dict):
    def __setitem__(self, key, value):
        if isinstance(value, bytes):
            value = value.decode('latin-1')
-        super().__setitem__(key.title(), str(value))
+        super().__setitem__(key.title(), str(value).strip())

    def __getitem__(self, key):
        return super().__getitem__(key.title())
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -8,7 +8,7 @@ from ._utils import (
    IDENTITY,
    NO_DEFAULT,
    LazyList,
-    int_or_none,
+    deprecation_warning,
    is_iterable_like,
    try_call,
    variadic,
@ -17,7 +17,7 @@ from ._utils import (

 def traverse_obj(
        obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
-        casesense=True, is_user_input=False, traverse_string=False):
+        casesense=True, is_user_input=NO_DEFAULT, traverse_string=False):
    """
    Safely traverse nested `dict`s and `Iterable`s

@ -63,10 +63,8 @@ def traverse_obj(
    @param get_all          If `False`, return the first matching result, otherwise all matching ones.
    @param casesense        If `False`, consider string dictionary keys as case insensitive.

-    The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+    `traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API

-    @param is_user_input    Whether the keys are generated from user input.
-                            If `True` strings get converted to `int`/`slice` if needed.
    @param traverse_string  Whether to traverse into objects as strings.
                            If `True`, any non-compatible object will first be
                            converted into a string and then traversed into.
@ -80,6 +78,9 @@ def traverse_obj(
                            If no `default` is given and the last path branches, a `list` of results
                            is always returned. If a path ends on a `dict` that result will always be a `dict`.
    """
+    if is_user_input is not NO_DEFAULT:
+        deprecation_warning('The is_user_input parameter is deprecated and no longer works')
+
    casefold = lambda k: k.casefold() if isinstance(k, str) else k

    if isinstance(expected_type, type):
@ -195,14 +196,6 @@ def traverse_obj(

        key = None
        for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
-            if is_user_input and isinstance(key, str):
-                if key == ':':
-                    key = ...
-                elif ':' in key:
-                    key = slice(*map(int_or_none, key.split(':')))
-                elif int_or_none(key) is not None:
-                    key = int(key)
-
            if not casesense and isinstance(key, str):
                key = key.casefold()

--- a/yt_dlp/webvtt.py
+++ b/yt_dlp/webvtt.py
@ -286,8 +286,8 @@ class CueBlock(Block):
        m1 = parser.consume(_REGEX_TS)
        if not m1:
            return None
-        parser.consume(_REGEX_OPTIONAL_WHITESPACE)
        m2 = parser.consume(cls._REGEX_SETTINGS)
+        parser.consume(_REGEX_OPTIONAL_WHITESPACE)
        if not parser.consume(_REGEX_NL):
            return None
Author	SHA1	Message	Date
bashonly	5222390c89	Merge branch 'yt-dlp:master' into feat/run-tests	2023-12-25 16:35:31 -06:00
kclauhk	c39358a54b	[ie/Facebook] Fix Memories extraction (#8681 ) - Support group /posts/ URLs - Raise a proper error message if no formats are found Closes #8669 Authored by: kclauhk	2023-12-24 23:43:35 +01:00
Lars Strojny	1f8bd8eba8	[ie/ARDBetaMediathek] Fix series extraction (#8687 ) Closes #7666 Authored by: lstrojny	2023-12-24 23:38:21 +01:00
Simon Sawicki	00cdda4f6f	[core] Fix format selection parse error for CPython 3.12 (#8797 ) Authored by: Grub4K	2023-12-24 22:09:01 +01:00
bashonly	116c268438	[ie/twitter] Work around API rate-limit (#8825 ) Closes #8762 Authored by: bashonly	2023-12-24 16:41:28 +00:00
bashonly	e7d22348e7	[ie/twitter] Prioritize m3u8 formats (#8826 ) Closes #8117 Authored by: bashonly	2023-12-24 16:40:50 +00:00
bashonly	50eaea9fd7	[ie/instagram] Fix stories extraction (#8843 ) Closes #8290 Authored by: bashonly	2023-12-24 16:40:03 +00:00
bashonly	f45c4efcd9	[ie/litv] Fix premium content extraction (#8842 ) Closes #8654 Authored by: bashonly	2023-12-24 16:33:16 +00:00
Simon Sawicki	13b3cb3c2b	[ci] Run core tests only for core changes (#8841 ) Authored by: Grub4K	2023-12-24 00:11:10 +01:00
Nicolas Dato	0d531c35ec	[ie/RudoVideo] Add extractor (#8664 ) Authored by: nicodato	2023-12-22 22:52:07 +01:00
barsnick	bc4ab17b38	[cleanup] Fix spelling of `IE_NAME` (#8810 ) Authored by: barsnick	2023-12-22 02:32:29 +01:00
bashonly	632b8ee54e	[core] Release workflow and Updater cleanup (#8640 ) - Only use trusted publishing with PyPI and remove support for PyPI tokens from release workflow - Clean up improper actions syntax in the build workflow inputs - Refactor Updater to allow for consistent unit testing with `UPDATE_SOURCES` Authored by: bashonly	2023-12-21 21:06:26 +00:00
barsnick	c919b68f7e	[ie/bbc] Extract more formats (#8321 ) Closes #4902 Authored by: barsnick, dirkf	2023-12-21 20:47:32 +00:00
bashonly	19741ab8a4	[ie/bbc] Fix JSON parsing bug Authored by: bashonly	2023-12-21 14:46:00 -06:00
bashonly	37755a037e	[test:networking] Update tests for OpenSSL 3.2 (#8814 ) Authored by: bashonly	2023-12-20 19:03:54 +00:00
coletdjnz	196eb0fe77	[networking] Strip whitespace around header values (#8802 ) Fixes https://github.com/yt-dlp/yt-dlp/issues/8729 Authored by: coletdjnz	2023-12-20 19:15:38 +13:00
Mozi	db8b4edc7d	[ie/JoqrAg] Add extractor (#8384 ) Authored by: pzhlkj6612	2023-12-19 14:21:47 +00:00
bashonly	1c54a98e19	[ie/twitter] Extract stale tweets (#8724 ) Closes #8691 Authored by: bashonly	2023-12-19 13:24:55 +00:00
Simon Sawicki	00a3e47bf5	[ie/bundestag] Add extractor (#8783 ) Authored by: Grub4K	2023-12-18 21:32:08 +01:00
Amir Y. Perehodnik	c5f01bf7d4	[ie/Maariv] Add extractor (#8331 ) Authored by: amir16yp	2023-12-18 16:52:43 +01:00
Tristan Charpentier	c91af948e4	[ie/RinseFM] Add extractor (#8778 ) Authored by: hashFactory	2023-12-17 14:07:55 +00:00
Pandey Ganesha	6b5d93b0b0	[ie/youtube] Fix `like_count` extraction (#8763 ) Closes #8759 Authored by: Ganesh910	2023-12-13 07:04:12 +00:00
pukkandan	298230e550	[webvtt] Fix `15f22b4880`	2023-12-13 05:11:45 +05:30
Mozi	d5d1517e7d	[ie/eplus] Add login support and DRM detection (#8661 ) Authored by: pzhlkj6612	2023-12-12 00:29:36 +00:00
trainman261	7e09c147fd	[ie/theplatform] Extract more metadata (#8635 ) Authored by: trainman261	2023-12-12 00:00:35 +00:00
Benjamin Krausse	e370f9ec36	[ie] Add `media_type` field Authored by: trainman261	2023-12-11 17:57:41 -06:00
SirElderling	b1a1ec1540	[ie/bitchute] Fix and improve metadata extraction (#8507 ) Closes #8492 Authored by: SirElderling	2023-12-11 23:56:01 +00:00
Simon Sawicki	0b6f829b1d	[utils] `traverse_obj`: Move `is_user_input` into output template (#8673 ) Authored by: Grub4K	2023-12-06 21:46:45 +01:00
Simon Sawicki	f98a3305eb	[ie/pr0gramm] Support variant formats and subtitles (#8674 ) Authored by: Grub4K	2023-12-06 21:44:54 +01:00
sepro	04a5e06350	[ie/ondemandkorea] Fix upgraded format extraction (#8677 ) Closes #8675 Authored by: seproDev	2023-12-06 18:58:00 +01:00
Nicolas Cisco	b03c89309e	[ie/mediastream] Fix authenticated format extraction (#8657 ) Authored by: NickCis	2023-12-06 18:55:38 +01:00
Pierrick Guillaume	71f28097fe	[ie/francetv] Improve metadata extraction (#8409 ) Authored by: Fymyte	2023-12-06 16:10:11 +01:00