[ie/imgur] cleanup

[ie/imgur] format_id
[ie/imgur] even more cleanup
2024-11-27 01:31:25 +01:00 · 2023-12-25 18:30:20 -06:00 · 2023-12-25 18:27:21 -06:00 · 2023-12-25 18:25:07 -06:00 · 2023-12-25 18:00:47 -06:00 · 2023-12-25 17:55:42 -06:00
19 changed files with 610 additions and 186 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -80,12 +80,12 @@ on:
        default: true
        type: boolean
      origin:
-        description: .
+        description: Origin
        required: false
-        default: ''
+        default: 'current repo'
        type: choice
        options:
-        - ''
+        - 'current repo'

 permissions:
  contents: read
@ -99,7 +99,7 @@ jobs:
      - name: Process origin
        id: process_origin
        run: |
-          echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
+          echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"

  unix:
    needs: process
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -1,5 +1,25 @@
 name: Core Tests
-on: [push, pull_request]
+on:
+  push:
+    paths:
+      - .github/**
+      - devscripts/**
+      - test/**
+      - yt_dlp/**.py
+      - '!yt_dlp/extractor/*.py'
+      - yt_dlp/extractor/__init__.py
+      - yt_dlp/extractor/common.py
+      - yt_dlp/extractor/extractors.py
+  pull_request:
+    paths:
+      - .github/**
+      - devscripts/**
+      - test/**
+      - yt_dlp/**.py
+      - '!yt_dlp/extractor/*.py'
+      - yt_dlp/extractor/__init__.py
+      - yt_dlp/extractor/common.py
+      - yt_dlp/extractor/extractors.py
 permissions:
  contents: read

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -64,7 +64,6 @@ jobs:
      target_tag: ${{ steps.setup_variables.outputs.target_tag }}
      pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
      pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
-      pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
      head_sha: ${{ steps.get_target.outputs.head_sha }}

    steps:
@ -153,7 +152,6 @@ jobs:
              ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
              pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
              pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
-              ${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
            fi
          else
            target_tag="${source_tag:-${version}}"
@ -163,7 +161,6 @@ jobs:
              ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
              pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
              pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
-              ${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
            else
              target_repo='${{ github.repository }}'
            fi
@ -172,13 +169,6 @@ jobs:
          if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
            pypi_project='${{ vars.PYPI_PROJECT }}'
          fi
-          if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
-            if ${{ !secrets.PYPI_TOKEN }}; then
-              pypi_token=OIDC
-            else
-              pypi_token=PYPI_TOKEN
-            fi
-          fi

          echo "::group::Output variables"
          cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -189,7 +179,6 @@ jobs:
          target_tag=${target_tag}
          pypi_project=${pypi_project}
          pypi_suffix=${pypi_suffix}
-          pypi_token=${pypi_token}
          EOF
          echo "::endgroup::"

@ -286,18 +275,7 @@ jobs:
          python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
          python setup.py sdist bdist_wheel

-      - name: Publish to PyPI via token
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
-        if: |
-          needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
-        run: |
-          twine upload dist/*
-
-      - name: Publish to PyPI via trusted publishing
-        if: |
-          needs.prepare.outputs.pypi_token == 'OIDC'
+      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          verbose: true
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -140,6 +140,8 @@ class TestFormatSelection(unittest.TestCase):
        test('example-with-dashes', 'example-with-dashes')
        test('all', '2', '47', '45', 'example-with-dashes', '35')
        test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        test('7_a/worst', '35')

    def test_format_selection_audio(self):
        formats = [
--- a/test/test_update.py
+++ b/test/test_update.py
@ -11,6 +11,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import FakeYDL, report_warning
 from yt_dlp.update import Updater, UpdateInfo

+
+# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
+TEST_UPDATE_SOURCES = {
+    'stable': 'yt-dlp/yt-dlp',
+    'nightly': 'yt-dlp/yt-dlp-nightly-builds',
+    'master': 'yt-dlp/yt-dlp-master-builds',
+}
+
 TEST_API_DATA = {
    'yt-dlp/yt-dlp/latest': {
        'tag_name': '2023.12.31',
@ -104,6 +112,7 @@ class FakeUpdater(Updater):

    _channel = 'stable'
    _origin = 'yt-dlp/yt-dlp'
+    _update_sources = TEST_UPDATE_SOURCES

    def _download_update_spec(self, *args, **kwargs):
        return TEST_LOCKFILE_ACTUAL
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2465,9 +2465,16 @@ class YoutubeDL:
                return selector_function(ctx_copy)
            return final_selector

-        stream = io.BytesIO(format_spec.encode())
+        # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+        #       Prefix numbers with random letters to avoid it being classified as a number
+        #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        # TODO: Implement parser not reliant on tokenize.tokenize
+        prefix = ''.join(random.choices(string.ascii_letters, k=32))
+        stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
        try:
-            tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
+            tokens = list(_remove_unused_ops(
+                token._replace(string=token.string.replace(prefix, ''))
+                for token in tokenize.tokenize(stream.readline)))
        except tokenize.TokenError:
            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1648,6 +1648,7 @@ from .rumble import (
    RumbleIE,
    RumbleChannelIE,
 )
+from .rudovideo import RudoVideoIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
    _TESTS = [{
        # available till 7.12.2023
        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
-        'md5': 'a438f671e87a7eba04000336a119ccc4',
+        'md5': '94812e6438488fb923c361a44469614b',
        'info_dict': {
            'id': 'maischberger-video-424',
            'display_id': 'maischberger-video-424',
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
    _VALID_URL = r'''(?x)https://
        (?:(?:beta|www)\.)?ardmediathek\.de/
        (?:(?P<client>[^/]+)/)?
-        (?:player|live|video|(?P<playlist>sendung|sammlung))/
+        (?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
        (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
        (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
        (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''

    _TESTS = [{
-        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
-        'md5': '3fd5fead7a370a819341129c8d713136',
+        'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
+        'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
        'info_dict': {
-            'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
-            'id': '12172961',
-            'title': 'Wolfsland - Die traurigen Schwestern',
-            'description': r're:^Als der Polizeiobermeister Raaben',
-            'duration': 5241,
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
-            'timestamp': 1670710500,
-            'upload_date': '20221210',
+            'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
+            'id': '12939099',
+            'title': 'Liebe auf vier Pfoten',
+            'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
+            'duration': 5222,
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
+            'timestamp': 1701343800,
+            'upload_date': '20231130',
            'ext': 'mp4',
-            'age_limit': 12,
-            'episode': 'Wolfsland - Die traurigen Schwestern',
+            'episode': 'Liebe auf vier Pfoten',
            'series': 'Filme im MDR'
        },
    }, {
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            'duration': 915,
            'episode': 'tagesschau, 20:00 Uhr',
            'series': 'tagesschau',
-            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
+            'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
        },
    }, {
        'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        # playlist of type 'sendung'
        'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
        'only_matching': True,
+    }, {
+        # playlist of type 'serie'
+        'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
+        'only_matching': True,
    }, {
        # playlist of type 'sammlung'
        'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
        'only_matching': True,
    }]

-    def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
+    def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
        """ Query the ARD server for playlist information
        and returns the data in "raw" format """
-        if mode == 'sendung':
+        assert mode in ('sendung', 'serie', 'sammlung')
+        if mode in ('sendung', 'serie'):
            graphQL = json.dumps({
                'query': '''{
                    showPage(
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                            links { target { id href title } }
                            type
                        }
-                    }}''' % (client, playlist_id, pageNumber),
+                    }}''' % (client, playlist_id, page_number),
            }).encode()
        else:  # mode == 'sammlung'
            graphQL = json.dumps({
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                                type
                            }
                        }
-                    }}''' % (client, playlist_id, pageNumber),
+                    }}''' % (client, playlist_id, page_number),
            }).encode()
        # Ressources for ARD graphQL debugging:
        # https://api-test.ardmediathek.de/public-gateway
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
            data=graphQL,
            headers={'Content-Type': 'application/json'})['data']
        # align the structure of the returned data:
-        if mode == 'sendung':
+        if mode in ('sendung', 'serie'):
            show_page = show_page['showPage']
        else:  # mode == 'sammlung'
            show_page = show_page['morePage']['widget']
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):

    def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
        """ Collects all playlist entries and returns them as info dict.
-        Supports playlists of mode 'sendung' and 'sammlung', and also nested
-        playlists. """
+        Supports playlists of mode 'sendung', 'serie', and 'sammlung',
+        as well as nested playlists. """
        entries = []
        pageNumber = 0
        while True:  # iterate by pageNumber
-            show_page = self._ARD_load_playlist_snipped(
+            show_page = self._ARD_load_playlist_snippet(
                playlist_id, display_id, client, mode, pageNumber)
            for teaser in show_page['teasers']:  # process playlist items
                if '/compilation/' in teaser['links']['target']['href']:
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -317,16 +317,25 @@ class BBCCoUkIE(InfoExtractor):

    def _download_media_selector(self, programme_id):
        last_exception = None
+        formats, subtitles = [], {}
        for media_set in self._MEDIA_SETS:
            try:
-                return self._download_media_selector_url(
+                fmts, subs = self._download_media_selector_url(
                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
+                formats.extend(fmts)
+                if subs:
+                    self._merge_subtitles(subs, target=subtitles)
            except BBCCoUkIE.MediaSelectionError as e:
                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                    last_exception = e
                    continue
                self._raise_extractor_error(e)
-        self._raise_extractor_error(last_exception)
+        if last_exception:
+            if formats or subtitles:
+                self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
+            else:
+                self._raise_extractor_error(last_exception)
+        return formats, subtitles

    def _download_media_selector_url(self, url, programme_id=None):
        media_selection = self._download_json(
@ -1188,7 +1197,7 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
        if initial_data is None:
            initial_data = self._search_regex(
                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
-                'preload state', default={})
+                'preload state', default='{}')
        else:
            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
-                            groups/[^/]+/permalink/|
+                            groups/[^/]+/(?:permalink|posts)/|
                            watchparty/
                        )|
                    facebook:
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
            'uploader_id': '100013949973717',
        },
        'skip': 'Requires logging in',
+    }, {
+        # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
+        'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
+        'info_dict': {
+            'id': '1569199726448814',
+            'ext': 'mp4',
+            'title': 'Pence MUST GO!',
+            'description': 'Vickie Gentry shared a memory.',
+            'timestamp': 1511548260,
+            'upload_date': '20171124',
+            'uploader': 'Vickie Gentry',
+            'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
+            'thumbnail': r're:^https?://.*',
+            'duration': 148.435,
+        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
                nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
                attachments = traverse_obj(nodes, (
                    ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
-                    ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
+                    ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
+                    'attachment', {dict}))
                for attachment in attachments:
-                    ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
+                    ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
+                                      ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
                    for n in ns:
                        parse_attachment(n)
                    parse_attachment(attachment)
@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
                if len(entries) > 1:
                    return self.playlist_result(entries, video_id)

-                video_info = entries[0]
+                video_info = entries[0] if entries else {'id': video_id}
                webpage_info = extract_metadata(webpage)
                # honor precise duration in video info
                if video_info.get('duration'):
--- a/yt_dlp/extractor/iheart.py
+++ b/yt_dlp/extractor/iheart.py
@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):


 class IHeartRadioIE(IHeartRadioBaseIE):
-    IENAME = 'iheartradio'
+    IE_NAME = 'iheartradio'
    _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
    _TEST = {
        'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
--- a/yt_dlp/extractor/imgur.py
+++ b/yt_dlp/extractor/imgur.py
@ -1,99 +1,243 @@
+import functools
 import re

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
+    determine_ext,
+    float_or_none,
    int_or_none,
    js_to_json,
    mimetype2ext,
-    ExtractorError,
+    parse_iso8601,
+    str_or_none,
+    strip_or_none,
+    traverse_obj,
+    url_or_none,
 )


-class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
+class ImgurBaseIE(InfoExtractor):
+    _CLIENT_ID = '546c25a59c58ad7'
+
+    @classmethod
+    def _imgur_result(cls, item_id):
+        return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id)
+
+    def _call_api(self, endpoint, video_id, **kwargs):
+        return self._download_json(
+            f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account',
+            video_id, **kwargs)
+
+    @staticmethod
+    def get_description(s):
+        if 'Discover the magic of the internet at Imgur' in s:
+            return None
+        return s or None
+
+
+class ImgurIE(ImgurBaseIE):
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'

    _TESTS = [{
-        'url': 'https://i.imgur.com/A61SaA1.gifv',
+        'url': 'https://imgur.com/A61SaA1',
        'info_dict': {
            'id': 'A61SaA1',
            'ext': 'mp4',
-            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'title': 'MRW gifv is up and running without any bugs',
+            'timestamp': 1416446068,
+            'upload_date': '20141120',
+            'dislike_count': int,
+            'comment_count': int,
+            'release_timestamp': 1416446068,
+            'release_date': '20141120',
+            'like_count': int,
+            'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
        },
    }, {
-        'url': 'https://imgur.com/A61SaA1',
+        'url': 'https://i.imgur.com/A61SaA1.gifv',
        'only_matching': True,
    }, {
        'url': 'https://i.imgur.com/crGpqCV.mp4',
        'only_matching': True,
    }, {
-        # no title
        'url': 'https://i.imgur.com/jxBXAMC.gifv',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'jxBXAMC',
+            'ext': 'mp4',
+            'title': 'Fahaka puffer feeding',
+            'timestamp': 1533835503,
+            'upload_date': '20180809',
+            'release_date': '20180809',
+            'like_count': int,
+            'duration': 30.0,
+            'comment_count': int,
+            'release_timestamp': 1533835503,
+            'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
+            'dislike_count': int,
+        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
+        data = self._call_api('media', video_id)
+        if not traverse_obj(data, ('media', 0, (
+                ('type', {lambda t: t == 'video' or None}),
+                ('metadata', 'is_animated'))), get_all=False):
+            raise ExtractorError(f'{video_id} is not a video or animated image', expected=True)
        webpage = self._download_webpage(
-            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
+            f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or ''
+        formats = []

-        width = int_or_none(self._og_search_property(
-            'video:width', webpage, default=None))
-        height = int_or_none(self._og_search_property(
-            'video:height', webpage, default=None))
+        media_fmt = traverse_obj(data, ('media', 0, {
+            'url': ('url', {url_or_none}),
+            'ext': ('ext', {str}),
+            'width': ('width', {int_or_none}),
+            'height': ('height', {int_or_none}),
+            'filesize': ('size', {int_or_none}),
+            'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}),
+        }))
+        media_url = media_fmt.get('url')
+        if media_url:
+            if not media_fmt.get('ext'):
+                media_fmt['ext'] = mimetype2ext(traverse_obj(
+                    data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
+            if traverse_obj(data, ('media', 0, 'type')) == 'image':
+                media_fmt['acodec'] = 'none'
+                media_fmt.setdefault('preference', -10)
+            formats.append(media_fmt)

        video_elements = self._search_regex(
            r'(?s)<div class="video-elements">(.*?)</div>',
            webpage, 'video elements', default=None)
-        if not video_elements:
-            raise ExtractorError(
-                'No sources found for video %s. Maybe an image?' % video_id,
-                expected=True)

-        formats = []
-        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
-            formats.append({
-                'format_id': m.group('type').partition('/')[2],
-                'url': self._proto_relative_url(m.group('src')),
-                'ext': mimetype2ext(m.group('type')),
-                'width': width,
-                'height': height,
-                'http_headers': {
-                    'User-Agent': 'yt-dlp (like wget)',
-                },
-            })
+        if video_elements:
+            def og_get_size(media_type):
+                return {
+                    p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None))
+                    for p in ('width', 'height')
+                }

-        gif_json = self._search_regex(
-            r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
-            webpage, 'GIF code', fatal=False)
-        if gif_json:
-            gifd = self._parse_json(
-                gif_json, video_id, transform_source=js_to_json)
-            formats.append({
-                'format_id': 'gif',
-                'preference': -10,  # gifs are worse than videos
-                'width': width,
-                'height': height,
-                'ext': 'gif',
-                'acodec': 'none',
-                'vcodec': 'gif',
-                'container': 'gif',
-                'url': self._proto_relative_url(gifd['gifUrl']),
-                'filesize': gifd.get('size'),
-                'http_headers': {
-                    'User-Agent': 'yt-dlp (like wget)',
-                },
+            size = og_get_size('video')
+            if not any(size.values()):
+                size = og_get_size('image')
+
+            formats = traverse_obj(
+                re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
+                (..., {
+                    'format_id': ('type', {lambda s: s.partition('/')[2]}),
+                    'url': ('src', {self._proto_relative_url}),
+                    'ext': ('type', {mimetype2ext}),
+                }))
+            for f in formats:
+                f.update(size)
+
+            # We can get the original gif format from the webpage as well
+            gif_json = traverse_obj(self._search_json(
+                r'var\s+videoItem\s*=', webpage, 'GIF info', video_id,
+                transform_source=js_to_json, fatal=False), {
+                    'url': ('gifUrl', {self._proto_relative_url}),
+                    'filesize': ('size', {int_or_none}),
            })
+            if gif_json:
+                gif_json.update(size)
+                gif_json.update({
+                    'format_id': 'gif',
+                    'preference': -10,  # gifs < videos
+                    'ext': 'gif',
+                    'acodec': 'none',
+                    'vcodec': 'gif',
+                    'container': 'gif',
+                })
+                formats.append(gif_json)
+
+        search = functools.partial(self._html_search_meta, html=webpage, default=None)
+
+        twitter_fmt = {
+            'format_id': 'twitter',
+            'url': url_or_none(search('twitter:player:stream')),
+            'ext': mimetype2ext(search('twitter:player:stream:content_type')),
+            'width': int_or_none(search('twitter:width')),
+            'height': int_or_none(search('twitter:height')),
+        }
+        if twitter_fmt['url']:
+            formats.append(twitter_fmt)
+
+        if not formats:
+            self.raise_no_formats(
+                f'No sources found for video {video_id}. Maybe a plain image?', expected=True)
+        self._remove_duplicate_formats(formats)

        return {
+            'title': self._og_search_title(webpage, default=None),
+            'description': self.get_description(self._og_search_description(webpage, default='')),
+            **traverse_obj(data, {
+                'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}),
+                'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}),
+                'uploader_url': ('account', 'avatar_url', {url_or_none}),
+                'like_count': ('upvote_count', {int_or_none}),
+                'dislike_count': ('downvote_count', {int_or_none}),
+                'comment_count': ('comment_count', {int_or_none}),
+                'age_limit': ('is_mature', {lambda x: 18 if x else None}),
+                'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
+                'release_timestamp': ('created_at', {parse_iso8601}),
+            }, get_all=False),
+            **traverse_obj(data, ('media', 0, 'metadata', {
+                'title': ('title', {lambda x: strip_or_none(x) or None}),
+                'description': ('description', {self.get_description}),
+                'duration': ('duration', {float_or_none}),
+                'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
+                'release_timestamp': ('created_at', {parse_iso8601}),
+            }), get_all=False),
            'id': video_id,
            'formats': formats,
-            'title': self._og_search_title(webpage, default=video_id),
+            'thumbnail': url_or_none(search('thumbnailUrl')),
        }


-class ImgurGalleryIE(InfoExtractor):
+class ImgurGalleryBaseIE(ImgurBaseIE):
+    _GALLERY = True
+
+    def _real_extract(self, url):
+        gallery_id = self._match_id(url)
+
+        data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
+
+        info = traverse_obj(data, {
+            'title': ('title', {lambda x: strip_or_none(x) or None}),
+            'description': ('description', {self.get_description}),
+        })
+
+        if traverse_obj(data, 'is_album'):
+
+            def yield_media_ids():
+                for m_id in traverse_obj(data, (
+                        'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
+                        'id', {lambda x: str_or_none(x) or None})):
+                    yield m_id
+
+            # if a gallery with exactly one video, apply album metadata to video
+            media_id = (
+                self._GALLERY
+                and traverse_obj(data, ('image_count', {lambda c: c == 1}))
+                and next(yield_media_ids(), None))
+
+            if not media_id:
+                result = self.playlist_result(
+                    map(self._imgur_result, yield_media_ids()), gallery_id)
+                result.update(info)
+                return result
+            gallery_id = media_id
+
+        result = self._imgur_result(gallery_id)
+        info['_type'] = 'url_transparent'
+        result.update(info)
+        return result
+
+
+class ImgurGalleryIE(ImgurGalleryBaseIE):
    IE_NAME = 'imgur:gallery'
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'

    _TESTS = [{
        'url': 'http://imgur.com/gallery/Q95ko',
@ -102,49 +246,121 @@ class ImgurGalleryIE(InfoExtractor):
            'title': 'Adding faces make every GIF better',
        },
        'playlist_count': 25,
+        'skip': 'Zoinks! You\'ve taken a wrong turn.',
    }, {
+        # TODO: static images - replace with animated/video gallery
        'url': 'http://imgur.com/topic/Aww/ll5Vk',
        'only_matching': True,
    }, {
        'url': 'https://imgur.com/gallery/YcAQlkx',
+        'add_ies': ['Imgur'],
        'info_dict': {
            'id': 'YcAQlkx',
            'ext': 'mp4',
            'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
-        }
+            'timestamp': 1358554297,
+            'upload_date': '20130119',
+            'uploader_id': '1648642',
+            'uploader': 'wittyusernamehere',
+            'release_timestamp': 1358554297,
+            'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
+            'release_date': '20130119',
+            'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
+            'comment_count': int,
+            'dislike_count': int,
+            'like_count': int,
+        },
    }, {
+        # TODO: static image - replace with animated/video gallery
        'url': 'http://imgur.com/topic/Funny/N8rOudd',
        'only_matching': True,
    }, {
        'url': 'http://imgur.com/r/aww/VQcQPhM',
-        'only_matching': True,
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'VQcQPhM',
+            'ext': 'mp4',
+            'title': 'The boss is here',
+            'timestamp': 1476494751,
+            'upload_date': '20161015',
+            'uploader_id': '19138530',
+            'uploader': 'thematrixcam',
+            'comment_count': int,
+            'dislike_count': int,
+            'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand',
+            'release_timestamp': 1476494751,
+            'like_count': int,
+            'release_date': '20161015',
+            'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg',
+        },
+    },
+        # from https://github.com/ytdl-org/youtube-dl/pull/16674
+        {
+        'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
+        'info_dict': {
+            'id': '6lAn9VQ',
+            'title': 'Penguins !',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://imgur.com/t/unmuted/kx2uD3C',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'ZVMv45i',
+            'ext': 'mp4',
+            'title': 'Intruder',
+            'timestamp': 1528129683,
+            'upload_date': '20180604',
+            'release_timestamp': 1528129683,
+            'release_date': '20180604',
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'duration': 30.03,
+            'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg',
+        },
+    }, {
+        'url': 'https://imgur.com/t/unmuted/wXSK0YH',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'JCAP4io',
+            'ext': 'mp4',
+            'title': 're:I got the blues$',
+            'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
+            'timestamp': 1527809525,
+            'upload_date': '20180531',
+            'like_count': int,
+            'dislike_count': int,
+            'duration': 30.03,
+            'comment_count': int,
+            'release_timestamp': 1527809525,
+            'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg',
+            'release_date': '20180531',
+        },
    }]

-    def _real_extract(self, url):
-        gallery_id = self._match_id(url)

-        data = self._download_json(
-            'https://imgur.com/gallery/%s.json' % gallery_id,
-            gallery_id)['data']['image']
-
-        if data.get('is_album'):
-            entries = [
-                self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
-                for image in data['album_images']['images'] if image.get('hash')]
-            return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
-
-        return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
-
-
-class ImgurAlbumIE(ImgurGalleryIE):  # XXX: Do not subclass from concrete IE
+class ImgurAlbumIE(ImgurGalleryBaseIE):
    IE_NAME = 'imgur:album'
    _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
-
+    _GALLERY = False
    _TESTS = [{
+        # TODO: only static images - replace with animated/video gallery
        'url': 'http://imgur.com/a/j6Orj',
+        'only_matching': True,
+    },
+        # from https://github.com/ytdl-org/youtube-dl/pull/21693
+        {
+        'url': 'https://imgur.com/a/iX265HX',
        'info_dict': {
-            'id': 'j6Orj',
-            'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+            'id': 'iX265HX',
+            'title': 'enen-no-shouboutai'
        },
-        'playlist_count': 12,
+        'playlist_count': 2,
+    }, {
+        'url': 'https://imgur.com/a/8pih2Ed',
+        'info_dict': {
+            'id': '8pih2Ed'
+        },
+        'playlist_mincount': 1,
    }]
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@ -10,6 +10,7 @@ from ..utils import (
    ExtractorError,
    decode_base_n,
    encode_base_n,
+    filter_dict,
    float_or_none,
    format_field,
    get_element_by_attribute,
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
        user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
        if not user_info:
            self.raise_login_required('This content is unreachable')
-        user_id = user_info.get('id')

+        user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
        story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
+        if not story_info_url:  # user id is only mandatory for non-highlights
+            raise ExtractorError('Unable to extract user id')
+
        videos = traverse_obj(self._download_json(
            f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
            story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
        if not videos:
            self.raise_login_required('You need to log in to access this content')

-        full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
+        full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
        story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
        if not story_title:
            story_title = f'Story by {username}'

-        highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
+        highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
        info_data = []
        for highlight in highlights:
            highlight_data = self._extract_product(highlight)
            if highlight_data.get('formats'):
                info_data.append({
-                    **highlight_data,
                    'uploader': full_name,
                    'uploader_id': user_id,
+                    **filter_dict(highlight_data),
                })
        return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
--- a/yt_dlp/extractor/kinja.py
+++ b/yt_dlp/extractor/kinja.py
@ -12,7 +12,7 @@ from ..utils import (


 class KinjaEmbedIE(InfoExtractor):
-    IENAME = 'kinja:embed'
+    IE_NAME = 'kinja:embed'
    _DOMAIN_REGEX = r'''(?:[^.]+\.)?
        (?:
            avclub|
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@ -6,6 +6,7 @@ from ..utils import (
    int_or_none,
    smuggle_url,
    traverse_obj,
+    try_call,
    unsmuggle_url,
 )

@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
            webpage, 'video data', default='{}'), video_id)
        if not video_data:
-            payload = {
-                'assetId': program_info['assetId'],
-                'watchDevices': program_info['watchDevices'],
-                'contentType': program_info['contentType'],
-            }
+            payload = {'assetId': program_info['assetId']}
+            puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
+            if puid:
+                payload.update({
+                    'type': 'auth',
+                    'puid': puid,
+                })
+                endpoint = 'getUrl'
+            else:
+                payload.update({
+                    'watchDevices': program_info['watchDevices'],
+                    'contentType': program_info['contentType'],
+                })
+                endpoint = 'getMainUrlNoAuth'
            video_data = self._download_json(
-                'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
+                f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
                data=json.dumps(payload).encode('utf-8'),
                headers={'Content-Type': 'application/json'})

--- a/yt_dlp/extractor/nba.py
+++ b/yt_dlp/extractor/nba.py
@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):


 class NBAWatchEmbedIE(NBAWatchBaseIE):
-    IENAME = 'nba:watch:embed'
+    IE_NAME = 'nba:watch:embed'
    _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):


 class NBAEmbedIE(NBABaseIE):
-    IENAME = 'nba:embed'
+    IE_NAME = 'nba:embed'
    _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
    _TESTS = [{
        'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):


 class NBAIE(NBABaseIE):
-    IENAME = 'nba'
+    IE_NAME = 'nba'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):


 class NBAChannelIE(NBABaseIE):
-    IENAME = 'nba:channel'
+    IE_NAME = 'nba:channel'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/blazers/video/channel/summer_league',
--- a/yt_dlp/extractor/rudovideo.py
+++ b/yt_dlp/extractor/rudovideo.py
@ -0,0 +1,135 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    js_to_json,
+    traverse_obj,
+    update_url_query,
+    url_or_none,
+)
+
+
+class RudoVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
+    _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
+    _TESTS = [{
+        'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
+        'md5': '28ed82b477708dc5e12e072da2449221',
+        'info_dict': {
+            'id': 'cz2wrUy8l0o',
+            'title': 'Diego Cabot',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/podcast/bQkt07',
+        'md5': '36b22a9863de0f47f00fc7532a32a898',
+        'info_dict': {
+            'id': 'bQkt07',
+            'title': 'Tubular Bells',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/podcast/b42ZUznHX0',
+        'md5': 'b91c70d832938871367f8ad10c895821',
+        'info_dict': {
+            'id': 'b42ZUznHX0',
+            'title': 'Columna Ruperto Concha',
+            'ext': 'mp3',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/vod/bN5AaJ',
+        'md5': '01324a329227e2591530ecb4f555c881',
+        'info_dict': {
+            'id': 'bN5AaJ',
+            'title': 'Ucrania 19.03',
+            'creator': 'La Tercera',
+            'ext': 'mp4',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/live/bbtv',
+        'info_dict': {
+            'id': 'bbtv',
+            'ext': 'mp4',
+            'creator': 'BioBioTV',
+            'live_status': 'is_live',
+            'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
+            'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
+        },
+    }, {
+        'url': 'https://rudo.video/live/c13',
+        'info_dict': {
+            'id': 'c13',
+            'title': 'CANAL13',
+            'ext': 'mp4',
+        },
+        'skip': 'Geo-restricted to Chile',
+    }, {
+        'url': 'https://rudo.video/live/t13-13cl',
+        'info_dict': {
+            'id': 't13-13cl',
+            'title': 'T13',
+            'ext': 'mp4',
+        },
+        'skip': 'Geo-restricted to Chile',
+    }]
+
+    def _real_extract(self, url):
+        video_id, type_ = self._match_valid_url(url).group('id', 'type')
+        is_live = type_ == 'live'
+
+        webpage = self._download_webpage(url, video_id)
+        if 'Streaming is not available in your area' in webpage:
+            self.raise_geo_restricted()
+
+        media_url = (
+            self._search_regex(
+                r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
+            # Source URL must be used only if streamURL is unavailable
+            or self._search_regex(
+                r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
+        if not media_url:
+            youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
+                                             webpage, 'youtube url', default=None)
+            if youtube_url:
+                return self.url_result(youtube_url, 'Youtube')
+            raise ExtractorError('Unable to extract stream url')
+
+        token_array = self._search_json(
+            r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
+            contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
+        if token_array:
+            token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
+            if not token_url:
+                raise ExtractorError('Invalid access token array')
+            access_token = self._download_json(
+                token_url, video_id, note='Downloading access token')['data']['authToken']
+            media_url = update_url_query(media_url, {'auth-token': access_token})
+
+        ext = determine_ext(media_url)
+        if ext == 'm3u8':
+            formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
+        elif ext == 'mp3':
+            formats = [{
+                'url': media_url,
+                'vcodec': 'none',
+            }]
+        else:
+            formats = [{'url': media_url}]
+
+        return {
+            'id': video_id,
+            'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
+                                         webpage, 'title', default=None)
+                      or self._og_search_title(webpage)),
+            'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
+                                          webpage, 'videoAuthor', default=None),
+            'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
+                                             webpage, 'thumbnail', default=None)
+                          or self._og_search_thumbnail(webpage)),
+            'formats': formats,
+            'is_live': is_live,
+        }
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -10,6 +10,7 @@ from ..compat import (
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlparse,
 )
+from ..networking.exceptions import HTTPError
 from ..utils import (
    ExtractorError,
    dict_get,
@ -1317,41 +1318,51 @@ class TwitterIE(TwitterBaseIE):
            }
        }

-    def _extract_status(self, twid):
-        if self.is_logged_in or self._selected_api == 'graphql':
-            status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
-
-        elif self._selected_api == 'legacy':
-            status = self._call_api(f'statuses/show/{twid}.json', twid, {
-                'cards_platform': 'Web-12',
-                'include_cards': 1,
-                'include_reply_count': 1,
-                'include_user_entities': 0,
-                'tweet_mode': 'extended',
+    def _call_syndication_api(self, twid):
+        self.report_warning(
+            'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
+        status = self._download_json(
+            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+            headers={'User-Agent': 'Googlebot'}, query={
+                'id': twid,
+                # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
+                'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
            })
+        if not status:
+            raise ExtractorError('Syndication endpoint returned empty JSON response')
+        # Transform the result so its structure matches that of legacy/graphql
+        media = []
+        for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
+            detail['id_str'] = traverse_obj(detail, (
+                'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
+            media.append(detail)
+        status['extended_entities'] = {'media': media}

-        elif self._selected_api == 'syndication':
-            self.report_warning(
-                'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
-            status = self._download_json(
-                'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
-                headers={'User-Agent': 'Googlebot'}, query={
-                    'id': twid,
-                    # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
-                    'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
+        return status
+
+    def _extract_status(self, twid):
+        if self._selected_api not in ('graphql', 'legacy', 'syndication'):
+            raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
+
+        try:
+            if self.is_logged_in or self._selected_api == 'graphql':
+                status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
+            elif self._selected_api == 'legacy':
+                status = self._call_api(f'statuses/show/{twid}.json', twid, {
+                    'cards_platform': 'Web-12',
+                    'include_cards': 1,
+                    'include_reply_count': 1,
+                    'include_user_entities': 0,
+                    'tweet_mode': 'extended',
                })
-            if not status:
-                raise ExtractorError('Syndication endpoint returned empty JSON response')
-            # Transform the result so its structure matches that of legacy/graphql
-            media = []
-            for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
-                detail['id_str'] = traverse_obj(detail, (
-                    'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
-                media.append(detail)
-            status['extended_entities'] = {'media': media}
+        except ExtractorError as e:
+            if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+                raise
+            self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
+            status = self._call_syndication_api(twid)

-        else:
-            raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
+        if self._selected_api == 'syndication':
+            status = self._call_syndication_api(twid)

        return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}

@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
                'thumbnails': thumbnails,
                'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
                'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
-                # The codec of http formats are unknown
-                '_format_sort_fields': ('res', 'br', 'size', 'proto'),
+                # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
+                '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
            }

        def extract_from_card_info(card):
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@ -206,13 +206,14 @@ class Updater:
    # XXX: use class variables to simplify testing
    _channel = CHANNEL
    _origin = ORIGIN
+    _update_sources = UPDATE_SOURCES

    def __init__(self, ydl, target: str | None = None):
        self.ydl = ydl
        # For backwards compat, target needs to be treated as if it could be None
        self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
        # Check if requested_tag is actually the requested repo/channel
-        if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
+        if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
            self.requested_channel = self.requested_tag
            self.requested_tag: str = None  # type: ignore (we set it later)
        elif not self.requested_channel:
@ -237,11 +238,11 @@ class Updater:
                self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
        else:
            # Check if requested_channel resolves to a known repository or else raise
-            self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
+            self.requested_repo = self._update_sources.get(self.requested_channel)
            if not self.requested_repo:
                self._report_error(
                    f'Invalid update channel {self.requested_channel!r} requested. '
-                    f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
+                    f'Valid channels are {", ".join(self._update_sources)}', True)

        self._identifier = f'{detect_variant()} {system_identifier()}'
Author	SHA1	Message	Date
bashonly	e48735604e	[ie/imgur] cleanup	2023-12-25 18:30:20 -06:00
bashonly	e2792fd5b3	[ie/imgur] format_id	2023-12-25 18:27:21 -06:00
bashonly	39d0e2248a	[ie/imgur] even more cleanup	2023-12-25 18:25:07 -06:00
bashonly	abfe114e49	[ie/imgur] more cleanup	2023-12-25 18:00:47 -06:00
bashonly	4396063a52	[ie/imgur] cleanup	2023-12-25 17:55:42 -06:00
bashonly	64f8963313	[ie/imgur] fix tests	2023-12-25 17:23:41 -06:00
bashonly	87e8a2dfe3	[ie/imgur] fix traversal	2023-12-25 17:13:29 -06:00
bashonly	68b4cbdd2b	Merge branch 'yt-dlp:master' into feat/backport-2023-12	2023-12-25 16:32:14 -06:00
bashonly	da957cff5e	regex cleanup	2023-12-25 16:29:17 -06:00
bashonly	5e90f3a9a9	Backport imgur overhaul	2023-12-25 16:24:45 -06:00
kclauhk	c39358a54b	[ie/Facebook] Fix Memories extraction (#8681 ) - Support group /posts/ URLs - Raise a proper error message if no formats are found Closes #8669 Authored by: kclauhk	2023-12-24 23:43:35 +01:00
Lars Strojny	1f8bd8eba8	[ie/ARDBetaMediathek] Fix series extraction (#8687 ) Closes #7666 Authored by: lstrojny	2023-12-24 23:38:21 +01:00
Simon Sawicki	00cdda4f6f	[core] Fix format selection parse error for CPython 3.12 (#8797 ) Authored by: Grub4K	2023-12-24 22:09:01 +01:00
bashonly	116c268438	[ie/twitter] Work around API rate-limit (#8825 ) Closes #8762 Authored by: bashonly	2023-12-24 16:41:28 +00:00
bashonly	e7d22348e7	[ie/twitter] Prioritize m3u8 formats (#8826 ) Closes #8117 Authored by: bashonly	2023-12-24 16:40:50 +00:00
bashonly	50eaea9fd7	[ie/instagram] Fix stories extraction (#8843 ) Closes #8290 Authored by: bashonly	2023-12-24 16:40:03 +00:00
bashonly	f45c4efcd9	[ie/litv] Fix premium content extraction (#8842 ) Closes #8654 Authored by: bashonly	2023-12-24 16:33:16 +00:00
Simon Sawicki	13b3cb3c2b	[ci] Run core tests only for core changes (#8841 ) Authored by: Grub4K	2023-12-24 00:11:10 +01:00
Nicolas Dato	0d531c35ec	[ie/RudoVideo] Add extractor (#8664 ) Authored by: nicodato	2023-12-22 22:52:07 +01:00
barsnick	bc4ab17b38	[cleanup] Fix spelling of `IE_NAME` (#8810 ) Authored by: barsnick	2023-12-22 02:32:29 +01:00
bashonly	632b8ee54e	[core] Release workflow and Updater cleanup (#8640 ) - Only use trusted publishing with PyPI and remove support for PyPI tokens from release workflow - Clean up improper actions syntax in the build workflow inputs - Refactor Updater to allow for consistent unit testing with `UPDATE_SOURCES` Authored by: bashonly	2023-12-21 21:06:26 +00:00
barsnick	c919b68f7e	[ie/bbc] Extract more formats (#8321 ) Closes #4902 Authored by: barsnick, dirkf	2023-12-21 20:47:32 +00:00
bashonly	19741ab8a4	[ie/bbc] Fix JSON parsing bug Authored by: bashonly	2023-12-21 14:46:00 -06:00