Compare commits

..

No commits in common. "e48735604e94f57188ecb7626d5dca95df4ad8ee" and "fcb28933af79e37014f31e588d3bfe1209c06cb1" have entirely different histories.

19 changed files with 186 additions and 610 deletions

View File

@ -80,12 +80,12 @@ on:
default: true default: true
type: boolean type: boolean
origin: origin:
description: Origin description: .
required: false required: false
default: 'current repo' default: ''
type: choice type: choice
options: options:
- 'current repo' - ''
permissions: permissions:
contents: read contents: read
@ -99,7 +99,7 @@ jobs:
- name: Process origin - name: Process origin
id: process_origin id: process_origin
run: | run: |
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT" echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
unix: unix:
needs: process needs: process

View File

@ -1,25 +1,5 @@
name: Core Tests name: Core Tests
on: on: [push, pull_request]
push:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
pull_request:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
permissions: permissions:
contents: read contents: read

View File

@ -64,6 +64,7 @@ jobs:
target_tag: ${{ steps.setup_variables.outputs.target_tag }} target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }} pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }} pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
head_sha: ${{ steps.get_target.outputs.head_sha }} head_sha: ${{ steps.get_target.outputs.head_sha }}
steps: steps:
@ -152,6 +153,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}' pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
fi fi
else else
target_tag="${source_tag:-${version}}" target_tag="${source_tag:-${version}}"
@ -161,6 +163,7 @@ jobs:
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}' pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
else else
target_repo='${{ github.repository }}' target_repo='${{ github.repository }}'
fi fi
@ -169,6 +172,13 @@ jobs:
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
pypi_project='${{ vars.PYPI_PROJECT }}' pypi_project='${{ vars.PYPI_PROJECT }}'
fi fi
if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
if ${{ !secrets.PYPI_TOKEN }}; then
pypi_token=OIDC
else
pypi_token=PYPI_TOKEN
fi
fi
echo "::group::Output variables" echo "::group::Output variables"
cat << EOF | tee -a "$GITHUB_OUTPUT" cat << EOF | tee -a "$GITHUB_OUTPUT"
@ -179,6 +189,7 @@ jobs:
target_tag=${target_tag} target_tag=${target_tag}
pypi_project=${pypi_project} pypi_project=${pypi_project}
pypi_suffix=${pypi_suffix} pypi_suffix=${pypi_suffix}
pypi_token=${pypi_token}
EOF EOF
echo "::endgroup::" echo "::endgroup::"
@ -275,7 +286,18 @@ jobs:
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel python setup.py sdist bdist_wheel
- name: Publish to PyPI - name: Publish to PyPI via token
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
if: |
needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
run: |
twine upload dist/*
- name: Publish to PyPI via trusted publishing
if: |
needs.prepare.outputs.pypi_token == 'OIDC'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true

View File

@ -140,8 +140,6 @@ class TestFormatSelection(unittest.TestCase):
test('example-with-dashes', 'example-with-dashes') test('example-with-dashes', 'example-with-dashes')
test('all', '2', '47', '45', 'example-with-dashes', '35') test('all', '2', '47', '45', 'example-with-dashes', '35')
test('mergeall', '2+47+45+example-with-dashes+35', multi=True) test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
test('7_a/worst', '35')
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [

View File

@ -11,14 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, report_warning from test.helper import FakeYDL, report_warning
from yt_dlp.update import Updater, UpdateInfo from yt_dlp.update import Updater, UpdateInfo
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
TEST_UPDATE_SOURCES = {
'stable': 'yt-dlp/yt-dlp',
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
'master': 'yt-dlp/yt-dlp-master-builds',
}
TEST_API_DATA = { TEST_API_DATA = {
'yt-dlp/yt-dlp/latest': { 'yt-dlp/yt-dlp/latest': {
'tag_name': '2023.12.31', 'tag_name': '2023.12.31',
@ -112,7 +104,6 @@ class FakeUpdater(Updater):
_channel = 'stable' _channel = 'stable'
_origin = 'yt-dlp/yt-dlp' _origin = 'yt-dlp/yt-dlp'
_update_sources = TEST_UPDATE_SOURCES
def _download_update_spec(self, *args, **kwargs): def _download_update_spec(self, *args, **kwargs):
return TEST_LOCKFILE_ACTUAL return TEST_LOCKFILE_ACTUAL

View File

@ -2465,16 +2465,9 @@ class YoutubeDL:
return selector_function(ctx_copy) return selector_function(ctx_copy)
return final_selector return final_selector
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid stream = io.BytesIO(format_spec.encode())
# Prefix numbers with random letters to avoid it being classified as a number
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
# TODO: Implement parser not reliant on tokenize.tokenize
prefix = ''.join(random.choices(string.ascii_letters, k=32))
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try: try:
tokens = list(_remove_unused_ops( tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
token._replace(string=token.string.replace(prefix, ''))
for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError: except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

View File

@ -1648,7 +1648,6 @@ from .rumble import (
RumbleIE, RumbleIE,
RumbleChannelIE, RumbleChannelIE,
) )
from .rudovideo import RudoVideoIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,

View File

@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# available till 7.12.2023 # available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': '94812e6438488fb923c361a44469614b', 'md5': 'a438f671e87a7eba04000336a119ccc4',
'info_dict': { 'info_dict': {
'id': 'maischberger-video-424', 'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424', 'display_id': 'maischberger-video-424',
@ -403,25 +403,26 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'''(?x)https:// _VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)? (?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/ (?:player|live|video|(?P<playlist>sendung|sammlung))/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', 'md5': '3fd5fead7a370a819341129c8d713136',
'info_dict': { 'info_dict': {
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', 'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12939099', 'id': '12172961',
'title': 'Liebe auf vier Pfoten', 'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg', 'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5222, 'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1701343800, 'timestamp': 1670710500,
'upload_date': '20231130', 'upload_date': '20221210',
'ext': 'mp4', 'ext': 'mp4',
'episode': 'Liebe auf vier Pfoten', 'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'series': 'Filme im MDR' 'series': 'Filme im MDR'
}, },
}, { }, {
@ -453,7 +454,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'duration': 915, 'duration': 915,
'episode': 'tagesschau, 20:00 Uhr', 'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau', 'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -474,10 +475,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
# playlist of type 'sendung' # playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True, 'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, { }, {
# playlist of type 'sammlung' # playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -490,11 +487,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
""" Query the ARD server for playlist information """ Query the ARD server for playlist information
and returns the data in "raw" format """ and returns the data in "raw" format """
assert mode in ('sendung', 'serie', 'sammlung') if mode == 'sendung':
if mode in ('sendung', 'serie'):
graphQL = json.dumps({ graphQL = json.dumps({
'query': '''{ 'query': '''{
showPage( showPage(
@ -511,7 +507,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
links { target { id href title } } links { target { id href title } }
type type
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
else: # mode == 'sammlung' else: # mode == 'sammlung'
graphQL = json.dumps({ graphQL = json.dumps({
@ -532,7 +528,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
type type
} }
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
# Ressources for ARD graphQL debugging: # Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway # https://api-test.ardmediathek.de/public-gateway
@ -542,7 +538,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
data=graphQL, data=graphQL,
headers={'Content-Type': 'application/json'})['data'] headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data: # align the structure of the returned data:
if mode in ('sendung', 'serie'): if mode == 'sendung':
show_page = show_page['showPage'] show_page = show_page['showPage']
else: # mode == 'sammlung' else: # mode == 'sammlung'
show_page = show_page['morePage']['widget'] show_page = show_page['morePage']['widget']
@ -550,12 +546,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict. """ Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung', 'serie', and 'sammlung', Supports playlists of mode 'sendung' and 'sammlung', and also nested
as well as nested playlists. """ playlists. """
entries = [] entries = []
pageNumber = 0 pageNumber = 0
while True: # iterate by pageNumber while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snippet( show_page = self._ARD_load_playlist_snipped(
playlist_id, display_id, client, mode, pageNumber) playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']: if '/compilation/' in teaser['links']['target']['href']:

View File

@ -317,25 +317,16 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id): def _download_media_selector(self, programme_id):
last_exception = None last_exception = None
formats, subtitles = [], {}
for media_set in self._MEDIA_SETS: for media_set in self._MEDIA_SETS:
try: try:
fmts, subs = self._download_media_selector_url( return self._download_media_selector_url(
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id) self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
formats.extend(fmts)
if subs:
self._merge_subtitles(subs, target=subtitles)
except BBCCoUkIE.MediaSelectionError as e: except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e last_exception = e
continue continue
self._raise_extractor_error(e) self._raise_extractor_error(e)
if last_exception: self._raise_extractor_error(last_exception)
if formats or subtitles:
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
else:
self._raise_extractor_error(last_exception)
return formats, subtitles
def _download_media_selector_url(self, url, programme_id=None): def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_json( media_selection = self._download_json(
@ -1197,7 +1188,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if initial_data is None: if initial_data is None:
initial_data = self._search_regex( initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage, r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
'preload state', default='{}') 'preload state', default={})
else: else:
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False) initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False) initial_data = self._parse_json(initial_data, playlist_id, fatal=False)

View File

@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/(?:permalink|posts)/| groups/[^/]+/permalink/|
watchparty/ watchparty/
)| )|
facebook: facebook:
@ -232,21 +232,6 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
'info_dict': {
'id': '1569199726448814',
'ext': 'mp4',
'title': 'Pence MUST GO!',
'description': 'Vickie Gentry shared a memory.',
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -627,11 +612,9 @@ class FacebookIE(InfoExtractor):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or []) nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, ( attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments', ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')), ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
'attachment', {dict}))
for attachment in attachments: for attachment in attachments:
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}), ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns: for n in ns:
parse_attachment(n) parse_attachment(n)
parse_attachment(attachment) parse_attachment(attachment)
@ -654,7 +637,7 @@ class FacebookIE(InfoExtractor):
if len(entries) > 1: if len(entries) > 1:
return self.playlist_result(entries, video_id) return self.playlist_result(entries, video_id)
video_info = entries[0] if entries else {'id': video_id} video_info = entries[0]
webpage_info = extract_metadata(webpage) webpage_info = extract_metadata(webpage)
# honor precise duration in video info # honor precise duration in video info
if video_info.get('duration'): if video_info.get('duration'):

View File

@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):
class IHeartRadioIE(IHeartRadioBaseIE): class IHeartRadioIE(IHeartRadioBaseIE):
IE_NAME = 'iheartradio' IENAME = 'iheartradio'
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)' _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true', 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',

View File

@ -1,243 +1,99 @@
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
parse_iso8601, ExtractorError,
str_or_none,
strip_or_none,
traverse_obj,
url_or_none,
) )
class ImgurBaseIE(InfoExtractor): class ImgurIE(InfoExtractor):
_CLIENT_ID = '546c25a59c58ad7' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
@classmethod
def _imgur_result(cls, item_id):
return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id)
def _call_api(self, endpoint, video_id, **kwargs):
return self._download_json(
f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account',
video_id, **kwargs)
@staticmethod
def get_description(s):
if 'Discover the magic of the internet at Imgur' in s:
return None
return s or None
class ImgurIE(ImgurBaseIE):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://imgur.com/A61SaA1', 'url': 'https://i.imgur.com/A61SaA1.gifv',
'info_dict': { 'info_dict': {
'id': 'A61SaA1', 'id': 'A61SaA1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'timestamp': 1416446068,
'upload_date': '20141120',
'dislike_count': int,
'comment_count': int,
'release_timestamp': 1416446068,
'release_date': '20141120',
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
}, },
}, { }, {
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://imgur.com/A61SaA1',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://i.imgur.com/crGpqCV.mp4', 'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True, 'only_matching': True,
}, { }, {
# no title
'url': 'https://i.imgur.com/jxBXAMC.gifv', 'url': 'https://i.imgur.com/jxBXAMC.gifv',
'info_dict': { 'only_matching': True,
'id': 'jxBXAMC',
'ext': 'mp4',
'title': 'Fahaka puffer feeding',
'timestamp': 1533835503,
'upload_date': '20180809',
'release_date': '20180809',
'like_count': int,
'duration': 30.0,
'comment_count': int,
'release_timestamp': 1533835503,
'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
'dislike_count': int,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._call_api('media', video_id)
if not traverse_obj(data, ('media', 0, (
('type', {lambda t: t == 'video' or None}),
('metadata', 'is_animated'))), get_all=False):
raise ExtractorError(f'{video_id} is not a video or animated image', expected=True)
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or '' 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
formats = []
media_fmt = traverse_obj(data, ('media', 0, { width = int_or_none(self._og_search_property(
'url': ('url', {url_or_none}), 'video:width', webpage, default=None))
'ext': ('ext', {str}), height = int_or_none(self._og_search_property(
'width': ('width', {int_or_none}), 'video:height', webpage, default=None))
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}),
}))
media_url = media_fmt.get('url')
if media_url:
if not media_fmt.get('ext'):
media_fmt['ext'] = mimetype2ext(traverse_obj(
data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
if traverse_obj(data, ('media', 0, 'type')) == 'image':
media_fmt['acodec'] = 'none'
media_fmt.setdefault('preference', -10)
formats.append(media_fmt)
video_elements = self._search_regex( video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>', r'(?s)<div class="video-elements">(.*?)</div>',
webpage, 'video elements', default=None) webpage, 'video elements', default=None)
if not video_elements:
raise ExtractorError(
'No sources found for video %s. Maybe an image?' % video_id,
expected=True)
if video_elements: formats = []
def og_get_size(media_type): for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
return { formats.append({
p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None)) 'format_id': m.group('type').partition('/')[2],
for p in ('width', 'height') 'url': self._proto_relative_url(m.group('src')),
} 'ext': mimetype2ext(m.group('type')),
'width': width,
size = og_get_size('video') 'height': height,
if not any(size.values()): 'http_headers': {
size = og_get_size('image') 'User-Agent': 'yt-dlp (like wget)',
},
formats = traverse_obj(
re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
(..., {
'format_id': ('type', {lambda s: s.partition('/')[2]}),
'url': ('src', {self._proto_relative_url}),
'ext': ('type', {mimetype2ext}),
}))
for f in formats:
f.update(size)
# We can get the original gif format from the webpage as well
gif_json = traverse_obj(self._search_json(
r'var\s+videoItem\s*=', webpage, 'GIF info', video_id,
transform_source=js_to_json, fatal=False), {
'url': ('gifUrl', {self._proto_relative_url}),
'filesize': ('size', {int_or_none}),
}) })
if gif_json:
gif_json.update(size)
gif_json.update({
'format_id': 'gif',
'preference': -10, # gifs < videos
'ext': 'gif',
'acodec': 'none',
'vcodec': 'gif',
'container': 'gif',
})
formats.append(gif_json)
search = functools.partial(self._html_search_meta, html=webpage, default=None) gif_json = self._search_regex(
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
twitter_fmt = { webpage, 'GIF code', fatal=False)
'format_id': 'twitter', if gif_json:
'url': url_or_none(search('twitter:player:stream')), gifd = self._parse_json(
'ext': mimetype2ext(search('twitter:player:stream:content_type')), gif_json, video_id, transform_source=js_to_json)
'width': int_or_none(search('twitter:width')), formats.append({
'height': int_or_none(search('twitter:height')), 'format_id': 'gif',
} 'preference': -10, # gifs are worse than videos
if twitter_fmt['url']: 'width': width,
formats.append(twitter_fmt) 'height': height,
'ext': 'gif',
if not formats: 'acodec': 'none',
self.raise_no_formats( 'vcodec': 'gif',
f'No sources found for video {video_id}. Maybe a plain image?', expected=True) 'container': 'gif',
self._remove_duplicate_formats(formats) 'url': self._proto_relative_url(gifd['gifUrl']),
'filesize': gifd.get('size'),
'http_headers': {
'User-Agent': 'yt-dlp (like wget)',
},
})
return { return {
'title': self._og_search_title(webpage, default=None),
'description': self.get_description(self._og_search_description(webpage, default='')),
**traverse_obj(data, {
'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}),
'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}),
'uploader_url': ('account', 'avatar_url', {url_or_none}),
'like_count': ('upvote_count', {int_or_none}),
'dislike_count': ('downvote_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'age_limit': ('is_mature', {lambda x: 18 if x else None}),
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
'release_timestamp': ('created_at', {parse_iso8601}),
}, get_all=False),
**traverse_obj(data, ('media', 0, 'metadata', {
'title': ('title', {lambda x: strip_or_none(x) or None}),
'description': ('description', {self.get_description}),
'duration': ('duration', {float_or_none}),
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
'release_timestamp': ('created_at', {parse_iso8601}),
}), get_all=False),
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'thumbnail': url_or_none(search('thumbnailUrl')), 'title': self._og_search_title(webpage, default=video_id),
} }
class ImgurGalleryBaseIE(ImgurBaseIE): class ImgurGalleryIE(InfoExtractor):
_GALLERY = True
def _real_extract(self, url):
gallery_id = self._match_id(url)
data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
info = traverse_obj(data, {
'title': ('title', {lambda x: strip_or_none(x) or None}),
'description': ('description', {self.get_description}),
})
if traverse_obj(data, 'is_album'):
def yield_media_ids():
for m_id in traverse_obj(data, (
'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
'id', {lambda x: str_or_none(x) or None})):
yield m_id
# if a gallery with exactly one video, apply album metadata to video
media_id = (
self._GALLERY
and traverse_obj(data, ('image_count', {lambda c: c == 1}))
and next(yield_media_ids(), None))
if not media_id:
result = self.playlist_result(
map(self._imgur_result, yield_media_ids()), gallery_id)
result.update(info)
return result
gallery_id = media_id
result = self._imgur_result(gallery_id)
info['_type'] = 'url_transparent'
result.update(info)
return result
class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery' IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko', 'url': 'http://imgur.com/gallery/Q95ko',
@ -246,121 +102,49 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'title': 'Adding faces make every GIF better', 'title': 'Adding faces make every GIF better',
}, },
'playlist_count': 25, 'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, { }, {
# TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk', 'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://imgur.com/gallery/YcAQlkx', 'url': 'https://imgur.com/gallery/YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': { 'info_dict': {
'id': 'YcAQlkx', 'id': 'YcAQlkx',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'timestamp': 1358554297, }
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
'release_timestamp': 1358554297,
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'release_date': '20130119',
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
},
}, { }, {
# TODO: static image - replace with animated/video gallery
'url': 'http://imgur.com/topic/Funny/N8rOudd', 'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://imgur.com/r/aww/VQcQPhM', 'url': 'http://imgur.com/r/aww/VQcQPhM',
'add_ies': ['Imgur'], 'only_matching': True,
'info_dict': {
'id': 'VQcQPhM',
'ext': 'mp4',
'title': 'The boss is here',
'timestamp': 1476494751,
'upload_date': '20161015',
'uploader_id': '19138530',
'uploader': 'thematrixcam',
'comment_count': int,
'dislike_count': int,
'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand',
'release_timestamp': 1476494751,
'like_count': int,
'release_date': '20161015',
'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg',
},
},
# from https://github.com/ytdl-org/youtube-dl/pull/16674
{
'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'ZVMv45i',
'ext': 'mp4',
'title': 'Intruder',
'timestamp': 1528129683,
'upload_date': '20180604',
'release_timestamp': 1528129683,
'release_date': '20180604',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'duration': 30.03,
'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg',
},
}, {
'url': 'https://imgur.com/t/unmuted/wXSK0YH',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'JCAP4io',
'ext': 'mp4',
'title': 're:I got the blues$',
'description': 'Lukas vocal stylings.\n\nFP edit: dont encourage me. Ill never stop posting Luka and friends.',
'timestamp': 1527809525,
'upload_date': '20180531',
'like_count': int,
'dislike_count': int,
'duration': 30.03,
'comment_count': int,
'release_timestamp': 1527809525,
'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg',
'release_date': '20180531',
},
}] }]
def _real_extract(self, url):
gallery_id = self._match_id(url)
class ImgurAlbumIE(ImgurGalleryBaseIE): data = self._download_json(
'https://imgur.com/gallery/%s.json' % gallery_id,
gallery_id)['data']['image']
if data.get('is_album'):
entries = [
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
for image in data['album_images']['images'] if image.get('hash')]
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
class ImgurAlbumIE(ImgurGalleryIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'imgur:album' IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False
_TESTS = [{ _TESTS = [{
# TODO: only static images - replace with animated/video gallery
'url': 'http://imgur.com/a/j6Orj', 'url': 'http://imgur.com/a/j6Orj',
'only_matching': True,
},
# from https://github.com/ytdl-org/youtube-dl/pull/21693
{
'url': 'https://imgur.com/a/iX265HX',
'info_dict': { 'info_dict': {
'id': 'iX265HX', 'id': 'j6Orj',
'title': 'enen-no-shouboutai' 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
}, },
'playlist_count': 2, 'playlist_count': 12,
}, {
'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': {
'id': '8pih2Ed'
},
'playlist_mincount': 1,
}] }]

View File

@ -10,7 +10,6 @@ from ..utils import (
ExtractorError, ExtractorError,
decode_base_n, decode_base_n,
encode_base_n, encode_base_n,
filter_dict,
float_or_none, float_or_none,
format_field, format_field,
get_element_by_attribute, get_element_by_attribute,
@ -704,31 +703,28 @@ class InstagramStoryIE(InstagramBaseIE):
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False) user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
if not user_info: if not user_info:
self.raise_login_required('This content is unreachable') self.raise_login_required('This content is unreachable')
user_id = user_info.get('id')
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}' story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
if not story_info_url: # user id is only mandatory for non-highlights
raise ExtractorError('Unable to extract user id')
videos = traverse_obj(self._download_json( videos = traverse_obj(self._download_json(
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}', f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels') story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
if not videos: if not videos:
self.raise_login_required('You need to log in to access this content') self.raise_login_required('You need to log in to access this content')
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name')) full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title')) story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
if not story_title: if not story_title:
story_title = f'Story by {username}' story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items')) highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
info_data = [] info_data = []
for highlight in highlights: for highlight in highlights:
highlight_data = self._extract_product(highlight) highlight_data = self._extract_product(highlight)
if highlight_data.get('formats'): if highlight_data.get('formats'):
info_data.append({ info_data.append({
**highlight_data,
'uploader': full_name, 'uploader': full_name,
'uploader_id': user_id, 'uploader_id': user_id,
**filter_dict(highlight_data),
}) })
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title) return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)

View File

@ -12,7 +12,7 @@ from ..utils import (
class KinjaEmbedIE(InfoExtractor): class KinjaEmbedIE(InfoExtractor):
IE_NAME = 'kinja:embed' IENAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)? _DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?: (?:
avclub| avclub|

View File

@ -6,7 +6,6 @@ from ..utils import (
int_or_none, int_or_none,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
try_call,
unsmuggle_url, unsmuggle_url,
) )
@ -97,22 +96,13 @@ class LiTVIE(InfoExtractor):
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
if not video_data: if not video_data:
payload = {'assetId': program_info['assetId']} payload = {
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value) 'assetId': program_info['assetId'],
if puid: 'watchDevices': program_info['watchDevices'],
payload.update({ 'contentType': program_info['contentType'],
'type': 'auth', }
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
})
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json( video_data = self._download_json(
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id, 'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
data=json.dumps(payload).encode('utf-8'), data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'}) headers={'Content-Type': 'application/json'})

View File

@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
class NBAWatchEmbedIE(NBAWatchBaseIE): class NBAWatchEmbedIE(NBAWatchBaseIE):
IE_NAME = 'nba:watch:embed' IENAME = 'nba:watch:embed'
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://watch.nba.com/embed?id=659395', 'url': 'http://watch.nba.com/embed?id=659395',
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):
class NBAEmbedIE(NBABaseIE): class NBAEmbedIE(NBABaseIE):
IE_NAME = 'nba:embed' IENAME = 'nba:embed'
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)' _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=', 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):
class NBAIE(NBABaseIE): class NBAIE(NBABaseIE):
IE_NAME = 'nba' IENAME = 'nba'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):
class NBAChannelIE(NBABaseIE): class NBAChannelIE(NBABaseIE):
IE_NAME = 'nba:channel' IENAME = 'nba:channel'
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.nba.com/blazers/video/channel/summer_league', 'url': 'https://www.nba.com/blazers/video/channel/summer_league',

View File

@ -1,135 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
js_to_json,
traverse_obj,
update_url_query,
url_or_none,
)
class RudoVideoIE(InfoExtractor):
_VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
_TESTS = [{
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
'md5': '28ed82b477708dc5e12e072da2449221',
'info_dict': {
'id': 'cz2wrUy8l0o',
'title': 'Diego Cabot',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/bQkt07',
'md5': '36b22a9863de0f47f00fc7532a32a898',
'info_dict': {
'id': 'bQkt07',
'title': 'Tubular Bells',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/podcast/b42ZUznHX0',
'md5': 'b91c70d832938871367f8ad10c895821',
'info_dict': {
'id': 'b42ZUznHX0',
'title': 'Columna Ruperto Concha',
'ext': 'mp3',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/vod/bN5AaJ',
'md5': '01324a329227e2591530ecb4f555c881',
'info_dict': {
'id': 'bN5AaJ',
'title': 'Ucrania 19.03',
'creator': 'La Tercera',
'ext': 'mp4',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/bbtv',
'info_dict': {
'id': 'bbtv',
'ext': 'mp4',
'creator': 'BioBioTV',
'live_status': 'is_live',
'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
},
}, {
'url': 'https://rudo.video/live/c13',
'info_dict': {
'id': 'c13',
'title': 'CANAL13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}, {
'url': 'https://rudo.video/live/t13-13cl',
'info_dict': {
'id': 't13-13cl',
'title': 'T13',
'ext': 'mp4',
},
'skip': 'Geo-restricted to Chile',
}]
def _real_extract(self, url):
video_id, type_ = self._match_valid_url(url).group('id', 'type')
is_live = type_ == 'live'
webpage = self._download_webpage(url, video_id)
if 'Streaming is not available in your area' in webpage:
self.raise_geo_restricted()
media_url = (
self._search_regex(
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
# Source URL must be used only if streamURL is unavailable
or self._search_regex(
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
if not media_url:
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
webpage, 'youtube url', default=None)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
raise ExtractorError('Unable to extract stream url')
token_array = self._search_json(
r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
if token_array:
token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
if not token_url:
raise ExtractorError('Invalid access token array')
access_token = self._download_json(
token_url, video_id, note='Downloading access token')['data']['authToken']
media_url = update_url_query(media_url, {'auth-token': access_token})
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
elif ext == 'mp3':
formats = [{
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{'url': media_url}]
return {
'id': video_id,
'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
webpage, 'title', default=None)
or self._og_search_title(webpage)),
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
webpage, 'videoAuthor', default=None),
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
webpage, 'thumbnail', default=None)
or self._og_search_thumbnail(webpage)),
'formats': formats,
'is_live': is_live,
}

View File

@ -10,7 +10,6 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
dict_get, dict_get,
@ -1318,51 +1317,41 @@ class TwitterIE(TwitterBaseIE):
} }
} }
def _call_syndication_api(self, twid):
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
})
if not status:
raise ExtractorError('Syndication endpoint returned empty JSON response')
# Transform the result so its structure matches that of legacy/graphql
media = []
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
return status
def _extract_status(self, twid): def _extract_status(self, twid):
if self._selected_api not in ('graphql', 'legacy', 'syndication'): if self.is_logged_in or self._selected_api == 'graphql':
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True) status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
try: elif self._selected_api == 'legacy':
if self.is_logged_in or self._selected_api == 'graphql': status = self._call_api(f'statuses/show/{twid}.json', twid, {
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid) 'cards_platform': 'Web-12',
elif self._selected_api == 'legacy': 'include_cards': 1,
status = self._call_api(f'statuses/show/{twid}.json', twid, { 'include_reply_count': 1,
'cards_platform': 'Web-12', 'include_user_entities': 0,
'include_cards': 1, 'tweet_mode': 'extended',
'include_reply_count': 1, })
'include_user_entities': 0,
'tweet_mode': 'extended', elif self._selected_api == 'syndication':
self.report_warning(
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
status = self._download_json(
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
headers={'User-Agent': 'Googlebot'}, query={
'id': twid,
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
}) })
except ExtractorError as e: if not status:
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429: raise ExtractorError('Syndication endpoint returned empty JSON response')
raise # Transform the result so its structure matches that of legacy/graphql
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint') media = []
status = self._call_syndication_api(twid) for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
detail['id_str'] = traverse_obj(detail, (
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
media.append(detail)
status['extended_entities'] = {'media': media}
if self._selected_api == 'syndication': else:
status = self._call_syndication_api(twid) raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {} return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
@ -1427,8 +1416,8 @@ class TwitterIE(TwitterBaseIE):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000), 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117 # The codec of http formats are unknown
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown '_format_sort_fields': ('res', 'br', 'size', 'proto'),
} }
def extract_from_card_info(card): def extract_from_card_info(card):

View File

@ -206,14 +206,13 @@ class Updater:
# XXX: use class variables to simplify testing # XXX: use class variables to simplify testing
_channel = CHANNEL _channel = CHANNEL
_origin = ORIGIN _origin = ORIGIN
_update_sources = UPDATE_SOURCES
def __init__(self, ydl, target: str | None = None): def __init__(self, ydl, target: str | None = None):
self.ydl = ydl self.ydl = ydl
# For backwards compat, target needs to be treated as if it could be None # For backwards compat, target needs to be treated as if it could be None
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@') self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
# Check if requested_tag is actually the requested repo/channel # Check if requested_tag is actually the requested repo/channel
if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources): if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
self.requested_channel = self.requested_tag self.requested_channel = self.requested_tag
self.requested_tag: str = None # type: ignore (we set it later) self.requested_tag: str = None # type: ignore (we set it later)
elif not self.requested_channel: elif not self.requested_channel:
@ -238,11 +237,11 @@ class Updater:
self._block_restart('Automatically restarting into custom builds is disabled for security reasons') self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else: else:
# Check if requested_channel resolves to a known repository or else raise # Check if requested_channel resolves to a known repository or else raise
self.requested_repo = self._update_sources.get(self.requested_channel) self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
if not self.requested_repo: if not self.requested_repo:
self._report_error( self._report_error(
f'Invalid update channel {self.requested_channel!r} requested. ' f'Invalid update channel {self.requested_channel!r} requested. '
f'Valid channels are {", ".join(self._update_sources)}', True) f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
self._identifier = f'{detect_variant()} {system_identifier()}' self._identifier = f'{detect_variant()} {system_identifier()}'