mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 01:31:25 +01:00
Compare commits
23 Commits
fcb28933af
...
e48735604e
Author | SHA1 | Date | |
---|---|---|---|
|
e48735604e | ||
|
e2792fd5b3 | ||
|
39d0e2248a | ||
|
abfe114e49 | ||
|
4396063a52 | ||
|
64f8963313 | ||
|
87e8a2dfe3 | ||
|
68b4cbdd2b | ||
|
da957cff5e | ||
|
5e90f3a9a9 | ||
|
c39358a54b | ||
|
1f8bd8eba8 | ||
|
00cdda4f6f | ||
|
116c268438 | ||
|
e7d22348e7 | ||
|
50eaea9fd7 | ||
|
f45c4efcd9 | ||
|
13b3cb3c2b | ||
|
0d531c35ec | ||
|
bc4ab17b38 | ||
|
632b8ee54e | ||
|
c919b68f7e | ||
|
19741ab8a4 |
8
.github/workflows/build.yml
vendored
8
.github/workflows/build.yml
vendored
|
@ -80,12 +80,12 @@ on:
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
origin:
|
origin:
|
||||||
description: .
|
description: Origin
|
||||||
required: false
|
required: false
|
||||||
default: ''
|
default: 'current repo'
|
||||||
type: choice
|
type: choice
|
||||||
options:
|
options:
|
||||||
- ''
|
- 'current repo'
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
|
@ -99,7 +99,7 @@ jobs:
|
||||||
- name: Process origin
|
- name: Process origin
|
||||||
id: process_origin
|
id: process_origin
|
||||||
run: |
|
run: |
|
||||||
echo "origin=${{ inputs.origin || github.repository }}" >> "$GITHUB_OUTPUT"
|
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
unix:
|
unix:
|
||||||
needs: process
|
needs: process
|
||||||
|
|
22
.github/workflows/core.yml
vendored
22
.github/workflows/core.yml
vendored
|
@ -1,5 +1,25 @@
|
||||||
name: Core Tests
|
name: Core Tests
|
||||||
on: [push, pull_request]
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- .github/**
|
||||||
|
- devscripts/**
|
||||||
|
- test/**
|
||||||
|
- yt_dlp/**.py
|
||||||
|
- '!yt_dlp/extractor/*.py'
|
||||||
|
- yt_dlp/extractor/__init__.py
|
||||||
|
- yt_dlp/extractor/common.py
|
||||||
|
- yt_dlp/extractor/extractors.py
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- .github/**
|
||||||
|
- devscripts/**
|
||||||
|
- test/**
|
||||||
|
- yt_dlp/**.py
|
||||||
|
- '!yt_dlp/extractor/*.py'
|
||||||
|
- yt_dlp/extractor/__init__.py
|
||||||
|
- yt_dlp/extractor/common.py
|
||||||
|
- yt_dlp/extractor/extractors.py
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
|
|
||||||
|
|
24
.github/workflows/release.yml
vendored
24
.github/workflows/release.yml
vendored
|
@ -64,7 +64,6 @@ jobs:
|
||||||
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
|
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
|
||||||
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
|
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
|
||||||
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
|
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
|
||||||
pypi_token: ${{ steps.setup_variables.outputs.pypi_token }}
|
|
||||||
head_sha: ${{ steps.get_target.outputs.head_sha }}
|
head_sha: ${{ steps.get_target.outputs.head_sha }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
@ -153,7 +152,6 @@ jobs:
|
||||||
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
|
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
|
||||||
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
|
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
|
||||||
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
|
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
|
||||||
${{ !secrets[format('{0}_pypi_token', env.target_repo)] }} || pypi_token='${{ env.target_repo }}_pypi_token'
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
target_tag="${source_tag:-${version}}"
|
target_tag="${source_tag:-${version}}"
|
||||||
|
@ -163,7 +161,6 @@ jobs:
|
||||||
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
|
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
|
||||||
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
|
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
|
||||||
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
|
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
|
||||||
${{ !secrets[format('{0}_pypi_token', env.source_repo)] }} || pypi_token='${{ env.source_repo }}_pypi_token'
|
|
||||||
else
|
else
|
||||||
target_repo='${{ github.repository }}'
|
target_repo='${{ github.repository }}'
|
||||||
fi
|
fi
|
||||||
|
@ -172,13 +169,6 @@ jobs:
|
||||||
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
|
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
|
||||||
pypi_project='${{ vars.PYPI_PROJECT }}'
|
pypi_project='${{ vars.PYPI_PROJECT }}'
|
||||||
fi
|
fi
|
||||||
if [[ -z "${pypi_token}" && "${pypi_project}" ]]; then
|
|
||||||
if ${{ !secrets.PYPI_TOKEN }}; then
|
|
||||||
pypi_token=OIDC
|
|
||||||
else
|
|
||||||
pypi_token=PYPI_TOKEN
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "::group::Output variables"
|
echo "::group::Output variables"
|
||||||
cat << EOF | tee -a "$GITHUB_OUTPUT"
|
cat << EOF | tee -a "$GITHUB_OUTPUT"
|
||||||
|
@ -189,7 +179,6 @@ jobs:
|
||||||
target_tag=${target_tag}
|
target_tag=${target_tag}
|
||||||
pypi_project=${pypi_project}
|
pypi_project=${pypi_project}
|
||||||
pypi_suffix=${pypi_suffix}
|
pypi_suffix=${pypi_suffix}
|
||||||
pypi_token=${pypi_token}
|
|
||||||
EOF
|
EOF
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
@ -286,18 +275,7 @@ jobs:
|
||||||
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
|
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
|
||||||
python setup.py sdist bdist_wheel
|
python setup.py sdist bdist_wheel
|
||||||
|
|
||||||
- name: Publish to PyPI via token
|
- name: Publish to PyPI
|
||||||
env:
|
|
||||||
TWINE_USERNAME: __token__
|
|
||||||
TWINE_PASSWORD: ${{ secrets[needs.prepare.outputs.pypi_token] }}
|
|
||||||
if: |
|
|
||||||
needs.prepare.outputs.pypi_token != 'OIDC' && env.TWINE_PASSWORD
|
|
||||||
run: |
|
|
||||||
twine upload dist/*
|
|
||||||
|
|
||||||
- name: Publish to PyPI via trusted publishing
|
|
||||||
if: |
|
|
||||||
needs.prepare.outputs.pypi_token == 'OIDC'
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
with:
|
with:
|
||||||
verbose: true
|
verbose: true
|
||||||
|
|
|
@ -140,6 +140,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||||
test('example-with-dashes', 'example-with-dashes')
|
test('example-with-dashes', 'example-with-dashes')
|
||||||
test('all', '2', '47', '45', 'example-with-dashes', '35')
|
test('all', '2', '47', '45', 'example-with-dashes', '35')
|
||||||
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
|
||||||
|
test('7_a/worst', '35')
|
||||||
|
|
||||||
def test_format_selection_audio(self):
|
def test_format_selection_audio(self):
|
||||||
formats = [
|
formats = [
|
||||||
|
|
|
@ -11,6 +11,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
from test.helper import FakeYDL, report_warning
|
from test.helper import FakeYDL, report_warning
|
||||||
from yt_dlp.update import Updater, UpdateInfo
|
from yt_dlp.update import Updater, UpdateInfo
|
||||||
|
|
||||||
|
|
||||||
|
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
|
||||||
|
TEST_UPDATE_SOURCES = {
|
||||||
|
'stable': 'yt-dlp/yt-dlp',
|
||||||
|
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
|
||||||
|
'master': 'yt-dlp/yt-dlp-master-builds',
|
||||||
|
}
|
||||||
|
|
||||||
TEST_API_DATA = {
|
TEST_API_DATA = {
|
||||||
'yt-dlp/yt-dlp/latest': {
|
'yt-dlp/yt-dlp/latest': {
|
||||||
'tag_name': '2023.12.31',
|
'tag_name': '2023.12.31',
|
||||||
|
@ -104,6 +112,7 @@ class FakeUpdater(Updater):
|
||||||
|
|
||||||
_channel = 'stable'
|
_channel = 'stable'
|
||||||
_origin = 'yt-dlp/yt-dlp'
|
_origin = 'yt-dlp/yt-dlp'
|
||||||
|
_update_sources = TEST_UPDATE_SOURCES
|
||||||
|
|
||||||
def _download_update_spec(self, *args, **kwargs):
|
def _download_update_spec(self, *args, **kwargs):
|
||||||
return TEST_LOCKFILE_ACTUAL
|
return TEST_LOCKFILE_ACTUAL
|
||||||
|
|
|
@ -2465,9 +2465,16 @@ class YoutubeDL:
|
||||||
return selector_function(ctx_copy)
|
return selector_function(ctx_copy)
|
||||||
return final_selector
|
return final_selector
|
||||||
|
|
||||||
stream = io.BytesIO(format_spec.encode())
|
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
|
||||||
|
# Prefix numbers with random letters to avoid it being classified as a number
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
|
||||||
|
# TODO: Implement parser not reliant on tokenize.tokenize
|
||||||
|
prefix = ''.join(random.choices(string.ascii_letters, k=32))
|
||||||
|
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
|
||||||
try:
|
try:
|
||||||
tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
|
tokens = list(_remove_unused_ops(
|
||||||
|
token._replace(string=token.string.replace(prefix, ''))
|
||||||
|
for token in tokenize.tokenize(stream.readline)))
|
||||||
except tokenize.TokenError:
|
except tokenize.TokenError:
|
||||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||||
|
|
||||||
|
|
|
@ -1648,6 +1648,7 @@ from .rumble import (
|
||||||
RumbleIE,
|
RumbleIE,
|
||||||
RumbleChannelIE,
|
RumbleChannelIE,
|
||||||
)
|
)
|
||||||
|
from .rudovideo import RudoVideoIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
|
|
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 7.12.2023
|
# available till 7.12.2023
|
||||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
'md5': '94812e6438488fb923c361a44469614b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'maischberger-video-424',
|
'id': 'maischberger-video-424',
|
||||||
'display_id': 'maischberger-video-424',
|
'display_id': 'maischberger-video-424',
|
||||||
|
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'''(?x)https://
|
_VALID_URL = r'''(?x)https://
|
||||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||||
(?:(?P<client>[^/]+)/)?
|
(?:(?P<client>[^/]+)/)?
|
||||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
|
||||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
|
||||||
'id': '12172961',
|
'id': '12939099',
|
||||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
'title': 'Liebe auf vier Pfoten',
|
||||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||||
'duration': 5241,
|
'duration': 5222,
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||||
'timestamp': 1670710500,
|
'timestamp': 1701343800,
|
||||||
'upload_date': '20221210',
|
'upload_date': '20231130',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 12,
|
'episode': 'Liebe auf vier Pfoten',
|
||||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
|
||||||
'series': 'Filme im MDR'
|
'series': 'Filme im MDR'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'duration': 915,
|
'duration': 915,
|
||||||
'episode': 'tagesschau, 20:00 Uhr',
|
'episode': 'tagesschau, 20:00 Uhr',
|
||||||
'series': 'tagesschau',
|
'series': 'tagesschau',
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
# playlist of type 'sendung'
|
# playlist of type 'sendung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# playlist of type 'serie'
|
||||||
|
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# playlist of type 'sammlung'
|
# playlist of type 'sammlung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||||
|
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
|
||||||
""" Query the ARD server for playlist information
|
""" Query the ARD server for playlist information
|
||||||
and returns the data in "raw" format """
|
and returns the data in "raw" format """
|
||||||
if mode == 'sendung':
|
assert mode in ('sendung', 'serie', 'sammlung')
|
||||||
|
if mode in ('sendung', 'serie'):
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
'query': '''{
|
'query': '''{
|
||||||
showPage(
|
showPage(
|
||||||
|
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
links { target { id href title } }
|
links { target { id href title } }
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
|
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
# Ressources for ARD graphQL debugging:
|
# Ressources for ARD graphQL debugging:
|
||||||
# https://api-test.ardmediathek.de/public-gateway
|
# https://api-test.ardmediathek.de/public-gateway
|
||||||
|
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
data=graphQL,
|
data=graphQL,
|
||||||
headers={'Content-Type': 'application/json'})['data']
|
headers={'Content-Type': 'application/json'})['data']
|
||||||
# align the structure of the returned data:
|
# align the structure of the returned data:
|
||||||
if mode == 'sendung':
|
if mode in ('sendung', 'serie'):
|
||||||
show_page = show_page['showPage']
|
show_page = show_page['showPage']
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
show_page = show_page['morePage']['widget']
|
show_page = show_page['morePage']['widget']
|
||||||
|
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||||
""" Collects all playlist entries and returns them as info dict.
|
""" Collects all playlist entries and returns them as info dict.
|
||||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
Supports playlists of mode 'sendung', 'serie', and 'sammlung',
|
||||||
playlists. """
|
as well as nested playlists. """
|
||||||
entries = []
|
entries = []
|
||||||
pageNumber = 0
|
pageNumber = 0
|
||||||
while True: # iterate by pageNumber
|
while True: # iterate by pageNumber
|
||||||
show_page = self._ARD_load_playlist_snipped(
|
show_page = self._ARD_load_playlist_snippet(
|
||||||
playlist_id, display_id, client, mode, pageNumber)
|
playlist_id, display_id, client, mode, pageNumber)
|
||||||
for teaser in show_page['teasers']: # process playlist items
|
for teaser in show_page['teasers']: # process playlist items
|
||||||
if '/compilation/' in teaser['links']['target']['href']:
|
if '/compilation/' in teaser['links']['target']['href']:
|
||||||
|
|
|
@ -317,16 +317,25 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
|
|
||||||
def _download_media_selector(self, programme_id):
|
def _download_media_selector(self, programme_id):
|
||||||
last_exception = None
|
last_exception = None
|
||||||
|
formats, subtitles = [], {}
|
||||||
for media_set in self._MEDIA_SETS:
|
for media_set in self._MEDIA_SETS:
|
||||||
try:
|
try:
|
||||||
return self._download_media_selector_url(
|
fmts, subs = self._download_media_selector_url(
|
||||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||||
|
formats.extend(fmts)
|
||||||
|
if subs:
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
except BBCCoUkIE.MediaSelectionError as e:
|
except BBCCoUkIE.MediaSelectionError as e:
|
||||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||||
last_exception = e
|
last_exception = e
|
||||||
continue
|
continue
|
||||||
self._raise_extractor_error(e)
|
self._raise_extractor_error(e)
|
||||||
self._raise_extractor_error(last_exception)
|
if last_exception:
|
||||||
|
if formats or subtitles:
|
||||||
|
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
|
||||||
|
else:
|
||||||
|
self._raise_extractor_error(last_exception)
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
def _download_media_selector_url(self, url, programme_id=None):
|
def _download_media_selector_url(self, url, programme_id=None):
|
||||||
media_selection = self._download_json(
|
media_selection = self._download_json(
|
||||||
|
@ -1188,7 +1197,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
if initial_data is None:
|
if initial_data is None:
|
||||||
initial_data = self._search_regex(
|
initial_data = self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||||
'preload state', default={})
|
'preload state', default='{}')
|
||||||
else:
|
else:
|
||||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||||
|
|
|
@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
|
||||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||||
[^/]+/videos/(?:[^/]+/)?|
|
[^/]+/videos/(?:[^/]+/)?|
|
||||||
[^/]+/posts/|
|
[^/]+/posts/|
|
||||||
groups/[^/]+/permalink/|
|
groups/[^/]+/(?:permalink|posts)/|
|
||||||
watchparty/
|
watchparty/
|
||||||
)|
|
)|
|
||||||
facebook:
|
facebook:
|
||||||
|
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
|
||||||
'uploader_id': '100013949973717',
|
'uploader_id': '100013949973717',
|
||||||
},
|
},
|
||||||
'skip': 'Requires logging in',
|
'skip': 'Requires logging in',
|
||||||
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
|
||||||
|
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1569199726448814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pence MUST GO!',
|
||||||
|
'description': 'Vickie Gentry shared a memory.',
|
||||||
|
'timestamp': 1511548260,
|
||||||
|
'upload_date': '20171124',
|
||||||
|
'uploader': 'Vickie Gentry',
|
||||||
|
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'duration': 148.435,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
|
||||||
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
||||||
attachments = traverse_obj(nodes, (
|
attachments = traverse_obj(nodes, (
|
||||||
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
||||||
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
|
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
|
||||||
|
'attachment', {dict}))
|
||||||
for attachment in attachments:
|
for attachment in attachments:
|
||||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
|
||||||
|
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
|
||||||
for n in ns:
|
for n in ns:
|
||||||
parse_attachment(n)
|
parse_attachment(n)
|
||||||
parse_attachment(attachment)
|
parse_attachment(attachment)
|
||||||
|
@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
|
||||||
if len(entries) > 1:
|
if len(entries) > 1:
|
||||||
return self.playlist_result(entries, video_id)
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
video_info = entries[0]
|
video_info = entries[0] if entries else {'id': video_id}
|
||||||
webpage_info = extract_metadata(webpage)
|
webpage_info = extract_metadata(webpage)
|
||||||
# honor precise duration in video info
|
# honor precise duration in video info
|
||||||
if video_info.get('duration'):
|
if video_info.get('duration'):
|
||||||
|
|
|
@ -23,7 +23,7 @@ class IHeartRadioBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class IHeartRadioIE(IHeartRadioBaseIE):
|
class IHeartRadioIE(IHeartRadioBaseIE):
|
||||||
IENAME = 'iheartradio'
|
IE_NAME = 'iheartradio'
|
||||||
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
|
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
|
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
|
||||||
|
|
|
@ -1,99 +1,243 @@
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
ExtractorError,
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
|
_CLIENT_ID = '546c25a59c58ad7'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _imgur_result(cls, item_id):
|
||||||
|
return cls.url_result(f'https://imgur.com/{item_id}', ImgurIE, item_id)
|
||||||
|
|
||||||
|
def _call_api(self, endpoint, video_id, **kwargs):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://api.imgur.com/post/v1/{endpoint}/{video_id}?client_id={self._CLIENT_ID}&include=media,account',
|
||||||
|
video_id, **kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_description(s):
|
||||||
|
if 'Discover the magic of the internet at Imgur' in s:
|
||||||
|
return None
|
||||||
|
return s or None
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(ImgurBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'A61SaA1',
|
'id': 'A61SaA1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
'title': 'MRW gifv is up and running without any bugs',
|
||||||
|
'timestamp': 1416446068,
|
||||||
|
'upload_date': '20141120',
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'release_timestamp': 1416446068,
|
||||||
|
'release_date': '20141120',
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/A61SaA1',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# no title
|
|
||||||
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'jxBXAMC',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fahaka puffer feeding',
|
||||||
|
'timestamp': 1533835503,
|
||||||
|
'upload_date': '20180809',
|
||||||
|
'release_date': '20180809',
|
||||||
|
'like_count': int,
|
||||||
|
'duration': 30.0,
|
||||||
|
'comment_count': int,
|
||||||
|
'release_timestamp': 1533835503,
|
||||||
|
'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
|
||||||
|
'dislike_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api('media', video_id)
|
||||||
|
if not traverse_obj(data, ('media', 0, (
|
||||||
|
('type', {lambda t: t == 'video' or None}),
|
||||||
|
('metadata', 'is_animated'))), get_all=False):
|
||||||
|
raise ExtractorError(f'{video_id} is not a video or animated image', expected=True)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
|
f'https://i.imgur.com/{video_id}.gifv', video_id, fatal=False) or ''
|
||||||
|
formats = []
|
||||||
|
|
||||||
width = int_or_none(self._og_search_property(
|
media_fmt = traverse_obj(data, ('media', 0, {
|
||||||
'video:width', webpage, default=None))
|
'url': ('url', {url_or_none}),
|
||||||
height = int_or_none(self._og_search_property(
|
'ext': ('ext', {str}),
|
||||||
'video:height', webpage, default=None))
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
'acodec': ('metadata', 'has_sound', {lambda b: None if b else 'none'}),
|
||||||
|
}))
|
||||||
|
media_url = media_fmt.get('url')
|
||||||
|
if media_url:
|
||||||
|
if not media_fmt.get('ext'):
|
||||||
|
media_fmt['ext'] = mimetype2ext(traverse_obj(
|
||||||
|
data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
|
||||||
|
if traverse_obj(data, ('media', 0, 'type')) == 'image':
|
||||||
|
media_fmt['acodec'] = 'none'
|
||||||
|
media_fmt.setdefault('preference', -10)
|
||||||
|
formats.append(media_fmt)
|
||||||
|
|
||||||
video_elements = self._search_regex(
|
video_elements = self._search_regex(
|
||||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
webpage, 'video elements', default=None)
|
webpage, 'video elements', default=None)
|
||||||
if not video_elements:
|
|
||||||
raise ExtractorError(
|
|
||||||
'No sources found for video %s. Maybe an image?' % video_id,
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
formats = []
|
if video_elements:
|
||||||
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
def og_get_size(media_type):
|
||||||
formats.append({
|
return {
|
||||||
'format_id': m.group('type').partition('/')[2],
|
p: int_or_none(self._og_search_property(f'{media_type}:{p}', webpage, default=None))
|
||||||
'url': self._proto_relative_url(m.group('src')),
|
for p in ('width', 'height')
|
||||||
'ext': mimetype2ext(m.group('type')),
|
}
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'http_headers': {
|
|
||||||
'User-Agent': 'yt-dlp (like wget)',
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
gif_json = self._search_regex(
|
size = og_get_size('video')
|
||||||
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
if not any(size.values()):
|
||||||
webpage, 'GIF code', fatal=False)
|
size = og_get_size('image')
|
||||||
if gif_json:
|
|
||||||
gifd = self._parse_json(
|
formats = traverse_obj(
|
||||||
gif_json, video_id, transform_source=js_to_json)
|
re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
|
||||||
formats.append({
|
(..., {
|
||||||
'format_id': 'gif',
|
'format_id': ('type', {lambda s: s.partition('/')[2]}),
|
||||||
'preference': -10, # gifs are worse than videos
|
'url': ('src', {self._proto_relative_url}),
|
||||||
'width': width,
|
'ext': ('type', {mimetype2ext}),
|
||||||
'height': height,
|
}))
|
||||||
'ext': 'gif',
|
for f in formats:
|
||||||
'acodec': 'none',
|
f.update(size)
|
||||||
'vcodec': 'gif',
|
|
||||||
'container': 'gif',
|
# We can get the original gif format from the webpage as well
|
||||||
'url': self._proto_relative_url(gifd['gifUrl']),
|
gif_json = traverse_obj(self._search_json(
|
||||||
'filesize': gifd.get('size'),
|
r'var\s+videoItem\s*=', webpage, 'GIF info', video_id,
|
||||||
'http_headers': {
|
transform_source=js_to_json, fatal=False), {
|
||||||
'User-Agent': 'yt-dlp (like wget)',
|
'url': ('gifUrl', {self._proto_relative_url}),
|
||||||
},
|
'filesize': ('size', {int_or_none}),
|
||||||
})
|
})
|
||||||
|
if gif_json:
|
||||||
|
gif_json.update(size)
|
||||||
|
gif_json.update({
|
||||||
|
'format_id': 'gif',
|
||||||
|
'preference': -10, # gifs < videos
|
||||||
|
'ext': 'gif',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'gif',
|
||||||
|
'container': 'gif',
|
||||||
|
})
|
||||||
|
formats.append(gif_json)
|
||||||
|
|
||||||
|
search = functools.partial(self._html_search_meta, html=webpage, default=None)
|
||||||
|
|
||||||
|
twitter_fmt = {
|
||||||
|
'format_id': 'twitter',
|
||||||
|
'url': url_or_none(search('twitter:player:stream')),
|
||||||
|
'ext': mimetype2ext(search('twitter:player:stream:content_type')),
|
||||||
|
'width': int_or_none(search('twitter:width')),
|
||||||
|
'height': int_or_none(search('twitter:height')),
|
||||||
|
}
|
||||||
|
if twitter_fmt['url']:
|
||||||
|
formats.append(twitter_fmt)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
self.raise_no_formats(
|
||||||
|
f'No sources found for video {video_id}. Maybe a plain image?', expected=True)
|
||||||
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'title': self._og_search_title(webpage, default=None),
|
||||||
|
'description': self.get_description(self._og_search_description(webpage, default='')),
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'uploader_id': ('account_id', {lambda a: str(a) if int_or_none(a) else None}),
|
||||||
|
'uploader': ('account', 'username', {lambda x: strip_or_none(x) or None}),
|
||||||
|
'uploader_url': ('account', 'avatar_url', {url_or_none}),
|
||||||
|
'like_count': ('upvote_count', {int_or_none}),
|
||||||
|
'dislike_count': ('downvote_count', {int_or_none}),
|
||||||
|
'comment_count': ('comment_count', {int_or_none}),
|
||||||
|
'age_limit': ('is_mature', {lambda x: 18 if x else None}),
|
||||||
|
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
|
||||||
|
'release_timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
}, get_all=False),
|
||||||
|
**traverse_obj(data, ('media', 0, 'metadata', {
|
||||||
|
'title': ('title', {lambda x: strip_or_none(x) or None}),
|
||||||
|
'description': ('description', {self.get_description}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'timestamp': (('updated_at', 'created_at'), {parse_iso8601}),
|
||||||
|
'release_timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
}), get_all=False),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage, default=video_id),
|
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ImgurGalleryIE(InfoExtractor):
|
class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||||
|
_GALLERY = True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
gallery_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
|
||||||
|
|
||||||
|
info = traverse_obj(data, {
|
||||||
|
'title': ('title', {lambda x: strip_or_none(x) or None}),
|
||||||
|
'description': ('description', {self.get_description}),
|
||||||
|
})
|
||||||
|
|
||||||
|
if traverse_obj(data, 'is_album'):
|
||||||
|
|
||||||
|
def yield_media_ids():
|
||||||
|
for m_id in traverse_obj(data, (
|
||||||
|
'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
|
||||||
|
'id', {lambda x: str_or_none(x) or None})):
|
||||||
|
yield m_id
|
||||||
|
|
||||||
|
# if a gallery with exactly one video, apply album metadata to video
|
||||||
|
media_id = (
|
||||||
|
self._GALLERY
|
||||||
|
and traverse_obj(data, ('image_count', {lambda c: c == 1}))
|
||||||
|
and next(yield_media_ids(), None))
|
||||||
|
|
||||||
|
if not media_id:
|
||||||
|
result = self.playlist_result(
|
||||||
|
map(self._imgur_result, yield_media_ids()), gallery_id)
|
||||||
|
result.update(info)
|
||||||
|
return result
|
||||||
|
gallery_id = media_id
|
||||||
|
|
||||||
|
result = self._imgur_result(gallery_id)
|
||||||
|
info['_type'] = 'url_transparent'
|
||||||
|
result.update(info)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||||
IE_NAME = 'imgur:gallery'
|
IE_NAME = 'imgur:gallery'
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://imgur.com/gallery/Q95ko',
|
'url': 'http://imgur.com/gallery/Q95ko',
|
||||||
|
@ -102,49 +246,121 @@ class ImgurGalleryIE(InfoExtractor):
|
||||||
'title': 'Adding faces make every GIF better',
|
'title': 'Adding faces make every GIF better',
|
||||||
},
|
},
|
||||||
'playlist_count': 25,
|
'playlist_count': 25,
|
||||||
|
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: static images - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'YcAQlkx',
|
'id': 'YcAQlkx',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||||
}
|
'timestamp': 1358554297,
|
||||||
|
'upload_date': '20130119',
|
||||||
|
'uploader_id': '1648642',
|
||||||
|
'uploader': 'wittyusernamehere',
|
||||||
|
'release_timestamp': 1358554297,
|
||||||
|
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||||
|
'release_date': '20130119',
|
||||||
|
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||||
|
'comment_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: static image - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||||
'only_matching': True,
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VQcQPhM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The boss is here',
|
||||||
|
'timestamp': 1476494751,
|
||||||
|
'upload_date': '20161015',
|
||||||
|
'uploader_id': '19138530',
|
||||||
|
'uploader': 'thematrixcam',
|
||||||
|
'comment_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'uploader_url': 'https://i.imgur.com/qCjr5Pi_d.png?maxwidth=290&fidelity=grand',
|
||||||
|
'release_timestamp': 1476494751,
|
||||||
|
'like_count': int,
|
||||||
|
'release_date': '20161015',
|
||||||
|
'thumbnail': 'https://i.imgur.com/VQcQPhMh.jpg',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# from https://github.com/ytdl-org/youtube-dl/pull/16674
|
||||||
|
{
|
||||||
|
'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6lAn9VQ',
|
||||||
|
'title': 'Penguins !',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZVMv45i',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Intruder',
|
||||||
|
'timestamp': 1528129683,
|
||||||
|
'upload_date': '20180604',
|
||||||
|
'release_timestamp': 1528129683,
|
||||||
|
'release_date': '20180604',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'duration': 30.03,
|
||||||
|
'thumbnail': 'https://i.imgur.com/ZVMv45ih.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/t/unmuted/wXSK0YH',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'JCAP4io',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:I got the blues$',
|
||||||
|
'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
|
||||||
|
'timestamp': 1527809525,
|
||||||
|
'upload_date': '20180531',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'duration': 30.03,
|
||||||
|
'comment_count': int,
|
||||||
|
'release_timestamp': 1527809525,
|
||||||
|
'thumbnail': 'https://i.imgur.com/JCAP4ioh.jpg',
|
||||||
|
'release_date': '20180531',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
gallery_id = self._match_id(url)
|
|
||||||
|
|
||||||
data = self._download_json(
|
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||||
'https://imgur.com/gallery/%s.json' % gallery_id,
|
|
||||||
gallery_id)['data']['image']
|
|
||||||
|
|
||||||
if data.get('is_album'):
|
|
||||||
entries = [
|
|
||||||
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
|
|
||||||
for image in data['album_images']['images'] if image.get('hash')]
|
|
||||||
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
|
|
||||||
|
|
||||||
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
|
|
||||||
|
|
||||||
|
|
||||||
class ImgurAlbumIE(ImgurGalleryIE): # XXX: Do not subclass from concrete IE
|
|
||||||
IE_NAME = 'imgur:album'
|
IE_NAME = 'imgur:album'
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
_GALLERY = False
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# TODO: only static images - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/a/j6Orj',
|
'url': 'http://imgur.com/a/j6Orj',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
# from https://github.com/ytdl-org/youtube-dl/pull/21693
|
||||||
|
{
|
||||||
|
'url': 'https://imgur.com/a/iX265HX',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'j6Orj',
|
'id': 'iX265HX',
|
||||||
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
|
'title': 'enen-no-shouboutai'
|
||||||
},
|
},
|
||||||
'playlist_count': 12,
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/a/8pih2Ed',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8pih2Ed'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
decode_base_n,
|
decode_base_n,
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
|
@ -703,28 +704,31 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
||||||
if not user_info:
|
if not user_info:
|
||||||
self.raise_login_required('This content is unreachable')
|
self.raise_login_required('This content is unreachable')
|
||||||
user_id = user_info.get('id')
|
|
||||||
|
|
||||||
|
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
|
||||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||||
|
if not story_info_url: # user id is only mandatory for non-highlights
|
||||||
|
raise ExtractorError('Unable to extract user id')
|
||||||
|
|
||||||
videos = traverse_obj(self._download_json(
|
videos = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||||
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
||||||
if not videos:
|
if not videos:
|
||||||
self.raise_login_required('You need to log in to access this content')
|
self.raise_login_required('You need to log in to access this content')
|
||||||
|
|
||||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
|
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
|
||||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
||||||
if not story_title:
|
if not story_title:
|
||||||
story_title = f'Story by {username}'
|
story_title = f'Story by {username}'
|
||||||
|
|
||||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
|
||||||
info_data = []
|
info_data = []
|
||||||
for highlight in highlights:
|
for highlight in highlights:
|
||||||
highlight_data = self._extract_product(highlight)
|
highlight_data = self._extract_product(highlight)
|
||||||
if highlight_data.get('formats'):
|
if highlight_data.get('formats'):
|
||||||
info_data.append({
|
info_data.append({
|
||||||
**highlight_data,
|
|
||||||
'uploader': full_name,
|
'uploader': full_name,
|
||||||
'uploader_id': user_id,
|
'uploader_id': user_id,
|
||||||
|
**filter_dict(highlight_data),
|
||||||
})
|
})
|
||||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
||||||
|
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class KinjaEmbedIE(InfoExtractor):
|
class KinjaEmbedIE(InfoExtractor):
|
||||||
IENAME = 'kinja:embed'
|
IE_NAME = 'kinja:embed'
|
||||||
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
|
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
|
||||||
(?:
|
(?:
|
||||||
avclub|
|
avclub|
|
||||||
|
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -96,13 +97,22 @@ class LiTVIE(InfoExtractor):
|
||||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||||
webpage, 'video data', default='{}'), video_id)
|
webpage, 'video data', default='{}'), video_id)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
payload = {
|
payload = {'assetId': program_info['assetId']}
|
||||||
'assetId': program_info['assetId'],
|
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||||
'watchDevices': program_info['watchDevices'],
|
if puid:
|
||||||
'contentType': program_info['contentType'],
|
payload.update({
|
||||||
}
|
'type': 'auth',
|
||||||
|
'puid': puid,
|
||||||
|
})
|
||||||
|
endpoint = 'getUrl'
|
||||||
|
else:
|
||||||
|
payload.update({
|
||||||
|
'watchDevices': program_info['watchDevices'],
|
||||||
|
'contentType': program_info['contentType'],
|
||||||
|
})
|
||||||
|
endpoint = 'getMainUrlNoAuth'
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
|
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||||
data=json.dumps(payload).encode('utf-8'),
|
data=json.dumps(payload).encode('utf-8'),
|
||||||
headers={'Content-Type': 'application/json'})
|
headers={'Content-Type': 'application/json'})
|
||||||
|
|
||||||
|
|
|
@ -97,7 +97,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||||
IENAME = 'nba:watch:embed'
|
IE_NAME = 'nba:watch:embed'
|
||||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://watch.nba.com/embed?id=659395',
|
'url': 'http://watch.nba.com/embed?id=659395',
|
||||||
|
@ -339,7 +339,7 @@ class NBABaseIE(NBACVPBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class NBAEmbedIE(NBABaseIE):
|
class NBAEmbedIE(NBABaseIE):
|
||||||
IENAME = 'nba:embed'
|
IE_NAME = 'nba:embed'
|
||||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||||
|
@ -361,7 +361,7 @@ class NBAEmbedIE(NBABaseIE):
|
||||||
|
|
||||||
|
|
||||||
class NBAIE(NBABaseIE):
|
class NBAIE(NBABaseIE):
|
||||||
IENAME = 'nba'
|
IE_NAME = 'nba'
|
||||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||||
|
@ -388,7 +388,7 @@ class NBAIE(NBABaseIE):
|
||||||
|
|
||||||
|
|
||||||
class NBAChannelIE(NBABaseIE):
|
class NBAChannelIE(NBABaseIE):
|
||||||
IENAME = 'nba:channel'
|
IE_NAME = 'nba:channel'
|
||||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||||
|
|
135
yt_dlp/extractor/rudovideo.py
Normal file
135
yt_dlp/extractor/rudovideo.py
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
js_to_json,
|
||||||
|
traverse_obj,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RudoVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)'
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rudo.video/podcast/cz2wrUy8l0o',
|
||||||
|
'md5': '28ed82b477708dc5e12e072da2449221',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cz2wrUy8l0o',
|
||||||
|
'title': 'Diego Cabot',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/podcast/bQkt07',
|
||||||
|
'md5': '36b22a9863de0f47f00fc7532a32a898',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bQkt07',
|
||||||
|
'title': 'Tubular Bells',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/podcast/b42ZUznHX0',
|
||||||
|
'md5': 'b91c70d832938871367f8ad10c895821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b42ZUznHX0',
|
||||||
|
'title': 'Columna Ruperto Concha',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/vod/bN5AaJ',
|
||||||
|
'md5': '01324a329227e2591530ecb4f555c881',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bN5AaJ',
|
||||||
|
'title': 'Ucrania 19.03',
|
||||||
|
'creator': 'La Tercera',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/live/bbtv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bbtv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'creator': 'BioBioTV',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$',
|
||||||
|
'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/live/c13',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c13',
|
||||||
|
'title': 'CANAL13',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Chile',
|
||||||
|
}, {
|
||||||
|
'url': 'https://rudo.video/live/t13-13cl',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 't13-13cl',
|
||||||
|
'title': 'T13',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Chile',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, type_ = self._match_valid_url(url).group('id', 'type')
|
||||||
|
is_live = type_ == 'live'
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
if 'Streaming is not available in your area' in webpage:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
|
media_url = (
|
||||||
|
self._search_regex(
|
||||||
|
r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None)
|
||||||
|
# Source URL must be used only if streamURL is unavailable
|
||||||
|
or self._search_regex(
|
||||||
|
r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None))
|
||||||
|
if not media_url:
|
||||||
|
youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)',
|
||||||
|
webpage, 'youtube url', default=None)
|
||||||
|
if youtube_url:
|
||||||
|
return self.url_result(youtube_url, 'Youtube')
|
||||||
|
raise ExtractorError('Unable to extract stream url')
|
||||||
|
|
||||||
|
token_array = self._search_json(
|
||||||
|
r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id,
|
||||||
|
contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json)
|
||||||
|
if token_array:
|
||||||
|
token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False)
|
||||||
|
if not token_url:
|
||||||
|
raise ExtractorError('Invalid access token array')
|
||||||
|
access_token = self._download_json(
|
||||||
|
token_url, video_id, note='Downloading access token')['data']['authToken']
|
||||||
|
media_url = update_url_query(media_url, {'auth-token': access_token})
|
||||||
|
|
||||||
|
ext = determine_ext(media_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(media_url, video_id, live=is_live)
|
||||||
|
elif ext == 'mp3':
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}]
|
||||||
|
else:
|
||||||
|
formats = [{'url': media_url}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)',
|
||||||
|
webpage, 'title', default=None)
|
||||||
|
or self._og_search_title(webpage)),
|
||||||
|
'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)',
|
||||||
|
webpage, 'videoAuthor', default=None),
|
||||||
|
'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)',
|
||||||
|
webpage, 'thumbnail', default=None)
|
||||||
|
or self._og_search_thumbnail(webpage)),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
|
@ -10,6 +10,7 @@ from ..compat import (
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
dict_get,
|
dict_get,
|
||||||
|
@ -1317,41 +1318,51 @@ class TwitterIE(TwitterBaseIE):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_status(self, twid):
|
def _call_syndication_api(self, twid):
|
||||||
if self.is_logged_in or self._selected_api == 'graphql':
|
self.report_warning(
|
||||||
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||||
|
status = self._download_json(
|
||||||
elif self._selected_api == 'legacy':
|
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||||
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
headers={'User-Agent': 'Googlebot'}, query={
|
||||||
'cards_platform': 'Web-12',
|
'id': twid,
|
||||||
'include_cards': 1,
|
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||||
'include_reply_count': 1,
|
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||||
'include_user_entities': 0,
|
|
||||||
'tweet_mode': 'extended',
|
|
||||||
})
|
})
|
||||||
|
if not status:
|
||||||
|
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||||
|
# Transform the result so its structure matches that of legacy/graphql
|
||||||
|
media = []
|
||||||
|
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||||
|
detail['id_str'] = traverse_obj(detail, (
|
||||||
|
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||||
|
media.append(detail)
|
||||||
|
status['extended_entities'] = {'media': media}
|
||||||
|
|
||||||
elif self._selected_api == 'syndication':
|
return status
|
||||||
self.report_warning(
|
|
||||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
def _extract_status(self, twid):
|
||||||
status = self._download_json(
|
if self._selected_api not in ('graphql', 'legacy', 'syndication'):
|
||||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
|
||||||
headers={'User-Agent': 'Googlebot'}, query={
|
|
||||||
'id': twid,
|
try:
|
||||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
if self.is_logged_in or self._selected_api == 'graphql':
|
||||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||||
|
elif self._selected_api == 'legacy':
|
||||||
|
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||||
|
'cards_platform': 'Web-12',
|
||||||
|
'include_cards': 1,
|
||||||
|
'include_reply_count': 1,
|
||||||
|
'include_user_entities': 0,
|
||||||
|
'tweet_mode': 'extended',
|
||||||
})
|
})
|
||||||
if not status:
|
except ExtractorError as e:
|
||||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||||
# Transform the result so its structure matches that of legacy/graphql
|
raise
|
||||||
media = []
|
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
|
||||||
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
status = self._call_syndication_api(twid)
|
||||||
detail['id_str'] = traverse_obj(detail, (
|
|
||||||
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
|
||||||
media.append(detail)
|
|
||||||
status['extended_entities'] = {'media': media}
|
|
||||||
|
|
||||||
else:
|
if self._selected_api == 'syndication':
|
||||||
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
|
status = self._call_syndication_api(twid)
|
||||||
|
|
||||||
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
||||||
|
|
||||||
|
@ -1416,8 +1427,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
||||||
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||||
# The codec of http formats are unknown
|
# Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_from_card_info(card):
|
def extract_from_card_info(card):
|
||||||
|
|
|
@ -206,13 +206,14 @@ class Updater:
|
||||||
# XXX: use class variables to simplify testing
|
# XXX: use class variables to simplify testing
|
||||||
_channel = CHANNEL
|
_channel = CHANNEL
|
||||||
_origin = ORIGIN
|
_origin = ORIGIN
|
||||||
|
_update_sources = UPDATE_SOURCES
|
||||||
|
|
||||||
def __init__(self, ydl, target: str | None = None):
|
def __init__(self, ydl, target: str | None = None):
|
||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
# For backwards compat, target needs to be treated as if it could be None
|
# For backwards compat, target needs to be treated as if it could be None
|
||||||
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
|
self.requested_channel, sep, self.requested_tag = (target or self._channel).rpartition('@')
|
||||||
# Check if requested_tag is actually the requested repo/channel
|
# Check if requested_tag is actually the requested repo/channel
|
||||||
if not sep and ('/' in self.requested_tag or self.requested_tag in UPDATE_SOURCES):
|
if not sep and ('/' in self.requested_tag or self.requested_tag in self._update_sources):
|
||||||
self.requested_channel = self.requested_tag
|
self.requested_channel = self.requested_tag
|
||||||
self.requested_tag: str = None # type: ignore (we set it later)
|
self.requested_tag: str = None # type: ignore (we set it later)
|
||||||
elif not self.requested_channel:
|
elif not self.requested_channel:
|
||||||
|
@ -237,11 +238,11 @@ class Updater:
|
||||||
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
|
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
|
||||||
else:
|
else:
|
||||||
# Check if requested_channel resolves to a known repository or else raise
|
# Check if requested_channel resolves to a known repository or else raise
|
||||||
self.requested_repo = UPDATE_SOURCES.get(self.requested_channel)
|
self.requested_repo = self._update_sources.get(self.requested_channel)
|
||||||
if not self.requested_repo:
|
if not self.requested_repo:
|
||||||
self._report_error(
|
self._report_error(
|
||||||
f'Invalid update channel {self.requested_channel!r} requested. '
|
f'Invalid update channel {self.requested_channel!r} requested. '
|
||||||
f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
|
f'Valid channels are {", ".join(self._update_sources)}', True)
|
||||||
|
|
||||||
self._identifier = f'{detect_variant()} {system_identifier()}'
|
self._identifier = f'{detect_variant()} {system_identifier()}'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user