Compare commits

..

No commits in common. "d9609608a8464b1fbc0037975cd9ee903a436778" and "da18e58fecf6a81f0d320bcf361ed806ea6e2e85" have entirely different histories.

4 changed files with 29 additions and 59 deletions

View File

@ -140,8 +140,6 @@ class TestFormatSelection(unittest.TestCase):
test('example-with-dashes', 'example-with-dashes') test('example-with-dashes', 'example-with-dashes')
test('all', '2', '47', '45', 'example-with-dashes', '35') test('all', '2', '47', '45', 'example-with-dashes', '35')
test('mergeall', '2+47+45+example-with-dashes+35', multi=True) test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
test('7_a/worst', '35')
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [

View File

@ -2471,16 +2471,9 @@ class YoutubeDL:
return selector_function(ctx_copy) return selector_function(ctx_copy)
return final_selector return final_selector
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid stream = io.BytesIO(format_spec.encode())
# Prefix numbers with random letters to avoid it being classified as a number
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
# TODO: Implement parser not reliant on tokenize.tokenize
prefix = ''.join(random.choices(string.ascii_letters, k=32))
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try: try:
tokens = list(_remove_unused_ops( tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
token._replace(string=token.string.replace(prefix, ''))
for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError: except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

View File

@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# available till 7.12.2023 # available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': '94812e6438488fb923c361a44469614b', 'md5': 'a438f671e87a7eba04000336a119ccc4',
'info_dict': { 'info_dict': {
'id': 'maischberger-video-424', 'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424', 'display_id': 'maischberger-video-424',
@ -403,25 +403,26 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'''(?x)https:// _VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)? (?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/ (?:player|live|video|(?P<playlist>sendung|sammlung))/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', 'md5': '3fd5fead7a370a819341129c8d713136',
'info_dict': { 'info_dict': {
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', 'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12939099', 'id': '12172961',
'title': 'Liebe auf vier Pfoten', 'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg', 'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5222, 'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1701343800, 'timestamp': 1670710500,
'upload_date': '20231130', 'upload_date': '20221210',
'ext': 'mp4', 'ext': 'mp4',
'episode': 'Liebe auf vier Pfoten', 'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'series': 'Filme im MDR' 'series': 'Filme im MDR'
}, },
}, { }, {
@ -453,7 +454,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'duration': 915, 'duration': 915,
'episode': 'tagesschau, 20:00 Uhr', 'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau', 'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -474,10 +475,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
# playlist of type 'sendung' # playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True, 'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, { }, {
# playlist of type 'sammlung' # playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
@ -490,11 +487,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
""" Query the ARD server for playlist information """ Query the ARD server for playlist information
and returns the data in "raw" format """ and returns the data in "raw" format """
assert mode in ('sendung', 'serie', 'sammlung') if mode == 'sendung':
if mode in ('sendung', 'serie'):
graphQL = json.dumps({ graphQL = json.dumps({
'query': '''{ 'query': '''{
showPage( showPage(
@ -511,7 +507,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
links { target { id href title } } links { target { id href title } }
type type
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
else: # mode == 'sammlung' else: # mode == 'sammlung'
graphQL = json.dumps({ graphQL = json.dumps({
@ -532,7 +528,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
type type
} }
} }
}}''' % (client, playlist_id, page_number), }}''' % (client, playlist_id, pageNumber),
}).encode() }).encode()
# Ressources for ARD graphQL debugging: # Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway # https://api-test.ardmediathek.de/public-gateway
@ -542,7 +538,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
data=graphQL, data=graphQL,
headers={'Content-Type': 'application/json'})['data'] headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data: # align the structure of the returned data:
if mode in ('sendung', 'serie'): if mode == 'sendung':
show_page = show_page['showPage'] show_page = show_page['showPage']
else: # mode == 'sammlung' else: # mode == 'sammlung'
show_page = show_page['morePage']['widget'] show_page = show_page['morePage']['widget']
@ -550,12 +546,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict. """ Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung', 'serie', and 'sammlung', Supports playlists of mode 'sendung' and 'sammlung', and also nested
as well as nested playlists. """ playlists. """
entries = [] entries = []
pageNumber = 0 pageNumber = 0
while True: # iterate by pageNumber while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snippet( show_page = self._ARD_load_playlist_snipped(
playlist_id, display_id, client, mode, pageNumber) playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']: if '/compilation/' in teaser['links']['target']['href']:

View File

@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/(?:permalink|posts)/| groups/[^/]+/permalink/|
watchparty/ watchparty/
)| )|
facebook: facebook:
@ -232,21 +232,6 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
'info_dict': {
'id': '1569199726448814',
'ext': 'mp4',
'title': 'Pence MUST GO!',
'description': 'Vickie Gentry shared a memory.',
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -627,11 +612,9 @@ class FacebookIE(InfoExtractor):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or []) nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, ( attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments', ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')), ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
'attachment', {dict}))
for attachment in attachments: for attachment in attachments:
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}), ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns: for n in ns:
parse_attachment(n) parse_attachment(n)
parse_attachment(attachment) parse_attachment(attachment)
@ -654,7 +637,7 @@ class FacebookIE(InfoExtractor):
if len(entries) > 1: if len(entries) > 1:
return self.playlist_result(entries, video_id) return self.playlist_result(entries, video_id)
video_info = entries[0] if entries else {'id': video_id} video_info = entries[0]
webpage_info = extract_metadata(webpage) webpage_info = extract_metadata(webpage)
# honor precise duration in video info # honor precise duration in video info
if video_info.get('duration'): if video_info.get('duration'):