Compare commits

...

8 Commits

Author SHA1 Message Date
pukkandan
b40e1e76bd
oops 2024-01-12 23:28:25 +05:30
pukkandan
6aa45a9a69
More robust warning 2024-01-12 22:40:10 +05:30
pukkandan
5ced986ab7
Clean docs 2024-01-12 22:38:58 +05:30
pukkandan
7f3a69ae68
[ie/youtube] Migrate artist 2024-01-12 22:08:11 +05:30
pukkandan
af8e0c8e8b
Replace comma with unicode 2024-01-12 22:08:11 +05:30
pukkandan
75a6541ad2
[test] Test only new fields 2024-01-12 22:07:50 +05:30
pukkandan
694da355d2
Handle when both fields are returned 2024-01-12 21:18:27 +05:30
pukkandan
1531f4f69e
Stricter Splitting 2024-01-12 20:09:40 +05:30
5 changed files with 28 additions and 21 deletions

View File

@ -1767,11 +1767,11 @@ Metadata fields | From
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `artists`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`composer` | `composer`, `composers` `composer` | `composer` or `composers`
`genre` | `genre`, `genres` `genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist`, `album_artists` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

View File

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

View File

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

View File

@ -581,6 +581,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -2641,18 +2648,13 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
multivalue_fields = { for old_key, new_key in self._deprecated_multivalue_fields.items():
'album_artist': 'album_artists', if new_key in info_dict and old_key in info_dict:
'artist': 'artists', self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
'composer': 'composers', elif old_value := info_dict.get(old_key):
'creator': 'creators', info_dict[new_key] = old_value.split(', ')
'genre': 'genres',
}
for old_key, new_key in multivalue_fields.items():
if old_value := info_dict.get(old_key):
info_dict[new_key] = re.split(r', ?', old_value)
elif new_value := info_dict.get(new_key): elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(new_value) info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)

View File

@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'artist': 'Stephen', 'artists': ['Stephen'],
'creators': ['Stephen'],
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
'release_date': '20190313', 'release_date': '20190313',
@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen', 'uploader': 'Stephen',
'availability': 'public', 'availability': 'public',
'creator': 'Stephen',
'duration': 169, 'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0, 'age_limit': 0,
@ -4386,7 +4386,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
release_year = release_date[:4] release_year = release_date[:4]
info.update({ info.update({
'album': mobj.group('album'.strip()), 'album': mobj.group('album'.strip()),
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artists': ([a] if (a := mobj.group('clean_artist'))
else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int_or_none(release_year), 'release_year': int_or_none(release_year),
@ -4532,7 +4533,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mrr_title == 'Album': if mrr_title == 'Album':
info['album'] = mrr_contents_text info['album'] = mrr_contents_text
elif mrr_title == 'Artist': elif mrr_title == 'Artist':
info['artist'] = mrr_contents_text info['artists'] = [mrr_contents_text]
elif mrr_title == 'Song': elif mrr_title == 'Song':
info['track'] = mrr_contents_text info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
@ -4566,7 +4567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if fmt.get('protocol') == 'm3u8_native': if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v:
info[d_k] = v info[d_k] = v