Compare commits

..

No commits in common. "a4ad93dd57ccd80854b9b2741fce14c37845a1da" and "4d082cf17330a68b2aeb7a4fccdcdc2600a5cbfd" have entirely different histories.

View File

@ -7,6 +7,8 @@ from random import randint
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import compat_urllib_parse_urlencode
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
@ -22,11 +24,12 @@ from ..utils import (
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic'] _FORMATS = ['bMusic', 'mMusic', 'hMusic', 'sqMusic', 'hrMusic']
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) request_text = json.dumps({**query, 'header': cookies}, separators=(',', ':'))
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1') message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
msg_digest = md5(message).hexdigest() msg_digest = md5(message).hexdigest()
@ -36,7 +39,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8'))) encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
return f'params={encrypted.hex().upper()}'.encode() return f'params={encrypted.hex().upper()}'.encode()
def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs): def _download_eapi_json(self, path, song_id, query, headers={}, **kwargs):
cookies = { cookies = {
'osver': 'undefined', 'osver': 'undefined',
'deviceId': 'undefined', 'deviceId': 'undefined',
@ -57,12 +60,13 @@ class NetEaseMusicBaseIE(InfoExtractor):
**headers, **headers,
} }
url = urljoin('https://interface3.music.163.com/', f'/eapi{path}') url = urljoin('https://interface3.music.163.com/', f'/eapi{path}')
data = self._create_eapi_cipher(f'/api{path}', query_body, cookies) data = self._create_eapi_cipher(f'/api{path}', query, cookies)
return self._download_json(url, video_id, data=data, headers=headers, **kwargs) return self._download_json(url, song_id, data=data, headers=headers, **kwargs)
def _call_player_api(self, song_id, bitrate): def _call_player_api(self, song_id, bitrate):
return self._download_eapi_json( return self._download_eapi_json(
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, '/song/enhance/player/url', song_id,
{'ids': f'[{song_id}]', 'br': bitrate},
note=f'Downloading song URL info: bitrate {bitrate}') note=f'Downloading song URL info: bitrate {bitrate}')
def extract_formats(self, info): def extract_formats(self, info):
@ -73,8 +77,10 @@ class NetEaseMusicBaseIE(InfoExtractor):
details = info.get(song_format) details = info.get(song_format)
if not details: if not details:
continue continue
bitrate = int_or_none(details.get('bitrate')) or 999000 bitrate = int_or_none(details.get('bitrate')) or 999000
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', ...)): data = self._call_player_api(song_id, bitrate)
for song in traverse_obj(data, ('data', ...)):
song_url = traverse_obj(song, ('url', {url_or_none})) song_url = traverse_obj(song, ('url', {url_or_none}))
if not song_url: if not song_url:
continue continue
@ -82,26 +88,34 @@ class NetEaseMusicBaseIE(InfoExtractor):
formats.append({ formats.append({
'url': song_url, 'url': song_url,
'format_id': song_format, 'format_id': song_format,
'asr': traverse_obj(details, ('sr', {int_or_none})),
**traverse_obj(song, { **traverse_obj(song, {
'ext': ('type', {str}), 'ext': ('type', {str}),
'abr': ('br', {lambda i: int_or_none(i, scale=1000)}), 'abr': ('br', {lambda i: int_or_none(i, scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
}), }),
**traverse_obj(details, {
'asr': ('sr', {int_or_none}),
}),
}) })
elif err == 0: elif err == 0:
err = try_get(song, lambda x: x['code'], int) err = try_get(song, lambda x: x['code'], int)
if not formats: if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400): if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True) raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else: else:
self.raise_geo_restricted('No media links found: probably due to geo restriction.') self.raise_geo_restricted(
msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
return formats return formats
def query_api(self, endpoint, video_id, note): def query_api(self, endpoint, video_id, note):
result = self._download_json( req = Request('%s%s' % (self._API_BASE, endpoint))
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) req.headers['Referer'] = self._API_BASE
result = self._download_json(req, video_id, note)
if result['code'] == -462: if result['code'] == -462:
self.raise_login_required(f'Login required to download: {result["message"]}') self.raise_login_required(f'Login required to download: {result["message"]}')
elif result['code'] != 200: elif result['code'] != 200:
@ -123,7 +137,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1522944000, 'timestamp': 1522944000,
'upload_date': '20180405', 'upload_date': '20180405',
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', 'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
'subtitles': {'lyric': [{'ext': 'lrc'}]},
"duration": 256, "duration": 256,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
}, },
@ -137,6 +150,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'creator': 'Dustin O\'Halloran', 'creator': 'Dustin O\'Halloran',
'upload_date': '20080211', 'upload_date': '20080211',
'timestamp': 1202745600, 'timestamp': 1202745600,
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'duration': 263, 'duration': 263,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
}, },
@ -151,7 +165,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'upload_date': '19911130', 'upload_date': '19911130',
'timestamp': 691516800, 'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141', 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
'subtitles': {'lyric': [{'ext': 'lrc'}]},
'duration': 268, 'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团', 'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
@ -166,8 +179,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'creator': 'Taylor Swift / Kendrick Lamar', 'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150516', 'upload_date': '20150516',
'timestamp': 1431792000, 'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a', 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
'subtitles': {'lyric': [{'ext': 'lrc'}]},
'duration': 199, 'duration': 199,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
}, },
@ -183,7 +195,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'upload_date': '20100127', 'upload_date': '20100127',
'timestamp': 1264608000, 'timestamp': 1264608000,
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184', 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
'subtitles': {'lyric': [{'ext': 'lrc'}]},
'duration': 229, 'duration': 229,
'alt_title': '说出愿望吧(Genie)', 'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
@ -192,54 +203,47 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
}] }]
def _process_lyrics(self, lyrics_info): def _process_lyrics(self, lyrics_info):
original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str})) original = lyrics_info.get('lrc', {}).get('lyric')
translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str})) translated = lyrics_info.get('tlyric', {}).get('lyric')
if original.strip() == '[99:00.00]纯音乐,请欣赏': if not translated:
return None
if not translated or not original:
return original return original
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)' lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
original_ts_texts = re.findall(lyrics_expr, original) original_ts_texts = re.findall(lyrics_expr, original)
translation_ts_dict = { translation_ts_dict = dict(
timestamp: text for timestamp, text in re.findall(lyrics_expr, translated) (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
} )
lyrics = '\n'.join([
for i in range(len(original_ts_texts)): '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
timestamp, text = original_ts_texts[i] for time_stamp, text in original_ts_texts
if translation_ts_dict.get(timestamp): ])
original_ts_texts[i] = timestamp, f'{text} / {translation_ts_dict[timestamp]}'
lyrics = '\n'.join([''.join(i) for i in original_ts_texts])
return lyrics return lyrics
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
params = {
'id': song_id,
'ids': '[%s]' % song_id
}
info = self.query_api( info = self.query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] 'song/detail?' + compat_urllib_parse_urlencode(params),
song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info) formats = self.extract_formats(info)
lyrics = self._process_lyrics(self.query_api( lyrics_info = self.query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
lyric_data = { song_id, 'Downloading lyrics data')
'description': lyrics, lyrics = self._process_lyrics(lyrics_info)
'subtitles': {
'lyric': [{
'data': lyrics,
'ext': 'lrc',
}]
}
} if lyrics else {}
return { return {
'id': song_id, 'id': song_id,
'description': lyrics,
'formats': formats, 'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None, 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))), 'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))),
**lyric_data,
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {lambda i: int_or_none(i, scale=1000)}), 'timestamp': ('album', 'publishTime', {lambda i: int_or_none(i, scale=1000)}),
@ -318,28 +322,21 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'title': '李昇基 - 이승기', 'title': '李昇基 - 이승기',
}, },
'playlist_count': 50, 'playlist_count': 50,
}, {
'note': 'Singer with both translated and alias',
'url': 'https://music.163.com/#/artist?id=159692',
'info_dict': {
'id': '159692',
'title': '初音ミク - 初音未来;Hatsune Miku',
},
'playlist_count': 50,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
singer_id = self._match_id(url) singer_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') 'artist/%s?id=%s' % (singer_id, singer_id),
singer_id, 'Downloading singer data')
name_and_aliases = traverse_obj(info, ( artist_info = info.get('artist', {})
'artist', ('name', 'trans', ('alias', ...)), {str}, {lambda i: i or None})) name = artist_info.get('name', '')
if len(name_and_aliases) > 1: if artist_info.get('trans'):
name = f'{name_and_aliases[0]} - {";".join(name_and_aliases[1:])}' name = '%s - %s' % (name, info['artist']['trans'])
else: if artist_info.get('alias'):
name = traverse_obj(name_and_aliases, 0) name = '%s - %s' % (name, ';'.join(map(str, info['artist']['alias'])))
entries = [ entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'], self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
@ -457,10 +454,11 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
mv_id = self._match_id(url) mv_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] 'mv/detail?id=%s&type=mp4' % mv_id,
mv_id, 'Downloading mv info')['data']
formats = [ formats = [
{'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)} {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
for brs, mv_url in info['brs'].items() for brs, mv_url in info['brs'].items()
] ]
@ -534,7 +532,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
program_id = self._match_id(url) program_id = self._match_id(url)
info = self.query_api( info = self.query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] 'dj/program/detail?id=%s' % program_id,
program_id, 'Downloading program info')['program']
metainfo = traverse_obj(info, { metainfo = traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
@ -585,8 +584,9 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
entries = [] entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE): for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api( info = self.query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
dj_id, note=f'Downloading dj programs - {offset}') % (self._PAGE_SIZE, dj_id, offset),
dj_id, 'Downloading dj programs - %d' % offset)
entries.extend([ entries.extend([
self.url_result( self.url_result(