Compare commits

...

2 Commits

Author SHA1 Message Date
Martin Renold
8f43d97420 [ie/mx3] Always try HEAD on media URLs; extract size and timestamp 2024-01-19 21:34:12 +01:00
Martin Renold
55d4944b94 [ie/mx3] Refactor: abstract base class 2024-01-19 21:32:59 +01:00

View File

@ -1,11 +1,80 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import urlhandle_detect_ext from ..utils.traversal import traverse_obj
from ..utils import (
urlhandle_detect_ext,
url_or_none,
int_or_none,
unified_timestamp,
)
from ..networking import HEADRequest from ..networking import HEADRequest
class Mx3IE(InfoExtractor): class Mx3BaseIE(InfoExtractor):
_MX3_DOMAIN = None
def _real_extract(self, url):
track_id = self._match_id(url)
webpage = self._download_webpage(url, track_id)
json = self._download_json(f'https://{self._MX3_DOMAIN}/t/{track_id}.json', track_id)
artists = []
if json.get('artist'):
artists.append(json['artist'])
performer = json.get('performer_name')
if performer and performer not in artists:
artists.append(performer)
genre = self._html_search_regex(r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>',
webpage, 'genre', fatal=False, flags=re.DOTALL)
formats = []
def add_format(fmt, fatal):
if fatal:
urlh = self._request_webpage(HEADRequest(fmt['url']), track_id, note='Fetching default media headers')
else:
urlh = self._request_webpage(HEADRequest(fmt['url']), track_id, fatal=False, expected_status=404,
note=f'Trying media headers for optional format {fmt["format_id"]}')
if urlh and urlh.status == 200:
fmt['ext'] = urlhandle_detect_ext(urlh)
fmt['filesize'] = int_or_none(urlh.headers.get('Content-Length'))
fmt['timestamp'] = unified_timestamp(urlh.headers.get('Last-Modified'))
formats.append(fmt)
track_url = f'https://{self._MX3_DOMAIN}/tracks/{track_id}'
add_format({
'url': f'{track_url}/player_asset',
'format_id': 'default',
'quality': 1,
}, fatal=True)
# the formats below don't always exist
add_format({
'url': f'{track_url}/player_asset?quality=hd',
'format_id': 'hd',
'quality': 10,
}, fatal=False)
add_format({
'url': f'{track_url}/download',
'format_id': 'download',
'quality': 11,
}, fatal=False)
return {
'id': track_id,
'formats': formats,
'artist': ', '.join(artists),
'genre': genre,
**traverse_obj(json, {
'title': ('title', {str}),
'composer': ('composer_name', {str}),
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
}, get_all=False),
}
class Mx3IE(Mx3BaseIE):
_MX3_DOMAIN = 'mx3.ch' _MX3_DOMAIN = 'mx3.ch'
_VALID_URL = r'https?://(?:www\.)?mx3\.ch/t/(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://(?:www\.)?mx3\.ch/t/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
@ -20,6 +89,7 @@ class Mx3IE(InfoExtractor):
'genre': 'Rock', 'genre': 'Rock',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813', 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
'title': 'S\'envoler', 'title': 'S\'envoler',
'timestamp': 1630272831,
} }
}, { }, {
'url': 'https://mx3.ch/t/1LIY', 'url': 'https://mx3.ch/t/1LIY',
@ -33,6 +103,7 @@ class Mx3IE(InfoExtractor):
'genre': 'Electro', 'genre': 'Electro',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670', 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
'title': 'The Broots-Larytta remix "Begging For Help"', 'title': 'The Broots-Larytta remix "Begging For Help"',
'timestamp': 1686963636,
} }
}, { }, {
'url': 'https://mx3.ch/t/1C6E', 'url': 'https://mx3.ch/t/1C6E',
@ -46,69 +117,12 @@ class Mx3IE(InfoExtractor):
'genre': 'Punk', 'genre': 'Punk',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733', 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
'title': 'Wide Awake', 'title': 'Wide Awake',
'timestamp': 1627054732,
} }
}] }]
def _real_extract(self, url):
track_id = self._match_id(url)
webpage = self._download_webpage(url, track_id)
json = self._download_json(f'https://{self._MX3_DOMAIN}/t/{track_id}.json', track_id)
artists = [] class Mx3NeoIE(Mx3BaseIE):
if json.get('artist'):
artists.append(json['artist'])
performer = json.get('performer_name')
if performer and performer not in artists:
artists.append(performer)
title = json['title']
genre = self._html_search_regex(r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>',
webpage, 'genre', fatal=False, flags=re.DOTALL)
formats = []
def add_format(fmt):
urlh = self._request_webpage(HEADRequest(fmt['url']), track_id, note='Fetching media headers', fatal=False)
if urlh:
fmt['ext'] = urlhandle_detect_ext(urlh)
formats.append(fmt)
base_url = 'https://' + self._MX3_DOMAIN + '/'
add_format({
'url': base_url + json["url"],
'format_id': 'default',
'quality': 1,
})
if 'hd_url' in json:
add_format({
'url': base_url + json['hd_url'],
'format_id': 'hd',
'quality': 10,
})
# the "download" feature is not available everywhere
if f'/tracks/{track_id}/download' in webpage:
add_format({
'url': f'{base_url}tracks/{track_id}/download',
'format_id': 'download',
'quality': 11,
'format_note': 'usually uncompressed WAV',
})
return {
'id': track_id,
'formats': formats,
'title': title,
'artist': ', '.join(artists),
'composer': json.get('composer_name', None),
'genre': genre,
'thumbnail': json.get('picture_url_xlarge') or json.get('picture_url'),
}
class Mx3NeoIE(Mx3IE):
_MX3_DOMAIN = 'neo.mx3.ch' _MX3_DOMAIN = 'neo.mx3.ch'
_VALID_URL = r'https?://(?:www\.)?neo.mx3\.ch/t/(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://(?:www\.)?neo.mx3\.ch/t/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
@ -121,12 +135,13 @@ class Mx3NeoIE(Mx3IE):
'composer': 'Jannik Giger', 'composer': 'Jannik Giger',
'genre': 'Composition, Orchestra', 'genre': 'Composition, Orchestra',
'title': 'Troisième œil. Für Kammerorchester (2023)', 'title': 'Troisième œil. Für Kammerorchester (2023)',
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252' 'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
'timestamp': 1705055012,
} }
}] }]
class Mx3VolksmusikIE(Mx3IE): class Mx3VolksmusikIE(Mx3BaseIE):
_MX3_DOMAIN = 'volksmusik.mx3.ch' _MX3_DOMAIN = 'volksmusik.mx3.ch'
_VALID_URL = r'https?://(?:www\.)?volksmusik.mx3\.ch/t/(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://(?:www\.)?volksmusik.mx3\.ch/t/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
@ -140,5 +155,6 @@ class Mx3VolksmusikIE(Mx3IE):
'genre': 'Instrumental, Graubünden', 'genre': 'Instrumental, Graubünden',
'title': 'Chämilouf', 'title': 'Chämilouf',
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120', 'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
'timestamp': 1450532809,
} }
}] }]