Compare commits

...

4 Commits

Author SHA1 Message Date
sepro
49a10a5ea8 Make sub parsing non fatal 2023-11-07 05:20:45 +01:00
sepro
51d214d85c Remove lyd tests 2023-11-07 04:59:51 +01:00
sepro
09a90428f0 Fix typo 2023-11-07 04:58:33 +01:00
sepro
42c8eadf16 Cleanup Part 2 2023-11-07 04:56:40 +01:00

View File

@ -19,7 +19,7 @@ class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
) )
(?P<id>[\da-z_-]+) (?P<id>[\da-z_-]+)
@ -48,22 +48,6 @@ class DRTVIE(InfoExtractor):
}, },
'expected_warnings': ['Unable to download f4m manifest'], 'expected_warnings': ['Unable to download f4m manifest'],
'skip': 'this video has been removed', 'skip': 'this video has been removed',
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'info_dict': {
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
'ext': 'mp4',
'title': 'christiania pusher street ryddes drdkrjpo',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
'duration': 131.4,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
# with SignLanguage formats # with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
@ -82,29 +66,25 @@ class DRTVIE(InfoExtractor):
'season': 'Historien om Danmark', 'season': 'Historien om Danmark',
'series': 'Historien om Danmark', 'series': 'Historien om Danmark',
}, },
'params': { 'skip': 'this video has been removed',
'skip_download': True,
},
}, {
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
'only_matching': True,
}, { }, {
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
'info_dict': { 'info_dict': {
'id': '00951930010', 'id': '00951930010',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bonderøven 2019 (1:8)', 'title': 'Frank & Kastaniegaarden',
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd', 'description': 'md5:974e1780934cf3275ef10280204bccb0',
'timestamp': 1654856100, 'release_timestamp': 1546545600,
'upload_date': '20220610', 'release_date': '20190103',
'duration': 2576.6, 'duration': 2576,
'season': 'Bonderøven 2019', 'season': 'Frank & Kastaniegaarden',
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5', 'season_id': '67125',
'release_year': 2019, 'release_year': 2019,
'season_number': 2019, 'season_number': 2019,
'series': 'Frank & Kastaniegaarden', 'series': 'Frank & Kastaniegaarden',
'episode_number': 1, 'episode_number': 1,
'episode': 'Episode 1', 'episode': 'Frank & Kastaniegaarden',
'thumbnail': r're:https?://.+',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -118,40 +98,6 @@ class DRTVIE(InfoExtractor):
}, { }, {
'url': 'https://www.dr.dk/drtv/program/jagten_220924', 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3',
'info_dict': {
'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113',
'title': "Regionale nyheder",
'ext': 'mp4',
'duration': 120.043,
'series': 'P4 Østjylland regionale nyheder',
'timestamp': 1651746600,
'season': 'Regionale nyheder',
'release_year': 0,
'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5',
'description': '',
'upload_date': '20220505',
},
'params': {
'skip_download': True,
},
'skip': 'this video has been removed',
}, {
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
'info_dict': {
'ext': 'mp4',
'id': '14802310112',
'timestamp': 1678786200,
'duration': 120.043,
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
'series': 'P4 København regionale nyheder',
'upload_date': '20230314',
'release_year': 0,
'description': 'Hør seneste regionale nyheder fra P4 København.',
'season': 'Regionale nyheder',
'title': 'Regionale nyheder',
},
}] }]
_TOKEN = None _TOKEN = None
@ -176,22 +122,38 @@ class DRTVIE(InfoExtractor):
'Catalog', 'Catalog',
], ],
'optout': True, 'optout': True,
}).encode('utf-8')) }).encode())
self._TOKEN = traverse_obj( self._TOKEN = traverse_obj(
token_response, (lambda _, x: x['type'] == 'UserAccount', 'value'), get_all=False) token_response, (lambda _, x: x['type'] == 'UserAccount', 'value'), get_all=False)
def _real_extract(self, url): def _real_extract(self, url):
raw_video_id = self._match_valid_url(url).group('id') url_slug = self._match_id(url)
webpage = self._download_webpage(url, raw_video_id) webpage = self._download_webpage(url, url_slug)
json_data = self._search_json(r'window\.__data\s*=\s*', webpage, 'data', raw_video_id) json_data = self._search_json(r'window\.__data\s*=\s*', webpage, 'data', url_slug, fatal=False) or {}
item = traverse_obj(json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item'), get_all=False) or {} item = traverse_obj(json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item'), get_all=False)
item_id = item.get('id') or raw_video_id.rsplit('_', 1)[-1] if item:
video_id = try_call(item['customId'].split(':')[-1]) or item_id item_id = item.get('id')
else:
item_id = url_slug.rsplit('_', 1)[-1]
item = self._download_json(
f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id,
note='Attempting to download backup item data', query={
'device': 'web_browser',
'expand': 'all',
'ff': 'idp,ldp,rpt',
'geoLocation': 'dk',
'isDeviceAbroad': 'false',
'lang': 'da',
'segments': 'drtv,optedout',
'sub': 'Anonymous',
})
video_id = try_call(lambda: item['customId'].split(':')[-1]) or item_id
stream_data = self._download_json( stream_data = self._download_json(
f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id, query={ f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id,
note='Downloading stream data', query={
'delivery': 'stream', 'delivery': 'stream',
'device': 'web_browser', 'device': 'web_browser',
'ff': 'idp,ldp,rpt', 'ff': 'idp,ldp,rpt',
@ -211,17 +173,18 @@ class DRTVIE(InfoExtractor):
preference = None preference = None
if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
preference = -1 preference = -1
format_id += '-%s' % access_service format_id += f'-{access_service}'
elif access_service == 'StandardVideo': elif access_service == 'StandardVideo':
preference = 1 preference = 1
fmts, subs = self._extract_m3u8_formats_and_subtitles(fmt['url'], video_id, preference=preference, m3u8_id=format_id, fatal=False) fmts, subs = self._extract_m3u8_formats_and_subtitles(
fmt.get('url'), video_id, preference=preference, m3u8_id=format_id, fatal=False)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
LANGS = { LANGS = {
'DanishLanguageSubtitles': 'da', 'DanishLanguageSubtitles': 'da',
} }
for subs in fmt['subtitles']: for subs in fmt.get('subtitles', []):
if not isinstance(subs, dict): if not isinstance(subs, dict):
continue continue
sub_uri = url_or_none(subs.get('link')) sub_uri = url_or_none(subs.get('link'))
@ -241,10 +204,10 @@ class DRTVIE(InfoExtractor):
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'thumbnail': ('images', 'wallpaper'), 'thumbnail': ('images', 'wallpaper'),
'timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}), 'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}),
'duration': 'duration', 'duration': 'duration',
'series': ('season', 'title'), 'series': ('season', 'show', 'title'),
'season': ('season', 'show', 'title'), 'season': ('season', 'title'),
'season_number': ('season', 'seasonNumber', {int_or_none}), 'season_number': ('season', 'seasonNumber', {int_or_none}),
'season_id': 'seasonId', 'season_id': 'seasonId',
'episode': 'episodeName', 'episode': 'episodeName',