Compare commits

..

No commits in common. "49a10a5ea8a819216b99f65c51e79dc9a88697b0" and "167ae779ff6302926c063869360b96e7fcfdbdf3" have entirely different histories.

View File

@ -19,7 +19,7 @@ class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/]+/)*| (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
) )
(?P<id>[\da-z_-]+) (?P<id>[\da-z_-]+)
@ -48,6 +48,22 @@ class DRTVIE(InfoExtractor):
}, },
'expected_warnings': ['Unable to download f4m manifest'], 'expected_warnings': ['Unable to download f4m manifest'],
'skip': 'this video has been removed', 'skip': 'this video has been removed',
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'info_dict': {
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
'ext': 'mp4',
'title': 'christiania pusher street ryddes drdkrjpo',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
'duration': 131.4,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
# with SignLanguage formats # with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
@ -66,25 +82,29 @@ class DRTVIE(InfoExtractor):
'season': 'Historien om Danmark', 'season': 'Historien om Danmark',
'series': 'Historien om Danmark', 'series': 'Historien om Danmark',
}, },
'skip': 'this video has been removed', 'params': {
'skip_download': True,
},
}, {
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
'only_matching': True,
}, { }, {
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
'info_dict': { 'info_dict': {
'id': '00951930010', 'id': '00951930010',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Frank & Kastaniegaarden', 'title': 'Bonderøven 2019 (1:8)',
'description': 'md5:974e1780934cf3275ef10280204bccb0', 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
'release_timestamp': 1546545600, 'timestamp': 1654856100,
'release_date': '20190103', 'upload_date': '20220610',
'duration': 2576, 'duration': 2576.6,
'season': 'Frank & Kastaniegaarden', 'season': 'Bonderøven 2019',
'season_id': '67125', 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
'release_year': 2019, 'release_year': 2019,
'season_number': 2019, 'season_number': 2019,
'series': 'Frank & Kastaniegaarden', 'series': 'Frank & Kastaniegaarden',
'episode_number': 1, 'episode_number': 1,
'episode': 'Frank & Kastaniegaarden', 'episode': 'Episode 1',
'thumbnail': r're:https?://.+',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -98,6 +118,40 @@ class DRTVIE(InfoExtractor):
}, { }, {
'url': 'https://www.dr.dk/drtv/program/jagten_220924', 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3',
'info_dict': {
'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113',
'title': "Regionale nyheder",
'ext': 'mp4',
'duration': 120.043,
'series': 'P4 Østjylland regionale nyheder',
'timestamp': 1651746600,
'season': 'Regionale nyheder',
'release_year': 0,
'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5',
'description': '',
'upload_date': '20220505',
},
'params': {
'skip_download': True,
},
'skip': 'this video has been removed',
}, {
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
'info_dict': {
'ext': 'mp4',
'id': '14802310112',
'timestamp': 1678786200,
'duration': 120.043,
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
'series': 'P4 København regionale nyheder',
'upload_date': '20230314',
'release_year': 0,
'description': 'Hør seneste regionale nyheder fra P4 København.',
'season': 'Regionale nyheder',
'title': 'Regionale nyheder',
},
}] }]
_TOKEN = None _TOKEN = None
@ -122,38 +176,22 @@ class DRTVIE(InfoExtractor):
'Catalog', 'Catalog',
], ],
'optout': True, 'optout': True,
}).encode()) }).encode('utf-8'))
self._TOKEN = traverse_obj( self._TOKEN = traverse_obj(
token_response, (lambda _, x: x['type'] == 'UserAccount', 'value'), get_all=False) token_response, (lambda _, x: x['type'] == 'UserAccount', 'value'), get_all=False)
def _real_extract(self, url): def _real_extract(self, url):
url_slug = self._match_id(url) raw_video_id = self._match_valid_url(url).group('id')
webpage = self._download_webpage(url, url_slug) webpage = self._download_webpage(url, raw_video_id)
json_data = self._search_json(r'window\.__data\s*=\s*', webpage, 'data', url_slug, fatal=False) or {} json_data = self._search_json(r'window\.__data\s*=\s*', webpage, 'data', raw_video_id)
item = traverse_obj(json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item'), get_all=False) item = traverse_obj(json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item'), get_all=False) or {}
if item: item_id = item.get('id') or raw_video_id.rsplit('_', 1)[-1]
item_id = item.get('id') video_id = try_call(item['customId'].split(':')[-1]) or item_id
else:
item_id = url_slug.rsplit('_', 1)[-1]
item = self._download_json(
f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id,
note='Attempting to download backup item data', query={
'device': 'web_browser',
'expand': 'all',
'ff': 'idp,ldp,rpt',
'geoLocation': 'dk',
'isDeviceAbroad': 'false',
'lang': 'da',
'segments': 'drtv,optedout',
'sub': 'Anonymous',
})
video_id = try_call(lambda: item['customId'].split(':')[-1]) or item_id
stream_data = self._download_json( stream_data = self._download_json(
f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id, f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id, query={
note='Downloading stream data', query={
'delivery': 'stream', 'delivery': 'stream',
'device': 'web_browser', 'device': 'web_browser',
'ff': 'idp,ldp,rpt', 'ff': 'idp,ldp,rpt',
@ -173,18 +211,17 @@ class DRTVIE(InfoExtractor):
preference = None preference = None
if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
preference = -1 preference = -1
format_id += f'-{access_service}' format_id += '-%s' % access_service
elif access_service == 'StandardVideo': elif access_service == 'StandardVideo':
preference = 1 preference = 1
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(fmt['url'], video_id, preference=preference, m3u8_id=format_id, fatal=False)
fmt.get('url'), video_id, preference=preference, m3u8_id=format_id, fatal=False)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
LANGS = { LANGS = {
'DanishLanguageSubtitles': 'da', 'DanishLanguageSubtitles': 'da',
} }
for subs in fmt.get('subtitles', []): for subs in fmt['subtitles']:
if not isinstance(subs, dict): if not isinstance(subs, dict):
continue continue
sub_uri = url_or_none(subs.get('link')) sub_uri = url_or_none(subs.get('link'))
@ -204,10 +241,10 @@ class DRTVIE(InfoExtractor):
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'thumbnail': ('images', 'wallpaper'), 'thumbnail': ('images', 'wallpaper'),
'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}), 'timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}),
'duration': 'duration', 'duration': 'duration',
'series': ('season', 'show', 'title'), 'series': ('season', 'title'),
'season': ('season', 'title'), 'season': ('season', 'show', 'title'),
'season_number': ('season', 'seasonNumber', {int_or_none}), 'season_number': ('season', 'seasonNumber', {int_or_none}),
'season_id': 'seasonId', 'season_id': 'seasonId',
'episode': 'episodeName', 'episode': 'episodeName',