#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import threading from test.helper import FakeYDL, expect_dict, expect_value, http_server_port from yt_dlp.compat import compat_etree_fromstring from yt_dlp.extractor import YoutubeIE, get_info_extractor from yt_dlp.extractor.common import InfoExtractor from yt_dlp.utils import ( ExtractorError, RegexNotFoundError, encode_data_uri, strip_jsonp, ) TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def do_GET(self): if self.path == '/teapot': self.send_response(TEAPOT_RESPONSE_STATUS) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) else: assert False class DummyIE(InfoExtractor): def _sort_formats(self, formats, field_preference=[]): self._downloader.sort_formats( {'formats': formats, '_format_sort_fields': field_preference}) class TestInfoExtractor(unittest.TestCase): def setUp(self): self.ie = DummyIE(FakeYDL()) def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) def test_get_netrc_login_info(self): for params in [ {'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'}, {'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'}, ]: ie = DummyIE(FakeYDL(params)) self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', '')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', '')) self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None)) def test_html_search_regex(self): html = '

Watch this video

' search = lambda re, *args: self.ie._html_search_regex(re, html, *args) self.assertEqual(search(r'

(.+?)

', 'foo'), 'Watch this video') def test_opengraph(self): ie = self.ie html = ''' ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') self.assertEqual(ie._og_search_video_url(html, default=None), None) self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value') def test_html_search_meta(self): ie = self.ie html = ''' ''' self.assertEqual(ie._html_search_meta('a', html), '1') self.assertEqual(ie._html_search_meta('b', html), '2') self.assertEqual(ie._html_search_meta('c', html), '3') self.assertEqual(ie._html_search_meta('d', html), '4') self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): _TESTS = [ # https://github.com/ytdl-org/youtube-dl/issues/23306 ( r'''''', { 'title': '1 On 1 With Kleio', 'description': 'Kleio Valentien', 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', 'timestamp': 1449347075, 'duration': 743.0, 'view_count': 1120958, 'width': 1920, 'height': 1080, }, {}, ), ( r'''''', { 'timestamp': 1636523400, 'title': 'md5:91fe569e952e4d146485740ae927662b', }, {'expected_type': 'NewsArticle'}, ), ( r''' ''', { 'chapters': [ {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440}, {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179}, {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263}, {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367}, {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383}, {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484}, {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575}, {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728}, {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873}, {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23}, ], 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)', }, {}, ), ( # test multiple thumbnails in a list r''' ''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ), ( # test single thumbnail r''' ''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( self, self.ie._search_json_ld(html, None, **search_json_ld_kwargs), expected_dict, ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) uri = encode_data_uri(b'{"foo": invalid}', 'application/json') self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) def test_parse_html5_media_entries(self): # inline video tag expect_dict( self, self.ie._parse_html5_media_entries( 'https://127.0.0.1/video.html', r'