mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-09-20 09:21:25 +02:00
Compare commits
6 Commits
197bcd877c
...
360ecdd614
Author | SHA1 | Date | |
---|---|---|---|
|
360ecdd614 | ||
|
46262a66ec | ||
|
7f5419a6f9 | ||
|
145aed902a | ||
|
3bb0d86c83 | ||
|
2f252e0b33 |
|
@ -1,8 +1,4 @@
|
||||||
# flake8: noqa: F401
|
# flake8: noqa: F401
|
||||||
from .tudou import(TudouIE,
|
|
||||||
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
from .youtube import ( # Youtube is moved to the top to improve performance
|
from .youtube import ( # Youtube is moved to the top to improve performance
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
|
@ -2051,6 +2047,7 @@ from .tubitv import (
|
||||||
TubiTvIE,
|
TubiTvIE,
|
||||||
TubiTvShowIE,
|
TubiTvShowIE,
|
||||||
)
|
)
|
||||||
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tunein import (
|
from .tunein import (
|
||||||
TuneInStationIE,
|
TuneInStationIE,
|
||||||
|
|
|
@ -2,63 +2,63 @@ from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
class TudouIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:play\.)?tudou\.com/v_show/(id_[a-zA-Z0-9_=.]+)'
|
_VALID_URL = r'https?://(?:play\.)?tudou\.com/v_show/(?P<id>id_[\w=.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://play.tudou.com/v_show/id_XNjAxNjI2OTU3Ng',
|
'url': 'https://play.tudou.com/v_show/id_XNjAxNjI2OTU3Ng',
|
||||||
|
'md5': 'to be updated',
|
||||||
# this code successfully downloaded the .mp4 file, and passed the test, EXCEPT the md5 part.
|
|
||||||
# I moved this code to the extractor folder of the released version of yt-dlp, trying to see if it works properly in there.
|
|
||||||
# IF it worked, then I can calculate the md5 of the first 10kb, and compare the md5.
|
|
||||||
# Unfortunately, it didn't.
|
|
||||||
# I'm guessing it has problem in extracting the video id, but can't prove my guessing.
|
|
||||||
|
|
||||||
# But it does work here, when run 'python test/test_download.py TestDownload.test_Tudou'
|
|
||||||
# That's why it didn't pass the md5 test, because I couldn't download the first 10kb with the released version of yt-dlp.
|
|
||||||
# So there's nothing to compare
|
|
||||||
|
|
||||||
# I'm interested to know if there's another way to download the first 10kb.
|
|
||||||
# Currently it's a paradox to me:
|
|
||||||
# To finalise this code, I need to get the first 10kb, to do the last comparison.
|
|
||||||
# But this code doesn't work in the released yt-dlp, so I can't get the first 10kb.
|
|
||||||
# So how can I.....
|
|
||||||
|
|
||||||
# 'f33b73e7470c45b7d3c4f7d8b34eda14',
|
|
||||||
# this md5, is from the output of this command - 'python test/test_download.py TestDownload.test_Tudou'.
|
|
||||||
# the downloaded file is deleted automatically, not giving me a chance to calculate its md5 manually.
|
|
||||||
'md5': 'failed to get the first 10kb',
|
|
||||||
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XNjAxNjI2OTU3Ng==',
|
'id': 'XNjAxNjI2OTU3Ng==',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '外星居民 第一季 阿斯塔意识到哈里杀了人,自己被骗了-电视剧-高清完整正版视频在线观看-优酷',
|
'title': '阿斯塔意识到哈里杀了人,自己被骗了',
|
||||||
|
'show_name': '外星居民 第一季',
|
||||||
},
|
},
|
||||||
# 'skip': 'testing skip function',
|
# 'skip': 'testing skip function',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# About video_id
|
video_id = self._match_id(url)
|
||||||
# .get_temp_id return None
|
|
||||||
# ._match_id doesn't work as well
|
|
||||||
# I don't know how to fix this, but line 49 works, it extracts id from the webpage
|
|
||||||
# I think there might be a smarter way, but I'm just not smart enough
|
|
||||||
video_id = self.get_temp_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data', video_id)
|
||||||
# print('==========')
|
# print('==========')
|
||||||
# print(webpage)
|
# print(webpage)
|
||||||
video_id = self._html_search_regex(r'currentEncodeVid: \'(.+?)\',', webpage, 'xhtml')
|
# print(data)
|
||||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
|
||||||
video_url = self._html_search_regex(r'<meta property="og:url" content="(.+?)"/>', webpage, 'og:url')
|
# The json file produced same results as the regex did, but it's much cleaner, thanks for the guide
|
||||||
|
video_id = data['data']['data']['data']['extra']['videoId']
|
||||||
|
videoLongId = str(data['data']['data']['data']['extra']['videoLongId'])
|
||||||
|
title = data['data']['data']['data']['extra']['videoTitle']
|
||||||
|
show_name = data['data']['data']['data']['extra']['showName']
|
||||||
|
|
||||||
|
video_url = 'https://play.tudou.com' + data['config']['url']
|
||||||
|
# About video_url
|
||||||
|
# The video url is not stored in the json file above, instead, the website uses m3u8 scheme
|
||||||
|
# With F12 developer tool, I've locked one request.
|
||||||
|
# Each time I click the button to play the video, the browser will GET a .m3u8 file which contains urls of all clips of that video, in currently selected resolution (in the webpage player).
|
||||||
|
# In Debugger panel, I also found a get.json file. Can't visit the source url, it'll fail, but can right-click and download the get.json. In get.json file, there're 4 m3u8_url that represent all 4 resolutions available for this video.
|
||||||
|
# These 2 files might be what I should be looking for, guess so.
|
||||||
|
|
||||||
|
# Tried to copy the link and send the request via PYTHON request module, with headers, fail, 403
|
||||||
|
# Tried to copy the cUrl and send via Insomnia, fail again, 403 forbidden
|
||||||
|
# Tudou.com is a bit similar to Youku.com(already available in yt-dlp), Tudou.com is acquired by Youku.com many years ago, they're probably sharing some servers and I do find similar domains in these 2 sites
|
||||||
|
# Therefore I also checked the Youku extractor, but don't know how they get to things like, line 119 'https://log.mmstat.com/eg.js'
|
||||||
|
# I also searched the internet and found another code for Youku.com, in that code there're token settings, appKey, sign, etc..
|
||||||
|
|
||||||
|
# So I'm guessing, for Tudou.com, there might be something to do with the token too pass the authentication...
|
||||||
|
# I'll keep looking into it, but if you can come up with any tips it'll be appreciated.
|
||||||
|
|
||||||
print('==========')
|
print('==========')
|
||||||
print(video_id)
|
print(f'videoId = {video_id}')
|
||||||
print(title)
|
print(f'videoLongId = {videoLongId}')
|
||||||
print(video_url)
|
print(f'title = {title}')
|
||||||
|
print(f'show_name = {show_name}')
|
||||||
|
print(f'video_url = {video_url}')
|
||||||
print('==========')
|
print('==========')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': video_url
|
'url': video_url,
|
||||||
|
'show_name': show_name,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user