Compare commits

...

17 Commits

Author SHA1 Message Date
Subrat Lima
e6a2bdc9ab
Merge 090fb58de7 into eb15fd5a32 2024-11-17 17:03:36 +01:00
subrat-lima
090fb58de7 [ie/afl] refactored AFCVideoIE
1. added tag option to 'get_element_html_by_attribute' function.
   this removes the need for using '_search_json'
2. swapped 'if not' statement with 'if' for simplification

feedback: pzhlkj6612
2024-09-24 15:25:06 +05:30
subrat-lima
cb0aa20d4f [ie/afl] updated AFCVideoIE & CarltonFCVideoIE mediaId parser 2024-09-20 02:00:27 +05:30
subrat-lima
2c49f52c04 [ie/omnyfm] added back the InAdvancePagedList function 2024-09-20 01:36:13 +05:30
subrat-lima
4ff1288758 [ie/omnyfm] updated _VALID_URL regex pattern for id 2024-09-20 01:18:31 +05:30
subrat-lima
93e65f14dc [ie/afl] updated _VALID_URL regex to properly escape dot character 2024-09-20 01:07:41 +05:30
subrat-lima
d40dbdc50b [ie/omnyfm] updated _EMBED_REGEX 2024-09-20 01:02:07 +05:30
subrat-lima
d9e0e023b8 [ie/afl] fixed AFLCFVideoIE incorrect video parsing & split CarltonFCVideoIE
Changes made:
1. AFCVideoIE, CarltonFCVideoIE: fixed the issue to find the correct
   video media id. (thanks to pzhlkj6612a)
2. AFCVideoIE: added support for pages where mediaId is unavailable
   in webpage
3. CarltonFCVideoIE: split from AFCVideoIE because of point 2 as the
   variation is specific to AFCVideo
4. Updated test cases
2024-09-19 15:39:55 +05:30
subrat-lima
74e26f7599 [ie/afl] updated AFCVideoIE to extract video attrs using html extract_attributes function 2024-09-19 14:28:55 +05:30
subrat-lima
549d28cd04 [ie/omnyfm] updated extractor to use _search_nextjs_data for json data parsing 2024-09-19 14:01:03 +05:30
subrat-lima
3e4523b78c [ie/omnyfm] updated extractor to use OnDemandPagedList instead of InAdvancePagedList 2024-09-19 13:40:37 +05:30
subrat-lima
e17e2beea6 [ie/afl] add support for lions.com.au and carltonfc.com.au
made the following changes:
1. added support for lions.com.au videos
1. added support for carltonfc.com.au videos and podcasts
2024-09-18 15:53:14 +05:30
subrat-lima
b62a7cf725 [ie/afl] added AFCVideoIE 2024-09-18 15:16:55 +05:30
subrat-lima
c386fc0d43 Merge branch 'master' into ie-afl 2024-09-18 13:32:07 +05:30
subrat-lima
5fea24bda2 [ie/afl][ie/omnyfm] added AFLPodcastIE and updated OmnyFMShowIE
1. AFLPodcastIE: Added extractor for AFL podcasts
2. OmnyFMShowIE: Updated code to adjust url before download page
   to support various url patterns
2024-09-18 13:23:50 +05:30
subrat-lima
8125680192 [ie/afl] added OmnyFMShow extractor 2024-09-18 09:21:58 +05:30
subrat-lima
380027be4e [ie/afl] added AFLVideoIE extractor 2024-09-12 13:51:02 +05:30
3 changed files with 281 additions and 0 deletions

View File

@ -75,6 +75,12 @@ from .aenetworks import (
HistoryTopicIE, HistoryTopicIE,
) )
from .aeonco import AeonCoIE from .aeonco import AeonCoIE
from .afl import (
AFCVideoIE,
AFLPodcastIE,
AFLVideoIE,
CarltonFCVideoIE,
)
from .afreecatv import ( from .afreecatv import (
AfreecaTVCatchStoryIE, AfreecaTVCatchStoryIE,
AfreecaTVIE, AfreecaTVIE,
@ -1441,6 +1447,7 @@ from .oftv import (
) )
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE from .olympics import OlympicsReplayIE
from .omnyfm import OmnyFMShowIE
from .on24 import On24IE from .on24 import On24IE
from .ondemandkorea import ( from .ondemandkorea import (
OnDemandKoreaIE, OnDemandKoreaIE,

206
yt_dlp/extractor/afl.py Normal file
View File

@ -0,0 +1,206 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from .omnyfm import OmnyFMShowIE
from ..utils import (
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_element_html_by_id,
smuggle_url,
str_or_none,
traverse_obj,
url_or_none,
)
class AFLVideoIE(InfoExtractor):
IE_NAME = 'afl:video'
_VALID_URL = r'https?://(?:www\.)?(?:afl|lions)\.com\.au/(?:aflw/)?video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.afl.com.au/aflw/video/1217670/the-w-show-aflws-line-in-the-sand-moment-bonnies-bold-bid',
'md5': '7000431c2bd3f96eddb5f63273aea83e',
'info_dict': {
'id': '6361825702112',
'ext': 'mp4',
'description': 'md5:d1fee2ae8e3ecf486c1f0f7aa19e724b',
'upload_date': '20240911',
'duration': 1523.28,
'tags': 'count:0',
'thumbnail': r're:^https?://.*\.jpg$',
'title': "The W Show: AFLW's 'line in the sand' moment, Bonnie's bold bid",
'uploader_id': '6057984922001',
'timestamp': 1726038522,
},
}, {
'url': 'https://www.lions.com.au/video/1655451/team-song-brisbane?videoId=1655451&modal=true&type=video&publishFrom=1726318577001',
'md5': '47e8c67e317b48a69787c8bc39c3c591',
'info_dict': {
'id': '6361958949112',
'ext': 'mp4',
'description': 'md5:c0fb37fcad9ec0f49ac54eb8d76641bd',
'upload_date': '20240914',
'duration': 41.0,
'tags': 'count:0',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Team Song: Brisbane',
'uploader_id': '6057984922001',
'timestamp': 1726318788,
},
}, {
'url': 'https://www.afl.com.au/video/1217264/bulldogs-season-review-gold-plated-list-going-to-waste-duos-frightening-future?videoId=1217264&modal=true&type=video&publishFrom=1725998400001',
'only_matching': True,
}, {
'url': 'https://www.afl.com.au/video/1210885/wafl-showreel-ef-hamish-davis-highlights?videoId=1210885&modal=true&type=video&publishFrom=1725171238001',
'only_matching': True,
}, {
'url': 'https://www.lions.com.au/video/1657551/svarc-weve-built-up-really-well?videoId=1657551&modal=true&type=video&publishFrom=1726545600001',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
element = get_element_by_class('inline-player__player-container', webpage)
attrs = traverse_obj(extract_attributes(element), {
'account_id': ('data-account', {str_or_none}),
'player_id': ('data-player', {lambda x: f'{x}_default'}, {str_or_none}),
'video_id': ('data-video-id', {str_or_none}),
})
account_id = attrs.get('account_id')
player_id = attrs.get('player_id')
video_id = attrs.get('video_id')
video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
video_url = smuggle_url(video_url, {'referrer': url})
return self.url_result(video_url, BrightcoveNewIE)
class AFLPodcastIE(InfoExtractor):
IE_NAME = 'afl:podcast'
_VALID_URL = r'https?://(?:www\.)?(?:afl|carltonfc)\.com\.au/(?:aflw/)?podcasts/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.afl.com.au/podcasts/between-us',
'md5': '7000431c2bd3f96eddb5f63273aea83e',
'info_dict': {
'id': 'e0ab8454-f818-483f-bed1-b156002c021f',
'title': 'Between Us',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.carltonfc.com.au/podcasts/walk-a-mile',
'md5': '',
'info_dict': {
'id': '6dbb9b23-7f00-49d4-b44e-aec2017651dc',
'title': 'Walk a Mile in Their Shoes',
},
'playlist_mincount': 3,
}, {
'url': 'https://www.afl.com.au/podcasts/afl-daily',
'only_matching': True,
}, {
'url': 'https://www.carltonfc.com.au/podcasts/summer-sessions',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
element = get_element_by_class('omny-embed', webpage)
podcast_url = traverse_obj(extract_attributes(element), ('src', {url_or_none}))
return self.url_result(podcast_url, OmnyFMShowIE)
class AFCVideoIE(InfoExtractor):
IE_NAME = 'afc:video'
_VALID_URL = r'https?://(?:www\.)?afc\.com\.au/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001',
'md5': 'd0f4ec78b5a693d95c975ae3aeed8b2d',
'info_dict': {
'id': '6362048189112',
'ext': 'mp4',
'description': 'md5:5c43f1affe1a0cd8e2192358a49de9cc',
'upload_date': '20240917',
'duration': 50.48,
'tags': 'count:0',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Girls Academies Be a Pro',
'uploader_id': '6057984922001',
'timestamp': 1726548942,
},
}, {
'url': 'https://www.afc.com.au/video/1586280/se10ep16-the-crows-show?videoId=1586280&modal=true&type=video&publishFrom=1719639000001&tagNames=crowsshowepisode',
'md5': 'bd9984d62f87b4c2299bb62ffc869189',
'info_dict': {
'id': '6355746458112',
'ext': 'mp4',
'description': 'md5:4470d107af6e749a8225fd558b98b50b',
'upload_date': '20240627',
'duration': 1193.64,
'tags': 'count:0',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'SE10EP16 - The Crows Show',
'uploader_id': '6057984922001',
'timestamp': 1719466601,
},
}, {
'url': 'https://www.afc.com.au/video/1634706/jones-radiology-injury-update-r24?videoId=1634706&modal=true&type=video&publishFrom=1724126172001',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage))
player_id = video_attrs['data-player-id'] + '_default'
account_id = video_attrs['data-account-id']
video_element_html = get_element_html_by_attribute('data-id', display_id, webpage, tag='a')
if video_element_html:
video_data = self._parse_json(extract_attributes(video_element_html)['data-ui-args'], display_id)
else:
video_data = self._download_json(f'https://aflapi.afc.com.au/content/aflc-adel/video/en/{display_id}', display_id)
video_id = video_data['mediaId']
video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
video_url = smuggle_url(video_url, {'referrer': url})
return self.url_result(video_url, BrightcoveNewIE)
class CarltonFCVideoIE(InfoExtractor):
IE_NAME = 'carltonfc:video'
_VALID_URL = r'https?://(?:www\.)?carltonfc\.com\.au/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001',
'md5': '67916ea9dd28376365184bb3869a1548',
'info_dict': {
'id': '6362046715112',
'ext': 'mp4',
'description': 'md5:02eeff6576fcd7c33e18e34b1b0ebf56',
'upload_date': '20240917',
'duration': 90.44,
'tags': 'count:0',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Cripps on taking Carlton to the next level',
'uploader_id': '6057984922001',
'timestamp': 1726550622,
},
}, {
'url': 'https://www.carltonfc.com.au/video/1658173/the-rundown-impact-of-fans?videoId=1658173&modal=true&type=video&publishFrom=1726630922001',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage))
player_id = video_attrs['data-player-id'] + '_default'
account_id = video_attrs['data-account-id']
video_element_html = get_element_html_by_attribute('data-id', display_id, webpage)
video_data = self._search_json(r'data-ui-args\s*=\s*["\']', video_element_html, 'video-id', display_id)
video_id = video_data['mediaId']
video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
video_url = smuggle_url(video_url, {'referrer': url})
return self.url_result(video_url, BrightcoveNewIE)

View File

@ -0,0 +1,68 @@
import functools
import math
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
clean_html,
float_or_none,
int_or_none,
str_or_none,
traverse_obj,
unified_strdate,
url_or_none,
)
class OmnyFMShowIE(InfoExtractor):
IE_NAME = 'omnyfm:show'
_VALID_URL = r'https?://omny\.fm/shows/(?P<id>[^/]+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://omny\.fm/shows/.+?)\1']
_PAGE_SIZE = 10
_TESTS = [{
'url': 'https://omny.fm/shows/league-leaders',
'info_dict': {
'id': 'bbe146d4-9bee-4763-b785-ad830009a23f',
'title': 'League Leaders with Nicole Livingstone',
},
'playlist_mincount': 15,
}, {
'url': 'https://omny.fm/shows/afl-daily',
'only_matching': True,
}]
def _fetch_page(self, org_id, playlist_id, page):
return self._download_json(f'https://api.omny.fm/orgs/{org_id}/programs/{playlist_id}/clips?cursor={page}&pageSize={self._PAGE_SIZE}', f'{playlist_id}_{page}')
def _entries(self, org_id, playlist_id, first_page_data, page):
data = first_page_data if not page else self._fetch_page(org_id, playlist_id, page + 1)
for clip in data.get('Clips', {}):
yield traverse_obj(clip, {
'id': ('Id', {str_or_none}),
'title': ('Title', {str_or_none}),
'description': ('Description', {clean_html}),
'thumbnail': (('ImageUrl', 'ArtworkUrl'), {url_or_none}, any),
'duration': ('DurationSeconds', {float_or_none}),
'url': ('AudioUrl', {url_or_none}),
'season_number': ('Season', {int_or_none}),
'episode_number': ('Episode', {int_or_none}),
'timestamp': ('PublishedUtc', {unified_strdate}, {int_or_none}),
'filesize': ('PublishedAudioSizeInBytes', {int}),
})
def _real_extract(self, url):
display_id = self._match_id(url)
page_url = 'https://omny.fm/shows/' + display_id
webpage = self._download_webpage(page_url, display_id)
data = self._search_nextjs_data(webpage, display_id)
org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none}))
playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none}))
playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none}))
title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none}))
first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict}))
total_pages = math.ceil(playlist_count / self._PAGE_SIZE)
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, org_id, playlist_id, first_page_data),
total_pages, self._PAGE_SIZE), playlist_id, title)