Merge 090fb58de7 into 1d253b0a27

[ie/afl] refactored AFCVideoIE
1. added tag option to 'get_element_html_by_attribute' function. this removes the need for using '_search_json' 2. swapped 'if not' statement with 'if' for simplification feedback: pzhlkj6612
2024-11-25 08:41:28 +01:00 · 2024-11-16 22:24:54 +01:00 · 2024-09-24 15:25:06 +05:30 · 2024-09-20 02:00:27 +05:30 · 2024-09-20 01:36:13 +05:30 · 2024-09-20 01:18:31 +05:30
3 changed files with 281 additions and 0 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -75,6 +75,12 @@ from .aenetworks import (
    HistoryTopicIE,
 )
 from .aeonco import AeonCoIE
+from .afl import (
+    AFCVideoIE,
+    AFLPodcastIE,
+    AFLVideoIE,
+    CarltonFCVideoIE,
+)
 from .afreecatv import (
    AfreecaTVCatchStoryIE,
    AfreecaTVIE,
@ -1437,6 +1443,7 @@ from .oftv import (
 )
 from .oktoberfesttv import OktoberfestTVIE
 from .olympics import OlympicsReplayIE
+from .omnyfm import OmnyFMShowIE
 from .on24 import On24IE
 from .ondemandkorea import (
    OnDemandKoreaIE,
--- a/yt_dlp/extractor/afl.py
+++ b/yt_dlp/extractor/afl.py
@ -0,0 +1,206 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from .omnyfm import OmnyFMShowIE
+from ..utils import (
+    extract_attributes,
+    get_element_by_class,
+    get_element_html_by_attribute,
+    get_element_html_by_id,
+    smuggle_url,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class AFLVideoIE(InfoExtractor):
+    IE_NAME = 'afl:video'
+    _VALID_URL = r'https?://(?:www\.)?(?:afl|lions)\.com\.au/(?:aflw/)?video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.afl.com.au/aflw/video/1217670/the-w-show-aflws-line-in-the-sand-moment-bonnies-bold-bid',
+        'md5': '7000431c2bd3f96eddb5f63273aea83e',
+        'info_dict': {
+            'id': '6361825702112',
+            'ext': 'mp4',
+            'description': 'md5:d1fee2ae8e3ecf486c1f0f7aa19e724b',
+            'upload_date': '20240911',
+            'duration': 1523.28,
+            'tags': 'count:0',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': "The W Show: AFLW's 'line in the sand' moment, Bonnie's bold bid",
+            'uploader_id': '6057984922001',
+            'timestamp': 1726038522,
+        },
+    }, {
+        'url': 'https://www.lions.com.au/video/1655451/team-song-brisbane?videoId=1655451&modal=true&type=video&publishFrom=1726318577001',
+        'md5': '47e8c67e317b48a69787c8bc39c3c591',
+        'info_dict': {
+            'id': '6361958949112',
+            'ext': 'mp4',
+            'description': 'md5:c0fb37fcad9ec0f49ac54eb8d76641bd',
+            'upload_date': '20240914',
+            'duration': 41.0,
+            'tags': 'count:0',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': 'Team Song: Brisbane',
+            'uploader_id': '6057984922001',
+            'timestamp': 1726318788,
+        },
+    }, {
+        'url': 'https://www.afl.com.au/video/1217264/bulldogs-season-review-gold-plated-list-going-to-waste-duos-frightening-future?videoId=1217264&modal=true&type=video&publishFrom=1725998400001',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.afl.com.au/video/1210885/wafl-showreel-ef-hamish-davis-highlights?videoId=1210885&modal=true&type=video&publishFrom=1725171238001',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.lions.com.au/video/1657551/svarc-weve-built-up-really-well?videoId=1657551&modal=true&type=video&publishFrom=1726545600001',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        element = get_element_by_class('inline-player__player-container', webpage)
+        attrs = traverse_obj(extract_attributes(element), {
+            'account_id': ('data-account', {str_or_none}),
+            'player_id': ('data-player', {lambda x: f'{x}_default'}, {str_or_none}),
+            'video_id': ('data-video-id', {str_or_none}),
+        })
+        account_id = attrs.get('account_id')
+        player_id = attrs.get('player_id')
+        video_id = attrs.get('video_id')
+
+        video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
+        video_url = smuggle_url(video_url, {'referrer': url})
+        return self.url_result(video_url, BrightcoveNewIE)
+
+
+class AFLPodcastIE(InfoExtractor):
+    IE_NAME = 'afl:podcast'
+    _VALID_URL = r'https?://(?:www\.)?(?:afl|carltonfc)\.com\.au/(?:aflw/)?podcasts/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.afl.com.au/podcasts/between-us',
+        'md5': '7000431c2bd3f96eddb5f63273aea83e',
+        'info_dict': {
+            'id': 'e0ab8454-f818-483f-bed1-b156002c021f',
+            'title': 'Between Us',
+        },
+        'playlist_mincount': 7,
+    }, {
+        'url': 'https://www.carltonfc.com.au/podcasts/walk-a-mile',
+        'md5': '',
+        'info_dict': {
+            'id': '6dbb9b23-7f00-49d4-b44e-aec2017651dc',
+            'title': 'Walk a Mile in Their Shoes',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'https://www.afl.com.au/podcasts/afl-daily',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.carltonfc.com.au/podcasts/summer-sessions',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        element = get_element_by_class('omny-embed', webpage)
+        podcast_url = traverse_obj(extract_attributes(element), ('src', {url_or_none}))
+        return self.url_result(podcast_url, OmnyFMShowIE)
+
+
+class AFCVideoIE(InfoExtractor):
+    IE_NAME = 'afc:video'
+    _VALID_URL = r'https?://(?:www\.)?afc\.com\.au/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001',
+        'md5': 'd0f4ec78b5a693d95c975ae3aeed8b2d',
+        'info_dict': {
+            'id': '6362048189112',
+            'ext': 'mp4',
+            'description': 'md5:5c43f1affe1a0cd8e2192358a49de9cc',
+            'upload_date': '20240917',
+            'duration': 50.48,
+            'tags': 'count:0',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': 'Girls Academies – ‘Be a Pro’',
+            'uploader_id': '6057984922001',
+            'timestamp': 1726548942,
+        },
+    }, {
+        'url': 'https://www.afc.com.au/video/1586280/se10ep16-the-crows-show?videoId=1586280&modal=true&type=video&publishFrom=1719639000001&tagNames=crowsshowepisode',
+        'md5': 'bd9984d62f87b4c2299bb62ffc869189',
+        'info_dict': {
+            'id': '6355746458112',
+            'ext': 'mp4',
+            'description': 'md5:4470d107af6e749a8225fd558b98b50b',
+            'upload_date': '20240627',
+            'duration': 1193.64,
+            'tags': 'count:0',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': 'SE10EP16 - The Crows Show',
+            'uploader_id': '6057984922001',
+            'timestamp': 1719466601,
+        },
+    }, {
+        'url': 'https://www.afc.com.au/video/1634706/jones-radiology-injury-update-r24?videoId=1634706&modal=true&type=video&publishFrom=1724126172001',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage))
+        player_id = video_attrs['data-player-id'] + '_default'
+        account_id = video_attrs['data-account-id']
+
+        video_element_html = get_element_html_by_attribute('data-id', display_id, webpage, tag='a')
+        if video_element_html:
+            video_data = self._parse_json(extract_attributes(video_element_html)['data-ui-args'], display_id)
+        else:
+            video_data = self._download_json(f'https://aflapi.afc.com.au/content/aflc-adel/video/en/{display_id}', display_id)
+        video_id = video_data['mediaId']
+
+        video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
+        video_url = smuggle_url(video_url, {'referrer': url})
+        return self.url_result(video_url, BrightcoveNewIE)
+
+
+class CarltonFCVideoIE(InfoExtractor):
+    IE_NAME = 'carltonfc:video'
+    _VALID_URL = r'https?://(?:www\.)?carltonfc\.com\.au/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001',
+        'md5': '67916ea9dd28376365184bb3869a1548',
+        'info_dict': {
+            'id': '6362046715112',
+            'ext': 'mp4',
+            'description': 'md5:02eeff6576fcd7c33e18e34b1b0ebf56',
+            'upload_date': '20240917',
+            'duration': 90.44,
+            'tags': 'count:0',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': 'Cripps on taking Carlton to the next level',
+            'uploader_id': '6057984922001',
+            'timestamp': 1726550622,
+        },
+    }, {
+        'url': 'https://www.carltonfc.com.au/video/1658173/the-rundown-impact-of-fans?videoId=1658173&modal=true&type=video&publishFrom=1726630922001',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage))
+        player_id = video_attrs['data-player-id'] + '_default'
+        account_id = video_attrs['data-account-id']
+
+        video_element_html = get_element_html_by_attribute('data-id', display_id, webpage)
+        video_data = self._search_json(r'data-ui-args\s*=\s*["\']', video_element_html, 'video-id', display_id)
+        video_id = video_data['mediaId']
+
+        video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
+        video_url = smuggle_url(video_url, {'referrer': url})
+        return self.url_result(video_url, BrightcoveNewIE)
--- a/yt_dlp/extractor/omnyfm.py
+++ b/yt_dlp/extractor/omnyfm.py
@ -0,0 +1,68 @@
+import functools
+import math
+
+from .common import InfoExtractor
+from ..utils import (
+    InAdvancePagedList,
+    clean_html,
+    float_or_none,
+    int_or_none,
+    str_or_none,
+    traverse_obj,
+    unified_strdate,
+    url_or_none,
+)
+
+
+class OmnyFMShowIE(InfoExtractor):
+    IE_NAME = 'omnyfm:show'
+    _VALID_URL = r'https?://omny\.fm/shows/(?P<id>[^/]+)'
+    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://omny\.fm/shows/.+?)\1']
+    _PAGE_SIZE = 10
+    _TESTS = [{
+        'url': 'https://omny.fm/shows/league-leaders',
+        'info_dict': {
+            'id': 'bbe146d4-9bee-4763-b785-ad830009a23f',
+            'title': 'League Leaders with Nicole Livingstone',
+        },
+        'playlist_mincount': 15,
+    }, {
+        'url': 'https://omny.fm/shows/afl-daily',
+        'only_matching': True,
+    }]
+
+    def _fetch_page(self, org_id, playlist_id, page):
+        return self._download_json(f'https://api.omny.fm/orgs/{org_id}/programs/{playlist_id}/clips?cursor={page}&pageSize={self._PAGE_SIZE}', f'{playlist_id}_{page}')
+
+    def _entries(self, org_id, playlist_id, first_page_data, page):
+        data = first_page_data if not page else self._fetch_page(org_id, playlist_id, page + 1)
+        for clip in data.get('Clips', {}):
+            yield traverse_obj(clip, {
+                'id': ('Id', {str_or_none}),
+                'title': ('Title', {str_or_none}),
+                'description': ('Description', {clean_html}),
+                'thumbnail': (('ImageUrl', 'ArtworkUrl'), {url_or_none}, any),
+                'duration': ('DurationSeconds', {float_or_none}),
+                'url': ('AudioUrl', {url_or_none}),
+                'season_number': ('Season', {int_or_none}),
+                'episode_number': ('Episode', {int_or_none}),
+                'timestamp': ('PublishedUtc', {unified_strdate}, {int_or_none}),
+                'filesize': ('PublishedAudioSizeInBytes', {int}),
+            })
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        page_url = 'https://omny.fm/shows/' + display_id
+        webpage = self._download_webpage(page_url, display_id)
+
+        data = self._search_nextjs_data(webpage, display_id)
+        org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none}))
+        playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none}))
+        playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none}))
+        title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none}))
+        first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict}))
+        total_pages = math.ceil(playlist_count / self._PAGE_SIZE)
+
+        return self.playlist_result(InAdvancePagedList(
+            functools.partial(self._entries, org_id, playlist_id, first_page_data),
+            total_pages, self._PAGE_SIZE), playlist_id, title)
Author	SHA1	Message	Date
Subrat Lima	fd08e25223	Merge `090fb58de7` into `1d253b0a27`	2024-11-16 22:24:54 +01:00
subrat-lima	090fb58de7	[ie/afl] refactored AFCVideoIE 1. added tag option to 'get_element_html_by_attribute' function. this removes the need for using '_search_json' 2. swapped 'if not' statement with 'if' for simplification feedback: pzhlkj6612	2024-09-24 15:25:06 +05:30
subrat-lima	cb0aa20d4f	[ie/afl] updated AFCVideoIE & CarltonFCVideoIE mediaId parser	2024-09-20 02:00:27 +05:30
subrat-lima	2c49f52c04	[ie/omnyfm] added back the InAdvancePagedList function	2024-09-20 01:36:13 +05:30
subrat-lima	4ff1288758	[ie/omnyfm] updated _VALID_URL regex pattern for id	2024-09-20 01:18:31 +05:30
subrat-lima	93e65f14dc	[ie/afl] updated _VALID_URL regex to properly escape dot character	2024-09-20 01:07:41 +05:30
subrat-lima	d40dbdc50b	[ie/omnyfm] updated _EMBED_REGEX	2024-09-20 01:02:07 +05:30
subrat-lima	d9e0e023b8	[ie/afl] fixed AFLCFVideoIE incorrect video parsing & split CarltonFCVideoIE Changes made: 1. AFCVideoIE, CarltonFCVideoIE: fixed the issue to find the correct video media id. (thanks to pzhlkj6612a) 2. AFCVideoIE: added support for pages where mediaId is unavailable in webpage 3. CarltonFCVideoIE: split from AFCVideoIE because of point 2 as the variation is specific to AFCVideo 4. Updated test cases	2024-09-19 15:39:55 +05:30
subrat-lima	74e26f7599	[ie/afl] updated AFCVideoIE to extract video attrs using html extract_attributes function	2024-09-19 14:28:55 +05:30
subrat-lima	549d28cd04	[ie/omnyfm] updated extractor to use _search_nextjs_data for json data parsing	2024-09-19 14:01:03 +05:30
subrat-lima	3e4523b78c	[ie/omnyfm] updated extractor to use OnDemandPagedList instead of InAdvancePagedList	2024-09-19 13:40:37 +05:30
subrat-lima	e17e2beea6	[ie/afl] add support for lions.com.au and carltonfc.com.au made the following changes: 1. added support for lions.com.au videos 1. added support for carltonfc.com.au videos and podcasts	2024-09-18 15:53:14 +05:30
subrat-lima	b62a7cf725	[ie/afl] added AFCVideoIE	2024-09-18 15:16:55 +05:30
subrat-lima	c386fc0d43	Merge branch 'master' into ie-afl	2024-09-18 13:32:07 +05:30
subrat-lima	5fea24bda2	[ie/afl][ie/omnyfm] added AFLPodcastIE and updated OmnyFMShowIE 1. AFLPodcastIE: Added extractor for AFL podcasts 2. OmnyFMShowIE: Updated code to adjust url before download page to support various url patterns	2024-09-18 13:23:50 +05:30
subrat-lima	8125680192	[ie/afl] added OmnyFMShow extractor	2024-09-18 09:21:58 +05:30
subrat-lima	380027be4e	[ie/afl] added AFLVideoIE extractor	2024-09-12 13:51:02 +05:30