[utils] write_xattr: Use os.setxattr if available (#8205 )

Closes #8193 Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
[ie/nhk] Fix Japanese-language VOD extraction (#8309 )
2024-09-20 09:21:25 +02:00 · 2023-10-09 18:30:36 +00:00 · 2023-10-09 18:00:26 +00:00 · 2023-10-09 17:55:46 +00:00 · 2023-10-09 17:51:37 +00:00 · 2023-10-09 17:46:16 +00:00
7 changed files with 135 additions and 58 deletions
--- a/README.md
+++ b/README.md
@ -280,7 +280,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
 * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
 * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
-* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Linux**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
+* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Mac** and **BSD**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
 ### Misc
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1992,7 +1992,10 @@ from .tencent import (
    WeTvSeriesIE,
 )
 from .tennistv import TennisTVIE
-from .tenplay import TenPlayIE
+from .tenplay import (
    TenPlayIE,
    TenPlaySeasonIE,
 )
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .tfo import TFOIE
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@ -48,17 +48,7 @@ class ArteTVIE(ArteTVBaseIE):
    }, {
        'note': 'No alt_title',
        'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
-        'info_dict': {
+        'only_matching': True,
            'id': '110371-000-A',
            'ext': 'mp4',
            'upload_date': '20220718',
            'duration': 154,
            'timestamp': 1658162460,
            'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
            'title': 'La chaleur, supplice des arbres de rue',
            'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
        },
        'params': {'skip_download': 'm3u8'}
    }, {
        'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
@ -67,19 +57,20 @@ class ArteTVIE(ArteTVBaseIE):
        'only_matching': True,
    }, {
        'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
        'only_matching': True,
    }, {
        'note': 'age-restricted',
        'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
        'info_dict': {
-            'id': '110203-006-A',
+            'id': '006785-000-A',
-            'chapters': 'count:16',
+            'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
-            'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
+            'title': 'The Element of Crime',
-            'alt_title': 'Zaz',
+            'timestamp': 1696111200,
-            'title': 'Baloise Session 2022',
+            'duration': 5849,
-            'timestamp': 1668445200,
+            'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
-            'duration': 4054,
+            'upload_date': '20230930',
            'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
            'upload_date': '20221114',
            'ext': 'mp4',
-        },
+        }
        'expected_warnings': ['geo restricted']
    }]
    _GEO_BYPASS = True
@ -136,7 +127,9 @@ class ArteTVIE(ArteTVBaseIE):
        lang = mobj.group('lang') or mobj.group('lang_2')
        langauge_code = self._LANG_MAP.get(lang)
-        config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
+        config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
            'x-validated-age': '18'
        })
        geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
        if geoblocking.get('restrictedArea'):
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@ -68,11 +68,12 @@ class NhkBaseIE(InfoExtractor):
    def _extract_episode_info(self, url, episode=None):
        fetch_episode = episode is None
-        lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
+        lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
-        if len(episode_id) == 7:
+        is_video = m_type == 'video'
        if is_video:
            episode_id = episode_id[:4] + '-' + episode_id[4:]
        is_video = m_type == 'video'
        if fetch_episode:
            episode = self._call_api(
                episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
@ -133,47 +134,46 @@ class NhkBaseIE(InfoExtractor):
 class NhkVodIE(NhkBaseIE):
    # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
-    _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+    _VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>video)/(?P<id>[0-9a-z]+)',
                  rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[0-9a-z]+)']
    # Content available only for a limited period of time. Visit
    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
    _TESTS = [{
-        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2061601/',
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
        'info_dict': {
-            'id': 'yd8322ch',
+            'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
            'ext': 'mp4',
-            'description': 'md5:109c8b05d67a62d0592f2b445d2cd898',
+            'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
-            'title': 'GRAND SUMO Highlights - [Recap] May Tournament Day 1 (Opening Day)',
+            'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
-            'upload_date': '20230514',
+            'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
-            'timestamp': 1684083791,
+            'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
-            'series': 'GRAND SUMO Highlights',
+            'series': 'Japan Railway Journal',
            'episode': '[Recap] May Tournament Day 1 (Opening Day)',
            'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1684084443/4028649.jpg?w=1920&h=1080',
        },
    }, {
        # video clip
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
-        'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
+        'md5': '153c3016dfd252ba09726588149cf0e7',
        'info_dict': {
-            'id': 'a95j5iza',
+            'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
            'ext': 'mp4',
-            'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+            'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
            'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
-            'timestamp': 1565965194,
+            'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
            'upload_date': '20190816',
            'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1567086278/3715195.jpg?w=1920&h=1080',
            'series': 'Dining with the Chef',
            'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
        },
    }, {
-        # audio clip
+        # radio
-        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
        'info_dict': {
-            'id': 'r_inventions-20201104-1-en',
+            'id': 'livinginjapan-20231001-1-en',
            'ext': 'm4a',
-            'title': "Japan's Top Inventions - Miniature Video Cameras",
+            'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
-            'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
+            'series': 'Living in Japan',
            'description': 'md5:850611969932874b4a3309e0cae06c2f',
            'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
            'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
        },
        'skip': '404 Not Found',
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
        'only_matching': True,
@ -199,6 +199,19 @@ class NhkVodIE(NhkBaseIE):
            'timestamp': 1623722008,
        },
        'skip': '404 Not Found',
    }, {
        # japanese-language, longer id than english
        'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/',
        'info_dict': {
            'id': 'nw_ja_v_jvod_ohayou_20231008',
            'ext': 'mp4',
            'title': 'おはよう日本（7時台） - 10月8日放送',
            'series': 'おはよう日本（7時台）',
            'episode': '10月8日放送',
            'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4',
            'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
        },
        'skip': 'expires 2023-10-15',
    }]
    def _real_extract(self, url):
@ -206,7 +219,7 @@ class NhkVodIE(NhkBaseIE):
 class NhkVodProgramIE(NhkBaseIE):
-    _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+    _VALID_URL = rf'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?'
    _TESTS = [{
        # video program episodes
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
@ -240,8 +253,7 @@ class NhkVodProgramIE(NhkBaseIE):
    }]
    def _real_extract(self, url):
-        lang, m_type, program_id, episode_type = self._match_valid_url(url).groups()
+        lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
        episodes = self._call_api(
            program_id, lang, m_type == 'video', False, episode_type == 'clip')
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@ -1,9 +1,11 @@
 from datetime import datetime
 import base64
 import functools
 import itertools
 from datetime import datetime
 from .common import InfoExtractor
 from ..networking import HEADRequest
-from ..utils import int_or_none, urlencode_postdata
+from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
 class TenPlayIE(InfoExtractor):
@ -113,3 +115,55 @@ class TenPlayIE(InfoExtractor):
            'uploader': 'Channel 10',
            'uploader_id': '2199827728001',
        }
 class TenPlaySeasonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
    _TESTS = [{
        'url': 'https://10play.com.au/masterchef/episodes/season-14',
        'info_dict': {
            'title': 'Season 14',
            'id': 'MjMyOTIy',
        },
        'playlist_mincount': 64,
    }, {
        'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022',
        'info_dict': {
            'title': 'Season 2022',
            'id': 'Mjc0OTIw',
        },
        'playlist_mincount': 256,
    }]
    def _entries(self, load_more_url, display_id=None):
        skip_ids = []
        for page in itertools.count(1):
            episodes_carousel = self._download_json(
                load_more_url, display_id, query={'skipIds[]': skip_ids},
                note=f'Fetching episodes page {page}')
            episodes_chunk = episodes_carousel['items']
            skip_ids.extend(ep['id'] for ep in episodes_chunk)
            for ep in episodes_chunk:
                yield ep['cardLink']
            if not episodes_carousel['hasMore']:
                break
    def _real_extract(self, url):
        show, season = self._match_valid_url(url).group('show', 'season')
        season_info = self._download_json(
            f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}')
        episodes_carousel = traverse_obj(season_info, (
            'content', 0, 'components', (
                lambda _, v: v['title'].lower() == 'episodes',
                (..., {dict}),
            )), get_all=False) or {}
        playlist_id = episodes_carousel['tpId']
        return self.playlist_from_matches(
            self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
            playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
            getter=functools.partial(urljoin, url))
--- a/yt_dlp/extractor/youku.py
+++ b/yt_dlp/extractor/youku.py
@ -20,7 +20,7 @@ class YoukuIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        (?:
            https?://(
-                (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+                (?:v|play(?:er)?)\.(?:youku|tudou)\.com/(?:v_show/id_|player\.php/sid/)|
                video\.tudou\.com/v/)|
            youku:)
        (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
@ -87,6 +87,19 @@ class YoukuIE(InfoExtractor):
            'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjU2MzY1MzM1Ng==',
            'tags': list,
        },
    }, {
        'url': 'https://play.tudou.com/v_show/id_XNjAxNjI2OTU3Ng==.html?',
        'info_dict': {
            'id': 'XNjAxNjI2OTU3Ng',
            'ext': 'mp4',
            'title': '阿斯塔意识到哈里杀了人，自己被骗了',
            'thumbnail': 'https://m.ykimg.com/0541010164F732752794D4D7B70331D1',
            'uploader_id': '88758207',
            'tags': [],
            'uploader_url': 'https://www.youku.com/profile/index/?uid=UMzU1MDMyODI4',
            'uploader': '英美剧场',
            'duration': 72.91,
        },
    }]
    @staticmethod
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -4441,10 +4441,12 @@ def write_xattr(path, key, value):
            raise XAttrMetadataError(e.errno, e.strerror)
        return
-    # UNIX Method 1. Use xattrs/pyxattrs modules
+    # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
    setxattr = None
-    if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
+    if callable(getattr(os, 'setxattr', None)):
        setxattr = os.setxattr
    elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
        # Unicode arguments are not supported in pyxattr until version 0.5.0
        # See https://github.com/ytdl-org/youtube-dl/issues/5498
        if version_tuple(xattr.__version__) >= (0, 5, 0):
Author	SHA1	Message	Date
bashonly	84e26038d4	[utils] `write_xattr`: Use `os.setxattr` if available (#8205 ) Closes #8193 Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki <contact@grub4k.xyz>	2023-10-09 18:30:36 +00:00
garret	4de94b9e16	[ie/nhk] Fix Japanese-language VOD extraction (#8309 ) Closes #8303 Authored by: garret1317	2023-10-09 18:00:26 +00:00
Midnight Veil	88a99c87b6	[ie/tenplay] Add support for seasons (#7939 ) Closes #7744 Authored by: midnightveil	2023-10-09 17:55:46 +00:00
Stefan Lobbenmeier	09f815ad52	[ie/ArteTV] Support age-restricted content (#8301 ) Closes #7782 Authored by: StefanLobbenmeier	2023-10-09 17:51:37 +00:00
naginatana	b7098d46b5	[ie/youku] Improve tudou.com support (#8160 ) Authored by: naginatana	2023-10-09 17:46:16 +00:00