inline the graphql blob and move _call_search_api into DailymotionSearchIE

fix the test (searches no prefixed anymore)
Update yt_dlp/extractor/dailymotion.py
2024-11-07 07:51:26 +01:00 · 2023-11-12 12:31:53 -03:00 · 2023-11-12 12:27:11 -03:00 · 2023-11-12 12:26:18 -03:00 · 2023-11-12 12:24:20 -03:00 · 2023-11-12 12:23:44 -03:00
1 changed files with 35 additions and 319 deletions
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@ -77,8 +77,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):

    def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
        if not self._HEADERS.get('Authorization'):
-            token = self._get_token(xid)
-            self._HEADERS['Authorization'] = 'Bearer ' + token
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'

        resp = self._download_json(
            'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
@ -93,310 +92,6 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
            raise ExtractorError(resp['errors'][0]['message'], expected=True)
        return obj

-    def _call_search_api(self, term, page, note):
-        payload_living_horror = '''
-fragment VIDEO_BASE_FRAGMENT on Video {
-  id
-  xid
-  title
-  createdAt
-  stats {
-    id
-    views {
-      id
-      total
-      __typename
-    }
-    __typename
-  }
-  channel {
-    id
-    xid
-    name
-    displayName
-    accountType
-    __typename
-  }
-  duration
-  thumbnailx60: thumbnailURL(size: "x60")
-  thumbnailx120: thumbnailURL(size: "x120")
-  thumbnailx240: thumbnailURL(size: "x240")
-  thumbnailx720: thumbnailURL(size: "x720")
-  aspectRatio
-  __typename
-}
-
-fragment VIDEO_FAVORITES_FRAGMENT on Media {
-  __typename
-  ... on Video {
-    id
-    isInWatchLater
-    __typename
-  }
-  ... on Live {
-    id
-    isInWatchLater
-    __typename
-  }
-}
-
-fragment CHANNEL_BASE_FRAG on Channel {
-  accountType
-  id
-  xid
-  name
-  displayName
-  isFollowed
-  thumbnailx60: logoURL(size: "x60")
-  thumbnailx120: logoURL(size: "x120")
-  thumbnailx240: logoURL(size: "x240")
-  thumbnailx720: logoURL(size: "x720")
-  __typename
-}
-
-fragment PLAYLIST_BASE_FRAG on Collection {
-  id
-  xid
-  name
-  channel {
-    id
-    xid
-    name
-    displayName
-    accountType
-    __typename
-  }
-  description
-  thumbnailx60: thumbnailURL(size: "x60")
-  thumbnailx120: thumbnailURL(size: "x120")
-  thumbnailx240: thumbnailURL(size: "x240")
-  thumbnailx720: thumbnailURL(size: "x720")
-  stats {
-    id
-    videos {
-      id
-      total
-      __typename
-    }
-    __typename
-  }
-  __typename
-}
-
-fragment TOPIC_BASE_FRAG on Topic {
-  id
-  xid
-  name
-  videos(sort: "recent", first: 5) {
-    pageInfo {
-      hasNextPage
-      nextPage
-      __typename
-    }
-    edges {
-      node {
-        id
-        ...VIDEO_BASE_FRAGMENT
-        ...VIDEO_FAVORITES_FRAGMENT
-        __typename
-      }
-      __typename
-    }
-    __typename
-  }
-  stats {
-    id
-    videos {
-      id
-      total
-      __typename
-    }
-    __typename
-  }
-  __typename
-}
-
-query SEARCH_QUERY($query: String!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeTopics: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime) {
-  search {
-    id
-    videos(
-      query: $query
-      first: $limit
-      page: $page
-      sort: $sortByVideos
-      durationMin: $durationMinVideos
-      durationMax: $durationMaxVideos
-      createdAfter: $createdAfterVideos
-    ) @include(if: $shouldIncludeVideos) {
-      metadata {
-        algorithm {
-          uuid
-          __typename
-        }
-        __typename
-      }
-      pageInfo {
-        hasNextPage
-        nextPage
-        __typename
-      }
-      totalCount
-      edges {
-        node {
-          id
-          ...VIDEO_BASE_FRAGMENT
-          ...VIDEO_FAVORITES_FRAGMENT
-          __typename
-        }
-        __typename
-      }
-      __typename
-    }
-    hasLives: lives(query: $query, first: $limit, page: $page) {
-      totalCount
-      __typename
-    }
-    lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
-      metadata {
-        algorithm {
-          uuid
-          __typename
-        }
-        __typename
-      }
-      pageInfo {
-        hasNextPage
-        nextPage
-        __typename
-      }
-      totalCount
-      edges {
-        node {
-          id
-          xid
-          title
-          thumbnail: thumbnailURL(size: "x240")
-          thumbnailx60: thumbnailURL(size: "x60")
-          thumbnailx120: thumbnailURL(size: "x120")
-          thumbnailx240: thumbnailURL(size: "x240")
-          thumbnailx720: thumbnailURL(size: "x720")
-          audienceCount
-          aspectRatio
-          isOnAir
-          channel {
-            id
-            xid
-            name
-            displayName
-            accountType
-            __typename
-          }
-          __typename
-        }
-        __typename
-      }
-      __typename
-    }
-    channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
-      metadata {
-        algorithm {
-          uuid
-          __typename
-        }
-        __typename
-      }
-      pageInfo {
-        hasNextPage
-        nextPage
-        __typename
-      }
-      totalCount
-      edges {
-        node {
-          id
-          ...CHANNEL_BASE_FRAG
-          __typename
-        }
-        __typename
-      }
-      __typename
-    }
-    playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
-      metadata {
-        algorithm {
-          uuid
-          __typename
-        }
-        __typename
-      }
-      pageInfo {
-        hasNextPage
-        nextPage
-        __typename
-      }
-      totalCount
-      edges {
-        node {
-          id
-          ...PLAYLIST_BASE_FRAG
-          __typename
-        }
-        __typename
-      }
-      __typename
-    }
-    topics(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopics) {
-      metadata {
-        algorithm {
-          uuid
-          __typename
-        }
-        __typename
-      }
-      pageInfo {
-        hasNextPage
-        nextPage
-        __typename
-      }
-      totalCount
-      edges {
-        node {
-          id
-          ...TOPIC_BASE_FRAG
-          __typename
-        }
-        __typename
-      }
-      __typename
-    }
-    __typename
-  }
-}
-'''
-
-        if not self._HEADERS.get('Authorization'):
-            token = self._get_token(term)
-            self._HEADERS['Authorization'] = 'Bearer ' + token
-        resp = self._download_json(
-            'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
-                'operationName': 'SEARCH_QUERY',
-                'query': payload_living_horror,
-                'variables': {
-                    'limit': 20,
-                    'page': page,
-                    'query': term,
-                    'shouldIncludeChannels': False,
-                    'shouldIncludeLives': False,
-                    'shouldIncludePlaylists': False,
-                    'shouldIncludeTopics': False,
-                    'shouldIncludeVideos': True
-                }
-            }).encode(), headers=self._HEADERS)
-        obj = resp['data']['search']
-        if not obj:
-            raise ExtractorError(resp['errors'][0]['message'], expected=True)
-        return obj
-

 class DailymotionIE(DailymotionBaseInfoExtractor):
    _VALID_URL = r'''(?ix)
@ -700,32 +395,53 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
        'url': 'http://www.dailymotion.com/search/king of turtles/videos',
        'info_dict': {
            'id': 'king of turtles',
-            'title': 'Dailymotion search of king of turtles',
+            'title': 'king of turtles',
        },
        'playlist_mincount': 90,
    }]

+    def _call_search_api(self, term, page, note):
+        search_query = 'fragment VIDEO_BASE_FRAGMENT on Video { id xid title createdAt stats { id views { id total __typename } __typename } channel { id xid name displayName accountType __typename } duration thumbnailx60: thumbnailURL(size: "x60") thumbnailx120: thumbnailURL(size: "x120") thumbnailx240: thumbnailURL(size: "x240") thumbnailx720: thumbnailURL(size: "x720") aspectRatio __typename } fragment VIDEO_FAVORITES_FRAGMENT on Media { __typename ... on Video { id isInWatchLater __typename } ... on Live { id isInWatchLater __typename } } fragment CHANNEL_BASE_FRAG on Channel { accountType id xid name displayName isFollowed thumbnailx60: logoURL(size: "x60") thumbnailx120: logoURL(size: "x120") thumbnailx240: logoURL(size: "x240") thumbnailx720: logoURL(size: "x720") __typename } fragment PLAYLIST_BASE_FRAG on Collection { id xid name channel { id xid name displayName accountType __typename } description thumbnailx60: thumbnailURL(size: "x60") thumbnailx120: thumbnailURL(size: "x120") thumbnailx240: thumbnailURL(size: "x240") thumbnailx720: thumbnailURL(size: "x720") stats { id videos { id total __typename } __typename } __typename } fragment TOPIC_BASE_FRAG on Topic { id xid name videos(sort: "recent", first: 5) { pageInfo { hasNextPage nextPage __typename } edges { node { id ...VIDEO_BASE_FRAGMENT ...VIDEO_FAVORITES_FRAGMENT __typename } __typename } __typename } stats { id videos { id total __typename } __typename } __typename } query SEARCH_QUERY($query: String!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeTopics: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime) { search { id videos( query: $query first: $limit page: $page sort: $sortByVideos durationMin: $durationMinVideos durationMax: $durationMaxVideos createdAfter: $createdAfterVideos ) @include(if: $shouldIncludeVideos) { metadata { algorithm { uuid __typename } __typename } pageInfo { hasNextPage nextPage __typename } totalCount edges { node { id ...VIDEO_BASE_FRAGMENT ...VIDEO_FAVORITES_FRAGMENT __typename } __typename } __typename } hasLives: lives(query: $query, first: $limit, page: $page) { totalCount __typename } lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) { metadata { algorithm { uuid __typename } __typename } pageInfo { hasNextPage nextPage __typename } totalCount edges { node { id xid title thumbnail: thumbnailURL(size: "x240") thumbnailx60: thumbnailURL(size: "x60") thumbnailx120: thumbnailURL(size: "x120") thumbnailx240: thumbnailURL(size: "x240") thumbnailx720: thumbnailURL(size: "x720") audienceCount aspectRatio isOnAir channel { id xid name displayName accountType __typename } __typename } __typename } __typename } channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) { metadata { algorithm { uuid __typename } __typename } pageInfo { hasNextPage nextPage __typename } totalCount edges { node { id ...CHANNEL_BASE_FRAG __typename } __typename } __typename } playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) { metadata { algorithm { uuid __typename } __typename } pageInfo { hasNextPage nextPage __typename } totalCount edges { node { id ...PLAYLIST_BASE_FRAG __typename } __typename } __typename } topics(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopics) { metadata { algorithm { uuid __typename } __typename } pageInfo { hasNextPage nextPage __typename } totalCount edges { node { id ...TOPIC_BASE_FRAG __typename } __typename } __typename } __typename } }'
+
+        if not self._HEADERS.get('Authorization'):
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
+        resp = self._download_json(
+            'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
+                'operationName': 'SEARCH_QUERY',
+                'query': search_query,
+                'variables': {
+                    'limit': 20,
+                    'page': page,
+                    'query': term,
+                    'shouldIncludeChannels': False,
+                    'shouldIncludeLives': False,
+                    'shouldIncludePlaylists': False,
+                    'shouldIncludeTopics': False,
+                    'shouldIncludeVideos': True
+                }
+            }).encode(), headers=self._HEADERS)
+        obj = traverse_obj(resp, ('data', 'search', {dict}))
+        if not obj:
+            raise ExtractorError(traverse_obj(resp, ('errors', 0, 'message'))
+                                 or 'Could not fetch search data', expected=True)
+        return obj
+
    def _fetch_page(self, term, page):
        page += 1
-        videos = self._call_search_api(term, page, 'Searching "%s", page %d' % (term, page))['videos']
-        for edge in videos['edges']:
-            node = edge['node']
-            yield self.url_result(
-                'https://www.dailymotion.com/video/' + node['xid'],
-                DailymotionIE.ie_key(),
-                node['xid']
-            )
+        response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
+        for edge in traverse_obj(response, ('videos', 'edges', lambda _, x: x['node']['xid'])) or []:
+            xid = edge['node']['xid']
+            yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)

    def _real_extract(self, url):
        term = urllib.parse.unquote_plus(self._match_id(url))
-        entries = OnDemandPagedList(functools.partial(
-            self._fetch_page, term), self._PAGE_SIZE)
-        return self.playlist_result(entries, term, 'Dailymotion search of %s' % term)
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)


 class DailymotionUserIE(DailymotionPlaylistBaseIE):
    IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'https://www.dailymotion.com/user/nqtv',
        'info_dict': {
Author	SHA1	Message	Date
Raphaël Droz	a068678dda	inline the graphql blob and move _call_search_api into DailymotionSearchIE fix the test (searches no prefixed anymore)	2023-11-12 12:31:53 -03:00
Raphaël Droz	0dfb22c711	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:27:11 -03:00
Raphaël Droz	3c93ee8ed7	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:26:18 -03:00
Raphaël Droz	75049a4191	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:24:20 -03:00
Raphaël Droz	b98a698ad1	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:23:44 -03:00
Raphaël Droz	5a47d6245e	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:22:56 -03:00
Raphaël Droz	3198376fe0	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:22:21 -03:00
Raphaël Droz	cf5c4606a0	Update yt_dlp/extractor/dailymotion.py Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>	2023-11-12 12:20:26 -03:00