From 352e7d987323e9df9205ee117a604ee4123231c2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Nov 2022 02:00:11 +0000 Subject: [PATCH] [extractor/twitter] Refresh guest token when expired (#5560) Closes #5548 Authored by: bashonly, Grub4K --- yt_dlp/extractor/twitter.py | 84 ++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 62b34d081..18ebb3617 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -107,46 +107,54 @@ def _call_api(self, path, video_id, query={}, graphql=False): 'x-twitter-active-user': 'yes', }) - result, last_error = None, None + last_error = None for bearer_token in self._TOKENS: - headers['Authorization'] = f'Bearer {bearer_token}' + for first_attempt in (True, False): + headers['Authorization'] = f'Bearer {bearer_token}' - if not self.is_logged_in: - if not self._TOKENS[bearer_token]: - headers.pop('x-guest-token', None) - guest_token_response = self._download_json( - self._API_BASE + 'guest/activate.json', video_id, - 'Downloading guest token', data=b'', headers=headers) - - self._TOKENS[bearer_token] = guest_token_response.get('guest_token') + if not self.is_logged_in: if not self._TOKENS[bearer_token]: - raise ExtractorError('Could not retrieve guest token') - headers['x-guest-token'] = self._TOKENS[bearer_token] + headers.pop('x-guest-token', None) + guest_token_response = self._download_json( + self._API_BASE + 'guest/activate.json', video_id, + 'Downloading guest token', data=b'', headers=headers) - try: - allowed_status = {400, 403, 404} if graphql else {403} - result = self._download_json( - (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, - video_id, headers=headers, query=query, expected_status=allowed_status) - break + self._TOKENS[bearer_token] = guest_token_response.get('guest_token') + if not self._TOKENS[bearer_token]: + raise ExtractorError('Could not retrieve guest token') - except ExtractorError as e: - if last_error: - raise last_error - elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: - raise - last_error = e - self.report_warning( - 'Twitter API gave 404 response, retrying with deprecated token. ' - 'Only one media item can be extracted') + headers['x-guest-token'] = self._TOKENS[bearer_token] - if result.get('errors'): - error_message = ', '.join(set(traverse_obj( - result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error' - raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True) + try: + allowed_status = {400, 403, 404} if graphql else {403} + result = self._download_json( + (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, + video_id, headers=headers, query=query, expected_status=allowed_status) - assert result is not None - return result + except ExtractorError as e: + if last_error: + raise last_error + + if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: + raise + + last_error = e + self.report_warning( + 'Twitter API gave 404 response, retrying with deprecated auth token. ' + 'Only one media item can be extracted') + break # continue outer loop with next bearer_token + + if result.get('errors'): + errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str) + if first_attempt and any('bad guest token' in error.lower() for error in errors): + self.to_screen('Guest token has expired. Refreshing guest token') + self._TOKENS[bearer_token] = None + continue + + error_message = ', '.join(set(errors)) or 'Unknown error' + raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True) + + return result def _build_graphql_query(self, media_id): raise NotImplementedError('Method must be implemented to support GraphQL') @@ -328,7 +336,7 @@ class TwitterIE(TwitterBaseIE): 'id': '665052190608723968', 'display_id': '665052190608723968', 'ext': 'mp4', - 'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8', + 'title': 'md5:55fef1d5b811944f1550e91b44abb82e', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': r're:Star Wars.*', @@ -364,6 +372,7 @@ class TwitterIE(TwitterBaseIE): # Test case of TwitterCardIE 'skip_download': True, }, + 'skip': 'Dead external link', }, { 'url': 'https://twitter.com/jaydingeer/status/700207533655363584', 'info_dict': { @@ -568,10 +577,10 @@ class TwitterIE(TwitterBaseIE): 'id': '1577855447914409984', 'display_id': '1577855540407197696', 'ext': 'mp4', - 'title': 'oshtru \U0001faac\U0001f47d - gm \u2728\ufe0f now I can post image and video. nice update.', - 'description': 'gm \u2728\ufe0f now I can post image and video. nice update. https://t.co/cG7XgiINOm', + 'title': 'md5:9d198efb93557b8f8d5b78c480407214', + 'description': 'md5:b9c3699335447391d11753ab21c70a74', 'upload_date': '20221006', - 'uploader': 'oshtru \U0001faac\U0001f47d', + 'uploader': 'oshtru', 'uploader_id': 'oshtru', 'uploader_url': 'https://twitter.com/oshtru', 'thumbnail': r're:^https?://.*\.jpg', @@ -1096,7 +1105,6 @@ def _real_extract(self, url): class TwitterSpacesIE(TwitterBaseIE): IE_NAME = 'twitter:spaces' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P[0-9a-zA-Z]{13})' - _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/' _TESTS = [{ 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',