From a9d939fe458d630fc82f30539014d7d2a4b913eb Mon Sep 17 00:00:00 2001 From: bashonly Date: Fri, 7 Jun 2024 13:47:46 -0500 Subject: [PATCH] Add rate-limit detection/retry loop for original formats Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 57 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 87260e05a..d8dbfc0ba 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -117,6 +117,26 @@ def _call_api(self, *args, **kwargs): return False raise + def _request_format_info(self, url, track_id, format_id, query, original=False): + # _call_api's additional error handling is harmful to original download format requests + method = self._download_json if original else self._call_api + for retry in self.RetryManager(fatal=False): + try: + return method( + url, track_id, f'Downloading {format_id} format info JSON', + query=query, headers=self._HEADERS) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 429: + self.report_warning( + 'You have reached the API rate limit, which is ~600 requests per ' + '10 minutes. Use the --extractor-retries and --retry-sleep options ' + 'to configure an appropriate retry count and wait time', only_once=True) + retry.error = e.cause + elif original: + raise # original download format extraction code handles errors externally + else: + self.report_warning(e.msg) + def _initialize_pre_login(self): self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' @@ -224,18 +244,18 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'): try: - # Do not use _call_api(); HTTP Error codes have different meanings for this request - download_data = self._download_json( + download_data = self._request_format_info( f'{self._API_V2_BASE}tracks/{track_id}/download', track_id, - 'Downloading original download format info JSON', query=query, headers=self._HEADERS) + 'original download', query, original=True) except ExtractorError as e: - if isinstance(e.cause, HTTPError): - if e.cause.status == 401: - self.report_warning( - 'Original download format is only available ' - f'for registered users. {self._login_hint()}') - elif e.cause.status == 403: - self.write_debug('Original download format is not available for this client') + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + self.report_warning( + 'Original download format is only available ' + f'for registered users. {self._login_hint()}') + elif isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.write_debug('Original download format is not available for this client') + else: + self.report_warning(e.msg) download_data = None if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})): @@ -319,22 +339,7 @@ def add_format(f, protocol, is_preview=False): self.write_debug(f'"{identifier}" is not a requested format, skipping') continue - stream = None - for retry in self.RetryManager(fatal=False): - try: - stream = self._call_api( - format_url, track_id, f'Downloading {identifier} format info JSON', - query=query, headers=self._HEADERS) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 429: - self.report_warning( - 'You have reached the API rate limit, which is ~600 requests per ' - '10 minutes. Use the --extractor-retries and --retry-sleep options ' - 'to configure an appropriate retry count and wait time', only_once=True) - retry.error = e.cause - else: - self.report_warning(e.msg) - + stream = self._request_format_info(format_url, track_id, identifier, query) stream_url = traverse_obj(stream, ('url', {url_or_none})) if invalid_url(stream_url): continue