From 8764947d10d861eff166445dca89fae9617bf780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Koch-Kramer?= Date: Mon, 27 Jan 2020 09:47:52 +0100 Subject: [PATCH] Raise QueryReturnedNotFoundException if expected structure is missing - QueryReturnedNotFoundException now inherits ConnectionException --> Retry on 404 errors + missing "window._sharedData" Relates to #146 and #496. --- instaloader/exceptions.py | 8 ++++---- instaloader/instaloadercontext.py | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/instaloader/exceptions.py b/instaloader/exceptions.py index 0fc7196..e23ba0d 100644 --- a/instaloader/exceptions.py +++ b/instaloader/exceptions.py @@ -9,10 +9,6 @@ class QueryReturnedBadRequestException(InstaloaderException): pass -class QueryReturnedNotFoundException(InstaloaderException): - pass - - class QueryReturnedForbiddenException(InstaloaderException): pass @@ -62,5 +58,9 @@ class PostChangedException(InstaloaderException): pass +class QueryReturnedNotFoundException(ConnectionException): + pass + + class TooManyRequestsException(ConnectionException): pass diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 46776ff..bed2245 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -397,7 +397,7 @@ class InstaloaderContext: if is_html_query: match = re.search(r'window\._sharedData = (.*);', resp.text) if match is None: - raise ConnectionException("Could not find \"window._sharedData\" in html response.") + raise QueryReturnedNotFoundException("Could not find \"window._sharedData\" in html response.") resp_json = json.loads(match.group(1)) entry_data = resp_json.get('entry_data') post_or_profile_page = list(entry_data.values())[0] if entry_data is not None else None @@ -422,7 +422,10 @@ class InstaloaderContext: except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err: error_string = "JSON Query to {}: {}".format(path, err) if _attempt == self.max_connection_attempts: - raise ConnectionException(error_string) from err + if isinstance(err, QueryReturnedNotFoundException): + raise QueryReturnedNotFoundException(error_string) from err + else: + raise ConnectionException(error_string) from err self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False) try: if is_graphql_query and isinstance(err, TooManyRequestsException):