1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-18 12:49:38 +02:00

Slightly improve error handling

Use raise from rather than raise where appropriate

Have a logic to reduce graphql page length if 400 Bad Request is
returned
This commit is contained in:
Alexander Graf 2018-04-18 15:58:45 +02:00
parent 066c3de113
commit 2d9acd9989
4 changed files with 54 additions and 35 deletions

View File

@ -140,7 +140,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
anonymous_loader.download_profile(target, profile_pic, profile_pic_only, anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
fast_update, filter_func=filter_func) fast_update, filter_func=filter_func)
else: else:
raise err raise
except KeyboardInterrupt: except KeyboardInterrupt:
print("\nInterrupted by user.", file=sys.stderr) print("\nInterrupted by user.", file=sys.stderr)
# Save session if it is useful # Save session if it is useful

View File

@ -5,6 +5,10 @@ class InstaloaderException(Exception):
pass pass
class QueryReturnedBadRequestException(InstaloaderException):
pass
class QueryReturnedNotFoundException(InstaloaderException): class QueryReturnedNotFoundException(InstaloaderException):
pass pass

View File

@ -17,8 +17,6 @@ import urllib3
from .exceptions import * from .exceptions import *
GRAPHQL_PAGE_LENGTH = 200
def copy_session(session: requests.Session) -> requests.Session: def copy_session(session: requests.Session) -> requests.Session:
"""Duplicates a requests.Session.""" """Duplicates a requests.Session."""
@ -57,6 +55,7 @@ class InstaloaderContext:
self.sleep = sleep self.sleep = sleep
self.quiet = quiet self.quiet = quiet
self.max_connection_attempts = max_connection_attempts self.max_connection_attempts = max_connection_attempts
self._graphql_page_length = 50
# error log, filled with error() and printed at the end of Instaloader.main() # error log, filled with error() and printed at the end of Instaloader.main()
self.error_log = [] self.error_log = []
@ -108,7 +107,7 @@ class InstaloaderContext:
else: else:
self.error('{}'.format(err)) self.error('{}'.format(err))
if self.raise_all_errors: if self.raise_all_errors:
raise err raise
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]: def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
"""Returns default HTTP header we use for requests.""" """Returns default HTTP header we use for requests."""
@ -192,6 +191,7 @@ class InstaloaderContext:
:param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com :param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com
:param session: Session to use, or None to use self.session :param session: Session to use, or None to use self.session
:return: Decoded response dictionary :return: Decoded response dictionary
:raises QueryReturnedBadRequestException: When the server responds with a 400.
:raises QueryReturnedNotFoundException: When the server responds with a 404. :raises QueryReturnedNotFoundException: When the server responds with a 404.
:raises ConnectionException: When query repeatedly failed. :raises ConnectionException: When query repeatedly failed.
""" """
@ -230,10 +230,12 @@ class InstaloaderContext:
params=params, allow_redirects=False) params=params, allow_redirects=False)
else: else:
break break
if resp.status_code == 400:
raise QueryReturnedBadRequestException("400 Bad Request")
if resp.status_code == 404: if resp.status_code == 404:
raise QueryReturnedNotFoundException("404") raise QueryReturnedNotFoundException("404 Not Found")
if resp.status_code == 429: if resp.status_code == 429:
raise TooManyRequestsException("429 - Too Many Requests") raise TooManyRequestsException("429 Too Many Requests")
if resp.status_code != 200: if resp.status_code != 200:
raise ConnectionException("HTTP error code {}.".format(resp.status_code)) raise ConnectionException("HTTP error code {}.".format(resp.status_code))
is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com" is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com"
@ -254,7 +256,7 @@ class InstaloaderContext:
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err: except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
error_string = "JSON Query to {}: {}".format(path, err) error_string = "JSON Query to {}: {}".format(path, err)
if _attempt == self.max_connection_attempts: if _attempt == self.max_connection_attempts:
raise ConnectionException(error_string) raise ConnectionException(error_string) from err
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False) self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. " text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
"Please do not use Instagram in your browser or run multiple instances of Instaloader " "Please do not use Instagram in your browser or run multiple instances of Instaloader "
@ -271,7 +273,7 @@ class InstaloaderContext:
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1) return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
except KeyboardInterrupt: except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False) self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string) raise ConnectionException(error_string) from err
def graphql_query(self, query_hash: str, variables: Dict[str, Any], def graphql_query(self, query_hash: str, variables: Dict[str, Any],
referer: Optional[str] = None, rhx_gis: Optional[str] = None) -> Dict[str, Any]: referer: Optional[str] = None, rhx_gis: Optional[str] = None) -> Dict[str, Any]:
@ -284,7 +286,7 @@ class InstaloaderContext:
:param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request :param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request
:return: The server's response dictionary. :return: The server's response dictionary.
""" """
tmpsession = copy_session(self._session) with copy_session(self._session) as tmpsession:
tmpsession.headers.update(self._default_http_header(empty_session_only=True)) tmpsession.headers.update(self._default_http_header(empty_session_only=True))
del tmpsession.headers['Connection'] del tmpsession.headers['Connection']
del tmpsession.headers['Content-Length'] del tmpsession.headers['Content-Length']
@ -306,7 +308,6 @@ class InstaloaderContext:
params={'query_hash': query_hash, params={'query_hash': query_hash,
'variables': variables_json}, 'variables': variables_json},
session=tmpsession) session=tmpsession)
tmpsession.close()
if 'status' not in resp_json: if 'status' not in resp_json:
self.error("GraphQL response did not contain a \"status\" field.") self.error("GraphQL response did not contain a \"status\" field.")
return resp_json return resp_json
@ -317,15 +318,29 @@ class InstaloaderContext:
rhx_gis: Optional[str] = None, rhx_gis: Optional[str] = None,
first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]: first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]:
"""Retrieve a list of GraphQL nodes.""" """Retrieve a list of GraphQL nodes."""
query_variables['first'] = GRAPHQL_PAGE_LENGTH
def _query():
query_variables['first'] = self._graphql_page_length
try:
return edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis))
except QueryReturnedBadRequestException:
new_page_length = int(self._graphql_page_length / 2)
if new_page_length >= 12:
self._graphql_page_length = new_page_length
self.error("HTTP Error 400 (Bad Request) on GraphQL Query. Retrying with shorter page length.",
repeat_at_end=False)
return _query()
else:
raise
if first_data: if first_data:
data = first_data data = first_data
else: else:
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis)) data = _query()
yield from (edge['node'] for edge in data['edges']) yield from (edge['node'] for edge in data['edges'])
while data['page_info']['has_next_page']: while data['page_info']['has_next_page']:
query_variables['after'] = data['page_info']['end_cursor'] query_variables['after'] = data['page_info']['end_cursor']
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis)) data = _query()
yield from (edge['node'] for edge in data['edges']) yield from (edge['node'] for edge in data['edges'])
def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None: def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None:
@ -353,11 +368,11 @@ class InstaloaderContext:
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err: except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
error_string = "URL {}: {}".format(url, err) error_string = "URL {}: {}".format(url, err)
if _attempt == self.max_connection_attempts: if _attempt == self.max_connection_attempts:
raise ConnectionException(error_string) raise ConnectionException(error_string) from err
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False) self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
try: try:
self._sleep() self._sleep()
self.get_and_write_raw(url, filename, _attempt + 1) self.get_and_write_raw(url, filename, _attempt + 1)
except KeyboardInterrupt: except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False) self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string) raise ConnectionException(error_string) from err

View File

@ -7,8 +7,8 @@ import unittest
import instaloader import instaloader
PUBLIC_PROFILE = "Thammus" PUBLIC_PROFILE = "selenagomez"
PUBLIC_PROFILE_ID = 1700252981 PUBLIC_PROFILE_ID = 460563723
HASHTAG = "kitten" HASHTAG = "kitten"
OWN_USERNAME = "aandergr" OWN_USERNAME = "aandergr"
NORMAL_MAX_COUNT = 2 NORMAL_MAX_COUNT = 2