mirror of
https://github.com/instaloader/instaloader.git
synced 2024-10-27 05:32:30 +01:00
Slightly improve error handling
Use raise from rather than raise where appropriate Have a logic to reduce graphql page length if 400 Bad Request is returned
This commit is contained in:
parent
066c3de113
commit
2d9acd9989
@ -140,7 +140,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
|||||||
anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
|
anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
|
||||||
fast_update, filter_func=filter_func)
|
fast_update, filter_func=filter_func)
|
||||||
else:
|
else:
|
||||||
raise err
|
raise
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("\nInterrupted by user.", file=sys.stderr)
|
print("\nInterrupted by user.", file=sys.stderr)
|
||||||
# Save session if it is useful
|
# Save session if it is useful
|
||||||
|
@ -5,6 +5,10 @@ class InstaloaderException(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class QueryReturnedBadRequestException(InstaloaderException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class QueryReturnedNotFoundException(InstaloaderException):
|
class QueryReturnedNotFoundException(InstaloaderException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -17,8 +17,6 @@ import urllib3
|
|||||||
|
|
||||||
from .exceptions import *
|
from .exceptions import *
|
||||||
|
|
||||||
GRAPHQL_PAGE_LENGTH = 200
|
|
||||||
|
|
||||||
|
|
||||||
def copy_session(session: requests.Session) -> requests.Session:
|
def copy_session(session: requests.Session) -> requests.Session:
|
||||||
"""Duplicates a requests.Session."""
|
"""Duplicates a requests.Session."""
|
||||||
@ -57,6 +55,7 @@ class InstaloaderContext:
|
|||||||
self.sleep = sleep
|
self.sleep = sleep
|
||||||
self.quiet = quiet
|
self.quiet = quiet
|
||||||
self.max_connection_attempts = max_connection_attempts
|
self.max_connection_attempts = max_connection_attempts
|
||||||
|
self._graphql_page_length = 50
|
||||||
|
|
||||||
# error log, filled with error() and printed at the end of Instaloader.main()
|
# error log, filled with error() and printed at the end of Instaloader.main()
|
||||||
self.error_log = []
|
self.error_log = []
|
||||||
@ -108,7 +107,7 @@ class InstaloaderContext:
|
|||||||
else:
|
else:
|
||||||
self.error('{}'.format(err))
|
self.error('{}'.format(err))
|
||||||
if self.raise_all_errors:
|
if self.raise_all_errors:
|
||||||
raise err
|
raise
|
||||||
|
|
||||||
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
||||||
"""Returns default HTTP header we use for requests."""
|
"""Returns default HTTP header we use for requests."""
|
||||||
@ -192,6 +191,7 @@ class InstaloaderContext:
|
|||||||
:param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com
|
:param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com
|
||||||
:param session: Session to use, or None to use self.session
|
:param session: Session to use, or None to use self.session
|
||||||
:return: Decoded response dictionary
|
:return: Decoded response dictionary
|
||||||
|
:raises QueryReturnedBadRequestException: When the server responds with a 400.
|
||||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||||
:raises ConnectionException: When query repeatedly failed.
|
:raises ConnectionException: When query repeatedly failed.
|
||||||
"""
|
"""
|
||||||
@ -230,10 +230,12 @@ class InstaloaderContext:
|
|||||||
params=params, allow_redirects=False)
|
params=params, allow_redirects=False)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
if resp.status_code == 400:
|
||||||
|
raise QueryReturnedBadRequestException("400 Bad Request")
|
||||||
if resp.status_code == 404:
|
if resp.status_code == 404:
|
||||||
raise QueryReturnedNotFoundException("404")
|
raise QueryReturnedNotFoundException("404 Not Found")
|
||||||
if resp.status_code == 429:
|
if resp.status_code == 429:
|
||||||
raise TooManyRequestsException("429 - Too Many Requests")
|
raise TooManyRequestsException("429 Too Many Requests")
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||||
is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com"
|
is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com"
|
||||||
@ -254,7 +256,7 @@ class InstaloaderContext:
|
|||||||
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
|
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
|
||||||
error_string = "JSON Query to {}: {}".format(path, err)
|
error_string = "JSON Query to {}: {}".format(path, err)
|
||||||
if _attempt == self.max_connection_attempts:
|
if _attempt == self.max_connection_attempts:
|
||||||
raise ConnectionException(error_string)
|
raise ConnectionException(error_string) from err
|
||||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||||
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
|
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
|
||||||
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
|
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
|
||||||
@ -271,7 +273,7 @@ class InstaloaderContext:
|
|||||||
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
|
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
self.error("[skipped by user]", repeat_at_end=False)
|
self.error("[skipped by user]", repeat_at_end=False)
|
||||||
raise ConnectionException(error_string)
|
raise ConnectionException(error_string) from err
|
||||||
|
|
||||||
def graphql_query(self, query_hash: str, variables: Dict[str, Any],
|
def graphql_query(self, query_hash: str, variables: Dict[str, Any],
|
||||||
referer: Optional[str] = None, rhx_gis: Optional[str] = None) -> Dict[str, Any]:
|
referer: Optional[str] = None, rhx_gis: Optional[str] = None) -> Dict[str, Any]:
|
||||||
@ -284,29 +286,28 @@ class InstaloaderContext:
|
|||||||
:param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request
|
:param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request
|
||||||
:return: The server's response dictionary.
|
:return: The server's response dictionary.
|
||||||
"""
|
"""
|
||||||
tmpsession = copy_session(self._session)
|
with copy_session(self._session) as tmpsession:
|
||||||
tmpsession.headers.update(self._default_http_header(empty_session_only=True))
|
tmpsession.headers.update(self._default_http_header(empty_session_only=True))
|
||||||
del tmpsession.headers['Connection']
|
del tmpsession.headers['Connection']
|
||||||
del tmpsession.headers['Content-Length']
|
del tmpsession.headers['Content-Length']
|
||||||
tmpsession.headers['authority'] = 'www.instagram.com'
|
tmpsession.headers['authority'] = 'www.instagram.com'
|
||||||
tmpsession.headers['scheme'] = 'https'
|
tmpsession.headers['scheme'] = 'https'
|
||||||
tmpsession.headers['accept'] = '*/*'
|
tmpsession.headers['accept'] = '*/*'
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
||||||
|
|
||||||
variables_json = json.dumps(variables, separators=(',', ':'))
|
variables_json = json.dumps(variables, separators=(',', ':'))
|
||||||
|
|
||||||
if rhx_gis:
|
if rhx_gis:
|
||||||
#self.log("rhx_gis {} query_hash {}".format(rhx_gis, query_hash))
|
#self.log("rhx_gis {} query_hash {}".format(rhx_gis, query_hash))
|
||||||
values = "{}:{}".format(rhx_gis, variables_json)
|
values = "{}:{}".format(rhx_gis, variables_json)
|
||||||
x_instagram_gis = hashlib.md5(values.encode()).hexdigest()
|
x_instagram_gis = hashlib.md5(values.encode()).hexdigest()
|
||||||
tmpsession.headers['x-instagram-gis'] = x_instagram_gis
|
tmpsession.headers['x-instagram-gis'] = x_instagram_gis
|
||||||
|
|
||||||
resp_json = self.get_json('graphql/query',
|
resp_json = self.get_json('graphql/query',
|
||||||
params={'query_hash': query_hash,
|
params={'query_hash': query_hash,
|
||||||
'variables': variables_json},
|
'variables': variables_json},
|
||||||
session=tmpsession)
|
session=tmpsession)
|
||||||
tmpsession.close()
|
|
||||||
if 'status' not in resp_json:
|
if 'status' not in resp_json:
|
||||||
self.error("GraphQL response did not contain a \"status\" field.")
|
self.error("GraphQL response did not contain a \"status\" field.")
|
||||||
return resp_json
|
return resp_json
|
||||||
@ -317,15 +318,29 @@ class InstaloaderContext:
|
|||||||
rhx_gis: Optional[str] = None,
|
rhx_gis: Optional[str] = None,
|
||||||
first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]:
|
first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]:
|
||||||
"""Retrieve a list of GraphQL nodes."""
|
"""Retrieve a list of GraphQL nodes."""
|
||||||
query_variables['first'] = GRAPHQL_PAGE_LENGTH
|
|
||||||
|
def _query():
|
||||||
|
query_variables['first'] = self._graphql_page_length
|
||||||
|
try:
|
||||||
|
return edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis))
|
||||||
|
except QueryReturnedBadRequestException:
|
||||||
|
new_page_length = int(self._graphql_page_length / 2)
|
||||||
|
if new_page_length >= 12:
|
||||||
|
self._graphql_page_length = new_page_length
|
||||||
|
self.error("HTTP Error 400 (Bad Request) on GraphQL Query. Retrying with shorter page length.",
|
||||||
|
repeat_at_end=False)
|
||||||
|
return _query()
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
if first_data:
|
if first_data:
|
||||||
data = first_data
|
data = first_data
|
||||||
else:
|
else:
|
||||||
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis))
|
data = _query()
|
||||||
yield from (edge['node'] for edge in data['edges'])
|
yield from (edge['node'] for edge in data['edges'])
|
||||||
while data['page_info']['has_next_page']:
|
while data['page_info']['has_next_page']:
|
||||||
query_variables['after'] = data['page_info']['end_cursor']
|
query_variables['after'] = data['page_info']['end_cursor']
|
||||||
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer, rhx_gis))
|
data = _query()
|
||||||
yield from (edge['node'] for edge in data['edges'])
|
yield from (edge['node'] for edge in data['edges'])
|
||||||
|
|
||||||
def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None:
|
def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None:
|
||||||
@ -353,11 +368,11 @@ class InstaloaderContext:
|
|||||||
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
||||||
error_string = "URL {}: {}".format(url, err)
|
error_string = "URL {}: {}".format(url, err)
|
||||||
if _attempt == self.max_connection_attempts:
|
if _attempt == self.max_connection_attempts:
|
||||||
raise ConnectionException(error_string)
|
raise ConnectionException(error_string) from err
|
||||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||||
try:
|
try:
|
||||||
self._sleep()
|
self._sleep()
|
||||||
self.get_and_write_raw(url, filename, _attempt + 1)
|
self.get_and_write_raw(url, filename, _attempt + 1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
self.error("[skipped by user]", repeat_at_end=False)
|
self.error("[skipped by user]", repeat_at_end=False)
|
||||||
raise ConnectionException(error_string)
|
raise ConnectionException(error_string) from err
|
||||||
|
@ -7,8 +7,8 @@ import unittest
|
|||||||
|
|
||||||
import instaloader
|
import instaloader
|
||||||
|
|
||||||
PUBLIC_PROFILE = "Thammus"
|
PUBLIC_PROFILE = "selenagomez"
|
||||||
PUBLIC_PROFILE_ID = 1700252981
|
PUBLIC_PROFILE_ID = 460563723
|
||||||
HASHTAG = "kitten"
|
HASHTAG = "kitten"
|
||||||
OWN_USERNAME = "aandergr"
|
OWN_USERNAME = "aandergr"
|
||||||
NORMAL_MAX_COUNT = 2
|
NORMAL_MAX_COUNT = 2
|
||||||
|
Loading…
Reference in New Issue
Block a user