1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-17 12:19:38 +02:00

More control about connection retry mechanism

- If when retrying a failed connection ^C is hit, the retrying is
  aborted,

- Option --max-connection-attempts to override default of how many
  connection attempts should be made.

Closes #51.
This commit is contained in:
Alexander Graf 2017-09-29 16:09:15 +02:00
parent e0ed4cf16c
commit a1920b97a4
2 changed files with 44 additions and 25 deletions

View File

@ -142,6 +142,12 @@ How to Download
User Agent to use for HTTP requests. Per default, Instaloader pretends being User Agent to use for HTTP requests. Per default, Instaloader pretends being
Chrome/51. Chrome/51.
.. option:: --max-connection-attempts N
Maximum number of connection attempts until a request is aborted. Defaults
to ``3``. If a connection fails, it can by manually skipped by hitting
:kbd:`Control-c`. Set this to ``0`` to retry infinitely.
Miscellaneous Options Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^

View File

@ -405,7 +405,8 @@ class Instaloader:
download_geotags: Tristate = Tristate.no_extra_query, download_geotags: Tristate = Tristate.no_extra_query,
save_captions: Tristate = Tristate.no_extra_query, save_captions: Tristate = Tristate.no_extra_query,
download_comments: Tristate = Tristate.no_extra_query, download_comments: Tristate = Tristate.no_extra_query,
save_metadata: Tristate = Tristate.never): save_metadata: Tristate = Tristate.never,
max_connection_attempts: int = 3):
# configuration parameters # configuration parameters
self.user_agent = user_agent if user_agent is not None else default_user_agent() self.user_agent = user_agent if user_agent is not None else default_user_agent()
@ -421,6 +422,7 @@ class Instaloader:
self.save_captions = save_captions self.save_captions = save_captions
self.download_comments = download_comments self.download_comments = download_comments
self.save_metadata = save_metadata self.save_metadata = save_metadata
self.max_connection_attempts = max_connection_attempts
# error log, filled with error() and printed at the end of Instaloader.main() # error log, filled with error() and printed at the end of Instaloader.main()
self.error_log = [] self.error_log = []
@ -439,7 +441,8 @@ class Instaloader:
new_loader = Instaloader(self.sleep, self.quiet, self.user_agent, new_loader = Instaloader(self.sleep, self.quiet, self.user_agent,
self.dirname_pattern, self.filename_pattern, self.dirname_pattern, self.filename_pattern,
self.download_videos, self.download_geotags, self.download_videos, self.download_geotags,
self.save_captions, self.download_comments) self.save_captions, self.download_comments,
self.save_metadata, self.max_connection_attempts)
new_loader.previous_queries = self.previous_queries new_loader.previous_queries = self.previous_queries
yield new_loader yield new_loader
self.error_log.extend(new_loader.error_log) self.error_log.extend(new_loader.error_log)
@ -477,7 +480,7 @@ class Instaloader:
if self.sleep: if self.sleep:
time.sleep(random.uniform(0.5, 3)) time.sleep(random.uniform(0.5, 3))
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None: def _get_and_write_raw(self, url: str, filename: str, _attempt = 1) -> None:
"""Downloads raw data. """Downloads raw data.
:raises QueryReturnedNotFoundException: When the server responds with a 404. :raises QueryReturnedNotFoundException: When the server responds with a 404.
@ -496,21 +499,23 @@ class Instaloader:
raise ConnectionException("HTTP error code {}.".format(resp.status_code)) raise ConnectionException("HTTP error code {}.".format(resp.status_code))
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err: except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
error_string = "URL {}: {}".format(url, err) error_string = "URL {}: {}".format(url, err)
if tries <= 1: if _attempt == self.max_connection_attempts:
raise ConnectionException(error_string)
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
try:
self._sleep()
self._get_and_write_raw(url, filename, _attempt + 1)
except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string) raise ConnectionException(error_string)
else:
self.error(error_string + " [retrying]", repeat_at_end=False)
self._sleep()
self._get_and_write_raw(url, filename, tries - 1)
def get_json(self, url: str, params: Dict[str, Any], def get_json(self, url: str, params: Dict[str, Any],
session: Optional[requests.Session] = None, tries: int = 3) -> Dict[str, Any]: session: Optional[requests.Session] = None, _attempt = 1) -> Dict[str, Any]:
"""JSON request to Instagram. """JSON request to Instagram.
:param url: URL, relative to www.instagram.com/ :param url: URL, relative to www.instagram.com/
:param params: GET parameters :param params: GET parameters
:param session: Session to use, or None to use self.session :param session: Session to use, or None to use self.session
:param tries: Maximum number of attempts until an exception is raised
:return: Decoded response dictionary :return: Decoded response dictionary
:raises QueryReturnedNotFoundException: When the server responds with a 404. :raises QueryReturnedNotFoundException: When the server responds with a 404.
:raises ConnectionException: When query repeatedly failed. :raises ConnectionException: When query repeatedly failed.
@ -558,21 +563,25 @@ class Instaloader:
return resp_json return resp_json
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err: except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
error_string = "JSON Query to {}: {}".format(url, err) error_string = "JSON Query to {}: {}".format(url, err)
if tries <= 1: if _attempt == self.max_connection_attempts:
raise ConnectionException(error_string)
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
"in parallel.")
try:
if isinstance(err, TooManyRequests):
print(textwrap.fill(text_for_429), file=sys.stderr)
if is_graphql_query:
waittime = graphql_query_waittime(query_id=params['query_id'], untracked_queries=True)
if waittime > 0:
self._log('The request will be retried in {} seconds.'.format(waittime))
time.sleep(waittime)
self._sleep()
return self.get_json(url, params, sess, _attempt + 1)
except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string) raise ConnectionException(error_string)
self.error(error_string + " [retrying]", repeat_at_end=False)
if isinstance(err, TooManyRequests):
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
"in parallel.")
print(textwrap.fill(text_for_429), file=sys.stderr)
if is_graphql_query:
waittime = graphql_query_waittime(query_id=params['query_id'], untracked_queries=True)
if waittime > 0:
self._log('The request will be retried in {} seconds.'.format(waittime))
time.sleep(waittime)
self._sleep()
return self.get_json(url, params, sess, tries - 1)
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]: def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
"""Returns default HTTP header we use for requests.""" """Returns default HTTP header we use for requests."""
@ -1464,6 +1473,10 @@ def main():
g_how.add_argument('--user-agent', g_how.add_argument('--user-agent',
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent())) help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS) g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3,
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
'connection fails, it can by manually skipped by hitting CTRL+C. Set this to 0 to retry '
'infinitely.')
g_misc = parser.add_argument_group('Miscellaneous Options') g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true', g_misc.add_argument('-q', '--quiet', action='store_true',
@ -1505,7 +1518,7 @@ def main():
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
download_videos=download_videos, download_geotags=download_geotags, download_videos=download_videos, download_geotags=download_geotags,
save_captions=save_captions, download_comments=download_comments, save_captions=save_captions, download_comments=download_comments,
save_metadata=save_metadata) save_metadata=save_metadata, max_connection_attempts=args.max_connection_attempts)
loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password, loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password,
args.sessionfile, args.sessionfile,
int(args.count) if args.count is not None else None, int(args.count) if args.count is not None else None,