From 63eb9e71ca7fdc8b73b1b5c75658f997591b2b80 Mon Sep 17 00:00:00 2001 From: corpone2 <55957093+corpone2@users.noreply.github.com> Date: Mon, 13 Jan 2020 12:59:00 -0800 Subject: [PATCH] support passing timeout to requests (#422) Adds a request_timeout parameter to Instaloader instantiation. Closes #330. --- docs/cli-options.rst | 4 ++++ instaloader/__main__.py | 3 +++ instaloader/instaloader.py | 30 ++++++++++++++++++++---------- instaloader/instaloadercontext.py | 28 +++++++++++++++++++++++----- 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 731d309..8b8e45b 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -246,6 +246,10 @@ Miscellaneous Options if login credentials are needed but not given. This is handy for running :ref:`instaloader-as-cronjob`. +.. option:: --request-timeout N + + Seconds to wait before timing out a connection request. + .. option:: +args.txt Read arguments from file `args.txt`, a shortcut to provide arguments from diff --git a/instaloader/__main__.py b/instaloader/__main__.py index cf51d53..c01c44f 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -343,6 +343,8 @@ def main(): help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. ' 'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. ' 'Requires the JSON metadata to be saved.') + g_how.add_argument('--request-timeout', metavar='N', type=float, + help='seconds to wait before timing out a connection request') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -397,6 +399,7 @@ def main(): post_metadata_txt_pattern=post_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, max_connection_attempts=args.max_connection_attempts, + request_timeout=args.request_timeout, commit_mode=args.commit_mode) _main(loader, args.profile, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index ddf8f8a..de49ac1 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -142,6 +142,7 @@ class Instaloader: :param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none) :param max_connection_attempts: :option:`--max-connection-attempts` :param commit_mode: :option:`--commit-mode` + :param request_timeout: "option:`--request-timeout`, set per-request timeout (seconds) .. attribute:: context @@ -150,6 +151,7 @@ class Instaloader: def __init__(self, sleep: bool = True, quiet: bool = False, + request_timeout: Optional[float] = None, user_agent: Optional[str] = None, dirname_pattern: Optional[str] = None, filename_pattern: Optional[str] = None, @@ -165,7 +167,7 @@ class Instaloader: max_connection_attempts: int = 3, commit_mode: bool = False): - self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts) + self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout) # configuration parameters self.dirname_pattern = dirname_pattern or "{target}" @@ -191,15 +193,23 @@ class Instaloader: @contextmanager def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" - new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent, self.dirname_pattern, - self.filename_pattern, download_pictures=self.download_pictures, - download_videos=self.download_videos, - download_video_thumbnails=self.download_video_thumbnails, - download_geotags=self.download_geotags, download_comments=self.download_comments, - save_metadata=self.save_metadata, compress_json=self.compress_json, - post_metadata_txt_pattern=self.post_metadata_txt_pattern, - storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, - max_connection_attempts=self.context.max_connection_attempts) + new_loader = Instaloader( + self.context.sleep, + self.context.request_timeout, + self.context.quiet, + self.context.user_agent, + self.dirname_pattern, + self.filename_pattern, + download_pictures=self.download_pictures, + download_videos=self.download_videos, + download_video_thumbnails=self.download_video_thumbnails, + download_geotags=self.download_geotags, + download_comments=self.download_comments, + save_metadata=self.save_metadata, + compress_json=self.compress_json, + post_metadata_txt_pattern=self.post_metadata_txt_pattern, + storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, + max_connection_attempts=self.context.max_connection_attempts) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 3b508a0..f5ae8cd 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -10,6 +10,7 @@ import time import urllib.parse from contextlib import contextmanager from datetime import datetime, timedelta +from functools import partial from typing import Any, Callable, Dict, Iterator, List, Optional, Union import requests @@ -18,11 +19,15 @@ import requests.utils from .exceptions import * -def copy_session(session: requests.Session) -> requests.Session: +def copy_session(session: requests.Session, request_timeout: Optional[float] = None) -> requests.Session: """Duplicates a requests.Session.""" new = requests.Session() new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) new.headers = session.headers.copy() # type: ignore + if request_timeout is not None: + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + new.request = partial(new.request, timeout=request_timeout) # type: ignore return new @@ -47,9 +52,10 @@ class InstaloaderContext: """ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, - max_connection_attempts: int = 3): + max_connection_attempts: int = 3, request_timeout: Optional[float] = None): self.user_agent = user_agent if user_agent is not None else default_user_agent() + self.request_timeout = request_timeout self._session = self.get_anonymous_session() self.username = None self.sleep = sleep @@ -155,6 +161,10 @@ class InstaloaderContext: 'ig_vw': '1920', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header(empty_session_only=True)) + if self.request_timeout is not None: + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore return session def save_session_to_file(self, sessionfile): @@ -167,6 +177,10 @@ class InstaloaderContext: session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile)) session.headers.update(self._default_http_header()) session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) + if self.request_timeout is not None: + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore self._session = session self.username = username @@ -191,6 +205,10 @@ class InstaloaderContext: 'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header()) + if self.request_timeout is not None: + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore session.get('https://www.instagram.com/web/__mid/') csrf_token = session.cookies.get_dict()['csrftoken'] session.headers.update({'X-CSRFToken': csrf_token}) @@ -203,7 +221,7 @@ class InstaloaderContext: except json.decoder.JSONDecodeError: raise ConnectionException("Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason)) if resp_json.get('two_factor_required'): - two_factor_session = copy_session(session) + two_factor_session = copy_session(session, self.request_timeout) two_factor_session.headers.update({'X-CSRFToken': csrf_token}) two_factor_session.cookies.update({'csrftoken': csrf_token}) self.two_factor_auth_pending = (two_factor_session, @@ -426,7 +444,7 @@ class InstaloaderContext: :param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request :return: The server's response dictionary. """ - with copy_session(self._session) as tmpsession: + with copy_session(self._session, self.request_timeout) as tmpsession: tmpsession.headers.update(self._default_http_header(empty_session_only=True)) del tmpsession.headers['Connection'] del tmpsession.headers['Content-Length'] @@ -494,7 +512,7 @@ class InstaloaderContext: :raises ConnectionException: When query repeatedly failed. .. versionadded:: 4.2.1""" - with copy_session(self._session) as tempsession: + with copy_session(self._session, self.request_timeout) as tempsession: tempsession.headers['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; ' \ 'scale=2.00; 750x1334) AppleWebKit/420+' for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']: