1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-18 12:49:38 +02:00

support passing timeout to requests (#422)

Adds a request_timeout parameter to Instaloader instantiation.

Closes #330.
This commit is contained in:
corpone2 2020-01-13 12:59:00 -08:00 committed by Alexander Graf
parent d8ba30b91b
commit 63eb9e71ca
4 changed files with 50 additions and 15 deletions

View File

@ -246,6 +246,10 @@ Miscellaneous Options
if login credentials are needed but not given. if login credentials are needed but not given.
This is handy for running :ref:`instaloader-as-cronjob`. This is handy for running :ref:`instaloader-as-cronjob`.
.. option:: --request-timeout N
Seconds to wait before timing out a connection request.
.. option:: +args.txt .. option:: +args.txt
Read arguments from file `args.txt`, a shortcut to provide arguments from Read arguments from file `args.txt`, a shortcut to provide arguments from

View File

@ -343,6 +343,8 @@ def main():
help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. ' help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. '
'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. ' 'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. '
'Requires the JSON metadata to be saved.') 'Requires the JSON metadata to be saved.')
g_how.add_argument('--request-timeout', metavar='N', type=float,
help='seconds to wait before timing out a connection request')
g_misc = parser.add_argument_group('Miscellaneous Options') g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true', g_misc.add_argument('-q', '--quiet', action='store_true',
@ -397,6 +399,7 @@ def main():
post_metadata_txt_pattern=post_metadata_txt_pattern, post_metadata_txt_pattern=post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
max_connection_attempts=args.max_connection_attempts, max_connection_attempts=args.max_connection_attempts,
request_timeout=args.request_timeout,
commit_mode=args.commit_mode) commit_mode=args.commit_mode)
_main(loader, _main(loader,
args.profile, args.profile,

View File

@ -142,6 +142,7 @@ class Instaloader:
:param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none) :param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none)
:param max_connection_attempts: :option:`--max-connection-attempts` :param max_connection_attempts: :option:`--max-connection-attempts`
:param commit_mode: :option:`--commit-mode` :param commit_mode: :option:`--commit-mode`
:param request_timeout: "option:`--request-timeout`, set per-request timeout (seconds)
.. attribute:: context .. attribute:: context
@ -150,6 +151,7 @@ class Instaloader:
def __init__(self, def __init__(self,
sleep: bool = True, quiet: bool = False, sleep: bool = True, quiet: bool = False,
request_timeout: Optional[float] = None,
user_agent: Optional[str] = None, user_agent: Optional[str] = None,
dirname_pattern: Optional[str] = None, dirname_pattern: Optional[str] = None,
filename_pattern: Optional[str] = None, filename_pattern: Optional[str] = None,
@ -165,7 +167,7 @@ class Instaloader:
max_connection_attempts: int = 3, max_connection_attempts: int = 3,
commit_mode: bool = False): commit_mode: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts) self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout)
# configuration parameters # configuration parameters
self.dirname_pattern = dirname_pattern or "{target}" self.dirname_pattern = dirname_pattern or "{target}"
@ -191,15 +193,23 @@ class Instaloader:
@contextmanager @contextmanager
def anonymous_copy(self): def anonymous_copy(self):
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent, self.dirname_pattern, new_loader = Instaloader(
self.filename_pattern, download_pictures=self.download_pictures, self.context.sleep,
download_videos=self.download_videos, self.context.request_timeout,
download_video_thumbnails=self.download_video_thumbnails, self.context.quiet,
download_geotags=self.download_geotags, download_comments=self.download_comments, self.context.user_agent,
save_metadata=self.save_metadata, compress_json=self.compress_json, self.dirname_pattern,
post_metadata_txt_pattern=self.post_metadata_txt_pattern, self.filename_pattern,
storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, download_pictures=self.download_pictures,
max_connection_attempts=self.context.max_connection_attempts) download_videos=self.download_videos,
download_video_thumbnails=self.download_video_thumbnails,
download_geotags=self.download_geotags,
download_comments=self.download_comments,
save_metadata=self.save_metadata,
compress_json=self.compress_json,
post_metadata_txt_pattern=self.post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern,
max_connection_attempts=self.context.max_connection_attempts)
yield new_loader yield new_loader
self.context.error_log.extend(new_loader.context.error_log) self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors new_loader.context.error_log = [] # avoid double-printing of errors

View File

@ -10,6 +10,7 @@ import time
import urllib.parse import urllib.parse
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import partial
from typing import Any, Callable, Dict, Iterator, List, Optional, Union from typing import Any, Callable, Dict, Iterator, List, Optional, Union
import requests import requests
@ -18,11 +19,15 @@ import requests.utils
from .exceptions import * from .exceptions import *
def copy_session(session: requests.Session) -> requests.Session: def copy_session(session: requests.Session, request_timeout: Optional[float] = None) -> requests.Session:
"""Duplicates a requests.Session.""" """Duplicates a requests.Session."""
new = requests.Session() new = requests.Session()
new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
new.headers = session.headers.copy() # type: ignore new.headers = session.headers.copy() # type: ignore
if request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
new.request = partial(new.request, timeout=request_timeout) # type: ignore
return new return new
@ -47,9 +52,10 @@ class InstaloaderContext:
""" """
def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None,
max_connection_attempts: int = 3): max_connection_attempts: int = 3, request_timeout: Optional[float] = None):
self.user_agent = user_agent if user_agent is not None else default_user_agent() self.user_agent = user_agent if user_agent is not None else default_user_agent()
self.request_timeout = request_timeout
self._session = self.get_anonymous_session() self._session = self.get_anonymous_session()
self.username = None self.username = None
self.sleep = sleep self.sleep = sleep
@ -155,6 +161,10 @@ class InstaloaderContext:
'ig_vw': '1920', 'csrftoken': '', 'ig_vw': '1920', 'csrftoken': '',
's_network': '', 'ds_user_id': ''}) 's_network': '', 'ds_user_id': ''})
session.headers.update(self._default_http_header(empty_session_only=True)) session.headers.update(self._default_http_header(empty_session_only=True))
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
return session return session
def save_session_to_file(self, sessionfile): def save_session_to_file(self, sessionfile):
@ -167,6 +177,10 @@ class InstaloaderContext:
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile)) session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
session.headers.update(self._default_http_header()) session.headers.update(self._default_http_header())
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
self._session = session self._session = session
self.username = username self.username = username
@ -191,6 +205,10 @@ class InstaloaderContext:
'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '', 'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '',
's_network': '', 'ds_user_id': ''}) 's_network': '', 'ds_user_id': ''})
session.headers.update(self._default_http_header()) session.headers.update(self._default_http_header())
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
session.get('https://www.instagram.com/web/__mid/') session.get('https://www.instagram.com/web/__mid/')
csrf_token = session.cookies.get_dict()['csrftoken'] csrf_token = session.cookies.get_dict()['csrftoken']
session.headers.update({'X-CSRFToken': csrf_token}) session.headers.update({'X-CSRFToken': csrf_token})
@ -203,7 +221,7 @@ class InstaloaderContext:
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
raise ConnectionException("Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason)) raise ConnectionException("Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason))
if resp_json.get('two_factor_required'): if resp_json.get('two_factor_required'):
two_factor_session = copy_session(session) two_factor_session = copy_session(session, self.request_timeout)
two_factor_session.headers.update({'X-CSRFToken': csrf_token}) two_factor_session.headers.update({'X-CSRFToken': csrf_token})
two_factor_session.cookies.update({'csrftoken': csrf_token}) two_factor_session.cookies.update({'csrftoken': csrf_token})
self.two_factor_auth_pending = (two_factor_session, self.two_factor_auth_pending = (two_factor_session,
@ -426,7 +444,7 @@ class InstaloaderContext:
:param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request :param rhx_gis: 'rhx_gis' variable as somewhere returned by Instagram, needed to 'sign' request
:return: The server's response dictionary. :return: The server's response dictionary.
""" """
with copy_session(self._session) as tmpsession: with copy_session(self._session, self.request_timeout) as tmpsession:
tmpsession.headers.update(self._default_http_header(empty_session_only=True)) tmpsession.headers.update(self._default_http_header(empty_session_only=True))
del tmpsession.headers['Connection'] del tmpsession.headers['Connection']
del tmpsession.headers['Content-Length'] del tmpsession.headers['Content-Length']
@ -494,7 +512,7 @@ class InstaloaderContext:
:raises ConnectionException: When query repeatedly failed. :raises ConnectionException: When query repeatedly failed.
.. versionadded:: 4.2.1""" .. versionadded:: 4.2.1"""
with copy_session(self._session) as tempsession: with copy_session(self._session, self.request_timeout) as tempsession:
tempsession.headers['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; ' \ tempsession.headers['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; ' \
'scale=2.00; 750x1334) AppleWebKit/420+' 'scale=2.00; 750x1334) AppleWebKit/420+'
for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']: for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']: