1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-09-11 16:22:24 +02:00

Change sleep interval between requests

These are now adapted to how many requests have already been done. With
the current settings, Instaloader does not more than

12 request in the first ten seconds,
28 requests in the first minute,
40 requests in the first two minutes,
63 requests in the first five minutes,
90 requests in the first ten minutes,
and after that 50 requests per ten minutes.

This should make it less likely that Instaloader is rate-limited by
Instagram, while still being fast if downloading only a few posts.

Further, option --no-sleep is hidden in --help output and README.rst.
This commit is contained in:
Alexander Graf 2017-08-20 11:28:12 +02:00
parent 6300c217b3
commit 566ef02b94
2 changed files with 22 additions and 9 deletions

View File

@ -222,8 +222,6 @@ How to Download
Defaults to ``{date:%Y-%m-%d_%H-%M-%S}``. Defaults to ``{date:%Y-%m-%d_%H-%M-%S}``.
--user-agent USER_AGENT User Agent to use for HTTP requests. Per default, --user-agent USER_AGENT User Agent to use for HTTP requests. Per default,
Instaloader pretends being Chrome/51. Instaloader pretends being Chrome/51.
--no-sleep Do not sleep between requests to Instagram's servers.
This makes downloading faster, but may be suspicious.
Miscellaneous Options Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^

View File

@ -15,7 +15,7 @@ import sys
import tempfile import tempfile
import time import time
import urllib.parse import urllib.parse
from argparse import ArgumentParser from argparse import ArgumentParser, SUPPRESS
from base64 import b64decode, b64encode from base64 import b64decode, b64encode
from contextlib import contextmanager, suppress from contextlib import contextmanager, suppress
from datetime import datetime from datetime import datetime
@ -364,6 +364,10 @@ class Instaloader:
# error log, filled with error() and printed at the end of Instaloader.main() # error log, filled with error() and printed at the end of Instaloader.main()
self.error_log = [] self.error_log = []
# For the adaption of sleep intervals (rate control)
self.request_count = 0
self.last_request_time = 0
@property @property
def is_logged_in(self) -> bool: def is_logged_in(self) -> bool:
return bool(self.username) return bool(self.username)
@ -375,8 +379,12 @@ class Instaloader:
self.dirname_pattern, self.filename_pattern, self.dirname_pattern, self.filename_pattern,
self.download_videos, self.download_geotags, self.download_videos, self.download_geotags,
self.download_captions, self.download_comments) self.download_captions, self.download_comments)
new_loader.request_count = self.request_count
new_loader.last_request_time = self.last_request_time
yield new_loader yield new_loader
self.error_log.extend(new_loader.error_log) self.error_log.extend(new_loader.error_log)
self.request_count = new_loader.request_count
self.last_request_time = new_loader.last_request_time
def _log(self, *msg, sep='', end='\n', flush=False): def _log(self, *msg, sep='', end='\n', flush=False):
"""Log a message to stdout that can be suppressed with --quiet.""" """Log a message to stdout that can be suppressed with --quiet."""
@ -403,9 +411,18 @@ class Instaloader:
self.error('{}'.format(err)) self.error('{}'.format(err))
def _sleep(self): def _sleep(self):
"""Sleep a short, random time if self.sleep is set. Called before each request to the instagram.com.""" """Sleep a short time if self.sleep is set. Called before each request to instagram.com."""
if self.sleep: if not self.sleep:
time.sleep(random.uniform(0.5, 1.75)) return
max_sleep_int = 600/50 # 50 requests per 10 minutes
count_for_max_sleep = 80 # after 80 requests.
sleep_interval = min(self.request_count, count_for_max_sleep) / count_for_max_sleep * max_sleep_int
current_time = time.monotonic()
sleep_time = self.last_request_time + sleep_interval - current_time
if sleep_time > 0.0:
time.sleep(sleep_time)
self.request_count += 1
self.last_request_time = max(current_time, self.last_request_time + sleep_interval)
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None: def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
"""Downloads raw data. """Downloads raw data.
@ -1342,9 +1359,7 @@ def main():
'\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.') '\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
g_how.add_argument('--user-agent', g_how.add_argument('--user-agent',
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent())) help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
g_how.add_argument('-S', '--no-sleep', action='store_true', g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
help='Do not sleep between requests to Instagram\'s servers. This makes downloading faster, but '
'may be suspicious.')
g_misc = parser.add_argument_group('Miscellaneous Options') g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-O', '--shorter-output', action='store_true', g_misc.add_argument('-O', '--shorter-output', action='store_true',