mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-04 09:22:29 +01:00
Change sleep interval between requests
These are now adapted to how many requests have already been done. With the current settings, Instaloader does not more than 12 request in the first ten seconds, 28 requests in the first minute, 40 requests in the first two minutes, 63 requests in the first five minutes, 90 requests in the first ten minutes, and after that 50 requests per ten minutes. This should make it less likely that Instaloader is rate-limited by Instagram, while still being fast if downloading only a few posts. Further, option --no-sleep is hidden in --help output and README.rst.
This commit is contained in:
parent
6300c217b3
commit
566ef02b94
@ -222,8 +222,6 @@ How to Download
|
|||||||
Defaults to ``{date:%Y-%m-%d_%H-%M-%S}``.
|
Defaults to ``{date:%Y-%m-%d_%H-%M-%S}``.
|
||||||
--user-agent USER_AGENT User Agent to use for HTTP requests. Per default,
|
--user-agent USER_AGENT User Agent to use for HTTP requests. Per default,
|
||||||
Instaloader pretends being Chrome/51.
|
Instaloader pretends being Chrome/51.
|
||||||
--no-sleep Do not sleep between requests to Instagram's servers.
|
|
||||||
This makes downloading faster, but may be suspicious.
|
|
||||||
|
|
||||||
Miscellaneous Options
|
Miscellaneous Options
|
||||||
^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
@ -15,7 +15,7 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser, SUPPRESS
|
||||||
from base64 import b64decode, b64encode
|
from base64 import b64decode, b64encode
|
||||||
from contextlib import contextmanager, suppress
|
from contextlib import contextmanager, suppress
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@ -364,6 +364,10 @@ class Instaloader:
|
|||||||
# error log, filled with error() and printed at the end of Instaloader.main()
|
# error log, filled with error() and printed at the end of Instaloader.main()
|
||||||
self.error_log = []
|
self.error_log = []
|
||||||
|
|
||||||
|
# For the adaption of sleep intervals (rate control)
|
||||||
|
self.request_count = 0
|
||||||
|
self.last_request_time = 0
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_logged_in(self) -> bool:
|
def is_logged_in(self) -> bool:
|
||||||
return bool(self.username)
|
return bool(self.username)
|
||||||
@ -375,8 +379,12 @@ class Instaloader:
|
|||||||
self.dirname_pattern, self.filename_pattern,
|
self.dirname_pattern, self.filename_pattern,
|
||||||
self.download_videos, self.download_geotags,
|
self.download_videos, self.download_geotags,
|
||||||
self.download_captions, self.download_comments)
|
self.download_captions, self.download_comments)
|
||||||
|
new_loader.request_count = self.request_count
|
||||||
|
new_loader.last_request_time = self.last_request_time
|
||||||
yield new_loader
|
yield new_loader
|
||||||
self.error_log.extend(new_loader.error_log)
|
self.error_log.extend(new_loader.error_log)
|
||||||
|
self.request_count = new_loader.request_count
|
||||||
|
self.last_request_time = new_loader.last_request_time
|
||||||
|
|
||||||
def _log(self, *msg, sep='', end='\n', flush=False):
|
def _log(self, *msg, sep='', end='\n', flush=False):
|
||||||
"""Log a message to stdout that can be suppressed with --quiet."""
|
"""Log a message to stdout that can be suppressed with --quiet."""
|
||||||
@ -403,9 +411,18 @@ class Instaloader:
|
|||||||
self.error('{}'.format(err))
|
self.error('{}'.format(err))
|
||||||
|
|
||||||
def _sleep(self):
|
def _sleep(self):
|
||||||
"""Sleep a short, random time if self.sleep is set. Called before each request to the instagram.com."""
|
"""Sleep a short time if self.sleep is set. Called before each request to instagram.com."""
|
||||||
if self.sleep:
|
if not self.sleep:
|
||||||
time.sleep(random.uniform(0.5, 1.75))
|
return
|
||||||
|
max_sleep_int = 600/50 # 50 requests per 10 minutes
|
||||||
|
count_for_max_sleep = 80 # after 80 requests.
|
||||||
|
sleep_interval = min(self.request_count, count_for_max_sleep) / count_for_max_sleep * max_sleep_int
|
||||||
|
current_time = time.monotonic()
|
||||||
|
sleep_time = self.last_request_time + sleep_interval - current_time
|
||||||
|
if sleep_time > 0.0:
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
self.request_count += 1
|
||||||
|
self.last_request_time = max(current_time, self.last_request_time + sleep_interval)
|
||||||
|
|
||||||
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
|
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
|
||||||
"""Downloads raw data.
|
"""Downloads raw data.
|
||||||
@ -1342,9 +1359,7 @@ def main():
|
|||||||
'\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
|
'\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
|
||||||
g_how.add_argument('--user-agent',
|
g_how.add_argument('--user-agent',
|
||||||
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
||||||
g_how.add_argument('-S', '--no-sleep', action='store_true',
|
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
|
||||||
help='Do not sleep between requests to Instagram\'s servers. This makes downloading faster, but '
|
|
||||||
'may be suspicious.')
|
|
||||||
|
|
||||||
g_misc = parser.add_argument_group('Miscellaneous Options')
|
g_misc = parser.add_argument_group('Miscellaneous Options')
|
||||||
g_misc.add_argument('-O', '--shorter-output', action='store_true',
|
g_misc.add_argument('-O', '--shorter-output', action='store_true',
|
||||||
|
Loading…
Reference in New Issue
Block a user