1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-09-11 16:22:24 +02:00

Limit GraphQl queries to 20 per 11 minutes

- Set GRAPHQL_PAGE_LENGTH to 50 what appears to be the new working maximum.
- Limit GQL queries to 20 per 666 seconds.
- Remove logic for tracking queries per query identifier as Instagram
  only allows 20 overall GQL queries per sliding window.

Related to #101
This commit is contained in:
André Koch-Kramer 2018-04-20 12:59:41 +02:00
parent eecaca7c7b
commit d90c05e0a4

View File

@ -460,7 +460,7 @@ class _PathPattern(str):
class Instaloader:
GRAPHQL_PAGE_LENGTH = 12
GRAPHQL_PAGE_LENGTH = 50
def __init__(self,
sleep: bool = True, quiet: bool = False,
@ -506,7 +506,7 @@ class Instaloader:
self.error_log = []
# For the adaption of sleep intervals (rate control)
self.previous_queries = dict()
self.query_timestamps = list()
@property
def is_logged_in(self) -> bool:
@ -523,10 +523,10 @@ class Instaloader:
self.download_geotags,
self.save_captions, self.download_comments,
self.save_metadata, self.max_connection_attempts)
new_loader.previous_queries = self.previous_queries
new_loader.query_timestamps = self.query_timestamps
yield new_loader
self.error_log.extend(new_loader.error_log)
self.previous_queries = new_loader.previous_queries
self.query_timestamps = new_loader.query_timestamps
def _log(self, *msg, sep='', end='\n', flush=False):
"""Log a message to stdout that can be suppressed with --quiet."""
@ -604,29 +604,25 @@ class Instaloader:
:raises QueryReturnedNotFoundException: When the server responds with a 404.
:raises ConnectionException: When query repeatedly failed.
"""
def graphql_query_waittime(query_id: Union[int, str], untracked_queries: bool = False) -> int:
def graphql_query_waittime(untracked_queries: bool = False) -> int:
sliding_window = 660
timestamps = self.previous_queries.get(query_id)
if not timestamps:
if not self.query_timestamps:
return sliding_window if untracked_queries else 0
current_time = time.monotonic()
timestamps = list(filter(lambda t: t > current_time - sliding_window, timestamps))
self.previous_queries[query_id] = timestamps
if len(timestamps) < 100 and not untracked_queries:
self.query_timestamps = list(filter(lambda t: t > current_time - sliding_window, self.query_timestamps))
if len(self.query_timestamps) < 20 and not untracked_queries:
return 0
return round(min(timestamps) + sliding_window - current_time) + 6
return round(min(self.query_timestamps) + sliding_window - current_time) + 6
is_graphql_query = 'graphql/query' in path
if is_graphql_query:
query_id = params['query_id'] if 'query_id' in params else params['query_hash']
waittime = graphql_query_waittime(query_id)
waittime = graphql_query_waittime()
if waittime > 0:
self._log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
time.sleep(waittime)
timestamp_list = self.previous_queries.get(query_id)
if timestamp_list is not None:
timestamp_list.append(time.monotonic())
if self.query_timestamps is not None:
self.query_timestamps.append(time.monotonic())
else:
self.previous_queries[query_id] = [time.monotonic()]
self.query_timestamps = [time.monotonic()]
sess = session if session else self.session
try:
self._sleep()
@ -663,8 +659,7 @@ class Instaloader:
if isinstance(err, TooManyRequests):
print(textwrap.fill(text_for_429), file=sys.stderr)
if is_graphql_query:
query_id = params['query_id'] if 'query_id' in params else params['query_hash']
waittime = graphql_query_waittime(query_id, untracked_queries=True)
waittime = graphql_query_waittime(untracked_queries=True)
if waittime > 0:
self._log('The request will be retried in {} seconds.'.format(waittime))
time.sleep(waittime)