mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-04 09:22:29 +01:00
Limit GraphQl queries to 20 per 11 minutes
cherry-picked from commit d90c05e0a4
- Set GRAPHQL_PAGE_LENGTH to 50 what appears to be the new working maximum.
- Limit GQL queries to 20 per 666 seconds.
- Remove logic for tracking queries per query identifier as Instagram
only allows 20 overall GQL queries per sliding window.
Related to #101
This commit is contained in:
parent
57ce51d4ef
commit
e25eb2a948
@ -111,11 +111,11 @@ class Instaloader:
|
||||
self.download_geotags, self.save_captions, self.download_comments, self.save_metadata,
|
||||
self.compress_json, self.post_metadata_txt_pattern,
|
||||
self.storyitem_metadata_txt_pattern, self.context.max_connection_attempts)
|
||||
new_loader.context.previous_queries = self.context.previous_queries
|
||||
new_loader.context.query_timestamps = self.context.query_timestamps
|
||||
yield new_loader
|
||||
self.context.error_log.extend(new_loader.context.error_log)
|
||||
new_loader.context.error_log = [] # avoid double-printing of errors
|
||||
self.context.previous_queries = new_loader.context.previous_queries
|
||||
self.context.query_timestamps = new_loader.context.query_timestamps
|
||||
new_loader.close()
|
||||
|
||||
def close(self):
|
||||
|
@ -61,7 +61,7 @@ class InstaloaderContext:
|
||||
self.error_log = []
|
||||
|
||||
# For the adaption of sleep intervals (rate control)
|
||||
self.previous_queries = dict()
|
||||
self.query_timestamps = list()
|
||||
|
||||
# Can be set to True for testing, disables supression of InstaloaderContext._error_catcher
|
||||
self.raise_all_errors = False
|
||||
@ -195,29 +195,25 @@ class InstaloaderContext:
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
:raises ConnectionException: When query repeatedly failed.
|
||||
"""
|
||||
def graphql_query_waittime(query_hash: str, untracked_queries: bool = False) -> int:
|
||||
def graphql_query_waittime(untracked_queries: bool = False) -> int:
|
||||
sliding_window = 660
|
||||
timestamps = self.previous_queries.get(query_hash)
|
||||
if not timestamps:
|
||||
if not self.query_timestamps:
|
||||
return sliding_window if untracked_queries else 0
|
||||
current_time = time.monotonic()
|
||||
timestamps = list(filter(lambda t: t > current_time - sliding_window, timestamps))
|
||||
self.previous_queries[query_hash] = timestamps
|
||||
if len(timestamps) < 100 and not untracked_queries:
|
||||
self.query_timestamps = list(filter(lambda t: t > current_time - sliding_window, self.query_timestamps))
|
||||
if len(self.query_timestamps) < 20 and not untracked_queries:
|
||||
return 0
|
||||
return round(min(timestamps) + sliding_window - current_time) + 6
|
||||
return round(min(self.query_timestamps) + sliding_window - current_time) + 6
|
||||
is_graphql_query = 'query_hash' in params and 'graphql/query' in path
|
||||
if is_graphql_query:
|
||||
query_hash = params['query_hash']
|
||||
waittime = graphql_query_waittime(query_hash)
|
||||
waittime = graphql_query_waittime()
|
||||
if waittime > 0:
|
||||
self.log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
timestamp_list = self.previous_queries.get(query_hash)
|
||||
if timestamp_list is not None:
|
||||
timestamp_list.append(time.monotonic())
|
||||
if self.query_timestamps is not None:
|
||||
self.query_timestamps.append(time.monotonic())
|
||||
else:
|
||||
self.previous_queries[query_hash] = [time.monotonic()]
|
||||
self.query_timestamps = [time.monotonic()]
|
||||
sess = session if session else self._session
|
||||
try:
|
||||
self._sleep()
|
||||
@ -265,7 +261,7 @@ class InstaloaderContext:
|
||||
if isinstance(err, TooManyRequestsException):
|
||||
print(textwrap.fill(text_for_429), file=sys.stderr)
|
||||
if is_graphql_query:
|
||||
waittime = graphql_query_waittime(query_hash=params['query_hash'], untracked_queries=True)
|
||||
waittime = graphql_query_waittime(untracked_queries=True)
|
||||
if waittime > 0:
|
||||
self.log('The request will be retried in {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
|
Loading…
Reference in New Issue
Block a user