1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-19 13:19:37 +02:00

Update RateController to current observations

This commit is contained in:
Alexander Graf 2021-01-25 15:46:05 +01:00
parent 3955540080
commit c5b686a213

View File

@ -549,6 +549,7 @@ class RateController:
self._context = context
self._query_timestamps = dict() # type: Dict[str, List[float]]
self._earliest_next_request_time = 0.0
self._iphone_earliest_next_request_time = 0.0
def sleep(self, secs: float):
"""Wait given number of seconds."""
@ -559,7 +560,7 @@ class RateController:
def _dump_query_timestamps(self, current_time: float, failed_query_type: str):
windows = [10, 11, 20, 22, 30, 60]
self._context.error("Requests within last {} minutes grouped by type:"
self._context.error("Number of requests within last {} minutes grouped by type:"
.format('/'.join(str(w) for w in windows)),
repeat_at_end=False)
for query_type, times in self._query_timestamps.items():
@ -571,11 +572,15 @@ class RateController:
), repeat_at_end=False)
def count_per_sliding_window(self, query_type: str) -> int:
"""Return how many requests can be done within the sliding window."""
"""Return how many requests of the given type can be done within a sliding window of 11 minutes.
This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries
at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more
control."""
# Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as
# whether we are logged in.
# pylint:disable=no-self-use
return 75 if query_type in ['iphone', 'other'] else 200
return 75 if query_type == 'other' else 200
def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]:
if query_type is not None:
@ -591,6 +596,7 @@ class RateController:
def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float:
"""Calculate time needed to wait before query can be executed."""
per_type_sliding_window = 660
iphone_sliding_window = 1800
if query_type not in self._query_timestamps:
self._query_timestamps[query_type] = []
self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60,
@ -616,25 +622,43 @@ class RateController:
def untracked_next_request_time():
if untracked_queries:
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window)
self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
return self._earliest_next_request_time
if query_type == "iphone":
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
iphone_sliding_window)
self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18
else:
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
per_type_sliding_window)
self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time)
def iphone_next_request():
if query_type == "iphone":
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window)
if len(reqs_in_sliding_window) >= 199:
return min(reqs_in_sliding_window) + iphone_sliding_window + 18
return 0.0
return max(0.0,
max(
per_type_next_request_time(),
gql_accumulated_next_request_time(),
untracked_next_request_time(),
iphone_next_request(),
) - current_time)
def wait_before_query(self, query_type: str) -> None:
"""This method is called before a query to Instagram. It calls :meth:`RateController.sleep` to wait
until the request can be made."""
"""This method is called before a query to Instagram.
It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
:meth:`RateController.sleep` to wait until the request can be made."""
waittime = self.query_waittime(query_type, time.monotonic(), False)
assert waittime >= 0
if waittime > 15:
self._context.log("\nToo many queries in the last time. Need to wait {} seconds, until {:%H:%M}."
.format(round(waittime), datetime.now() + timedelta(seconds=waittime)))
formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
"{} minutes".format(round(waittime / 60)))
self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}."
.format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)))
if waittime > 0:
self.sleep(waittime)
if query_type not in self._query_timestamps:
@ -643,8 +667,10 @@ class RateController:
self._query_timestamps[query_type].append(time.monotonic())
def handle_429(self, query_type: str) -> None:
"""This method is called to handle a 429 Too Many Requests response. It calls :meth:`RateController.sleep` to
wait until we can repeat the same request."""
"""This method is called to handle a 429 Too Many Requests response.
It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
:meth:`RateController.sleep` to wait until we can repeat the same request."""
current_time = time.monotonic()
waittime = self.query_waittime(query_type, current_time, True)
assert waittime >= 0
@ -654,8 +680,10 @@ class RateController:
"App while Instaloader is running.")
self._context.error(textwrap.fill(text_for_429), repeat_at_end=False)
if waittime > 1.5:
self._context.error("The request will be retried in {} seconds, at {:%H:%M}."
.format(round(waittime), datetime.now() + timedelta(seconds=waittime)),
formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
"{} minutes".format(round(waittime / 60)))
self._context.error("The request will be retried in {}, at {:%H:%M}."
.format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)),
repeat_at_end=False)
if waittime > 0:
self.sleep(waittime)