From c5b686a213afa8a84f13e16b9e01a835f81b9ddd Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 25 Jan 2021 15:46:05 +0100 Subject: [PATCH] Update RateController to current observations --- instaloader/instaloadercontext.py | 56 +++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 1407e80..423204a 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -549,6 +549,7 @@ class RateController: self._context = context self._query_timestamps = dict() # type: Dict[str, List[float]] self._earliest_next_request_time = 0.0 + self._iphone_earliest_next_request_time = 0.0 def sleep(self, secs: float): """Wait given number of seconds.""" @@ -559,7 +560,7 @@ class RateController: def _dump_query_timestamps(self, current_time: float, failed_query_type: str): windows = [10, 11, 20, 22, 30, 60] - self._context.error("Requests within last {} minutes grouped by type:" + self._context.error("Number of requests within last {} minutes grouped by type:" .format('/'.join(str(w) for w in windows)), repeat_at_end=False) for query_type, times in self._query_timestamps.items(): @@ -571,11 +572,15 @@ class RateController: ), repeat_at_end=False) def count_per_sliding_window(self, query_type: str) -> int: - """Return how many requests can be done within the sliding window.""" + """Return how many requests of the given type can be done within a sliding window of 11 minutes. + + This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries + at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more + control.""" # Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as # whether we are logged in. # pylint:disable=no-self-use - return 75 if query_type in ['iphone', 'other'] else 200 + return 75 if query_type == 'other' else 200 def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]: if query_type is not None: @@ -591,6 +596,7 @@ class RateController: def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float: """Calculate time needed to wait before query can be executed.""" per_type_sliding_window = 660 + iphone_sliding_window = 1800 if query_type not in self._query_timestamps: self._query_timestamps[query_type] = [] self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60, @@ -616,25 +622,43 @@ class RateController: def untracked_next_request_time(): if untracked_queries: - reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window) - self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6 - return self._earliest_next_request_time + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + iphone_sliding_window) + self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18 + else: + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + per_type_sliding_window) + self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6 + return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time) + + def iphone_next_request(): + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window) + if len(reqs_in_sliding_window) >= 199: + return min(reqs_in_sliding_window) + iphone_sliding_window + 18 + return 0.0 return max(0.0, max( per_type_next_request_time(), gql_accumulated_next_request_time(), untracked_next_request_time(), + iphone_next_request(), ) - current_time) def wait_before_query(self, query_type: str) -> None: - """This method is called before a query to Instagram. It calls :meth:`RateController.sleep` to wait - until the request can be made.""" + """This method is called before a query to Instagram. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until the request can be made.""" waittime = self.query_waittime(query_type, time.monotonic(), False) assert waittime >= 0 if waittime > 15: - self._context.log("\nToo many queries in the last time. Need to wait {} seconds, until {:%H:%M}." - .format(round(waittime), datetime.now() + timedelta(seconds=waittime))) + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime))) if waittime > 0: self.sleep(waittime) if query_type not in self._query_timestamps: @@ -643,8 +667,10 @@ class RateController: self._query_timestamps[query_type].append(time.monotonic()) def handle_429(self, query_type: str) -> None: - """This method is called to handle a 429 Too Many Requests response. It calls :meth:`RateController.sleep` to - wait until we can repeat the same request.""" + """This method is called to handle a 429 Too Many Requests response. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until we can repeat the same request.""" current_time = time.monotonic() waittime = self.query_waittime(query_type, current_time, True) assert waittime >= 0 @@ -654,8 +680,10 @@ class RateController: "App while Instaloader is running.") self._context.error(textwrap.fill(text_for_429), repeat_at_end=False) if waittime > 1.5: - self._context.error("The request will be retried in {} seconds, at {:%H:%M}." - .format(round(waittime), datetime.now() + timedelta(seconds=waittime)), + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.error("The request will be retried in {}, at {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)), repeat_at_end=False) if waittime > 0: self.sleep(waittime)