From c5b686a213afa8a84f13e16b9e01a835f81b9ddd Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Mon, 25 Jan 2021 15:46:05 +0100
Subject: [PATCH] Update RateController to current observations

---
 instaloader/instaloadercontext.py | 56 +++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py
index 1407e80..423204a 100644
--- a/instaloader/instaloadercontext.py
+++ b/instaloader/instaloadercontext.py
@@ -549,6 +549,7 @@ class RateController:
         self._context = context
         self._query_timestamps = dict()  # type: Dict[str, List[float]]
         self._earliest_next_request_time = 0.0
+        self._iphone_earliest_next_request_time = 0.0
 
     def sleep(self, secs: float):
         """Wait given number of seconds."""
@@ -559,7 +560,7 @@ class RateController:
 
     def _dump_query_timestamps(self, current_time: float, failed_query_type: str):
         windows = [10, 11, 20, 22, 30, 60]
-        self._context.error("Requests within last {} minutes grouped by type:"
+        self._context.error("Number of requests within last {} minutes grouped by type:"
                             .format('/'.join(str(w) for w in windows)),
                             repeat_at_end=False)
         for query_type, times in self._query_timestamps.items():
@@ -571,11 +572,15 @@ class RateController:
             ), repeat_at_end=False)
 
     def count_per_sliding_window(self, query_type: str) -> int:
-        """Return how many requests can be done within the sliding window."""
+        """Return how many requests of the given type can be done within a sliding window of 11 minutes.
+
+        This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries
+        at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more
+        control."""
         # Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as
         # whether we are logged in.
         # pylint:disable=no-self-use
-        return 75 if query_type in ['iphone', 'other'] else 200
+        return 75 if query_type == 'other' else 200
 
     def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]:
         if query_type is not None:
@@ -591,6 +596,7 @@ class RateController:
     def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float:
         """Calculate time needed to wait before query can be executed."""
         per_type_sliding_window = 660
+        iphone_sliding_window = 1800
         if query_type not in self._query_timestamps:
             self._query_timestamps[query_type] = []
         self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60,
@@ -616,25 +622,43 @@ class RateController:
 
         def untracked_next_request_time():
             if untracked_queries:
-                reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window)
-                self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
-            return self._earliest_next_request_time
+                if query_type == "iphone":
+                    reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
+                                                                          iphone_sliding_window)
+                    self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18
+                else:
+                    reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
+                                                                          per_type_sliding_window)
+                    self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
+            return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time)
+
+        def iphone_next_request():
+            if query_type == "iphone":
+                reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window)
+                if len(reqs_in_sliding_window) >= 199:
+                    return min(reqs_in_sliding_window) + iphone_sliding_window + 18
+            return 0.0
 
         return max(0.0,
                    max(
                        per_type_next_request_time(),
                        gql_accumulated_next_request_time(),
                        untracked_next_request_time(),
+                       iphone_next_request(),
                    ) - current_time)
 
     def wait_before_query(self, query_type: str) -> None:
-        """This method is called before a query to Instagram. It calls :meth:`RateController.sleep` to wait
-        until the request can be made."""
+        """This method is called before a query to Instagram.
+
+        It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
+        :meth:`RateController.sleep` to wait until the request can be made."""
         waittime = self.query_waittime(query_type, time.monotonic(), False)
         assert waittime >= 0
         if waittime > 15:
-            self._context.log("\nToo many queries in the last time. Need to wait {} seconds, until {:%H:%M}."
-                              .format(round(waittime), datetime.now() + timedelta(seconds=waittime)))
+            formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
+                                  "{} minutes".format(round(waittime / 60)))
+            self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}."
+                              .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)))
         if waittime > 0:
             self.sleep(waittime)
         if query_type not in self._query_timestamps:
@@ -643,8 +667,10 @@ class RateController:
             self._query_timestamps[query_type].append(time.monotonic())
 
     def handle_429(self, query_type: str) -> None:
-        """This method is called to handle a 429 Too Many Requests response. It calls :meth:`RateController.sleep` to
-         wait until we can repeat the same request."""
+        """This method is called to handle a 429 Too Many Requests response.
+
+        It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
+        :meth:`RateController.sleep` to wait until we can repeat the same request."""
         current_time = time.monotonic()
         waittime = self.query_waittime(query_type, current_time, True)
         assert waittime >= 0
@@ -654,8 +680,10 @@ class RateController:
                         "App while Instaloader is running.")
         self._context.error(textwrap.fill(text_for_429), repeat_at_end=False)
         if waittime > 1.5:
-            self._context.error("The request will be retried in {} seconds, at {:%H:%M}."
-                                .format(round(waittime), datetime.now() + timedelta(seconds=waittime)),
+            formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
+                                  "{} minutes".format(round(waittime / 60)))
+            self._context.error("The request will be retried in {}, at {:%H:%M}."
+                                .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)),
                                 repeat_at_end=False)
         if waittime > 0:
             self.sleep(waittime)