mirror of
https://github.com/instaloader/instaloader.git
synced 2024-10-27 05:32:30 +01:00
Merge branch 'master' into upcoming/v4.7
This commit is contained in:
commit
640066dd60
@ -238,7 +238,7 @@ How to Download
|
|||||||
.. option:: --user-agent USER_AGENT
|
.. option:: --user-agent USER_AGENT
|
||||||
|
|
||||||
User Agent to use for HTTP requests. Per default, Instaloader pretends being
|
User Agent to use for HTTP requests. Per default, Instaloader pretends being
|
||||||
Chrome/51.
|
Chrome/88 on Linux.
|
||||||
|
|
||||||
.. option:: --max-connection-attempts N
|
.. option:: --max-connection-attempts N
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||||
|
|
||||||
|
|
||||||
__version__ = '4.6'
|
__version__ = '4.6.1'
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -33,7 +33,7 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N
|
|||||||
|
|
||||||
def default_user_agent() -> str:
|
def default_user_agent() -> str:
|
||||||
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
||||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
'(KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'
|
||||||
|
|
||||||
|
|
||||||
class InstaloaderContext:
|
class InstaloaderContext:
|
||||||
@ -288,7 +288,7 @@ class InstaloaderContext:
|
|||||||
def do_sleep(self):
|
def do_sleep(self):
|
||||||
"""Sleep a short time if self.sleep is set. Called before each request to instagram.com."""
|
"""Sleep a short time if self.sleep is set. Called before each request to instagram.com."""
|
||||||
if self.sleep:
|
if self.sleep:
|
||||||
time.sleep(min(random.expovariate(0.7), 5.0))
|
time.sleep(min(random.expovariate(0.6), 15.0))
|
||||||
|
|
||||||
def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com',
|
def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com',
|
||||||
session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]:
|
session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]:
|
||||||
@ -549,6 +549,7 @@ class RateController:
|
|||||||
self._context = context
|
self._context = context
|
||||||
self._query_timestamps = dict() # type: Dict[str, List[float]]
|
self._query_timestamps = dict() # type: Dict[str, List[float]]
|
||||||
self._earliest_next_request_time = 0.0
|
self._earliest_next_request_time = 0.0
|
||||||
|
self._iphone_earliest_next_request_time = 0.0
|
||||||
|
|
||||||
def sleep(self, secs: float):
|
def sleep(self, secs: float):
|
||||||
"""Wait given number of seconds."""
|
"""Wait given number of seconds."""
|
||||||
@ -559,7 +560,7 @@ class RateController:
|
|||||||
|
|
||||||
def _dump_query_timestamps(self, current_time: float, failed_query_type: str):
|
def _dump_query_timestamps(self, current_time: float, failed_query_type: str):
|
||||||
windows = [10, 11, 20, 22, 30, 60]
|
windows = [10, 11, 20, 22, 30, 60]
|
||||||
self._context.error("Requests within last {} minutes grouped by type:"
|
self._context.error("Number of requests within last {} minutes grouped by type:"
|
||||||
.format('/'.join(str(w) for w in windows)),
|
.format('/'.join(str(w) for w in windows)),
|
||||||
repeat_at_end=False)
|
repeat_at_end=False)
|
||||||
for query_type, times in self._query_timestamps.items():
|
for query_type, times in self._query_timestamps.items():
|
||||||
@ -571,11 +572,15 @@ class RateController:
|
|||||||
), repeat_at_end=False)
|
), repeat_at_end=False)
|
||||||
|
|
||||||
def count_per_sliding_window(self, query_type: str) -> int:
|
def count_per_sliding_window(self, query_type: str) -> int:
|
||||||
"""Return how many requests can be done within the sliding window."""
|
"""Return how many requests of the given type can be done within a sliding window of 11 minutes.
|
||||||
|
|
||||||
|
This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries
|
||||||
|
at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more
|
||||||
|
control."""
|
||||||
# Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as
|
# Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as
|
||||||
# whether we are logged in.
|
# whether we are logged in.
|
||||||
# pylint:disable=no-self-use
|
# pylint:disable=no-self-use
|
||||||
return 75 if query_type in ['iphone', 'other'] else 200
|
return 75 if query_type == 'other' else 200
|
||||||
|
|
||||||
def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]:
|
def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]:
|
||||||
if query_type is not None:
|
if query_type is not None:
|
||||||
@ -591,6 +596,7 @@ class RateController:
|
|||||||
def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float:
|
def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float:
|
||||||
"""Calculate time needed to wait before query can be executed."""
|
"""Calculate time needed to wait before query can be executed."""
|
||||||
per_type_sliding_window = 660
|
per_type_sliding_window = 660
|
||||||
|
iphone_sliding_window = 1800
|
||||||
if query_type not in self._query_timestamps:
|
if query_type not in self._query_timestamps:
|
||||||
self._query_timestamps[query_type] = []
|
self._query_timestamps[query_type] = []
|
||||||
self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60,
|
self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60,
|
||||||
@ -616,25 +622,43 @@ class RateController:
|
|||||||
|
|
||||||
def untracked_next_request_time():
|
def untracked_next_request_time():
|
||||||
if untracked_queries:
|
if untracked_queries:
|
||||||
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window)
|
if query_type == "iphone":
|
||||||
self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
|
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
|
||||||
return self._earliest_next_request_time
|
iphone_sliding_window)
|
||||||
|
self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18
|
||||||
|
else:
|
||||||
|
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time,
|
||||||
|
per_type_sliding_window)
|
||||||
|
self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6
|
||||||
|
return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time)
|
||||||
|
|
||||||
|
def iphone_next_request():
|
||||||
|
if query_type == "iphone":
|
||||||
|
reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window)
|
||||||
|
if len(reqs_in_sliding_window) >= 199:
|
||||||
|
return min(reqs_in_sliding_window) + iphone_sliding_window + 18
|
||||||
|
return 0.0
|
||||||
|
|
||||||
return max(0.0,
|
return max(0.0,
|
||||||
max(
|
max(
|
||||||
per_type_next_request_time(),
|
per_type_next_request_time(),
|
||||||
gql_accumulated_next_request_time(),
|
gql_accumulated_next_request_time(),
|
||||||
untracked_next_request_time(),
|
untracked_next_request_time(),
|
||||||
|
iphone_next_request(),
|
||||||
) - current_time)
|
) - current_time)
|
||||||
|
|
||||||
def wait_before_query(self, query_type: str) -> None:
|
def wait_before_query(self, query_type: str) -> None:
|
||||||
"""This method is called before a query to Instagram. It calls :meth:`RateController.sleep` to wait
|
"""This method is called before a query to Instagram.
|
||||||
until the request can be made."""
|
|
||||||
|
It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
|
||||||
|
:meth:`RateController.sleep` to wait until the request can be made."""
|
||||||
waittime = self.query_waittime(query_type, time.monotonic(), False)
|
waittime = self.query_waittime(query_type, time.monotonic(), False)
|
||||||
assert waittime >= 0
|
assert waittime >= 0
|
||||||
if waittime > 15:
|
if waittime > 15:
|
||||||
self._context.log("\nToo many queries in the last time. Need to wait {} seconds, until {:%H:%M}."
|
formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
|
||||||
.format(round(waittime), datetime.now() + timedelta(seconds=waittime)))
|
"{} minutes".format(round(waittime / 60)))
|
||||||
|
self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}."
|
||||||
|
.format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)))
|
||||||
if waittime > 0:
|
if waittime > 0:
|
||||||
self.sleep(waittime)
|
self.sleep(waittime)
|
||||||
if query_type not in self._query_timestamps:
|
if query_type not in self._query_timestamps:
|
||||||
@ -643,8 +667,10 @@ class RateController:
|
|||||||
self._query_timestamps[query_type].append(time.monotonic())
|
self._query_timestamps[query_type].append(time.monotonic())
|
||||||
|
|
||||||
def handle_429(self, query_type: str) -> None:
|
def handle_429(self, query_type: str) -> None:
|
||||||
"""This method is called to handle a 429 Too Many Requests response. It calls :meth:`RateController.sleep` to
|
"""This method is called to handle a 429 Too Many Requests response.
|
||||||
wait until we can repeat the same request."""
|
|
||||||
|
It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls
|
||||||
|
:meth:`RateController.sleep` to wait until we can repeat the same request."""
|
||||||
current_time = time.monotonic()
|
current_time = time.monotonic()
|
||||||
waittime = self.query_waittime(query_type, current_time, True)
|
waittime = self.query_waittime(query_type, current_time, True)
|
||||||
assert waittime >= 0
|
assert waittime >= 0
|
||||||
@ -654,8 +680,10 @@ class RateController:
|
|||||||
"App while Instaloader is running.")
|
"App while Instaloader is running.")
|
||||||
self._context.error(textwrap.fill(text_for_429), repeat_at_end=False)
|
self._context.error(textwrap.fill(text_for_429), repeat_at_end=False)
|
||||||
if waittime > 1.5:
|
if waittime > 1.5:
|
||||||
self._context.error("The request will be retried in {} seconds, at {:%H:%M}."
|
formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else
|
||||||
.format(round(waittime), datetime.now() + timedelta(seconds=waittime)),
|
"{} minutes".format(round(waittime / 60)))
|
||||||
|
self._context.error("The request will be retried in {}, at {:%H:%M}."
|
||||||
|
.format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)),
|
||||||
repeat_at_end=False)
|
repeat_at_end=False)
|
||||||
if waittime > 0:
|
if waittime > 0:
|
||||||
self.sleep(waittime)
|
self.sleep(waittime)
|
||||||
|
Loading…
Reference in New Issue
Block a user