1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-11-04 09:22:29 +01:00

Retry get requests for downloading pictures

Tries to workaround #26.
This commit is contained in:
André Koch-Kramer 2017-07-31 20:34:27 +02:00
parent 4b8b257672
commit 9fbe9b0903

View File

@ -154,6 +154,23 @@ class Instaloader:
if self.sleep: if self.sleep:
time.sleep(random.uniform(0.25, 2.0)) time.sleep(random.uniform(0.25, 2.0))
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
try:
resp = self.get_anonymous_session().get(url, stream=True)
if resp.status_code == 200:
self._log(filename, end=' ', flush=True)
with open(filename, 'wb') as file:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, file)
else:
raise ConnectionException("Request returned HTTP error code {}.".format(resp.status_code))
except (ConnectionResetError, ConnectionException) as err:
print("URL: " + url + "\n" + err, file=sys.stderr)
if tries <= 1:
raise err
self._sleep()
self._get_and_write_raw(url, filename, tries - 1)
def get_json(self, name: str, session: requests.Session = None, def get_json(self, name: str, session: requests.Session = None,
max_id: Optional[str] = None) -> Optional[Dict[str, Any]]: max_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Return JSON of a profile""" """Return JSON of a profile"""
@ -335,16 +352,9 @@ class Instaloader:
if os.path.isfile(filename): if os.path.isfile(filename):
self._log(filename + ' exists', end=' ', flush=True) self._log(filename + ' exists', end=' ', flush=True)
return False return False
resp = self.get_anonymous_session().get(url, stream=True) self._get_and_write_raw(url, filename)
if resp.status_code == 200:
self._log(filename, end=' ', flush=True)
with open(filename, 'wb') as file:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, file)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True return True
else:
raise ConnectionException("File \'" + url + "\' could not be downloaded.")
def update_comments(self, filename: str, shortcode: str) -> None: def update_comments(self, filename: str, shortcode: str) -> None:
filename += '_comments.json' filename += '_comments.json'
@ -446,15 +456,8 @@ class Instaloader:
index = len(match.group(0)) - 1 index = len(match.group(0)) - 1
offset = 8 if match.group(0)[-1:] == 's' else 0 offset = 8 if match.group(0)[-1:] == 's' else 0
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:] url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
resp = self.get_anonymous_session().get(url, stream=True) self._get_and_write_raw(url, filename)
if resp.status_code == 200:
self._log(filename)
with open(filename, 'wb') as file:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, file)
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp())) os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
else:
raise ConnectionException("File \'" + url + "\' could not be downloaded.")
def save_session_to_file(self, filename: Optional[str] = None) -> None: def save_session_to_file(self, filename: Optional[str] = None) -> None:
"""Saves requests.Session object.""" """Saves requests.Session object."""
@ -533,11 +536,19 @@ class Instaloader:
'fetch_comment_count': 4, 'fetch_comment_count': 4,
'fetch_like': 10}) 'fetch_like': 10})
def get_node_metadata(self, node_code: str) -> Dict[str, Any]: def get_node_metadata(self, node_code: str, tries: int = 3) -> Dict[str, Any]:
pic_json = self.get_json("p/" + node_code) pic_json = self.get_json("p/" + node_code)
try:
media = pic_json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] \ media = pic_json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] \
if "graphql" in pic_json["entry_data"]["PostPage"][0] \ if "graphql" in pic_json["entry_data"]["PostPage"][0] \
else pic_json["entry_data"]["PostPage"][0]["media"] else pic_json["entry_data"]["PostPage"][0]["media"]
except KeyError as err:
print(err, file=sys.stderr)
print(json.dumps(pic_json, indent=4), file=sys.stderr)
if tries <= 1:
raise err
self._sleep()
media = self.get_node_metadata(node_code, tries - 1)
return media return media
def get_location(self, node_code: str) -> Dict[str, str]: def get_location(self, node_code: str) -> Dict[str, str]: