mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-05 01:42:29 +01:00
Raise and catch NodeUnavailableException
In case a node can not be downloaded or its metadata is needed and can not be retrieved, a NodeUnavailableException is raised and the according node will be skipped. Concerns #26
This commit is contained in:
parent
838ea645a8
commit
9b5d4e34fc
111
instaloader.py
111
instaloader.py
@ -80,6 +80,10 @@ class BadResponseException(NonfatalException):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NodeUnavailableException(NonfatalException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BadCredentialsException(InstaloaderException):
|
class BadCredentialsException(InstaloaderException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -166,9 +170,9 @@ class Instaloader:
|
|||||||
else:
|
else:
|
||||||
raise ConnectionException("Request returned HTTP error code {}.".format(resp.status_code))
|
raise ConnectionException("Request returned HTTP error code {}.".format(resp.status_code))
|
||||||
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
||||||
print("URL: " + url + "\n" + err, file=sys.stderr)
|
print("URL: {}\n{}".format(url, err), file=sys.stderr)
|
||||||
if tries <= 1:
|
if tries <= 1:
|
||||||
raise err
|
raise NodeUnavailableException
|
||||||
self._sleep()
|
self._sleep()
|
||||||
self._get_and_write_raw(url, filename, tries - 1)
|
self._get_and_write_raw(url, filename, tries - 1)
|
||||||
|
|
||||||
@ -547,13 +551,18 @@ class Instaloader:
|
|||||||
print(err, file=sys.stderr)
|
print(err, file=sys.stderr)
|
||||||
print(json.dumps(pic_json, indent=4), file=sys.stderr)
|
print(json.dumps(pic_json, indent=4), file=sys.stderr)
|
||||||
if tries <= 1:
|
if tries <= 1:
|
||||||
raise err
|
raise NodeUnavailableException
|
||||||
self._sleep()
|
self._sleep()
|
||||||
media = self.get_node_metadata(node_code, tries - 1)
|
media = self.get_node_metadata(node_code, tries - 1)
|
||||||
return media
|
return media
|
||||||
|
|
||||||
def get_location(self, node_code: str) -> Dict[str, str]:
|
def get_location(self, node_code: str) -> Dict[str, str]:
|
||||||
media = self.get_node_metadata(node_code)
|
try:
|
||||||
|
media = self.get_node_metadata(node_code)
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to lookup location for node \"https://www.instagram.com/p/{}/\".".format(node_code),
|
||||||
|
sys.stderr)
|
||||||
|
return dict()
|
||||||
if media["location"] is not None:
|
if media["location"] is not None:
|
||||||
location_json = self.get_json("explore/locations/" +
|
location_json = self.get_json("explore/locations/" +
|
||||||
media["location"]["id"])
|
media["location"]["id"])
|
||||||
@ -580,8 +589,13 @@ class Instaloader:
|
|||||||
if already_has_profilename:
|
if already_has_profilename:
|
||||||
profilename = profile if profile is not None else node['owner']['username']
|
profilename = profile if profile is not None else node['owner']['username']
|
||||||
else:
|
else:
|
||||||
metadata = self.get_node_metadata(shortcode)
|
try:
|
||||||
profilename = metadata['owner']['username']
|
metadata = self.get_node_metadata(shortcode)
|
||||||
|
profilename = metadata['owner']['username']
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to gather profilename for node "
|
||||||
|
"\"https://www.instagram.com/p/{}/\".".format(shortcode), sys.stderr)
|
||||||
|
profilename = 'UNKNOWN'
|
||||||
else:
|
else:
|
||||||
profilename = None
|
profilename = None
|
||||||
profilename = profilename.lower() if profilename else None
|
profilename = profilename.lower() if profilename else None
|
||||||
@ -712,27 +726,32 @@ class Instaloader:
|
|||||||
date=date,
|
date=date,
|
||||||
shortcode=shortcode)
|
shortcode=shortcode)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
if "image_versions2" in item:
|
try:
|
||||||
url = item["image_versions2"]["candidates"][0]["url"]
|
if "image_versions2" in item:
|
||||||
downloaded = self.download_pic(filename=filename,
|
url = item["image_versions2"]["candidates"][0]["url"]
|
||||||
url=url,
|
downloaded = self.download_pic(filename=filename,
|
||||||
mtime=date)
|
url=url,
|
||||||
else:
|
mtime=date)
|
||||||
self._log("Warning: Unable to find story image.")
|
else:
|
||||||
downloaded = False
|
self._log("Warning: Unable to find story image.")
|
||||||
if "caption" in item and item["caption"] is not None:
|
downloaded = False
|
||||||
caption = item["caption"]
|
if "caption" in item and item["caption"] is not None:
|
||||||
if isinstance(caption, dict) and "text" in caption:
|
caption = item["caption"]
|
||||||
caption = caption["text"]
|
if isinstance(caption, dict) and "text" in caption:
|
||||||
self.save_caption(filename, date, caption)
|
caption = caption["text"]
|
||||||
else:
|
self.save_caption(filename, date, caption)
|
||||||
self._log("<no caption>", end=' ', flush=True)
|
else:
|
||||||
if "video_versions" in item and download_videos:
|
self._log("<no caption>", end=' ', flush=True)
|
||||||
downloaded = self.download_pic(filename=filename,
|
if "video_versions" in item and download_videos:
|
||||||
url=item["video_versions"][0]["url"],
|
downloaded = self.download_pic(filename=filename,
|
||||||
mtime=date)
|
url=item["video_versions"][0]["url"],
|
||||||
if "video_duration" in item and self.sleep and downloaded:
|
mtime=date)
|
||||||
time.sleep(item["video_duration"])
|
if "video_duration" in item and self.sleep and downloaded:
|
||||||
|
time.sleep(item["video_duration"])
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {} from stories."
|
||||||
|
.format(shortcode, name), sys.stderr)
|
||||||
|
continue
|
||||||
if item["story_locations"]:
|
if item["story_locations"]:
|
||||||
location = item["story_locations"][0]["location"]
|
location = item["story_locations"][0]["location"]
|
||||||
if location:
|
if location:
|
||||||
@ -786,9 +805,14 @@ class Instaloader:
|
|||||||
continue
|
continue
|
||||||
self._log("[%3i] %s " % (count, name), end="", flush=True)
|
self._log("[%3i] %s " % (count, name), end="", flush=True)
|
||||||
count += 1
|
count += 1
|
||||||
downloaded = self.download_node(node, profile=name, target=':feed',
|
try:
|
||||||
download_videos=download_videos, geotags=geotags,
|
downloaded = self.download_node(node, profile=name, target=':feed',
|
||||||
download_comments=download_comments)
|
download_videos=download_videos, geotags=geotags,
|
||||||
|
download_comments=download_comments)
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {} from feed."
|
||||||
|
.format(node['shortcode'], name), sys.stderr)
|
||||||
|
continue
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
return
|
||||||
if not feed["page_info"]["has_next_page"]:
|
if not feed["page_info"]["has_next_page"]:
|
||||||
@ -830,9 +854,14 @@ class Instaloader:
|
|||||||
self._log('<skipped>')
|
self._log('<skipped>')
|
||||||
continue
|
continue
|
||||||
count += 1
|
count += 1
|
||||||
downloaded = self.download_node(node=node, profile=None, target='#'+hashtag,
|
try:
|
||||||
download_videos=download_videos, geotags=geotags,
|
downloaded = self.download_node(node=node, profile=None, target='#'+hashtag,
|
||||||
download_comments=download_comments)
|
download_videos=download_videos, geotags=geotags,
|
||||||
|
download_comments=download_comments)
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to download node \"https://www.instagram.com/p/{}/\" "
|
||||||
|
"while downloading hashtag \"{}\".".format(node['shortcode'], hashtag), sys.stderr)
|
||||||
|
continue
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
return
|
||||||
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
|
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
|
||||||
@ -904,7 +933,10 @@ class Instaloader:
|
|||||||
name = name_updated
|
name = name_updated
|
||||||
data = self.get_json(name)
|
data = self.get_json(name)
|
||||||
# Download profile picture
|
# Download profile picture
|
||||||
self.download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
|
try:
|
||||||
|
self.download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to download profilepic of user {}.".format(name), sys.stderr)
|
||||||
if profile_pic_only:
|
if profile_pic_only:
|
||||||
return
|
return
|
||||||
# Catch some errors
|
# Catch some errors
|
||||||
@ -939,9 +971,14 @@ class Instaloader:
|
|||||||
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
||||||
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||||
count += 1
|
count += 1
|
||||||
downloaded = self.download_node(node=node, profile=name, target=name,
|
try:
|
||||||
download_videos=download_videos, geotags=geotags,
|
downloaded = self.download_node(node=node, profile=name, target=name,
|
||||||
download_comments=download_comments)
|
download_videos=download_videos, geotags=geotags,
|
||||||
|
download_comments=download_comments)
|
||||||
|
except NodeUnavailableException:
|
||||||
|
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {}."
|
||||||
|
.format(node['shortcode'], name), sys.stderr)
|
||||||
|
continue
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
return
|
||||||
data = self.get_json(name, max_id=get_last_id(data))
|
data = self.get_json(name, max_id=get_last_id(data))
|
||||||
|
Loading…
Reference in New Issue
Block a user