1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-05 14:57:08 +02:00

Merge branch 'bug_69_fix'

This commit is contained in:
André Koch-Kramer 2018-02-15 14:56:42 +01:00
commit 882d460a67

View File

@ -267,7 +267,7 @@ class Post:
return int(self._field('owner', 'id'))
@property
def date(self) -> datetime:
def date_local(self) -> datetime:
"""Timestamp when the post was created (local time zone)."""
return datetime.fromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
@ -436,11 +436,16 @@ class Instaloader:
self.quiet = quiet
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
if filename_pattern is not None:
self.filename_pattern = filename_pattern \
.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \
.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
filename_pattern = re.sub(r"(\{(?:post\.)?date)([:}])", r"\1_utc\2", filename_pattern)
self.filename_pattern_old = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S}')
self.filename_pattern_old = re.sub(r"(?i)(\{(?:post\.)?date_utc:[^}]*?)_UTC",
r"\1", self.filename_pattern_old)
filename_pattern = re.sub(r"(?i)(\{(date_utc|post\.date_utc):(?![^}]*UTC[^}]*).*?)}",
r"\1_UTC}", filename_pattern)
self.filename_pattern = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
else:
self.filename_pattern = '{date:%Y-%m-%d_%H-%M-%S}'
self.filename_pattern = '{date_utc:%Y-%m-%d_%H-%M-%S_UTC}'
self.filename_pattern_old = '{date_utc:%Y-%m-%d_%H-%M-%S}'
self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails
self.download_geotags = download_geotags
@ -725,17 +730,24 @@ class Instaloader:
lambda d: d['data']['user']['edge_follow'])
def download_pic(self, filename: str, url: str, mtime: datetime,
filename_suffix: Optional[str] = None) -> bool:
filename_alt: Optional[str] = None, filename_suffix: Optional[str] = None) -> bool:
"""Downloads and saves picture with given url under given directory with given timestamp.
Returns true, if file was actually downloaded, i.e. updated."""
urlmatch = re.search('\\.[a-z0-9]*\\?', url)
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
if filename_suffix is not None:
filename += '_' + filename_suffix
if filename_alt is not None:
filename_alt += '_' + filename_suffix
filename += '.' + file_extension
if os.path.isfile(filename):
self._log(filename + ' exists', end=' ', flush=True)
return False
if filename_alt is not None:
filename_alt += '.' + file_extension
if os.path.isfile(filename_alt):
self._log(filename_alt + 'exists', end=' ', flush=True)
return False
self._get_and_write_raw(url, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True
@ -746,15 +758,20 @@ class Instaloader:
json.dump(post, fp=open(filename, 'w'), indent=4, default=Post.json_encoder)
self._log('json', end=' ', flush=True)
def update_comments(self, filename: str, post: Post) -> None:
filename += '_comments.json'
def update_comments(self, filename: str, post: Post, filename_alt: Optional[str] = None) -> None:
try:
comments = json.load(open(filename))
filename_current = filename + '_comments.json'
comments = json.load(open(filename_current))
except FileNotFoundError:
comments = list()
try:
filename_current = filename_alt + '_comments.json'
comments = json.load(open(filename_current))
except (FileNotFoundError, TypeError):
filename_current = filename + '_comments.json'
comments = list()
comments.extend(post.get_comments())
if comments:
with open(filename, 'w') as file:
with open(filename_current, 'w') as file:
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
key=lambda t: t['created_at'], reverse=True)
unique_comments_list = [comments_list[0]]
@ -767,17 +784,25 @@ class Instaloader:
if x['id'] != y['id']:
unique_comments_list.append(y)
file.write(json.dumps(unique_comments_list, indent=4))
os.rename(filename_current, filename + '_comments.json')
self._log('comments', end=' ', flush=True)
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
def save_caption(self, filename: str, mtime: datetime, caption: str, filename_alt: Optional[str] = None) -> None:
"""Updates picture caption"""
filename += '.txt'
if filename_alt is not None:
filename_alt += '.txt'
pcaption = caption.replace('\n', ' ').strip()
caption = caption.encode("UTF-8")
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
with suppress(FileNotFoundError):
with open(filename, 'rb') as file:
file_caption = file.read()
try:
with open(filename, 'rb') as file:
file_caption = file.read()
except FileNotFoundError:
if filename_alt is not None:
with open(filename_alt, 'rb') as file:
file_caption = file.read()
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
try:
self._log(pcaption + ' unchanged', end=' ', flush=True)
@ -785,15 +810,22 @@ class Instaloader:
self._log('txt unchanged', end=' ', flush=True)
return None
else:
def get_filename(index):
return filename if index == 0 else (filename[:-4] + '_old_' +
(str(0) if index < 10 else str()) + str(index) + filename[-4:])
def get_filename(file, index):
return file if index == 0 else (file[:-4] + '_old_' +
(str(0) if index < 10 else str()) + str(index) + file[-4:])
i = 0
while os.path.isfile(get_filename(i)):
file_exists_list = []
while True:
file_exists_list.append(1 if os.path.isfile(get_filename(filename, i)) else 0)
if not file_exists_list[i] and filename_alt is not None:
file_exists_list[i] = 2 if os.path.isfile(get_filename(filename_alt, i)) else 0
if not file_exists_list[i]:
break
i = i + 1
for index in range(i, 0, -1):
os.rename(get_filename(index - 1), get_filename(index))
os.rename(get_filename(filename if file_exists_list[index - 1] % 2 else filename_alt, index - 1),
get_filename(filename, index))
try:
self._log(pcaption + ' updated', end=' ', flush=True)
except UnicodeEncodeError:
@ -916,9 +948,14 @@ class Instaloader:
profilename = post.owner_username if needs_profilename else None
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
date=post.date, date_utc=post.date_utc,
date_utc=post.date_utc,
shortcode=post.shortcode,
post=post)
filename_old = dirname + '/' + self.filename_pattern_old.replace("{post.date_utc", "{date_utc") \
.format(profile=profilename, target=target.lower(),
date_utc=post.date_local,
shortcode=post.shortcode,
post=post)
os.makedirs(os.path.dirname(filename), exist_ok=True)
# Download the image(s) / video thumbnail and videos within sidecars if desired
@ -929,44 +966,50 @@ class Instaloader:
# Download picture or video thumbnail
if not edge['node']['is_video'] or self.download_video_thumbnails is Tristate.always:
downloaded |= self.download_pic(filename=filename,
filename_alt=filename_old,
url=edge['node']['display_url'],
mtime=post.date,
mtime=post.date_local,
filename_suffix=str(edge_number))
# Additionally download video if available and desired
if edge['node']['is_video'] and self.download_videos is Tristate.always:
downloaded |= self.download_pic(filename=filename,
filename_alt=filename_old,
url=edge['node']['video_url'],
mtime=post.date,
mtime=post.date_local,
filename_suffix=str(edge_number))
edge_number += 1
elif post.typename == 'GraphImage':
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date)
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
url=post.url, mtime=post.date_local)
elif post.typename == 'GraphVideo':
if self.download_video_thumbnails is Tristate.always:
downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date)
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
url=post.url, mtime=post.date_local)
else:
self.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
# Save caption if desired
if self.save_captions is not Tristate.never:
if post.caption:
self.save_caption(filename, post.date, post.caption)
self.save_caption(filename=filename, filename_alt=filename_old,
mtime=post.date_local, caption=post.caption)
else:
self._log("<no caption>", end=' ', flush=True)
# Download video if desired
if post.is_video and self.download_videos is Tristate.always:
downloaded |= self.download_pic(filename=filename, url=post.video_url, mtime=post.date)
downloaded |= self.download_pic(filename=filename, filename_alt=filename_old,
url=post.video_url, mtime=post.date_local)
# Download geotags if desired
if self.download_geotags is Tristate.always:
location = post.get_location()
if location:
self.save_location(filename, location, post.date)
self.save_location(filename, location, post.date_local)
# Update comments if desired
if self.download_comments is Tristate.always:
self.update_comments(filename, post)
self.update_comments(filename=filename, filename_alt=filename_old, post=post)
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
@ -1057,20 +1100,24 @@ class Instaloader:
"""
shortcode = item["code"] if "code" in item else "no_code"
date = datetime.fromtimestamp(item["taken_at"])
date_local = datetime.fromtimestamp(item["taken_at"])
date_utc = datetime.utcfromtimestamp(item["taken_at"])
dirname = self.dirname_pattern.format(profile=profile, target=target)
filename = dirname + '/' + self.filename_pattern.format(profile=profile, target=target,
date=date, date_utc=date_utc,
date_utc=date_utc,
shortcode=shortcode)
filename_old = dirname + '/' + self.filename_pattern_old.format(profile=profile, target=target,
date_utc=date_local,
shortcode=shortcode)
os.makedirs(os.path.dirname(filename), exist_ok=True)
downloaded = False
if "image_versions2" in item:
if "video_versions" not in item or self.download_video_thumbnails is Tristate.always:
url = item["image_versions2"]["candidates"][0]["url"]
downloaded = self.download_pic(filename=filename,
filename_alt=filename_old,
url=url,
mtime=date)
mtime=date_local)
else:
self._log("Warning: Unable to find story image.")
if "caption" in item and item["caption"] is not None and \
@ -1078,17 +1125,18 @@ class Instaloader:
caption = item["caption"]
if isinstance(caption, dict) and "text" in caption:
caption = caption["text"]
self.save_caption(filename, date, caption)
self.save_caption(filename=filename, filename_alt=filename_old, mtime=date_local, caption=caption)
else:
self._log("<no caption>", end=' ', flush=True)
if "video_versions" in item and self.download_videos is Tristate.always:
downloaded |= self.download_pic(filename=filename,
filename_alt=filename_old,
url=item["video_versions"][0]["url"],
mtime=date)
mtime=date_local)
if item["story_locations"] and self.download_geotags is not Tristate.never:
location = item["story_locations"][0]["location"]
if location:
self.save_location(filename, location, date)
self.save_location(filename, location, date_local)
self._log()
return downloaded