1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-11-23 10:42:30 +01:00

Added commit mode

The commit mode ensures pictures are not corrupted when Instaloader is
unexpectedly interrupted. In the case that the last picture is corrupted
because of an interruption, Instaloader will redownload the picture.
Since the metadata is the last object saved to disk, we can consider a
post as "committed" if its json metadata file exists and is not
malformed. Instaloader should download any posts which are not
committed. Downside is commit mode requires metadata to be saved.
This commit is contained in:
sushilicious 2018-06-18 03:45:11 -04:00
parent ece0d11915
commit 30555b576c
3 changed files with 47 additions and 6 deletions

View File

@ -230,6 +230,12 @@ How to Download
to ``3``. If a connection fails, it can be manually skipped by hitting
:kbd:`Control-c`. Set this to ``0`` to retry infinitely.
.. option:: --commit-mode
Tries to ensure downloaded images avoid corruption in case of unexpected
interruption. If the last picture is corrupted, Instaloader will fix the
picture the next time it is run.
Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^

View File

@ -320,6 +320,10 @@ def main():
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
'infinitely.')
g_how.add_argument('--commit-mode', action='store_true',
help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. '
'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. '
'Requires the JSON metadata to be saved .')
g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true',
@ -361,6 +365,9 @@ def main():
download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only)
download_stories = args.stories or args.stories_only
if args.commit_mode and args.no_metadata_json:
raise SystemExit('--commit-mode requires JSON metadata to be saved.')
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent,
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
download_pictures=not args.no_pictures,
@ -371,7 +378,8 @@ def main():
post_metadata_txt_pattern=post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
graphql_rate_limit=args.graphql_rate_limit,
max_connection_attempts=args.max_connection_attempts)
max_connection_attempts=args.max_connection_attempts,
commit_mode=args.commit_mode)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -1,5 +1,6 @@
import getpass
import json
import lzma
import os
import platform
import re
@ -15,7 +16,7 @@ from typing import Any, Callable, Iterator, List, Optional, Set, Union
from .exceptions import *
from .instaloadercontext import InstaloaderContext
from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file
from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file, load_structure_from_file
def get_default_session_filename(username: str) -> str:
@ -116,7 +117,8 @@ class Instaloader:
post_metadata_txt_pattern: str = None,
storyitem_metadata_txt_pattern: str = None,
graphql_rate_limit: Optional[int] = None,
max_connection_attempts: int = 3):
max_connection_attempts: int = 3,
commit_mode: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, graphql_rate_limit, max_connection_attempts)
@ -134,6 +136,9 @@ class Instaloader:
else post_metadata_txt_pattern
self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \
else storyitem_metadata_txt_pattern
self.commit_mode = commit_mode
if self.commit_mode and not self.save_metadata:
raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.")
@contextmanager
def anonymous_copy(self):
@ -173,9 +178,15 @@ class Instaloader:
if filename_suffix is not None:
filename += '_' + filename_suffix
filename += '.' + file_extension
if os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True)
return False
# A post is considered "commited" if the json file exists and is not malformed.
if self.commit_mode:
if self._committed:
self.context.log(filename + ' exists', end=' ', flush=True)
return False
else:
if os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True)
return False
self.context.get_and_write_raw(url, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True
@ -359,6 +370,7 @@ class Instaloader:
# Download the image(s) / video thumbnail and videos within sidecars if desired
downloaded = True
self._committed = self.check_if_committed(filename)
if self.download_pictures:
if post.typename == 'GraphSidecar':
edge_number = 1
@ -945,6 +957,21 @@ class Instaloader:
if fast_update and not downloaded:
break
def check_if_committed(self, filename: str) -> bool:
"""Checks to see if the current post has been committed."""
# A post is considered committed if its json metadata file exists and is not malformed.
if os.path.isfile(filename + '.json.xz'):
filename += '.json.xz'
elif os.path.isfile(filename + '.json'):
filename += '.json'
else:
return False
try:
load_structure_from_file(self.context, filename)
return True
except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError):
return False
def interactive_login(self, username: str) -> None:
"""Logs in and internally stores session, asking user for password interactively.