1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-27 05:32:30 +01:00

Deprecate commit-mode (#697)

Rather than checking the json file to make sure posts have been
successfully downloaded, data is stored in a temporary file which
is renamed when downloading has finished, as suggested in #257.
This commit is contained in:
Lars Lindqvist 2020-06-20 16:23:31 +02:00 committed by GitHub
parent 66b3ec6928
commit fa62025ea3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 52 deletions

View File

@ -358,10 +358,7 @@ def main():
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
'infinitely.') 'infinitely.')
g_how.add_argument('--commit-mode', action='store_true', g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS)
help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. '
'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. '
'Requires the JSON metadata to be saved.')
g_how.add_argument('--request-timeout', metavar='N', type=float, g_how.add_argument('--request-timeout', metavar='N', type=float,
help='seconds to wait before timing out a connection request') help='seconds to wait before timing out a connection request')
@ -405,9 +402,6 @@ def main():
download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only)
download_stories = args.stories or args.stories_only download_stories = args.stories or args.stories_only
if args.commit_mode and args.no_metadata_json:
raise SystemExit('--commit-mode requires JSON metadata to be saved.')
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent,
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
download_pictures=not args.no_pictures, download_pictures=not args.no_pictures,
@ -418,8 +412,7 @@ def main():
post_metadata_txt_pattern=post_metadata_txt_pattern, post_metadata_txt_pattern=post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
max_connection_attempts=args.max_connection_attempts, max_connection_attempts=args.max_connection_attempts,
request_timeout=args.request_timeout, request_timeout=args.request_timeout)
commit_mode=args.commit_mode)
_main(loader, _main(loader,
args.profile, args.profile,
username=args.login.lower() if args.login is not None else None, username=args.login.lower() if args.login is not None else None,

View File

@ -1,6 +1,5 @@
import getpass import getpass
import json import json
import lzma
import os import os
import platform import platform
import re import re
@ -22,7 +21,7 @@ import urllib3 # type: ignore
from .exceptions import * from .exceptions import *
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
save_structure_to_file, load_structure_from_file) save_structure_to_file)
def get_default_session_filename(username: str) -> str: def get_default_session_filename(username: str) -> str:
@ -153,7 +152,6 @@ class Instaloader:
txt file. txt file.
:param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none) :param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none)
:param max_connection_attempts: :option:`--max-connection-attempts` :param max_connection_attempts: :option:`--max-connection-attempts`
:param commit_mode: :option:`--commit-mode`
:param request_timeout: :option:`--request-timeout`, set per-request timeout (seconds) :param request_timeout: :option:`--request-timeout`, set per-request timeout (seconds)
.. attribute:: context .. attribute:: context
@ -177,8 +175,7 @@ class Instaloader:
post_metadata_txt_pattern: str = None, post_metadata_txt_pattern: str = None,
storyitem_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None,
max_connection_attempts: int = 3, max_connection_attempts: int = 3,
request_timeout: Optional[float] = None, request_timeout: Optional[float] = None):
commit_mode: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout) self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout)
@ -196,12 +193,6 @@ class Instaloader:
else post_metadata_txt_pattern else post_metadata_txt_pattern
self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \
else storyitem_metadata_txt_pattern else storyitem_metadata_txt_pattern
self.commit_mode = commit_mode
if self.commit_mode and not self.save_metadata:
raise InvalidArgumentException("Commit mode requires JSON metadata to be saved.")
# Used to keep state in commit mode
self._committed = None # type: Optional[bool]
@contextmanager @contextmanager
def anonymous_copy(self): def anonymous_copy(self):
@ -222,8 +213,7 @@ class Instaloader:
post_metadata_txt_pattern=self.post_metadata_txt_pattern, post_metadata_txt_pattern=self.post_metadata_txt_pattern,
storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern,
max_connection_attempts=self.context.max_connection_attempts, max_connection_attempts=self.context.max_connection_attempts,
request_timeout=self.context.request_timeout, request_timeout=self.context.request_timeout)
commit_mode=self.commit_mode)
yield new_loader yield new_loader
self.context.error_log.extend(new_loader.context.error_log) self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors new_loader.context.error_log = [] # avoid double-printing of errors
@ -249,15 +239,9 @@ class Instaloader:
if filename_suffix is not None: if filename_suffix is not None:
filename += '_' + filename_suffix filename += '_' + filename_suffix
filename += '.' + file_extension filename += '.' + file_extension
# A post is considered "commited" if the json file exists and is not malformed. if os.path.isfile(filename):
if self.commit_mode: self.context.log(filename + ' exists', end=' ', flush=True)
if self._committed and os.path.isfile(filename): return False
self.context.log(filename + ' exists', end=' ', flush=True)
return False
else:
if os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True)
return False
self.context.get_and_write_raw(url, filename) self.context.get_and_write_raw(url, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True return True
@ -506,7 +490,6 @@ class Instaloader:
# Download the image(s) / video thumbnail and videos within sidecars if desired # Download the image(s) / video thumbnail and videos within sidecars if desired
downloaded = True downloaded = True
self._committed = self.check_if_committed(filename)
if self.download_pictures: if self.download_pictures:
if post.typename == 'GraphSidecar': if post.typename == 'GraphSidecar':
edge_number = 1 edge_number = 1
@ -1206,25 +1189,6 @@ class Instaloader:
self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter, self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter,
total_count=profile.mediacount) total_count=profile.mediacount)
def check_if_committed(self, filename: str) -> bool:
"""Checks to see if the current post has been committed.
A post is considered committed if its json metadata file exists and is not malformed.
.. versionadded:: 4.2
"""
if os.path.isfile(filename + '.json.xz'):
filename += '.json.xz'
elif os.path.isfile(filename + '.json'):
filename += '.json'
else:
return False
try:
load_structure_from_file(self.context, filename)
return True
except (FileNotFoundError, lzma.LZMAError, json.decoder.JSONDecodeError):
return False
def interactive_login(self, username: str) -> None: def interactive_login(self, username: str) -> None:
"""Logs in and internally stores session, asking user for password interactively. """Logs in and internally stores session, asking user for password interactively.

View File

@ -1,5 +1,6 @@
import hashlib import hashlib
import json import json
import os
import pickle import pickle
import random import random
import re import re
@ -547,11 +548,12 @@ class InstaloaderContext:
.. versionadded:: 4.2.1""" .. versionadded:: 4.2.1"""
self.log(filename, end=' ', flush=True) self.log(filename, end=' ', flush=True)
with open(filename, 'wb') as file: with open(filename + '.temp', 'wb') as file:
if isinstance(resp, requests.Response): if isinstance(resp, requests.Response):
shutil.copyfileobj(resp.raw, file) shutil.copyfileobj(resp.raw, file)
else: else:
file.write(resp) file.write(resp)
os.rename(filename + '.temp', filename)
def get_raw(self, url: str, _attempt=1) -> requests.Response: def get_raw(self, url: str, _attempt=1) -> requests.Response:
"""Downloads a file anonymously. """Downloads a file anonymously.