From 117124d1dd9028e2d8ef6bcf742e5c10182de7cf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 11 Aug 2017 19:51:00 +0200 Subject: [PATCH] Options --no-captions and --no-geotags These options instruct instaloader to not save captions or geotags respectively, even if the regarding information can be obtained without any additional queries to Instagram. This feature was proposed in #25, and thus this commit should close #25. --- README.rst | 4 ++ instaloader.py | 158 ++++++++++++++++++++++++++++--------------------- 2 files changed, 94 insertions(+), 68 deletions(-) diff --git a/README.rst b/README.rst index ce7ad1b..9fd2e63 100644 --- a/README.rst +++ b/README.rst @@ -145,9 +145,13 @@ Instead of a *profile* or a *#hashtag*, the special targets link. This requires an additional request to the Instagram server for each picture, which is why it is disabled by default. +--no-geotags Do not store geotags, even if they can be obtained + without any additional request. --comments Download and update comments for each post. This requires an additional request to the Instagram server for each post, which is why it is disabled by default. +--no-captions Do not store media captions, although no additional + request is needed to obtain them. --stories Also **download stories** of each profile that is downloaded. Requires ``--login``. --stories-only Rather than downloading regular posts of each diff --git a/instaloader.py b/instaloader.py index 3c0a1b7..c401661 100755 --- a/instaloader.py +++ b/instaloader.py @@ -17,6 +17,8 @@ from argparse import ArgumentParser from base64 import b64decode, b64encode from contextlib import contextmanager, suppress from datetime import datetime +from enum import Enum + from io import BytesIO from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple @@ -125,12 +127,30 @@ def format_string_contains_key(format_string: str, key: str) -> bool: return False +class Tristate(Enum): + """Tri-state to encode whether we should save certain information, i.e. videos, captions, comments or geotags. + + never: Do not save, even if the information is available without any additional request, + no_extra_query: Save if and only if available without doing additional queries, + always: Save (and query, if neccessary). + """ + never = 0 + no_extra_query = 1 + always = 2 + + class Instaloader: def __init__(self, sleep: bool = True, quiet: bool = False, shorter_output: bool = False, user_agent: Optional[str] = None, dirname_pattern: Optional[str] = None, - filename_pattern: Optional[str] = None): + filename_pattern: Optional[str] = None, + download_videos: Tristate = Tristate.always, + download_geotags: Tristate = Tristate.no_extra_query, + download_captions: Tristate = Tristate.no_extra_query, + download_comments: Tristate = Tristate.no_extra_query): + + # configuration parameters self.user_agent = user_agent if user_agent is not None else default_user_agent() self.session = self._get_anonymous_session() self.username = None @@ -140,8 +160,24 @@ class Instaloader: self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}' self.filename_pattern = filename_pattern.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \ if filename_pattern is not None else '{date:%Y-%m-%d_%H-%M-%S}' + self.download_videos = download_videos + self.download_geotags = download_geotags + self.download_captions = download_captions + self.download_comments = download_comments + + # error log, filled with _error() and printed at the end of Instaloader.main() self.error_log = [] + @contextmanager + def anonymous_copy(self): + """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" + new_loader = Instaloader(self.sleep, self.quiet, self.shorter_output, self.user_agent, + self.dirname_pattern, self.filename_pattern, + self.download_videos, self.download_geotags, + self.download_captions, self.download_comments) + yield new_loader + self.error_log.extend(new_loader.error_log) + def _log(self, *msg, sep='', end='\n', flush=False): """Log a message to stdout that can be suppressed with --quiet.""" if not self.quiet: @@ -542,17 +578,13 @@ class Instaloader: params={'__a': 1}) return location_json["location"] - def download_post(self, node: Dict[str, Any], profile: Optional[str], target: str, - download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool: + def download_post(self, node: Dict[str, Any], profile: Optional[str], target: str) -> bool: """ Download everything associated with one instagram post node, i.e. picture, caption and video. :param node: Node, as from media->nodes list in instagram's JSONs :param profile: Name of profile to which this node belongs :param target: Target name, i.e. profile name, #hashtag, :feed; for filename. - :param download_videos: True, if videos should be downloaded - :param geotags: Download geotags - :param download_comments: Update comments :return: True if something was downloaded, False otherwise, i.e. file was already there """ already_has_profilename = profile is not None or ('owner' in node and 'username' in node['owner']) @@ -603,25 +635,26 @@ class Instaloader: else: # Node is an old image or video. downloaded = self.download_pic(filename=filename, url=url, mtime=date) - if "edge_media_to_caption" in node and node["edge_media_to_caption"]["edges"]: - self.save_caption(filename, date, node["edge_media_to_caption"]["edges"][0]["node"]["text"]) - elif "caption" in node: - self.save_caption(filename, date, node["caption"]) - else: - self._log("", end=' ', flush=True) - if node["is_video"] and download_videos: + if self.download_captions is not Tristate.never: + if "edge_media_to_caption" in node and node["edge_media_to_caption"]["edges"]: + self.save_caption(filename, date, node["edge_media_to_caption"]["edges"][0]["node"]["text"]) + elif "caption" in node: + self.save_caption(filename, date, node["caption"]) + else: + self._log("", end=' ', flush=True) + if node["is_video"] and self.download_videos is Tristate.always: if not post_metadata: post_metadata = self.get_post_metadata(shortcode) self.download_pic(filename=filename, url=post_metadata['video_url'], mtime=date) - if geotags: + if self.download_geotags is Tristate.always: if not post_metadata: post_metadata = self.get_post_metadata(shortcode) location = self.get_location(post_metadata) if location: self.save_location(filename, location, date) - if download_comments: + if self.download_comments is Tristate.always: self.update_comments(filename, shortcode) self._log() return downloaded @@ -663,7 +696,6 @@ class Instaloader: def download_stories(self, userids: Optional[List[int]] = None, - download_videos: bool = True, fast_update: bool = False, filename_target: str = ':stories') -> None: """ @@ -672,7 +704,6 @@ class Instaloader: To use this, one needs to be logged in :param userids: List of user IDs to be processed in terms of downloading their stories - :param download_videos: True, if videos should be downloaded :param fast_update: If true, abort when first already-downloaded picture is encountered :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern """ @@ -715,18 +746,19 @@ class Instaloader: else: self._log("Warning: Unable to find story image.") downloaded = False - if "caption" in item and item["caption"] is not None: + if "caption" in item and item["caption"] is not None and \ + self.download_captions is not Tristate.never: caption = item["caption"] if isinstance(caption, dict) and "text" in caption: caption = caption["text"] self.save_caption(filename, date, caption) else: self._log("", end=' ', flush=True) - if "video_versions" in item and download_videos: + if "video_versions" in item and self.download_videos is Tristate.always: downloaded = self.download_pic(filename=filename, url=item["video_versions"][0]["url"], mtime=date) - if item["story_locations"]: + if item["story_locations"] and self.download_geotags is not Tristate.never: location = item["story_locations"][0]["location"] if location: self.save_location(filename, location, date) @@ -763,9 +795,7 @@ class Instaloader: 'fetch_like': 10}) def download_feed_posts(self, max_count: int = None, fast_update: bool = False, - filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None, - download_videos: bool = True, geotags: bool = False, - download_comments: bool = False) -> None: + filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None) -> None: """ Download pictures from the user's feed. @@ -781,9 +811,6 @@ class Instaloader: :param max_count: Maximum count of pictures to download :param fast_update: If true, abort when first already-downloaded picture is encountered :param filter_func: function(post), which returns True if given picture should not be downloaded - :param download_videos: True, if videos should be downloaded - :param geotags: Download geotags - :param download_comments: Update comments """ count = 1 for post in self.get_feed_posts(): @@ -796,9 +823,7 @@ class Instaloader: self._log("[%3i] %s " % (count, name), end="", flush=True) count += 1 with self._error_catcher('Download feed'): - downloaded = self.download_post(post, profile=name, target=':feed', - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + downloaded = self.download_post(post, profile=name, target=':feed') if fast_update and not downloaded: break @@ -811,8 +836,7 @@ class Instaloader: def download_hashtag(self, hashtag: str, max_count: Optional[int] = None, filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None, - fast_update: bool = False, download_videos: bool = True, geotags: bool = False, - download_comments: bool = False) -> None: + fast_update: bool = False) -> None: """Download pictures of one hashtag. To download the last 30 pictures with hashtag #cat, do @@ -823,9 +847,6 @@ class Instaloader: :param max_count: Maximum count of pictures to download :param filter_func: function(post), which returns True if given picture should not be downloaded :param fast_update: If true, abort when first already-downloaded picture is encountered - :param download_videos: True, if videos should be downloaded - :param geotags: Download geotags - :param download_comments: Update comments """ count = 1 for post in self.get_hashtag_posts(hashtag): @@ -837,9 +858,7 @@ class Instaloader: continue count += 1 with self._error_catcher('Download hashtag #{}'.format(hashtag)): - downloaded = self.download_post(node=post, profile=None, target='#' + hashtag, - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + downloaded = self.download_post(node=post, profile=None, target='#' + hashtag) if fast_update and not downloaded: break @@ -918,8 +937,7 @@ class Instaloader: end_cursor = media['page_info']['end_cursor'] def download_profile(self, name: str, - profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False, - download_comments: bool = False, fast_update: bool = False, + profile_pic_only: bool = False, fast_update: bool = False, download_stories: bool = False, download_stories_only: bool = False) -> None: """Download one profile""" @@ -956,8 +974,7 @@ class Instaloader: # Download stories, if requested if download_stories or download_stories_only: - self.download_stories(userids=[profile_id], filename_target=name, - download_videos=download_videos, fast_update=fast_update) + self.download_stories(userids=[profile_id], filename_target=name, fast_update=fast_update) if download_stories_only: return @@ -973,9 +990,7 @@ class Instaloader: self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True) count += 1 with self._error_catcher('Download profile {}'.format(name)): - downloaded = self.download_post(node=post, profile=name, target=name, - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + downloaded = self.download_post(node=post, profile=name, target=name) if fast_update and not downloaded: break @@ -996,9 +1011,7 @@ class Instaloader: def main(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None, sessionfile: Optional[str] = None, max_count: Optional[int] = None, - profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False, - download_comments: bool = False, - fast_update: bool = False, + profile_pic_only: bool = False, fast_update: bool = False, stories: bool = False, stories_only: bool = False) -> None: """Download set of profiles, hashtags etc. and handle logging in and session files if desired.""" # Login, if desired @@ -1023,9 +1036,7 @@ class Instaloader: if pentry[0] == '#': self._log("Retrieving pictures with hashtag {0}".format(pentry)) with self._error_catcher(): - self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update, - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update) elif pentry[0] == '@': if username is not None: self._log("Retrieving followees of %s..." % pentry[1:]) @@ -1038,9 +1049,7 @@ class Instaloader: if username is not None: self._log("Retrieving pictures from your feed...") with self._error_catcher(): - self.download_feed_posts(fast_update=fast_update, max_count=max_count, - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + self.download_feed_posts(fast_update=fast_update, max_count=max_count) else: print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) elif pentry == ":feed-liked": @@ -1052,15 +1061,13 @@ class Instaloader: return not node["viewer_has_liked"] with self._error_catcher(): self.download_feed_posts(fast_update=fast_update, max_count=max_count, - filter_func=liked_filter, - download_videos=download_videos, geotags=geotags, - download_comments=download_comments) + filter_func=liked_filter) else: print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) elif pentry == ":stories": if username is not None: with self._error_catcher(): - self.download_stories(download_videos=download_videos, fast_update=fast_update) + self.download_stories(fast_update=fast_update) else: print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) else: @@ -1071,18 +1078,14 @@ class Instaloader: for target in targets: with self._error_catcher(): try: - self.download_profile(target, profile_pic_only, download_videos, - geotags, download_comments, fast_update, stories, stories_only) + self.download_profile(target, profile_pic_only, fast_update, stories, stories_only) except ProfileNotExistsException as err: if username is not None: self._log(err) self._log("Trying again anonymously, helps in case you are just blocked.") - anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output, - self.user_agent, self.dirname_pattern, self.filename_pattern) - anonymous_loader.error_log = self.error_log - with self._error_catcher(): - anonymous_loader.download_profile(target, profile_pic_only, download_videos, - geotags, download_comments, fast_update) + with self.anonymous_copy() as anonymous_loader: + with self._error_catcher(): + anonymous_loader.download_profile(target, profile_pic_only, fast_update) else: raise err except KeyboardInterrupt: @@ -1122,10 +1125,14 @@ def main(): 'text file with the location\'s name and a Google Maps link. ' 'This requires an additional request to the Instagram ' 'server for each picture, which is why it is disabled by default.') + g_what.add_argument('--no-geotags', action='store_true', + help='Do not store geotags, even if they can be obtained without any additional request.') g_what.add_argument('-C', '--comments', action='store_true', help='Download and update comments for each post. ' 'This requires an additional request to the Instagram ' 'server for each post, which is why it is disabled by default.') + g_what.add_argument('--no-captions', action='store_true', + help='Do not store media captions, although no additional request is needed to obtain them.') g_what.add_argument('-s', '--stories', action='store_true', help='Also download stories of each profile that is downloaded. Requires --login.') g_what.add_argument('--stories-only', action='store_true', @@ -1192,14 +1199,29 @@ def main(): args.stories = False if args.stories_only: raise SystemExit(1) + + download_videos = Tristate.always if not args.skip_videos else Tristate.no_extra_query + download_comments = Tristate.always if args.comments else Tristate.no_extra_query + download_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never + + if args.geotags and args.no_geotags: + raise SystemExit("--geotags and --no-geotags given. I am confused and refuse to work.") + elif args.geotags: + download_geotags = Tristate.always + elif args.no_geotags: + download_geotags = Tristate.never + else: + download_geotags = Tristate.no_extra_query + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, shorter_output=args.shorter_output, user_agent=args.user_agent, - dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern) + dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, + download_videos=download_videos, download_geotags=download_geotags, + download_captions=download_captions, download_comments=download_comments) loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password, args.sessionfile, int(args.count) if args.count is not None else None, - args.profile_pic_only, not args.skip_videos, args.geotags, args.comments, - args.fast_update, args.stories, args.stories_only) + args.profile_pic_only, args.fast_update, args.stories, args.stories_only) except InstaloaderException as err: raise SystemExit("Fatal error: %s" % err)