From 3d53ed39b022c5243ea19e065bf918494caf7b44 Mon Sep 17 00:00:00 2001 From: saravananravi08 Date: Thu, 26 Sep 2024 21:24:03 +0530 Subject: [PATCH] added proxy support --- build/lib/instaloader/__init__.py | 38 + build/lib/instaloader/__main__.py | 597 +++++ build/lib/instaloader/exceptions.py | 84 + build/lib/instaloader/instaloader.py | 1631 ++++++++++++++ build/lib/instaloader/instaloadercontext.py | 875 ++++++++ build/lib/instaloader/lateststamps.py | 117 + build/lib/instaloader/nodeiterator.py | 329 +++ build/lib/instaloader/py.typed | 0 build/lib/instaloader/sectioniterator.py | 46 + build/lib/instaloader/structures.py | 2191 +++++++++++++++++++ instaloader.egg-info/PKG-INFO | 185 ++ instaloader.egg-info/SOURCES.txt | 21 + instaloader.egg-info/dependency_links.txt | 1 + instaloader.egg-info/entry_points.txt | 2 + instaloader.egg-info/not-zip-safe | 1 + instaloader.egg-info/requires.txt | 4 + instaloader.egg-info/top_level.txt | 1 + instaloader/instaloader.py | 3 +- instaloader/instaloadercontext.py | 14 +- 19 files changed, 6133 insertions(+), 7 deletions(-) create mode 100644 build/lib/instaloader/__init__.py create mode 100644 build/lib/instaloader/__main__.py create mode 100644 build/lib/instaloader/exceptions.py create mode 100644 build/lib/instaloader/instaloader.py create mode 100644 build/lib/instaloader/instaloadercontext.py create mode 100644 build/lib/instaloader/lateststamps.py create mode 100644 build/lib/instaloader/nodeiterator.py create mode 100644 build/lib/instaloader/py.typed create mode 100644 build/lib/instaloader/sectioniterator.py create mode 100644 build/lib/instaloader/structures.py create mode 100644 instaloader.egg-info/PKG-INFO create mode 100644 instaloader.egg-info/SOURCES.txt create mode 100644 instaloader.egg-info/dependency_links.txt create mode 100644 instaloader.egg-info/entry_points.txt create mode 100644 instaloader.egg-info/not-zip-safe create mode 100644 instaloader.egg-info/requires.txt create mode 100644 instaloader.egg-info/top_level.txt diff --git a/build/lib/instaloader/__init__.py b/build/lib/instaloader/__init__.py new file mode 100644 index 0000000..8da6016 --- /dev/null +++ b/build/lib/instaloader/__init__.py @@ -0,0 +1,38 @@ +"""Download pictures (or videos) along with their captions and other metadata from Instagram.""" + + +__version__ = '4.13.1' + + +try: + # pylint:disable=wrong-import-position + import win_unicode_console # type: ignore +except ImportError: + pass +else: + win_unicode_console.enable() + +from .exceptions import * +from .instaloader import Instaloader as Instaloader +from .instaloadercontext import (InstaloaderContext as InstaloaderContext, + RateController as RateController) +from .lateststamps import LatestStamps as LatestStamps +from .nodeiterator import (NodeIterator as NodeIterator, + FrozenNodeIterator as FrozenNodeIterator, + resumable_iteration as resumable_iteration) +from .structures import (Hashtag as Hashtag, + Highlight as Highlight, + Post as Post, + PostSidecarNode as PostSidecarNode, + PostComment as PostComment, + PostCommentAnswer as PostCommentAnswer, + PostLocation as PostLocation, + Profile as Profile, + Story as Story, + StoryItem as StoryItem, + TopSearchResults as TopSearchResults, + TitlePic as TitlePic, + load_structure_from_file as load_structure_from_file, + save_structure_to_file as save_structure_to_file, + load_structure as load_structure, + get_json_structure as get_json_structure) diff --git a/build/lib/instaloader/__main__.py b/build/lib/instaloader/__main__.py new file mode 100644 index 0000000..6e18e26 --- /dev/null +++ b/build/lib/instaloader/__main__.py @@ -0,0 +1,597 @@ +"""Download pictures (or videos) along with their captions and other metadata from Instagram.""" + +import ast +import datetime +import os +import re +import sys +from argparse import ArgumentParser, ArgumentTypeError, SUPPRESS +from enum import IntEnum +from typing import List, Optional + +from . import (AbortDownloadException, BadCredentialsException, Instaloader, InstaloaderException, + InvalidArgumentException, LoginException, Post, Profile, ProfileNotExistsException, StoryItem, + TwoFactorAuthRequiredException, __version__, load_structure_from_file) +from .instaloader import (get_default_session_filename, get_default_stamps_filename) +from .instaloadercontext import default_user_agent +from .lateststamps import LatestStamps +try: + import browser_cookie3 + bc3_library = True +except ImportError: + bc3_library = False + + +class ExitCode(IntEnum): + SUCCESS = 0 + NON_FATAL_ERROR = 1 + INIT_FAILURE = 2 + LOGIN_FAILURE = 3 + DOWNLOAD_ABORTED = 4 + USER_ABORTED = 5 + UNEXPECTED_ERROR = 99 + +def usage_string(): + # NOTE: duplicated in README.rst and docs/index.rst + argv0 = os.path.basename(sys.argv[0]) + argv0 = "instaloader" if argv0 == "__main__.py" else argv0 + return """ +{0} [--comments] [--geotags] +{2:{1}} [--stories] [--highlights] [--tagged] [--igtv] +{2:{1}} [--login YOUR-USERNAME] [--fast-update] +{2:{1}} profile | "#hashtag" | %%location_id | :stories | :feed | :saved +{0} --help""".format(argv0, len(argv0), '') + + +def http_status_code_list(code_list_str: str) -> List[int]: + codes = [int(s) for s in code_list_str.split(',')] + for code in codes: + if not 100 <= code <= 599: + raise ArgumentTypeError("Invalid HTTP status code: {}".format(code)) + return codes + + +def filterstr_to_filterfunc(filter_str: str, item_type: type): + """Takes an --post-filter=... or --storyitem-filter=... filter + specification and makes a filter_func Callable out of it.""" + + # The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.. A + # function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace. + + class TransformFilterAst(ast.NodeTransformer): + def visit_Name(self, node: ast.Name): + if not isinstance(node.ctx, ast.Load): + raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id)) + if node.id == "datetime": + return node + if not hasattr(item_type, node.id): + raise InvalidArgumentException("Invalid filter: {} not a {} attribute.".format(node.id, + item_type.__name__)) + new_node = ast.Attribute(ast.copy_location(ast.Name('item', ast.Load()), node), node.id, + ast.copy_location(ast.Load(), node)) + return ast.copy_location(new_node, node) + + input_filename = '' + compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')), + filename=input_filename, mode='eval') + + def filterfunc(item) -> bool: + # pylint:disable=eval-used + return bool(eval(compiled_filter, {'item': item, 'datetime': datetime.datetime})) + + return filterfunc + + +def get_cookies_from_instagram(domain, browser, cookie_file='', cookie_name=''): + supported_browsers = { + "brave": browser_cookie3.brave, + "chrome": browser_cookie3.chrome, + "chromium": browser_cookie3.chromium, + "edge": browser_cookie3.edge, + "firefox": browser_cookie3.firefox, + "librewolf": browser_cookie3.librewolf, + "opera": browser_cookie3.opera, + "opera_gx": browser_cookie3.opera_gx, + "safari": browser_cookie3.safari, + "vivaldi": browser_cookie3.vivaldi, + } + + if browser not in supported_browsers: + raise InvalidArgumentException("Loading cookies from the specified browser failed\n" + "Supported browsers are Brave, Chrome, Chromium, Edge, Firefox, LibreWolf, " + "Opera, Opera_GX, Safari and Vivaldi") + + cookies = {} + browser_cookies = list(supported_browsers[browser](cookie_file=cookie_file)) + + for cookie in browser_cookies: + if domain in cookie.domain: + cookies[cookie.name] = cookie.value + + if cookies: + print(f"Cookies loaded successfully from {browser}") + else: + raise LoginException(f"No cookies found for Instagram in {browser}, " + f"Are you logged in succesfully in {browser}?") + + if cookie_name: + return cookies.get(cookie_name, {}) + else: + return cookies + + +def import_session(browser, instaloader, cookiefile): + cookie = get_cookies_from_instagram('instagram', browser, cookiefile) + if cookie is not None: + instaloader.context.update_cookies(cookie) + username = instaloader.test_login() + if not username: + raise LoginException(f"Not logged in. Are you logged in successfully in {browser}?") + instaloader.context.username = username + print(f"{username} has been successfully logged in.") + print(f"Next time use --login={username} to reuse the same session.") + + +def _main(instaloader: Instaloader, targetlist: List[str], + username: Optional[str] = None, password: Optional[str] = None, + sessionfile: Optional[str] = None, + download_profile_pic: bool = True, download_posts=True, + download_stories: bool = False, + download_highlights: bool = False, + download_tagged: bool = False, + download_igtv: bool = False, + fast_update: bool = False, + latest_stamps_file: Optional[str] = None, + max_count: Optional[int] = None, post_filter_str: Optional[str] = None, + storyitem_filter_str: Optional[str] = None, + browser: Optional[str] = None, + cookiefile: Optional[str] = None) -> ExitCode: + """Download set of profiles, hashtags etc. and handle logging in and session files if desired.""" + # Parse and generate filter function + post_filter = None + if post_filter_str is not None: + post_filter = filterstr_to_filterfunc(post_filter_str, Post) + instaloader.context.log('Only download posts with property "{}".'.format(post_filter_str)) + storyitem_filter = None + if storyitem_filter_str is not None: + storyitem_filter = filterstr_to_filterfunc(storyitem_filter_str, StoryItem) + instaloader.context.log('Only download storyitems with property "{}".'.format(storyitem_filter_str)) + latest_stamps = None + if latest_stamps_file is not None: + latest_stamps = LatestStamps(latest_stamps_file) + instaloader.context.log(f"Using latest stamps from {latest_stamps_file}.") + # load cookies if browser is not None + if browser and bc3_library: + import_session(browser.lower(), instaloader, cookiefile) + elif browser and not bc3_library: + raise InvalidArgumentException("browser_cookie3 library is needed to load cookies from browsers") + # Login, if desired + if username is not None: + if not re.match(r"^[A-Za-z0-9._]+$", username): + instaloader.context.error("Warning: Parameter \"{}\" for --login is not a valid username.".format(username)) + try: + instaloader.load_session_from_file(username, sessionfile) + except FileNotFoundError as err: + if sessionfile is not None: + print(err, file=sys.stderr) + instaloader.context.log("Session file does not exist yet - Logging in.") + if not instaloader.context.is_logged_in or username != instaloader.test_login(): + if password is not None: + try: + instaloader.login(username, password) + except TwoFactorAuthRequiredException: + # https://github.com/instaloader/instaloader/issues/1217 + instaloader.context.error("Warning: There have been reports of 2FA currently not working. " + "Consider importing session cookies from your browser with " + "--load-cookies.") + while True: + try: + code = input("Enter 2FA verification code: ") + instaloader.two_factor_login(code) + break + except BadCredentialsException as err: + print(err, file=sys.stderr) + pass + else: + try: + instaloader.interactive_login(username) + except KeyboardInterrupt: + print("\nInterrupted by user.", file=sys.stderr) + return ExitCode.USER_ABORTED + instaloader.context.log("Logged in as %s." % username) + # since 4.2.9 login is required for geotags + if instaloader.download_geotags and not instaloader.context.is_logged_in: + instaloader.context.error("Warning: Login is required to download geotags of posts.") + # Try block for KeyboardInterrupt (save session on ^C) + profiles = set() + anonymous_retry_profiles = set() + exit_code = ExitCode.SUCCESS + try: + # Generate set of profiles, already downloading non-profile targets + for target in targetlist: + if (target.endswith('.json') or target.endswith('.json.xz')) and os.path.isfile(target): + with instaloader.context.error_catcher(target): + structure = load_structure_from_file(instaloader.context, target) + if isinstance(structure, Post): + if post_filter is not None and not post_filter(structure): + instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True) + continue + instaloader.context.log("Downloading {} ({})".format(structure, target)) + instaloader.download_post(structure, os.path.dirname(target)) + elif isinstance(structure, StoryItem): + if storyitem_filter is not None and not storyitem_filter(structure): + instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True) + continue + instaloader.context.log("Attempting to download {} ({})".format(structure, target)) + instaloader.download_storyitem(structure, os.path.dirname(target)) + elif isinstance(structure, Profile): + raise InvalidArgumentException("Profile JSON are ignored. Pass \"{}\" to download that profile" + .format(structure.username)) + else: + raise InvalidArgumentException("{} JSON file not supported as target" + .format(structure.__class__.__name__)) + continue + # strip '/' characters to be more shell-autocompletion-friendly + target = target.rstrip('/') + with instaloader.context.error_catcher(target): + if re.match(r"^@[A-Za-z0-9._]+$", target): + instaloader.context.log("Retrieving followees of %s..." % target[1:]) + profile = Profile.from_username(instaloader.context, target[1:]) + for followee in profile.get_followees(): + instaloader.save_profile_id(followee) + profiles.add(followee) + elif re.match(r"^#\w+$", target): + instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update, + post_filter=post_filter, + profile_pic=download_profile_pic, posts=download_posts) + elif re.match(r"^-[A-Za-z0-9-_]+$", target): + instaloader.download_post(Post.from_shortcode(instaloader.context, target[1:]), target) + elif re.match(r"^%[0-9]+$", target): + instaloader.download_location(location=target[1:], max_count=max_count, fast_update=fast_update, + post_filter=post_filter) + elif target == ":feed": + instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count, + post_filter=post_filter) + elif target == ":stories": + instaloader.download_stories(fast_update=fast_update, storyitem_filter=storyitem_filter) + elif target == ":saved": + instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count, + post_filter=post_filter) + elif re.match(r"^[A-Za-z0-9._]+$", target): + try: + profile = instaloader.check_profile_id(target, latest_stamps) + if instaloader.context.is_logged_in and profile.has_blocked_viewer: + if download_profile_pic or ((download_posts or download_tagged or download_igtv) + and not profile.is_private): + raise ProfileNotExistsException("{} blocked you; But we download her anonymously." + .format(target)) + else: + instaloader.context.error("{} blocked you.".format(target)) + else: + profiles.add(profile) + except ProfileNotExistsException as err: + # Not only our profile.has_blocked_viewer condition raises ProfileNotExistsException, + # check_profile_id() also does, since access to blocked profile may be responded with 404. + if instaloader.context.is_logged_in and (download_profile_pic or download_posts or + download_tagged or download_igtv): + instaloader.context.log(err) + instaloader.context.log("Trying again anonymously, helps in case you are just blocked.") + with instaloader.anonymous_copy() as anonymous_loader: + with instaloader.context.error_catcher(): + anonymous_retry_profiles.add(anonymous_loader.check_profile_id(target, + latest_stamps)) + instaloader.context.error("Warning: {} will be downloaded anonymously (\"{}\")." + .format(target, err)) + else: + raise + else: + target_type = { + '#': 'hashtag', + '%': 'location', + '-': 'shortcode', + }.get(target[0], 'username') + raise ProfileNotExistsException('Invalid {} {}'.format(target_type, target)) + if len(profiles) > 1: + instaloader.context.log("Downloading {} profiles: {}".format(len(profiles), + ' '.join([p.username for p in profiles]))) + if instaloader.context.iphone_support and profiles and (download_profile_pic or download_posts) and \ + not instaloader.context.is_logged_in: + instaloader.context.log("Hint: Login to download higher-quality versions of pictures.") + instaloader.download_profiles(profiles, + download_profile_pic, download_posts, download_tagged, download_igtv, + download_highlights, download_stories, + fast_update, post_filter, storyitem_filter, latest_stamps=latest_stamps) + if anonymous_retry_profiles: + instaloader.context.log("Downloading anonymously: {}" + .format(' '.join([p.username for p in anonymous_retry_profiles]))) + with instaloader.anonymous_copy() as anonymous_loader: + anonymous_loader.download_profiles(anonymous_retry_profiles, + download_profile_pic, download_posts, download_tagged, download_igtv, + fast_update=fast_update, post_filter=post_filter, + latest_stamps=latest_stamps) + except KeyboardInterrupt: + print("\nInterrupted by user.", file=sys.stderr) + exit_code = ExitCode.USER_ABORTED + except AbortDownloadException as exc: + print("\nDownload aborted: {}.".format(exc), file=sys.stderr) + exit_code = ExitCode.DOWNLOAD_ABORTED + # Save session if it is useful + if instaloader.context.is_logged_in: + instaloader.save_session_to_file(sessionfile) + # User might be confused if Instaloader does nothing + if not targetlist: + if instaloader.context.is_logged_in: + # Instaloader did at least save a session file + instaloader.context.log("No targets were specified, thus nothing has been downloaded.") + else: + # Instaloader did not do anything + instaloader.context.log("usage:" + usage_string()) + exit_code = ExitCode.INIT_FAILURE + return exit_code + + +def main(): + parser = ArgumentParser(description=__doc__, add_help=False, usage=usage_string(), + epilog="The complete documentation can be found at " + "https://instaloader.github.io/.", + fromfile_prefix_chars='+') + + g_targets = parser.add_argument_group("What to Download", + "Specify a list of targets. For each of these, Instaloader creates a folder " + "and downloads all posts. The following targets are supported:") + g_targets.add_argument('profile', nargs='*', + help="Download profile. If an already-downloaded profile has been renamed, Instaloader " + "automatically finds it by its unique ID and renames the folder likewise.") + g_targets.add_argument('_at_profile', nargs='*', metavar="@profile", + help="Download all followees of profile. Requires login. " + "Consider using :feed rather than @yourself.") + g_targets.add_argument('_hashtag', nargs='*', metavar='"#hashtag"', help="Download #hashtag.") + g_targets.add_argument('_location', nargs='*', metavar='%location_id', + help="Download %%location_id. Requires login.") + g_targets.add_argument('_feed', nargs='*', metavar=":feed", + help="Download pictures from your feed. Requires login.") + g_targets.add_argument('_stories', nargs='*', metavar=":stories", + help="Download the stories of your followees. Requires login.") + g_targets.add_argument('_saved', nargs='*', metavar=":saved", + help="Download the posts that you marked as saved. Requires login.") + g_targets.add_argument('_singlepost', nargs='*', metavar="-- -shortcode", + help="Download the post with the given shortcode") + g_targets.add_argument('_json', nargs='*', metavar="filename.json[.xz]", + help="Re-Download the given object.") + g_targets.add_argument('_fromfile', nargs='*', metavar="+args.txt", + help="Read targets (and options) from given textfile.") + + g_post = parser.add_argument_group("What to Download of each Post") + + g_prof = parser.add_argument_group("What to Download of each Profile") + + g_prof.add_argument('-P', '--profile-pic-only', action='store_true', + help=SUPPRESS) + g_prof.add_argument('--no-posts', action='store_true', + help="Do not download regular posts.") + g_prof.add_argument('--no-profile-pic', action='store_true', + help='Do not download profile picture.') + g_post.add_argument('--slide', action='store', + help='Set what image/interval of a sidecar you want to download.') + g_post.add_argument('--no-pictures', action='store_true', + help='Do not download post pictures. Cannot be used together with --fast-update. ' + 'Implies --no-video-thumbnails, does not imply --no-videos.') + g_post.add_argument('-V', '--no-videos', action='store_true', + help='Do not download videos.') + g_post.add_argument('--no-video-thumbnails', action='store_true', + help='Do not download thumbnails of videos.') + g_post.add_argument('-G', '--geotags', action='store_true', + help='Download geotags when available. Geotags are stored as a ' + 'text file with the location\'s name and a Google Maps link. ' + 'This requires an additional request to the Instagram ' + 'server for each picture. Requires login.') + g_post.add_argument('-C', '--comments', action='store_true', + help='Download and update comments for each post. ' + 'This requires an additional request to the Instagram ' + 'server for each post, which is why it is disabled by default. Requires login.') + g_post.add_argument('--no-captions', action='store_true', + help='Do not create txt files.') + g_post.add_argument('--post-metadata-txt', action='append', + help='Template to write in txt file for each Post.') + g_post.add_argument('--storyitem-metadata-txt', action='append', + help='Template to write in txt file for each StoryItem.') + g_post.add_argument('--no-metadata-json', action='store_true', + help='Do not create a JSON file containing the metadata of each post.') + g_post.add_argument('--metadata-json', action='store_true', + help=SUPPRESS) + g_post.add_argument('--no-compress-json', action='store_true', + help='Do not xz compress JSON files, rather create pretty formatted JSONs.') + g_prof.add_argument('-s', '--stories', action='store_true', + help='Also download stories of each profile that is downloaded. Requires login.') + g_prof.add_argument('--stories-only', action='store_true', + help=SUPPRESS) + g_prof.add_argument('--highlights', action='store_true', + help='Also download highlights of each profile that is downloaded. Requires login.') + g_prof.add_argument('--tagged', action='store_true', + help='Also download posts where each profile is tagged.') + g_prof.add_argument('--igtv', action='store_true', + help='Also download IGTV videos.') + + g_cond = parser.add_argument_group("Which Posts to Download") + + g_cond.add_argument('-F', '--fast-update', action='store_true', + help='For each target, stop when encountering the first already-downloaded picture. This ' + 'flag is recommended when you use Instaloader to update your personal Instagram archive.') + g_cond.add_argument('--latest-stamps', nargs='?', metavar='STAMPSFILE', const=get_default_stamps_filename(), + help='Store the timestamps of latest media scraped for each profile. This allows updating ' + 'your personal Instagram archive even if you delete the destination directories. ' + 'If STAMPSFILE is not provided, defaults to ' + get_default_stamps_filename()) + g_cond.add_argument('--post-filter', '--only-if', metavar='filter', + help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be ' + 'a syntactically valid python expression. Variables are evaluated to ' + 'instaloader.Post attributes. Example: --post-filter=viewer_has_liked.') + g_cond.add_argument('--storyitem-filter', metavar='filter', + help='Expression that, if given, must evaluate to True for each storyitem to be downloaded. ' + 'Must be a syntactically valid python expression. Variables are evaluated to ' + 'instaloader.StoryItem attributes.') + + g_cond.add_argument('-c', '--count', + help='Do not attempt to download more than COUNT posts. ' + 'Applies to #hashtag, %%location_id, :feed, and :saved.') + + g_login = parser.add_argument_group('Login (Download Private Profiles)', + 'Instaloader can login to Instagram. This allows downloading private profiles. ' + 'To login, pass the --login option. Your session cookie (not your password!) ' + 'will be saved to a local file to be reused next time you want Instaloader ' + 'to login. Instead of --login, the --load-cookies option can be used to ' + 'import a session from a browser.') + g_login.add_argument('-l', '--login', metavar='YOUR-USERNAME', + help='Login name (profile name) for your Instagram account.') + g_login.add_argument('-b', '--load-cookies', metavar='BROWSER-NAME', + help='Browser name to load cookies from Instagram') + g_login.add_argument('-B', '--cookiefile', metavar='COOKIE-FILE', + help='Cookie file of a profile to load cookies') + g_login.add_argument('-f', '--sessionfile', + help='Path for loading and storing session key file. ' + 'Defaults to ' + get_default_session_filename("")) + g_login.add_argument('-p', '--password', metavar='YOUR-PASSWORD', + help='Password for your Instagram account. Without this option, ' + 'you\'ll be prompted for your password interactively if ' + 'there is not yet a valid session file.') + + g_how = parser.add_argument_group('How to Download') + g_how.add_argument('--dirname-pattern', + help='Name of directory where to store posts. {profile} is replaced by the profile name, ' + '{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the ' + 'profile name. Defaults to \'{target}\'.') + g_how.add_argument('--filename-pattern', + help='Prefix of filenames for posts and stories, relative to the directory given with ' + '--dirname-pattern. {profile} is replaced by the profile name,' + '{target} is replaced by the target you specified, i.e. either :feed' + '#hashtag or the profile name. Defaults to \'{date_utc}_UTC\'') + g_how.add_argument('--title-pattern', + help='Prefix of filenames for profile pics, hashtag profile pics, and highlight covers. ' + 'Defaults to \'{date_utc}_UTC_{typename}\' if --dirname-pattern contains \'{target}\' ' + 'or \'{dirname}\', or if --dirname-pattern is not specified. Otherwise defaults to ' + '\'{target}_{date_utc}_UTC_{typename}\'.') + g_how.add_argument('--resume-prefix', metavar='PREFIX', + help='Prefix for filenames that are used to save the information to resume an interrupted ' + 'download.') + g_how.add_argument('--sanitize-paths', action='store_true', + help='Sanitize paths so that the resulting file and directory names are valid on both ' + 'Windows and Unix.') + g_how.add_argument('--no-resume', action='store_true', + help='Do not resume a previously-aborted download iteration, and do not save such information ' + 'when interrupted.') + g_how.add_argument('--use-aged-resume-files', action='store_true', help=SUPPRESS) + g_how.add_argument('--user-agent', + help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent())) + g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS) + g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3, + help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a ' + 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' + 'infinitely.') + g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS) + g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0, + help='Seconds to wait before timing out a connection request. Defaults to 300.') + g_how.add_argument('--abort-on', type=http_status_code_list, metavar="STATUS_CODES", + help='Comma-separated list of HTTP status codes that cause Instaloader to abort, bypassing all ' + 'retry logic.') + g_how.add_argument('--no-iphone', action='store_true', + help='Do not attempt to download iPhone version of images and videos.') + + g_misc = parser.add_argument_group('Miscellaneous Options') + g_misc.add_argument('-q', '--quiet', action='store_true', + help='Disable user interaction, i.e. do not print messages (except errors) and fail ' + 'if login credentials are needed but not given. This makes Instaloader suitable as a ' + 'cron job.') + g_misc.add_argument('-h', '--help', action='help', help='Show this help message and exit.') + g_misc.add_argument('--version', action='version', help='Show version number and exit.', + version=__version__) + + args = parser.parse_args() + try: + if (args.login is None and args.load_cookies is None) and (args.stories or args.stories_only): + print("Login is required to download stories.", file=sys.stderr) + args.stories = False + if args.stories_only: + raise InvalidArgumentException() + + if ':feed-all' in args.profile or ':feed-liked' in args.profile: + raise InvalidArgumentException(":feed-all and :feed-liked were removed. Use :feed as target and " + "eventually --post-filter=viewer_has_liked.") + + post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None + storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None + + if args.no_captions: + if not (post_metadata_txt_pattern or storyitem_metadata_txt_pattern): + post_metadata_txt_pattern = '' + storyitem_metadata_txt_pattern = '' + else: + raise InvalidArgumentException("--no-captions and --post-metadata-txt or --storyitem-metadata-txt " + "given; That contradicts.") + + if args.no_resume and args.resume_prefix: + raise InvalidArgumentException("--no-resume and --resume-prefix given; That contradicts.") + resume_prefix = (args.resume_prefix if args.resume_prefix else 'iterator') if not args.no_resume else None + + if args.no_pictures and args.fast_update: + raise InvalidArgumentException('--no-pictures and --fast-update cannot be used together.') + + if args.login and args.load_cookies: + raise InvalidArgumentException('--load-cookies and --login cannot be used together.') + + # Determine what to download + download_profile_pic = not args.no_profile_pic or args.profile_pic_only + download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) + download_stories = args.stories or args.stories_only + + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, + dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, + download_pictures=not args.no_pictures, + download_videos=not args.no_videos, download_video_thumbnails=not args.no_video_thumbnails, + download_geotags=args.geotags, + download_comments=args.comments, save_metadata=not args.no_metadata_json, + compress_json=not args.no_compress_json, + post_metadata_txt_pattern=post_metadata_txt_pattern, + storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern, + max_connection_attempts=args.max_connection_attempts, + request_timeout=args.request_timeout, + resume_prefix=resume_prefix, + check_resume_bbd=not args.use_aged_resume_files, + slide=args.slide, + fatal_status_codes=args.abort_on, + iphone_support=not args.no_iphone, + title_pattern=args.title_pattern, + sanitize_paths=args.sanitize_paths) + exit_code = _main(loader, + args.profile, + username=args.login.lower() if args.login is not None else None, + password=args.password, + sessionfile=args.sessionfile, + download_profile_pic=download_profile_pic, + download_posts=download_posts, + download_stories=download_stories, + download_highlights=args.highlights, + download_tagged=args.tagged, + download_igtv=args.igtv, + fast_update=args.fast_update, + latest_stamps_file=args.latest_stamps, + max_count=int(args.count) if args.count is not None else None, + post_filter_str=args.post_filter, + storyitem_filter_str=args.storyitem_filter, + browser=args.load_cookies, + cookiefile=args.cookiefile) + loader.close() + if loader.has_stored_errors: + exit_code = ExitCode.NON_FATAL_ERROR + except InvalidArgumentException as err: + print(err, file=sys.stderr) + exit_code = ExitCode.INIT_FAILURE + except LoginException as err: + print(err, file=sys.stderr) + exit_code = ExitCode.LOGIN_FAILURE + except InstaloaderException as err: + print("Fatal error: %s" % err) + exit_code = ExitCode.UNEXPECTED_ERROR + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/build/lib/instaloader/exceptions.py b/build/lib/instaloader/exceptions.py new file mode 100644 index 0000000..9145d8e --- /dev/null +++ b/build/lib/instaloader/exceptions.py @@ -0,0 +1,84 @@ +class InstaloaderException(Exception): + """Base exception for this script. + + :note: This exception should not be raised directly.""" + pass + + +class QueryReturnedBadRequestException(InstaloaderException): + pass + + +class QueryReturnedForbiddenException(InstaloaderException): + pass + + +class ProfileNotExistsException(InstaloaderException): + pass + + +class ProfileHasNoPicsException(InstaloaderException): + """ + .. deprecated:: 4.2.2 + Not raised anymore. + """ + pass + + +class PrivateProfileNotFollowedException(InstaloaderException): + pass + + +class LoginRequiredException(InstaloaderException): + pass + + +class LoginException(InstaloaderException): + pass + + +class TwoFactorAuthRequiredException(LoginException): + pass + + +class InvalidArgumentException(InstaloaderException): + pass + + +class BadResponseException(InstaloaderException): + pass + + +class BadCredentialsException(LoginException): + pass + + +class ConnectionException(InstaloaderException): + pass + + +class PostChangedException(InstaloaderException): + """.. versionadded:: 4.2.2""" + pass + + +class QueryReturnedNotFoundException(ConnectionException): + pass + + +class TooManyRequestsException(ConnectionException): + pass + +class IPhoneSupportDisabledException(InstaloaderException): + pass + +class AbortDownloadException(Exception): + """ + Exception that is not catched in the error catchers inside the download loop and so aborts the + download loop. + + This exception is not a subclass of ``InstaloaderException``. + + .. versionadded:: 4.7 + """ + pass diff --git a/build/lib/instaloader/instaloader.py b/build/lib/instaloader/instaloader.py new file mode 100644 index 0000000..a60f6f7 --- /dev/null +++ b/build/lib/instaloader/instaloader.py @@ -0,0 +1,1631 @@ +import getpass +import json +import os +import platform +import re +import shutil +import string +import sys +import tempfile +from contextlib import contextmanager, suppress +from datetime import datetime, timezone +from functools import wraps +from io import BytesIO +from pathlib import Path +from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast +from urllib.parse import urlparse + +import requests +import urllib3 # type: ignore + +from .exceptions import * +from .instaloadercontext import InstaloaderContext, RateController +from .lateststamps import LatestStamps +from .nodeiterator import NodeIterator, resumable_iteration +from .sectioniterator import SectionIterator +from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, + load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic) + + +def _get_config_dir() -> str: + if platform.system() == "Windows": + # on Windows, use %LOCALAPPDATA%\Instaloader + localappdata = os.getenv("LOCALAPPDATA") + if localappdata is not None: + return os.path.join(localappdata, "Instaloader") + # legacy fallback - store in temp dir if %LOCALAPPDATA% is not set + return os.path.join(tempfile.gettempdir(), ".instaloader-" + getpass.getuser()) + # on Unix, use ~/.config/instaloader + return os.path.join(os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config")), "instaloader") + + +def get_default_session_filename(username: str) -> str: + """Returns default session filename for given username.""" + configdir = _get_config_dir() + sessionfilename = "session-{}".format(username) + return os.path.join(configdir, sessionfilename) + + +def get_legacy_session_filename(username: str) -> str: + """Returns legacy (until v4.4.3) default session filename for given username.""" + dirname = tempfile.gettempdir() + "/" + ".instaloader-" + getpass.getuser() + filename = dirname + "/" + "session-" + username + return filename.lower() + + +def get_default_stamps_filename() -> str: + """ + Returns default filename for latest stamps database. + + .. versionadded:: 4.8 + + """ + configdir = _get_config_dir() + return os.path.join(configdir, "latest-stamps.ini") + + +def format_string_contains_key(format_string: str, key: str) -> bool: + # pylint:disable=unused-variable + for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string): + if field_name and (field_name == key or field_name.startswith(key + '.')): + return True + return False + + +def _requires_login(func: Callable) -> Callable: + """Decorator to raise an exception if herewith-decorated function is called without being logged in""" + @wraps(func) + def call(instaloader, *args, **kwargs): + if not instaloader.context.is_logged_in: + raise LoginRequiredException("Login required.") + return func(instaloader, *args, **kwargs) + return call + + +def _retry_on_connection_error(func: Callable) -> Callable: + """Decorator to retry the function max_connection_attemps number of times. + + Herewith-decorated functions need an ``_attempt`` keyword argument. + + This is to decorate functions that do network requests that may fail. Note that + :meth:`.get_json`, :meth:`.get_iphone_json`, :meth:`.graphql_query` and :meth:`.graphql_node_list` already have + their own logic for retrying, hence functions that only use these for network access must not be decorated with this + decorator.""" + @wraps(func) + def call(instaloader, *args, **kwargs): + try: + return func(instaloader, *args, **kwargs) + except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err: + error_string = "{}({}): {}".format(func.__name__, ', '.join([repr(arg) for arg in args]), err) + if (kwargs.get('_attempt') or 1) == instaloader.context.max_connection_attempts: + raise ConnectionException(error_string) from None + instaloader.context.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False) + try: + if kwargs.get('_attempt'): + kwargs['_attempt'] += 1 + else: + kwargs['_attempt'] = 2 + instaloader.context.do_sleep() + return call(instaloader, *args, **kwargs) + except KeyboardInterrupt: + instaloader.context.error("[skipped by user]", repeat_at_end=False) + raise ConnectionException(error_string) from None + return call + + +class _ArbitraryItemFormatter(string.Formatter): + def __init__(self, item: Any): + self._item = item + + def get_value(self, key, args, kwargs): + """Override to substitute {ATTRIBUTE} by attributes of our _item.""" + if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode, TitlePic)): + return "{filename}" + if hasattr(self._item, key): + return getattr(self._item, key) + return super().get_value(key, args, kwargs) + + def format_field(self, value, format_spec): + """Override :meth:`string.Formatter.format_field` to have our + default format_spec for :class:`datetime.Datetime` objects, and to + let None yield an empty string rather than ``None``.""" + if isinstance(value, datetime) and not format_spec: + return super().format_field(value, '%Y-%m-%d_%H-%M-%S') + if value is None: + return '' + return super().format_field(value, format_spec) + + +class _PostPathFormatter(_ArbitraryItemFormatter): + RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL', + 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', + 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'} + + def __init__(self, item: Any, force_windows_path: bool = False): + super().__init__(item) + self.force_windows_path = force_windows_path + + def get_value(self, key, args, kwargs): + ret = super().get_value(key, args, kwargs) + if not isinstance(ret, str): + return ret + return self.sanitize_path(ret, self.force_windows_path) + + @staticmethod + def sanitize_path(ret: str, force_windows_path: bool = False) -> str: + """Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows.""" + ret = ret.replace('/', '\u2215') + + if ret.startswith('.'): + ret = ret.replace('.', '\u2024', 1) + + if force_windows_path or platform.system() == 'Windows': + ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') + ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') + ret = ret.replace('\n', ' ').replace('\r', ' ') + root, ext = os.path.splitext(ret) + if root.upper() in _PostPathFormatter.RESERVED: + root += '_' + if ext == '.': + ext = '\u2024' + ret = root + ext + return ret + + +class Instaloader: + """Instaloader Class. + + :param quiet: :option:`--quiet` + :param user_agent: :option:`--user-agent` + :param dirname_pattern: :option:`--dirname-pattern`, default is ``{target}`` + :param filename_pattern: :option:`--filename-pattern`, default is ``{date_utc}_UTC`` + :param title_pattern: + :option:`--title-pattern`, default is ``{date_utc}_UTC_{typename}`` if ``dirname_pattern`` contains + ``{target}`` or ``{profile}``, ``{target}_{date_utc}_UTC_{typename}`` otherwise. + :param download_pictures: not :option:`--no-pictures` + :param download_videos: not :option:`--no-videos` + :param download_video_thumbnails: not :option:`--no-video-thumbnails` + :param download_geotags: :option:`--geotags` + :param download_comments: :option:`--comments` + :param save_metadata: not :option:`--no-metadata-json` + :param compress_json: not :option:`--no-compress-json` + :param post_metadata_txt_pattern: + :option:`--post-metadata-txt`, default is ``{caption}``. Set to empty string to avoid creation of post metadata + txt file. + :param storyitem_metadata_txt_pattern: :option:`--storyitem-metadata-txt`, default is empty (=none) + :param max_connection_attempts: :option:`--max-connection-attempts` + :param request_timeout: :option:`--request-timeout`, set per-request timeout (seconds) + :param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior + :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. + :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. + :param slide: :option:`--slide` + :param fatal_status_codes: :option:`--abort-on` + :param iphone_support: not :option:`--no-iphone` + :param sanitize_paths: :option:`--sanitize-paths` + + .. attribute:: context + + The associated :class:`InstaloaderContext` with low-level communication functions and logging. + """ + + def __init__(self, + proxy:Optional[dict] = None, + sleep: bool = True, + quiet: bool = False, + user_agent: Optional[str] = None, + dirname_pattern: Optional[str] = None, + filename_pattern: Optional[str] = None, + download_pictures=True, + download_videos: bool = True, + download_video_thumbnails: bool = True, + download_geotags: bool = False, + download_comments: bool = False, + save_metadata: bool = True, + compress_json: bool = True, + post_metadata_txt_pattern: Optional[str] = None, + storyitem_metadata_txt_pattern: Optional[str] = None, + max_connection_attempts: int = 3, + request_timeout: float = 300.0, + rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, + resume_prefix: Optional[str] = "iterator", + check_resume_bbd: bool = True, + slide: Optional[str] = None, + fatal_status_codes: Optional[List[int]] = None, + iphone_support: bool = True, + title_pattern: Optional[str] = None, + sanitize_paths: bool = False): + + self.context = InstaloaderContext(proxy,sleep, quiet, user_agent, max_connection_attempts, + request_timeout, rate_controller, fatal_status_codes, + iphone_support) + + # configuration parameters + self.dirname_pattern = dirname_pattern or "{target}" + self.filename_pattern = filename_pattern or "{date_utc}_UTC" + if title_pattern is not None: + self.title_pattern = title_pattern + else: + if (format_string_contains_key(self.dirname_pattern, 'profile') or + format_string_contains_key(self.dirname_pattern, 'target')): + self.title_pattern = '{date_utc}_UTC_{typename}' + else: + self.title_pattern = '{target}_{date_utc}_UTC_{typename}' + self.sanitize_paths = sanitize_paths + self.download_pictures = download_pictures + self.download_videos = download_videos + self.download_video_thumbnails = download_video_thumbnails + self.download_geotags = download_geotags + self.download_comments = download_comments + self.save_metadata = save_metadata + self.compress_json = compress_json + self.post_metadata_txt_pattern = '{caption}' if post_metadata_txt_pattern is None \ + else post_metadata_txt_pattern + self.storyitem_metadata_txt_pattern = '' if storyitem_metadata_txt_pattern is None \ + else storyitem_metadata_txt_pattern + self.resume_prefix = resume_prefix + self.check_resume_bbd = check_resume_bbd + + self.slide = slide or "" + self.slide_start = 0 + self.slide_end = -1 + if self.slide != "": + splitted = self.slide.split('-') + if len(splitted) == 1: + if splitted[0] == 'last': + # download only last image of a sidecar + self.slide_start = -1 + else: + if int(splitted[0]) > 0: + self.slide_start = self.slide_end = int(splitted[0])-1 + else: + raise InvalidArgumentException("--slide parameter must be greater than 0.") + elif len(splitted) == 2: + if splitted[1] == 'last': + self.slide_start = int(splitted[0])-1 + elif 0 < int(splitted[0]) < int(splitted[1]): + self.slide_start = int(splitted[0])-1 + self.slide_end = int(splitted[1])-1 + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + + @contextmanager + def anonymous_copy(self): + """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" + new_loader = Instaloader( + sleep=self.context.sleep, + quiet=self.context.quiet, + user_agent=self.context.user_agent, + dirname_pattern=self.dirname_pattern, + filename_pattern=self.filename_pattern, + download_pictures=self.download_pictures, + download_videos=self.download_videos, + download_video_thumbnails=self.download_video_thumbnails, + download_geotags=self.download_geotags, + download_comments=self.download_comments, + save_metadata=self.save_metadata, + compress_json=self.compress_json, + post_metadata_txt_pattern=self.post_metadata_txt_pattern, + storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, + max_connection_attempts=self.context.max_connection_attempts, + request_timeout=self.context.request_timeout, + resume_prefix=self.resume_prefix, + check_resume_bbd=self.check_resume_bbd, + slide=self.slide, + fatal_status_codes=self.context.fatal_status_codes, + iphone_support=self.context.iphone_support, + sanitize_paths=self.sanitize_paths) + yield new_loader + self.context.error_log.extend(new_loader.context.error_log) + new_loader.context.error_log = [] # avoid double-printing of errors + new_loader.close() + + def close(self): + """Close associated session objects and repeat error log.""" + self.context.close() + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + @_retry_on_connection_error + def download_pic(self, filename: str, url: str, mtime: datetime, + filename_suffix: Optional[str] = None, _attempt: int = 1) -> bool: + """Downloads and saves picture with given url under given directory with given timestamp. + Returns true, if file was actually downloaded, i.e. updated.""" + if filename_suffix is not None: + filename += '_' + filename_suffix + urlmatch = re.search('\\.[a-z0-9]*\\?', url) + file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1] + nominal_filename = filename + '.' + file_extension + if os.path.isfile(nominal_filename): + self.context.log(nominal_filename + ' exists', end=' ', flush=True) + return False + resp = self.context.get_raw(url) + if 'Content-Type' in resp.headers and resp.headers['Content-Type']: + header_extension = '.' + resp.headers['Content-Type'].split(';')[0].split('/')[-1] + header_extension = header_extension.lower().replace('jpeg', 'jpg') + filename += header_extension + else: + filename = nominal_filename + if filename != nominal_filename and os.path.isfile(filename): + self.context.log(filename + ' exists', end=' ', flush=True) + return False + self.context.write_raw(resp, filename) + os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) + return True + + def save_metadata_json(self, filename: str, structure: JsonExportable) -> None: + """Saves metadata JSON file of a structure.""" + if self.compress_json: + filename += '.json.xz' + else: + filename += '.json' + os.makedirs(os.path.dirname(filename), exist_ok=True) + save_structure_to_file(structure, filename) + if isinstance(structure, (Post, StoryItem)): + # log 'json ' message when saving Post or StoryItem + self.context.log('json', end=' ', flush=True) + + def update_comments(self, filename: str, post: Post) -> None: + def _postcommentanswer_asdict(comment): + return {'id': comment.id, + 'created_at': int(comment.created_at_utc.replace(tzinfo=timezone.utc).timestamp()), + 'text': comment.text, + 'owner': comment.owner._asdict(), + 'likes_count': comment.likes_count} + + def _postcomment_asdict(comment): + return {**_postcommentanswer_asdict(comment), + 'answers': sorted([_postcommentanswer_asdict(answer) for answer in comment.answers], + key=lambda t: int(t['id']), + reverse=True)} + + def get_unique_comments(comments, combine_answers=False): + if not comments: + return list() + comments_list = sorted(sorted(list(comments), key=lambda t: int(t['id'])), + key=lambda t: int(t['created_at']), reverse=True) + unique_comments_list = [comments_list[0]] + for x, y in zip(comments_list[:-1], comments_list[1:]): + if x['id'] != y['id']: + unique_comments_list.append(y) + else: + unique_comments_list[-1]['likes_count'] = y.get('likes_count') + if combine_answers: + combined_answers = unique_comments_list[-1].get('answers') or list() + if 'answers' in y: + combined_answers.extend(y['answers']) + unique_comments_list[-1]['answers'] = get_unique_comments(combined_answers) + return unique_comments_list + + def get_new_comments(new_comments, start): + for idx, comment in enumerate(new_comments, start=start+1): + if idx % 250 == 0: + self.context.log('{}'.format(idx), end='…', flush=True) + yield comment + + def save_comments(extended_comments): + unique_comments = get_unique_comments(extended_comments, combine_answers=True) + answer_ids = set(int(answer['id']) for comment in unique_comments for answer in comment.get('answers', [])) + with open(filename, 'w') as file: + file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, unique_comments)), + indent=4)) + + base_filename = filename + filename += '_comments.json' + try: + with open(filename) as fp: + comments = json.load(fp) + except (FileNotFoundError, json.decoder.JSONDecodeError): + comments = list() + + comments_iterator = post.get_comments() + try: + with resumable_iteration( + context=self.context, + iterator=comments_iterator, + load=load_structure_from_file, + save=save_structure_to_file, + format_path=lambda magic: "{}_{}_{}.json.xz".format(base_filename, self.resume_prefix, magic), + check_bbd=self.check_resume_bbd, + enabled=self.resume_prefix is not None + ) as (_is_resuming, start_index): + comments.extend(_postcomment_asdict(comment) + for comment in get_new_comments(comments_iterator, start_index)) + except (KeyboardInterrupt, AbortDownloadException): + if comments: + save_comments(comments) + raise + if comments: + save_comments(comments) + self.context.log('comments', end=' ', flush=True) + + def save_caption(self, filename: str, mtime: datetime, caption: str) -> None: + """Updates picture caption / Post metadata info""" + def _elliptify(caption): + pcaption = caption.replace('\n', ' ').strip() + return '[' + ((pcaption[:29] + "\u2026") if len(pcaption) > 31 else pcaption) + ']' + filename += '.txt' + caption += '\n' + pcaption = _elliptify(caption) + bcaption = caption.encode("UTF-8") + with suppress(FileNotFoundError): + with open(filename, 'rb') as file: + file_caption = file.read() + if file_caption.replace(b'\r\n', b'\n') == bcaption.replace(b'\r\n', b'\n'): + try: + self.context.log(pcaption + ' unchanged', end=' ', flush=True) + except UnicodeEncodeError: + self.context.log('txt unchanged', end=' ', flush=True) + return None + else: + def get_filename(index): + return filename if index == 0 else '{0}_old_{2:02}{1}'.format(*os.path.splitext(filename), index) + + i = 0 + while os.path.isfile(get_filename(i)): + i = i + 1 + for index in range(i, 0, -1): + os.rename(get_filename(index - 1), get_filename(index)) + try: + self.context.log(_elliptify(file_caption.decode("UTF-8")) + ' updated', end=' ', flush=True) + except UnicodeEncodeError: + self.context.log('txt updated', end=' ', flush=True) + try: + self.context.log(pcaption, end=' ', flush=True) + except UnicodeEncodeError: + self.context.log('txt', end=' ', flush=True) + with open(filename, 'w', encoding='UTF-8') as fio: + fio.write(caption) + os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) + + def save_location(self, filename: str, location: PostLocation, mtime: datetime) -> None: + """Save post location name and Google Maps link.""" + filename += '_location.txt' + if location.lat is not None and location.lng is not None: + location_string = (location.name + "\n" + + "https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location.lat, + location.lng)) + else: + location_string = location.name + with open(filename, 'wb') as text_file: + with BytesIO(location_string.encode()) as bio: + shutil.copyfileobj(cast(IO, bio), text_file) + os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) + self.context.log('geo', end=' ', flush=True) + + def format_filename_within_target_path(self, + target: Union[str, Path], + owner_profile: Optional[Profile], + identifier: str, + name_suffix: str, + extension: str): + """Returns a filename within the target path. + + .. versionadded:: 4.5""" + if ((format_string_contains_key(self.dirname_pattern, 'profile') or + format_string_contains_key(self.dirname_pattern, 'target'))): + profile_str = owner_profile.username.lower() if owner_profile is not None else target + return os.path.join(self.dirname_pattern.format(profile=profile_str, target=target), + '{0}_{1}.{2}'.format(identifier, name_suffix, extension)) + else: + return os.path.join(self.dirname_pattern.format(), + '{0}_{1}_{2}.{3}'.format(target, identifier, name_suffix, extension)) + + @_retry_on_connection_error + def download_title_pic(self, url: str, target: Union[str, Path], name_suffix: str, owner_profile: Optional[Profile], + _attempt: int = 1) -> None: + """Downloads and saves a picture that does not have an association with a Post or StoryItem, such as a + Profile picture or a Highlight cover picture. Modification time is taken from the HTTP response headers. + + .. versionadded:: 4.3""" + + http_response = self.context.get_raw(url) + date_object: Optional[datetime] = None + if 'Last-Modified' in http_response.headers: + date_object = datetime.strptime(http_response.headers["Last-Modified"], '%a, %d %b %Y %H:%M:%S GMT') + date_object = date_object.replace(tzinfo=timezone.utc) + pic_bytes = None + else: + pic_bytes = http_response.content + ig_filename = url.split('/')[-1].split('?')[0] + pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object) + dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target) + filename_template = os.path.join( + dirname, + _PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target)) + filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg" + content_length = http_response.headers.get('Content-Length', None) + if os.path.isfile(filename) and (not self.context.is_logged_in or + (content_length is not None and + os.path.getsize(filename) >= int(content_length))): + self.context.log(filename + ' already exists') + return + os.makedirs(os.path.dirname(filename), exist_ok=True) + self.context.write_raw(pic_bytes if pic_bytes else http_response, filename) + if date_object: + os.utime(filename, (datetime.now().timestamp(), date_object.timestamp())) + self.context.log('') # log output of _get_and_write_raw() does not produce \n + + def download_profilepic_if_new(self, profile: Profile, latest_stamps: Optional[LatestStamps]) -> None: + """ + Downloads and saves profile pic if it has not been downloaded before. + + :param latest_stamps: Database with the last downloaded data. If not present, + the profile pic is downloaded unless it already exists + + .. versionadded:: 4.8 + """ + if latest_stamps is None: + self.download_profilepic(profile) + return + profile_pic_basename = profile.profile_pic_url_no_iphone.split('/')[-1].split('?')[0] + saved_basename = latest_stamps.get_profile_pic(profile.username) + if saved_basename == profile_pic_basename: + return + self.download_profilepic(profile) + latest_stamps.set_profile_pic(profile.username, profile_pic_basename) + + def download_profilepic(self, profile: Profile) -> None: + """Downloads and saves profile pic.""" + self.download_title_pic(profile.profile_pic_url, profile.username.lower(), 'profile_pic', profile) + + def download_highlight_cover(self, highlight: Highlight, target: Union[str, Path]) -> None: + """Downloads and saves Highlight cover picture. + + .. versionadded:: 4.3""" + self.download_title_pic(highlight.cover_url, target, 'cover', highlight.owner_profile) + + def download_hashtag_profilepic(self, hashtag: Hashtag) -> None: + """Downloads and saves the profile picture of a Hashtag. + + .. versionadded:: 4.4""" + self.download_title_pic(hashtag.profile_pic_url, '#' + hashtag.name, 'profile_pic', None) + + @_requires_login + def save_session(self) -> dict: + """Saves internally stored :class:`requests.Session` object to :class:`dict`. + + :raises LoginRequiredException: If called without being logged in. + + .. versionadded:: 4.10 + """ + return self.context.save_session() + + def load_session(self, username: str, session_data: dict) -> None: + """Internally stores :class:`requests.Session` object from :class:`dict`. + + .. versionadded:: 4.10 + """ + self.context.load_session(username, session_data) + + @_requires_login + def save_session_to_file(self, filename: Optional[str] = None) -> None: + """Saves internally stored :class:`requests.Session` object. + + :param filename: Filename, or None to use default filename. + :raises LoginRequiredException: If called without being logged in. + """ + if filename is None: + assert self.context.username is not None + filename = get_default_session_filename(self.context.username) + dirname = os.path.dirname(filename) + if dirname != '' and not os.path.exists(dirname): + os.makedirs(dirname) + os.chmod(dirname, 0o700) + with open(filename, 'wb') as sessionfile: + os.chmod(filename, 0o600) + self.context.save_session_to_file(sessionfile) + self.context.log("Saved session to %s." % filename) + + def load_session_from_file(self, username: str, filename: Optional[str] = None) -> None: + """Internally stores :class:`requests.Session` object loaded from file. + + If filename is None, the file with the default session path is loaded. + + :raises FileNotFoundError: If the file does not exist. + """ + if filename is None: + filename = get_default_session_filename(username) + if not os.path.exists(filename): + filename = get_legacy_session_filename(username) + with open(filename, 'rb') as sessionfile: + self.context.load_session_from_file(username, sessionfile) + self.context.log("Loaded session from %s." % filename) + + def test_login(self) -> Optional[str]: + """Returns the Instagram username to which given :class:`requests.Session` object belongs, or None.""" + return self.context.test_login() + + def login(self, user: str, passwd: str) -> None: + """Log in to instagram with given username and password and internally store session object. + + :raises BadCredentialsException: If the provided password is wrong. + :raises TwoFactorAuthRequiredException: First step of 2FA login done, now call + :meth:`Instaloader.two_factor_login`. + :raises LoginException: An error happened during login (for example, an invalid response was received). + Or if the provided username does not exist. + + .. versionchanged:: 4.12 + Raises LoginException instead of ConnectionException when an error happens. + Raises LoginException instead of InvalidArgumentException when the username does not exist. + """ + self.context.login(user, passwd) + + def two_factor_login(self, two_factor_code) -> None: + """Second step of login if 2FA is enabled. + Not meant to be used directly, use :meth:`Instaloader.two_factor_login`. + + :raises InvalidArgumentException: No two-factor authentication pending. + :raises BadCredentialsException: 2FA verification code invalid. + + .. versionadded:: 4.2""" + self.context.two_factor_login(two_factor_code) + + @staticmethod + def __prepare_filename(filename_template: str, url: Callable[[], str]) -> str: + """Replace filename token inside filename_template with url's filename and assure the directories exist. + + .. versionadded:: 4.6""" + if "{filename}" in filename_template: + filename = filename_template.replace("{filename}", + os.path.splitext(os.path.basename(urlparse(url()).path))[0]) + else: + filename = filename_template + os.makedirs(os.path.dirname(filename), exist_ok=True) + return filename + + def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode, TitlePic], + target: Optional[Union[str, Path]] = None): + """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. + + .. versionadded:: 4.1""" + return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target) + + def download_post(self, post: Post, target: Union[str, Path]) -> bool: + """ + Download everything associated with one instagram post node, i.e. picture, caption and video. + + :param post: Post to download. + :param target: Target name, i.e. profile name, #hashtag, :feed; for filename. + :return: True if something was downloaded, False otherwise, i.e. file was already there + """ + + def _already_downloaded(path: str) -> bool: + if not os.path.isfile(path): + return False + else: + self.context.log(path + ' exists', end=' ', flush=True) + return True + + def _all_already_downloaded(path_base, is_videos_enumerated) -> bool: + if '{filename}' in self.filename_pattern: + # full URL needed to evaluate actual filename, cannot determine at + # this point if all sidecar nodes were already downloaded. + return False + for idx, is_video in is_videos_enumerated: + if self.download_pictures and (not is_video or self.download_video_thumbnails): + if not _already_downloaded("{0}_{1}.jpg".format(path_base, idx)): + return False + if is_video and self.download_videos: + if not _already_downloaded("{0}_{1}.mp4".format(path_base, idx)): + return False + return True + + dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target) + filename_template = os.path.join(dirname, self.format_filename(post, target=target)) + filename = self.__prepare_filename(filename_template, lambda: post.url) + + # Download the image(s) / video thumbnail and videos within sidecars if desired + downloaded = True + if post.typename == 'GraphSidecar': + if (self.download_pictures or self.download_videos) and post.mediacount > 0: + if not _all_already_downloaded( + filename_template, enumerate( + (post.get_is_videos()[i] + for i in range(self.slide_start % post.mediacount, self.slide_end % post.mediacount + 1)), + start=self.slide_start % post.mediacount + 1 + ) + ): + for edge_number, sidecar_node in enumerate( + post.get_sidecar_nodes(self.slide_start, self.slide_end), + start=self.slide_start % post.mediacount + 1 + ): + suffix: Optional[str] = str(edge_number) + if '{filename}' in self.filename_pattern: + suffix = None + if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): + # pylint:disable=cell-var-from-loop + sidecar_filename = self.__prepare_filename(filename_template, + lambda: sidecar_node.display_url) + # Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails) + downloaded &= self.download_pic(filename=sidecar_filename, url=sidecar_node.display_url, + mtime=post.date_local, filename_suffix=suffix) + if sidecar_node.is_video and self.download_videos: + # pylint:disable=cell-var-from-loop + sidecar_filename = self.__prepare_filename(filename_template, + lambda: sidecar_node.video_url) + # Download sidecar video if desired + downloaded &= self.download_pic(filename=sidecar_filename, url=sidecar_node.video_url, + mtime=post.date_local, filename_suffix=suffix) + else: + downloaded = False + elif post.typename == 'GraphImage': + # Download picture + if self.download_pictures: + downloaded = (not _already_downloaded(filename + ".jpg") and + self.download_pic(filename=filename, url=post.url, mtime=post.date_local)) + elif post.typename == 'GraphVideo': + # Download video thumbnail (--no-pictures implies --no-video-thumbnails) + if self.download_pictures and self.download_video_thumbnails: + with self.context.error_catcher("Video thumbnail of {}".format(post)): + downloaded = (not _already_downloaded(filename + ".jpg") and + self.download_pic(filename=filename, url=post.url, mtime=post.date_local)) + else: + self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename)) + + # Save caption if desired + metadata_string = _ArbitraryItemFormatter(post).format(self.post_metadata_txt_pattern).strip() + if metadata_string: + self.save_caption(filename=filename, mtime=post.date_local, caption=metadata_string) + + # Download video if desired + if post.is_video and self.download_videos: + downloaded &= (not _already_downloaded(filename + ".mp4") and + self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local)) + + # Download geotags if desired + if self.download_geotags and post.location: + self.save_location(filename, post.location, post.date_local) + + # Update comments if desired + if self.download_comments: + self.update_comments(filename=filename, post=post) + + # Save metadata as JSON if desired. + if self.save_metadata: + self.save_metadata_json(filename, post) + + self.context.log() + return downloaded + + @_requires_login + def get_stories(self, userids: Optional[List[int]] = None) -> Iterator[Story]: + """Get available stories from followees or all stories of users whose ID are given. + Does not mark stories as seen. + To use this, one needs to be logged in + + :param userids: List of user IDs to be processed in terms of downloading their stories, or None. + :raises LoginRequiredException: If called without being logged in. + """ + + if not userids: + data = self.context.graphql_query("d15efd8c0c5b23f0ef71f18bf363c704", + {"only_stories": True})["data"]["user"] + if data is None: + raise BadResponseException('Bad stories reel JSON.') + userids = list(edge["node"]["id"] for edge in data["feed_reels_tray"]["edge_reels_tray_to_reel"]["edges"]) + + def _userid_chunks(): + assert userids is not None + userids_per_query = 50 + for i in range(0, len(userids), userids_per_query): + yield userids[i:i + userids_per_query] + + for userid_chunk in _userid_chunks(): + stories = self.context.graphql_query("303a4ae99711322310f25250d988f3b7", + {"reel_ids": userid_chunk, "precomposed_overlay": False})["data"] + yield from (Story(self.context, media) for media in stories['reels_media']) + + @_requires_login + def download_stories(self, + userids: Optional[List[Union[int, Profile]]] = None, + fast_update: bool = False, + filename_target: Optional[str] = ':stories', + storyitem_filter: Optional[Callable[[StoryItem], bool]] = None, + latest_stamps: Optional[LatestStamps] = None) -> None: + """ + Download available stories from user followees or all stories of users whose ID are given. + Does not mark stories as seen. + To use this, one needs to be logged in + + :param userids: List of user IDs or Profiles to be processed in terms of downloading their stories + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern + or None if profile name should be used instead + :param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded + :param latest_stamps: Database with the last times each user was scraped + :raises LoginRequiredException: If called without being logged in. + + .. versionchanged:: 4.8 + Add `latest_stamps` parameter. + """ + + if not userids: + self.context.log("Retrieving all visible stories...") + profile_count = None + else: + userids = [p if isinstance(p, int) else p.userid for p in userids] + profile_count = len(userids) + + for i, user_story in enumerate(self.get_stories(userids), start=1): + name = user_story.owner_username + if profile_count is not None: + msg = "[{0:{w}d}/{1:{w}d}] Retrieving stories from profile {2}.".format(i, profile_count, name, + w=len(str(profile_count))) + else: + msg = "[{:3d}] Retrieving stories from profile {}.".format(i, name) + self.context.log(msg) + totalcount = user_story.itemcount + count = 1 + if latest_stamps is not None: + # pylint:disable=cell-var-from-loop + last_scraped = latest_stamps.get_last_story_timestamp(name) + scraped_timestamp = datetime.now().astimezone() + for item in user_story.get_items(): + if latest_stamps is not None: + if item.date_local <= last_scraped: + break + if storyitem_filter is not None and not storyitem_filter(item): + self.context.log("<{} skipped>".format(item), flush=True) + continue + self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) + count += 1 + with self.context.error_catcher('Download story from user {}'.format(name)): + downloaded = self.download_storyitem(item, filename_target if filename_target else name) + if fast_update and not downloaded: + break + if latest_stamps is not None: + latest_stamps.set_last_story_timestamp(name, scraped_timestamp) + + def download_storyitem(self, item: StoryItem, target: Union[str, Path]) -> bool: + """Download one user story. + + :param item: Story item, as in story['items'] for story in :meth:`get_stories` + :param target: Replacement for {target} in dirname_pattern and filename_pattern + :return: True if something was downloaded, False otherwise, i.e. file was already there + """ + + def _already_downloaded(path: str) -> bool: + if not os.path.isfile(path): + return False + else: + self.context.log(path + ' exists', end=' ', flush=True) + return True + + date_local = item.date_local + dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target) + filename_template = os.path.join(dirname, self.format_filename(item, target=target)) + filename = self.__prepare_filename(filename_template, lambda: item.url) + downloaded = False + video_url_fetch_failed = False + if item.is_video and self.download_videos is True: + video_url = item.video_url + if video_url: + filename = self.__prepare_filename(filename_template, lambda: str(video_url)) + downloaded |= (not _already_downloaded(filename + ".mp4") and + self.download_pic(filename=filename, url=video_url, mtime=date_local)) + else: + video_url_fetch_failed = True + if video_url_fetch_failed or not item.is_video or self.download_video_thumbnails is True: + downloaded = (not _already_downloaded(filename + ".jpg") and + self.download_pic(filename=filename, url=item.url, mtime=date_local)) + # Save caption if desired + metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip() + if metadata_string: + self.save_caption(filename=filename, mtime=item.date_local, caption=metadata_string) + # Save metadata as JSON if desired. + if self.save_metadata is not False: + self.save_metadata_json(filename, item) + self.context.log() + return downloaded + + @_requires_login + def get_highlights(self, user: Union[int, Profile]) -> Iterator[Highlight]: + """Get all highlights from a user. + To use this, one needs to be logged in. + + .. versionadded:: 4.1 + + :param user: ID or Profile of the user whose highlights should get fetched. + :raises LoginRequiredException: If called without being logged in. + """ + + userid = user if isinstance(user, int) else user.userid + data = self.context.graphql_query("7c16654f22c819fb63d1183034a5162f", + {"user_id": userid, "include_chaining": False, "include_reel": False, + "include_suggested_users": False, "include_logged_out_extras": False, + "include_highlight_reels": True})["data"]["user"]['edge_highlight_reels'] + if data is None: + raise BadResponseException('Bad highlights reel JSON.') + yield from (Highlight(self.context, edge['node'], user if isinstance(user, Profile) else None) + for edge in data['edges']) + + @_requires_login + def download_highlights(self, + user: Union[int, Profile], + fast_update: bool = False, + filename_target: Optional[str] = None, + storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None: + """ + Download available highlights from a user whose ID is given. + To use this, one needs to be logged in. + + .. versionadded:: 4.1 + + .. versionchanged:: 4.3 + Also downloads and saves the Highlight's cover pictures. + + :param user: ID or Profile of the user whose highlights should get downloaded. + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern + or None if profile name and the highlights' titles should be used instead + :param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded + :raises LoginRequiredException: If called without being logged in. + """ + for user_highlight in self.get_highlights(user): + name = user_highlight.owner_username + highlight_target: Union[str, Path] = (filename_target + if filename_target + else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) / + _PostPathFormatter.sanitize_path(user_highlight.title, + self.sanitize_paths))) + self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name)) + self.download_highlight_cover(user_highlight, highlight_target) + totalcount = user_highlight.itemcount + count = 1 + for item in user_highlight.get_items(): + if storyitem_filter is not None and not storyitem_filter(item): + self.context.log("<{} skipped>".format(item), flush=True) + continue + self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) + count += 1 + with self.context.error_catcher('Download highlights \"{}\" from user {}'.format(user_highlight.title, + name)): + downloaded = self.download_storyitem(item, highlight_target) + if fast_update and not downloaded: + break + + def posts_download_loop(self, + posts: Iterator[Post], + target: Union[str, Path], + fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None, + max_count: Optional[int] = None, + total_count: Optional[int] = None, + owner_profile: Optional[Profile] = None, + takewhile: Optional[Callable[[Post], bool]] = None, + possibly_pinned: int = 0) -> None: + """ + Download the Posts returned by given Post Iterator. + + .. versionadded:: 4.4 + + .. versionchanged:: 4.5 + Transparently resume an aborted operation if `posts` is a :class:`NodeIterator`. + + .. versionchanged:: 4.8 + Add `takewhile` parameter. + + .. versionchanged:: 4.10.3 + Add `possibly_pinned` parameter. + + :param posts: Post Iterator to loop through. + :param target: Target name. + :param fast_update: :option:`--fast-update`. + :param post_filter: :option:`--post-filter`. + :param max_count: Maximum count of Posts to download (:option:`--count`). + :param total_count: Total number of posts returned by given iterator. + :param owner_profile: Associated profile, if any. + :param takewhile: Expression evaluated for each post. Once it returns false, downloading stops. + :param possibly_pinned: Number of posts that might be pinned. These posts do not cause download + to stop even if they've already been downloaded. + """ + displayed_count = (max_count if total_count is None or max_count is not None and max_count < total_count + else total_count) + sanitized_target = target + if isinstance(target, str): + sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths) + if takewhile is None: + takewhile = lambda _: True + with resumable_iteration( + context=self.context, + iterator=posts, + load=load_structure_from_file, + save=save_structure_to_file, + format_path=lambda magic: self.format_filename_within_target_path( + sanitized_target, owner_profile, self.resume_prefix or '', magic, 'json.xz' + ), + check_bbd=self.check_resume_bbd, + enabled=self.resume_prefix is not None + ) as (is_resuming, start_index): + for number, post in enumerate(posts, start=start_index + 1): + should_stop = not takewhile(post) + if should_stop and number <= possibly_pinned: + continue + if (max_count is not None and number > max_count) or should_stop: + break + if displayed_count is not None: + self.context.log("[{0:{w}d}/{1:{w}d}] ".format(number, displayed_count, + w=len(str(displayed_count))), + end="", flush=True) + else: + self.context.log("[{:3d}] ".format(number), end="", flush=True) + if post_filter is not None: + try: + if not post_filter(post): + self.context.log("{} skipped".format(post)) + continue + except (InstaloaderException, KeyError, TypeError) as err: + self.context.error("{} skipped. Filter evaluation failed: {}".format(post, err)) + continue + with self.context.error_catcher("Download {} of {}".format(post, target)): + # The PostChangedException gets raised if the Post's id/shortcode changed while obtaining + # additional metadata. This is most likely the case if a HTTP redirect takes place while + # resolving the shortcode URL. + # The `post_changed` variable keeps the fast-update functionality alive: A Post which is + # obained after a redirect has probably already been downloaded as a previous Post of the + # same Profile. + # Observed in issue #225: https://github.com/instaloader/instaloader/issues/225 + post_changed = False + while True: + try: + downloaded = self.download_post(post, target=target) + break + except PostChangedException: + post_changed = True + continue + if fast_update and not downloaded and not post_changed and number > possibly_pinned: + # disengage fast_update for first post when resuming + if not is_resuming or number > 0: + break + + @_requires_login + def get_feed_posts(self) -> Iterator[Post]: + """Get Posts of the user's feed. + + :return: Iterator over Posts of the user's feed. + :raises LoginRequiredException: If called without being logged in. + """ + + data = self.context.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})["data"] + + while True: + feed = data["user"]["edge_web_feed_timeline"] + for edge in feed["edges"]: + node = edge["node"] + if node.get("__typename") in Post.supported_graphql_types() and node.get("shortcode") is not None: + yield Post(self.context, node) + if not feed["page_info"]["has_next_page"]: + break + data = self.context.graphql_query("d6f4427fbe92d846298cf93df0b937d3", + {'fetch_media_item_count': 12, + 'fetch_media_item_cursor': feed["page_info"]["end_cursor"], + 'fetch_comment_count': 4, + 'fetch_like': 10, + 'has_stories': False})["data"] + + @_requires_login + def download_feed_posts(self, max_count: Optional[int] = None, fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None) -> None: + """ + Download pictures from the user's feed. + + Example to download up to the 20 pics the user last liked:: + + loader = Instaloader() + loader.load_session_from_file('USER') + loader.download_feed_posts(max_count=20, fast_update=True, + post_filter=lambda post: post.viewer_has_liked) + + :param max_count: Maximum count of pictures to download + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param post_filter: function(post), which returns True if given picture should be downloaded + :raises LoginRequiredException: If called without being logged in. + """ + self.context.log("Retrieving pictures from your feed...") + self.posts_download_loop(self.get_feed_posts(), ":feed", fast_update, post_filter, max_count=max_count) + + @_requires_login + def download_saved_posts(self, max_count: Optional[int] = None, fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None) -> None: + """Download user's saved pictures. + + :param max_count: Maximum count of pictures to download + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param post_filter: function(post), which returns True if given picture should be downloaded + :raises LoginRequiredException: If called without being logged in. + """ + self.context.log("Retrieving saved posts...") + assert self.context.username is not None # safe due to @_requires_login; required by typechecker + node_iterator = Profile.own_profile(self.context).get_saved_posts() + self.posts_download_loop(node_iterator, ":saved", + fast_update, post_filter, + max_count=max_count, total_count=node_iterator.count) + + @_requires_login + def get_location_posts(self, location: str) -> Iterator[Post]: + """Get Posts which are listed by Instagram for a given Location. + + :return: Iterator over Posts of a location's posts + :raises LoginRequiredException: If called without being logged in. + + .. versionadded:: 4.2 + + .. versionchanged:: 4.2.9 + Require being logged in (as required by Instagram) + """ + yield from SectionIterator( + self.context, + lambda d: d["native_location_data"]["recent"], + lambda m: Post.from_iphone_struct(self.context, m), + f"explore/locations/{location}/", + ) + + @_requires_login + def download_location(self, location: str, + max_count: Optional[int] = None, + post_filter: Optional[Callable[[Post], bool]] = None, + fast_update: bool = False) -> None: + """Download pictures of one location. + + To download the last 30 pictures with location 362629379, do:: + + loader = Instaloader() + loader.download_location(362629379, max_count=30) + + :param location: Location to download, as Instagram numerical ID + :param max_count: Maximum count of pictures to download + :param post_filter: function(post), which returns True if given picture should be downloaded + :param fast_update: If true, abort when first already-downloaded picture is encountered + :raises LoginRequiredException: If called without being logged in. + + .. versionadded:: 4.2 + + .. versionchanged:: 4.2.9 + Require being logged in (as required by Instagram) + """ + self.context.log("Retrieving pictures for location {}...".format(location)) + self.posts_download_loop(self.get_location_posts(location), "%" + location, fast_update, post_filter, + max_count=max_count) + + @_requires_login + def get_explore_posts(self) -> NodeIterator[Post]: + """Get Posts which are worthy of exploring suggested by Instagram. + + :return: Iterator over Posts of the user's suggested posts. + :rtype: NodeIterator[Post] + :raises LoginRequiredException: If called without being logged in. + """ + return NodeIterator( + self.context, + 'df0dcc250c2b18d9fd27c5581ef33c7c', + lambda d: d['data']['user']['edge_web_discover_media'], + lambda n: Post(self.context, n), + query_referer='https://www.instagram.com/explore/', + ) + + def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]: + """Get Posts associated with a #hashtag. + + .. deprecated:: 4.4 + Use :meth:`Hashtag.get_posts_resumable`.""" + return Hashtag.from_name(self.context, hashtag).get_posts_resumable() + + def download_hashtag(self, hashtag: Union[Hashtag, str], + max_count: Optional[int] = None, + post_filter: Optional[Callable[[Post], bool]] = None, + fast_update: bool = False, + profile_pic: bool = True, + posts: bool = True) -> None: + """Download pictures of one hashtag. + + To download the last 30 pictures with hashtag #cat, do:: + + loader = Instaloader() + loader.download_hashtag('cat', max_count=30) + + :param hashtag: Hashtag to download, as instance of :class:`Hashtag`, or string without leading '#' + :param max_count: Maximum count of pictures to download + :param post_filter: function(post), which returns True if given picture should be downloaded + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param profile_pic: not :option:`--no-profile-pic`. + :param posts: not :option:`--no-posts`. + + .. versionchanged:: 4.4 + Add parameters `profile_pic` and `posts`. + """ + if isinstance(hashtag, str): + with self.context.error_catcher("Get hashtag #{}".format(hashtag)): + hashtag = Hashtag.from_name(self.context, hashtag) + if not isinstance(hashtag, Hashtag): + return + target = "#" + hashtag.name + if profile_pic: + with self.context.error_catcher("Download profile picture of {}".format(target)): + self.download_hashtag_profilepic(hashtag) + if posts: + self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name)) + self.posts_download_loop(hashtag.get_posts_resumable(), target, fast_update, post_filter, + max_count=max_count) + if self.save_metadata: + json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target, + target=target), + target) + self.save_metadata_json(json_filename, hashtag) + + def download_tagged(self, profile: Profile, fast_update: bool = False, + target: Optional[str] = None, + post_filter: Optional[Callable[[Post], bool]] = None, + latest_stamps: Optional[LatestStamps] = None) -> None: + """Download all posts where a profile is tagged. + + .. versionadded:: 4.1 + + .. versionchanged:: 4.8 + Add `latest_stamps` parameter.""" + self.context.log("Retrieving tagged posts for profile {}.".format(profile.username)) + posts_takewhile: Optional[Callable[[Post], bool]] = None + if latest_stamps is not None: + last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) + posts_takewhile = lambda p: p.date_local > last_scraped + tagged_posts = profile.get_tagged_posts() + self.posts_download_loop(tagged_posts, + target if target + else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) / + _PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)), + fast_update, post_filter, takewhile=posts_takewhile) + if latest_stamps is not None and tagged_posts.first_item is not None: + latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local) + + def download_igtv(self, profile: Profile, fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None, + latest_stamps: Optional[LatestStamps] = None) -> None: + """Download IGTV videos of a profile. + + .. versionadded:: 4.3 + + .. versionchanged:: 4.8 + Add `latest_stamps` parameter.""" + self.context.log("Retrieving IGTV videos for profile {}.".format(profile.username)) + posts_takewhile: Optional[Callable[[Post], bool]] = None + if latest_stamps is not None: + last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) + posts_takewhile = lambda p: p.date_local > last_scraped + igtv_posts = profile.get_igtv_posts() + self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, + total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) + if latest_stamps is not None and igtv_posts.first_item is not None: + latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local) + + def _get_id_filename(self, profile_name: str) -> str: + if ((format_string_contains_key(self.dirname_pattern, 'profile') or + format_string_contains_key(self.dirname_pattern, 'target'))): + return os.path.join(self.dirname_pattern.format(profile=profile_name.lower(), + target=profile_name.lower()), + 'id') + else: + return os.path.join(self.dirname_pattern.format(), + '{0}_id'.format(profile_name.lower())) + + def load_profile_id(self, profile_name: str) -> Optional[int]: + """ + Load ID of profile from profile directory. + + .. versionadded:: 4.8 + """ + id_filename = self._get_id_filename(profile_name) + try: + with open(id_filename, 'rb') as id_file: + return int(id_file.read()) + except (FileNotFoundError, ValueError): + return None + + def save_profile_id(self, profile: Profile): + """ + Store ID of profile on profile directory. + + .. versionadded:: 4.0.6 + """ + os.makedirs(self.dirname_pattern.format(profile=profile.username, + target=profile.username), exist_ok=True) + with open(self._get_id_filename(profile.username), 'w') as text_file: + text_file.write(str(profile.userid) + "\n") + self.context.log("Stored ID {0} for profile {1}.".format(profile.userid, profile.username)) + + def check_profile_id(self, profile_name: str, latest_stamps: Optional[LatestStamps] = None) -> Profile: + """ + Consult locally stored ID of profile with given name, check whether ID matches and whether name + has changed and return current name of the profile, and store ID of profile. + + :param profile_name: Profile name + :param latest_stamps: Database of downloaded data. If present, IDs are retrieved from it, + otherwise from the target directory + :return: Instance of current profile + + .. versionchanged:: 4.8 + Add `latest_stamps` parameter. + """ + profile = None + profile_name_not_exists_err = None + try: + profile = Profile.from_username(self.context, profile_name) + except ProfileNotExistsException as err: + profile_name_not_exists_err = err + if latest_stamps is None: + profile_id = self.load_profile_id(profile_name) + else: + profile_id = latest_stamps.get_profile_id(profile_name) + if profile_id is not None: + if (profile is None) or \ + (profile_id != profile.userid): + if profile is not None: + self.context.log("Profile {0} does not match the stored unique ID {1}.".format(profile_name, + profile_id)) + else: + self.context.log("Trying to find profile {0} using its unique ID {1}.".format(profile_name, + profile_id)) + profile_from_id = Profile.from_id(self.context, profile_id) + newname = profile_from_id.username + if profile_name == newname: + self.context.error( + f"Warning: Profile {profile_name} could not be retrieved by its name, but by its ID.") + return profile_from_id + self.context.error("Profile {0} has changed its name to {1}.".format(profile_name, newname)) + if latest_stamps is None: + if ((format_string_contains_key(self.dirname_pattern, 'profile') or + format_string_contains_key(self.dirname_pattern, 'target'))): + os.rename(self.dirname_pattern.format(profile=profile_name.lower(), + target=profile_name.lower()), + self.dirname_pattern.format(profile=newname.lower(), + target=newname.lower())) + else: + os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower()), + '{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower())) + else: + latest_stamps.rename_profile(profile_name, newname) + return profile_from_id + # profile exists and profile id matches saved id + return profile + if profile is not None: + if latest_stamps is None: + self.save_profile_id(profile) + else: + latest_stamps.save_profile_id(profile.username, profile.userid) + return profile + if profile_name_not_exists_err: + raise profile_name_not_exists_err + raise ProfileNotExistsException("Profile {0} does not exist.".format(profile_name)) + + def download_profiles(self, profiles: Set[Profile], + profile_pic: bool = True, posts: bool = True, + tagged: bool = False, + igtv: bool = False, + highlights: bool = False, + stories: bool = False, + fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None, + storyitem_filter: Optional[Callable[[Post], bool]] = None, + raise_errors: bool = False, + latest_stamps: Optional[LatestStamps] = None, + max_count: Optional[int] = None): + """High-level method to download set of profiles. + + :param profiles: Set of profiles to download. + :param profile_pic: not :option:`--no-profile-pic`. + :param posts: not :option:`--no-posts`. + :param tagged: :option:`--tagged`. + :param igtv: :option:`--igtv`. + :param highlights: :option:`--highlights`. + :param stories: :option:`--stories`. + :param fast_update: :option:`--fast-update`. + :param post_filter: :option:`--post-filter`. + :param storyitem_filter: :option:`--post-filter`. + :param raise_errors: + Whether :exc:`LoginRequiredException` and :exc:`PrivateProfileNotFollowedException` should be raised or + catched and printed with :meth:`InstaloaderContext.error_catcher`. + :param latest_stamps: :option:`--latest-stamps`. + :param max_count: Maximum count of posts to download. + + .. versionadded:: 4.1 + + .. versionchanged:: 4.3 + Add `igtv` parameter. + + .. versionchanged:: 4.8 + Add `latest_stamps` parameter. + + .. versionchanged:: 4.13 + Add `max_count` parameter. + """ + + @contextmanager + def _error_raiser(_str): + yield + + # error_handler type is Callable[[Optional[str]], ContextManager[None]] (not supported with Python 3.5.0..3.5.3) + error_handler = _error_raiser if raise_errors else self.context.error_catcher + + for i, profile in enumerate(profiles, start=1): + self.context.log("[{0:{w}d}/{1:{w}d}] Downloading profile {2}".format(i, len(profiles), profile.username, + w=len(str(len(profiles))))) + with error_handler(profile.username): # type: ignore # (ignore type for Python 3.5 support) + profile_name = profile.username + + # Download profile picture + if profile_pic: + with self.context.error_catcher('Download profile picture of {}'.format(profile_name)): + self.download_profilepic_if_new(profile, latest_stamps) + + # Save metadata as JSON if desired. + if self.save_metadata: + json_filename = os.path.join(self.dirname_pattern.format(profile=profile_name, + target=profile_name), + '{0}_{1}'.format(profile_name, profile.userid)) + self.save_metadata_json(json_filename, profile) + + # Catch some errors + if tagged or igtv or highlights or posts: + if (not self.context.is_logged_in and + profile.is_private): + raise LoginRequiredException("Login required.") + if (self.context.username != profile.username and + profile.is_private and + not profile.followed_by_viewer): + raise PrivateProfileNotFollowedException("Private but not followed.") + + # Download tagged, if requested + if tagged: + with self.context.error_catcher('Download tagged of {}'.format(profile_name)): + self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter, + latest_stamps=latest_stamps) + + # Download IGTV, if requested + if igtv: + with self.context.error_catcher('Download IGTV of {}'.format(profile_name)): + self.download_igtv(profile, fast_update=fast_update, post_filter=post_filter, + latest_stamps=latest_stamps) + + # Download highlights, if requested + if highlights: + with self.context.error_catcher('Download highlights of {}'.format(profile_name)): + self.download_highlights(profile, fast_update=fast_update, storyitem_filter=storyitem_filter) + + # Iterate over pictures and download them + if posts: + self.context.log("Retrieving posts from profile {}.".format(profile_name)) + posts_takewhile: Optional[Callable[[Post], bool]] = None + if latest_stamps is not None: + # pylint:disable=cell-var-from-loop + last_scraped = latest_stamps.get_last_post_timestamp(profile_name) + posts_takewhile = lambda p: p.date_local > last_scraped + posts_to_download = profile.get_posts() + self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, + total_count=profile.mediacount, owner_profile=profile, + takewhile=posts_takewhile, possibly_pinned=3, max_count=max_count) + if latest_stamps is not None and posts_to_download.first_item is not None: + latest_stamps.set_last_post_timestamp(profile_name, + posts_to_download.first_item.date_local) + + if stories and profiles: + with self.context.error_catcher("Download stories"): + self.context.log("Downloading stories") + self.download_stories(userids=list(profiles), fast_update=fast_update, filename_target=None, + storyitem_filter=storyitem_filter, latest_stamps=latest_stamps) + + def download_profile(self, profile_name: Union[str, Profile], + profile_pic: bool = True, profile_pic_only: bool = False, + fast_update: bool = False, + download_stories: bool = False, download_stories_only: bool = False, + download_tagged: bool = False, download_tagged_only: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None, + storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None: + """Download one profile + + .. deprecated:: 4.1 + Use :meth:`Instaloader.download_profiles`. + """ + + # Get profile main page json + # check if profile does exist or name has changed since last download + # and update name and json data if necessary + if isinstance(profile_name, str): + profile = self.check_profile_id(profile_name.lower()) + else: + profile = profile_name + + profile_name = profile.username + + # Save metadata as JSON if desired. + if self.save_metadata is not False: + json_filename = '{0}/{1}_{2}'.format(self.dirname_pattern.format(profile=profile_name, target=profile_name), + profile_name, profile.userid) + self.save_metadata_json(json_filename, profile) + + if self.context.is_logged_in and profile.has_blocked_viewer and not profile.is_private: + # raising ProfileNotExistsException invokes "trying again anonymously" logic + raise ProfileNotExistsException("Profile {} has blocked you".format(profile_name)) + + # Download profile picture + if profile_pic or profile_pic_only: + with self.context.error_catcher('Download profile picture of {}'.format(profile_name)): + self.download_profilepic(profile) + if profile_pic_only: + return + + # Catch some errors + if profile.is_private: + if not self.context.is_logged_in: + raise LoginRequiredException("profile %s requires login" % profile_name) + if not profile.followed_by_viewer and \ + self.context.username != profile.username: + raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % profile_name) + else: + if self.context.is_logged_in and not (download_stories or download_stories_only): + self.context.log("profile %s could also be downloaded anonymously." % profile_name) + + # Download stories, if requested + if download_stories or download_stories_only: + if profile.has_viewable_story: + with self.context.error_catcher("Download stories of {}".format(profile_name)): + self.download_stories(userids=[profile.userid], filename_target=profile_name, + fast_update=fast_update, storyitem_filter=storyitem_filter) + else: + self.context.log("{} does not have any stories.".format(profile_name)) + if download_stories_only: + return + + # Download tagged, if requested + if download_tagged or download_tagged_only: + with self.context.error_catcher('Download tagged of {}'.format(profile_name)): + self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter) + if download_tagged_only: + return + + # Iterate over pictures and download them + self.context.log("Retrieving posts from profile {}.".format(profile_name)) + self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter, + total_count=profile.mediacount, owner_profile=profile) + + def interactive_login(self, username: str) -> None: + """Logs in and internally stores session, asking user for password interactively. + + :raises InvalidArgumentException: when in quiet mode. + :raises LoginException: If the provided username does not exist. + :raises ConnectionException: If connection to Instagram failed. + + .. versionchanged:: 4.12 + Raises InvalidArgumentException instead of LoginRequiredException when in quiet mode. + Raises LoginException instead of InvalidArgumentException when the username does not exist. + """ + if self.context.quiet: + raise InvalidArgumentException("Quiet mode requires given password or valid session file.") + try: + password = None + while password is None: + password = getpass.getpass(prompt="Enter Instagram password for %s: " % username) + try: + self.login(username, password) + except BadCredentialsException as err: + print(err, file=sys.stderr) + password = None + except TwoFactorAuthRequiredException: + while True: + try: + code = input("Enter 2FA verification code: ") + self.two_factor_login(code) + break + except BadCredentialsException as err: + print(err, file=sys.stderr) + pass + + @property + def has_stored_errors(self) -> bool: + """Returns whether any error has been reported and stored to be repeated at program termination. + + .. versionadded: 4.12""" + return self.context.has_stored_errors diff --git a/build/lib/instaloader/instaloadercontext.py b/build/lib/instaloader/instaloadercontext.py new file mode 100644 index 0000000..dc66946 --- /dev/null +++ b/build/lib/instaloader/instaloadercontext.py @@ -0,0 +1,875 @@ +import json +import os +import pickle +import random +import shutil +import sys +import textwrap +import time +import urllib.parse +import uuid +from contextlib import contextmanager, suppress +from datetime import datetime, timedelta +from functools import partial +from typing import Any, Callable, Dict, Iterator, List, Optional, Union + +import requests +import requests.utils + +from .exceptions import * + + +def copy_session(session: requests.Session, request_timeout: Optional[float] = None) -> requests.Session: + """Duplicates a requests.Session.""" + new = requests.Session() + new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) + new.headers = session.headers.copy() # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + new.request = partial(new.request, timeout=request_timeout) # type: ignore + return new + + +def default_user_agent() -> str: + return ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36') + + +def default_iphone_headers() -> Dict[str, Any]: + return {'User-Agent': 'Instagram 273.0.0.16.70 (iPad13,8; iOS 16_3; en_US; en-US; ' \ + 'scale=2.00; 2048x2732; 452417278) AppleWebKit/420+', + 'x-ads-opt-out': '1', + 'x-bloks-is-panorama-enabled': 'true', + 'x-bloks-version-id': '01507c21540f73e2216b6f62a11a5b5e51aa85491b72475c080da35b1228ddd6', + 'x-fb-client-ip': 'True', + 'x-fb-connection-type': 'wifi', + 'x-fb-http-engine': 'Liger', + 'x-fb-server-cluster': 'True', + 'x-fb': '1', + 'x-ig-abr-connection-speed-kbps': '2', + 'x-ig-app-id': '124024574287414', + 'x-ig-app-locale': 'en-US', + 'x-ig-app-startup-country': 'US', + 'x-ig-bandwidth-speed-kbps': '0.000', + 'x-ig-capabilities': '36r/F/8=', + 'x-ig-connection-speed': '{}kbps'.format(random.randint(1000, 20000)), + 'x-ig-connection-type': 'WiFi', + 'x-ig-device-locale': 'en-US', + 'x-ig-mapped-locale': 'en-US', + 'x-ig-timezone-offset': str((datetime.now().astimezone().utcoffset() or timedelta(seconds=0)).seconds), + 'x-ig-www-claim': '0', + 'x-pigeon-session-id': str(uuid.uuid4()), + 'x-tigon-is-retry': 'False', + 'x-whatsapp': '0'} + + +class InstaloaderContext: + """Class providing methods for (error) logging and low-level communication with Instagram. + + It is not thought to be instantiated directly, rather :class:`Instaloader` instances maintain a context + object. + + For logging, it provides :meth:`log`, :meth:`error`, :meth:`error_catcher`. + + It provides low-level communication routines :meth:`get_json`, :meth:`graphql_query`, :meth:`graphql_node_list`, + :meth:`get_and_write_raw` and implements mechanisms for rate controlling and error handling. + + Further, it provides methods for logging in and general session handles, which are used by that routines in + class :class:`Instaloader`. + """ + + def __init__(self,proxy:Optional[dict], sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, + max_connection_attempts: int = 3, request_timeout: float = 300.0, + rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None, + fatal_status_codes: Optional[List[int]] = None, + iphone_support: bool = True): + + self.user_agent = user_agent if user_agent is not None else default_user_agent() + self.request_timeout = request_timeout + self._session = self.get_anonymous_session(proxy) + self.username = None + self.user_id = None + self.sleep = sleep + self.quiet = quiet + self.max_connection_attempts = max_connection_attempts + self._graphql_page_length = 50 + self.two_factor_auth_pending = None + self.iphone_support = iphone_support + self.iphone_headers = default_iphone_headers() + self.proxy = proxy + + # error log, filled with error() and printed at the end of Instaloader.main() + self.error_log: List[str] = [] + + self._rate_controller = rate_controller(self) if rate_controller is not None else RateController(self) + + # Can be set to True for testing, disables supression of InstaloaderContext._error_catcher + self.raise_all_errors = False + + # HTTP status codes that should cause an AbortDownloadException + self.fatal_status_codes = fatal_status_codes or [] + + # Cache profile from id (mapping from id to Profile) + self.profile_id_cache: Dict[int, Any] = dict() + + @contextmanager + def anonymous_copy(self): + session = self._session + username = self.username + user_id = self.user_id + iphone_headers = self.iphone_headers + self._session = self.get_anonymous_session(self.proxy) + self.username = None + self.user_id = None + self.iphone_headers = default_iphone_headers() + try: + yield self + finally: + self._session.close() + self.username = username + self._session = session + self.user_id = user_id + self.iphone_headers = iphone_headers + + @property + def is_logged_in(self) -> bool: + """True, if this Instaloader instance is logged in.""" + return bool(self.username) + + def log(self, *msg, sep='', end='\n', flush=False): + """Log a message to stdout that can be suppressed with --quiet.""" + if not self.quiet: + print(*msg, sep=sep, end=end, flush=flush) + + def error(self, msg, repeat_at_end=True): + """Log a non-fatal error message to stderr, which is repeated at program termination. + + :param msg: Message to be printed. + :param repeat_at_end: Set to false if the message should be printed, but not repeated at program termination.""" + print(msg, file=sys.stderr) + if repeat_at_end: + self.error_log.append(msg) + + @property + def has_stored_errors(self) -> bool: + """Returns whether any error has been reported and stored to be repeated at program termination. + + .. versionadded: 4.12""" + return bool(self.error_log) + + def close(self): + """Print error log and close session""" + if self.error_log and not self.quiet: + print("\nErrors or warnings occurred:", file=sys.stderr) + for err in self.error_log: + print(err, file=sys.stderr) + self._session.close() + + @contextmanager + def error_catcher(self, extra_info: Optional[str] = None): + """ + Context manager to catch, print and record InstaloaderExceptions. + + :param extra_info: String to prefix error message with.""" + try: + yield + except InstaloaderException as err: + if extra_info: + self.error('{}: {}'.format(extra_info, err)) + else: + self.error('{}'.format(err)) + if self.raise_all_errors: + raise + + def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]: + """Returns default HTTP header we use for requests.""" + header = {'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive', + 'Content-Length': '0', + 'Host': 'www.instagram.com', + 'Origin': 'https://www.instagram.com', + 'Referer': 'https://www.instagram.com/', + 'User-Agent': self.user_agent, + 'X-Instagram-AJAX': '1', + 'X-Requested-With': 'XMLHttpRequest'} + if empty_session_only: + del header['Host'] + del header['Origin'] + del header['X-Instagram-AJAX'] + del header['X-Requested-With'] + return header + + def get_anonymous_session(self,proxy) -> requests.Session: + """Returns our default anonymous requests.Session object.""" + session = requests.Session() + session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1', + 'ig_vw': '1920', 'csrftoken': '', + 's_network': '', 'ds_user_id': ''}) + session.proxies.update(proxy) + session.headers.update(self._default_http_header(empty_session_only=True)) + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + return session + + def save_session(self): + """Not meant to be used directly, use :meth:`Instaloader.save_session`.""" + return requests.utils.dict_from_cookiejar(self._session.cookies) + + def update_cookies(self, cookie): + """.. versionadded:: 4.11""" + self._session.cookies.update(cookie) + + def load_session(self, username, sessiondata): + """Not meant to be used directly, use :meth:`Instaloader.load_session`.""" + session = requests.Session() + session.cookies = requests.utils.cookiejar_from_dict(sessiondata) + session.headers.update(self._default_http_header()) + session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + self._session = session + self.username = username + + def save_session_to_file(self, sessionfile): + """Not meant to be used directly, use :meth:`Instaloader.save_session_to_file`.""" + pickle.dump(self.save_session(), sessionfile) + + def load_session_from_file(self, username, sessionfile): + """Not meant to be used directly, use :meth:`Instaloader.load_session_from_file`.""" + self.load_session(username, pickle.load(sessionfile)) + + def test_login(self) -> Optional[str]: + """Not meant to be used directly, use :meth:`Instaloader.test_login`.""" + try: + data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {}) + return data["data"]["user"]["username"] if data["data"]["user"] is not None else None + except (AbortDownloadException, ConnectionException) as err: + self.error(f"Error when checking if logged in: {err}") + return None + + def login(self, user, passwd): + """Not meant to be used directly, use :meth:`Instaloader.login`. + + :raises BadCredentialsException: If the provided password is wrong. + :raises TwoFactorAuthRequiredException: First step of 2FA login done, now call + :meth:`Instaloader.two_factor_login`. + :raises LoginException: An error happened during login (for example, and invalid response). + Or if the provided username does not exist. + + .. versionchanged:: 4.12 + Raises LoginException instead of ConnectionException when an error happens. + Raises LoginException instead of InvalidArgumentException when the username does not exist. + """ + # pylint:disable=import-outside-toplevel + import http.client + # pylint:disable=protected-access + http.client._MAXHEADERS = 200 + session = requests.Session() + session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1', + 'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '', + 's_network': '', 'ds_user_id': ''}) + session.headers.update(self._default_http_header()) + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + + # Make a request to Instagram's root URL, which will set the session's csrftoken cookie + # Not using self.get_json() here, because we need to access the cookie + session.get('https://www.instagram.com/') + # Add session's csrftoken cookie to session headers + csrf_token = session.cookies.get_dict()['csrftoken'] + session.headers.update({'X-CSRFToken': csrf_token}) + + self.do_sleep() + # Workaround credits to pgrimaud. + # See: https://github.com/pgrimaud/instagram-user-feed/commit/96ad4cf54d1ad331b337f325c73e664999a6d066 + enc_password = '#PWD_INSTAGRAM_BROWSER:0:{}:{}'.format(int(datetime.now().timestamp()), passwd) + login = session.post('https://www.instagram.com/api/v1/web/accounts/login/ajax/', + data={'enc_password': enc_password, 'username': user}, allow_redirects=True) + try: + resp_json = login.json() + + except json.decoder.JSONDecodeError as err: + raise LoginException( + "Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason) + ) from err + if resp_json.get('two_factor_required'): + two_factor_session = copy_session(session, self.request_timeout) + two_factor_session.headers.update({'X-CSRFToken': csrf_token}) + two_factor_session.cookies.update({'csrftoken': csrf_token}) + self.two_factor_auth_pending = (two_factor_session, + user, + resp_json['two_factor_info']['two_factor_identifier']) + raise TwoFactorAuthRequiredException("Login error: two-factor authentication required.") + if resp_json.get('checkpoint_url'): + raise LoginException( + f"Login: Checkpoint required. Point your browser to {resp_json.get('checkpoint_url')} - " + f"follow the instructions, then retry." + ) + if resp_json['status'] != 'ok': + if 'message' in resp_json: + raise LoginException("Login error: \"{}\" status, message \"{}\".".format(resp_json['status'], + resp_json['message'])) + else: + raise LoginException("Login error: \"{}\" status.".format(resp_json['status'])) + if 'authenticated' not in resp_json: + # Issue #472 + if 'message' in resp_json: + raise LoginException("Login error: Unexpected response, \"{}\".".format(resp_json['message'])) + else: + raise LoginException("Login error: Unexpected response, this might indicate a blocked IP.") + if not resp_json['authenticated']: + if resp_json['user']: + # '{"authenticated": false, "user": true, "status": "ok"}' + raise BadCredentialsException('Login error: Wrong password.') + else: + # '{"authenticated": false, "user": false, "status": "ok"}' + # Raise LoginException rather than BadCredentialException, because BadCredentialException + # triggers re-asking of password in Instaloader.interactive_login(), which makes no sense if the + # username is invalid. + raise LoginException('Login error: User {} does not exist.'.format(user)) + # '{"authenticated": true, "user": true, "userId": ..., "oneTapPrompt": false, "status": "ok"}' + session.headers.update({'X-CSRFToken': login.cookies['csrftoken']}) + self._session = session + self.username = user + self.user_id = resp_json['userId'] + + def two_factor_login(self, two_factor_code): + """Second step of login if 2FA is enabled. + Not meant to be used directly, use :meth:`Instaloader.two_factor_login`. + + :raises InvalidArgumentException: No two-factor authentication pending. + :raises BadCredentialsException: 2FA verification code invalid. + + .. versionadded:: 4.2""" + if not self.two_factor_auth_pending: + raise InvalidArgumentException("No two-factor authentication pending.") + (session, user, two_factor_id) = self.two_factor_auth_pending + + login = session.post('https://www.instagram.com/accounts/login/ajax/two_factor/', + data={'username': user, 'verificationCode': two_factor_code, 'identifier': two_factor_id}, + allow_redirects=True) + resp_json = login.json() + if resp_json['status'] != 'ok': + if 'message' in resp_json: + raise BadCredentialsException("2FA error: {}".format(resp_json['message'])) + else: + raise BadCredentialsException("2FA error: \"{}\" status.".format(resp_json['status'])) + session.headers.update({'X-CSRFToken': login.cookies['csrftoken']}) + self._session = session + self.username = user + self.two_factor_auth_pending = None + + def do_sleep(self): + """Sleep a short time if self.sleep is set. Called before each request to instagram.com.""" + if self.sleep: + time.sleep(min(random.expovariate(0.6), 15.0)) + + @staticmethod + def _response_error(resp: requests.Response) -> str: + extra_from_json: Optional[str] = None + with suppress(json.decoder.JSONDecodeError): + resp_json = resp.json() + if "status" in resp_json: + extra_from_json = ( + f"\"{resp_json['status']}\" status, message \"{resp_json['message']}\"" + if "message" in resp_json + else f"\"{resp_json['status']}\" status" + ) + return ( + f"{resp.status_code} {resp.reason}" + f"{f' - {extra_from_json}' if extra_from_json is not None else ''}" + f" when accessing {resp.url}" + ) + + def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com', + session: Optional[requests.Session] = None, _attempt=1, + response_headers: Optional[Dict[str, Any]] = None, + use_post: bool = False) -> Dict[str, Any]: + """JSON request to Instagram. + + :param path: URL, relative to the given domain which defaults to www.instagram.com/ + :param params: request parameters + :param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com + :param session: Session to use, or None to use self.session + :param use_post: Use POST instead of GET to make the request + :return: Decoded response dictionary + :raises QueryReturnedBadRequestException: When the server responds with a 400. + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises ConnectionException: When query repeatedly failed. + + .. versionchanged:: 4.13 + Added `use_post` parameter. + """ + is_graphql_query = 'query_hash' in params and 'graphql/query' in path + is_doc_id_query = 'doc_id' in params and 'graphql/query' in path + is_iphone_query = host == 'i.instagram.com' + is_other_query = not is_graphql_query and not is_doc_id_query and host == "www.instagram.com" + sess = session if session else self._session + try: + self.do_sleep() + if is_graphql_query: + self._rate_controller.wait_before_query(params['query_hash']) + if is_doc_id_query: + self._rate_controller.wait_before_query(params['doc_id']) + if is_iphone_query: + self._rate_controller.wait_before_query('iphone') + if is_other_query: + self._rate_controller.wait_before_query('other') + if use_post: + resp = sess.post('https://{0}/{1}'.format(host, path), data=params, allow_redirects=False) + else: + resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False) + if resp.status_code in self.fatal_status_codes: + redirect = " redirect to {}".format(resp.headers['location']) if 'location' in resp.headers else "" + body = "" + if resp.headers['Content-Type'].startswith('application/json'): + body = ': ' + resp.text[:500] + ('…' if len(resp.text) > 501 else '') + raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}{}".format( + host, path, resp.status_code, resp.reason, redirect, body + )) + while resp.is_redirect: + redirect_url = resp.headers['location'] + self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url)) + if (redirect_url.startswith('https://www.instagram.com/accounts/login') or + redirect_url.startswith('https://i.instagram.com/accounts/login')): + if not self.is_logged_in: + raise LoginRequiredException("Redirected to login page. Use --login or --load-cookies.") + raise AbortDownloadException("Redirected to login page. You've been logged out, please wait " + + "some time, recreate the session and try again") + if redirect_url.startswith('https://{}/'.format(host)): + resp = sess.get(redirect_url if redirect_url.endswith('/') else redirect_url + '/', + params=params, allow_redirects=False) + else: + break + if response_headers is not None: + response_headers.clear() + response_headers.update(resp.headers) + if resp.status_code == 400: + raise QueryReturnedBadRequestException(self._response_error(resp)) + if resp.status_code == 404: + raise QueryReturnedNotFoundException(self._response_error(resp)) + if resp.status_code == 429: + raise TooManyRequestsException(self._response_error(resp)) + if resp.status_code != 200: + raise ConnectionException(self._response_error(resp)) + else: + resp_json = resp.json() + if 'status' in resp_json and resp_json['status'] != "ok": + raise ConnectionException(self._response_error(resp)) + return resp_json + except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err: + error_string = "JSON Query to {}: {}".format(path, err) + if _attempt == self.max_connection_attempts: + if isinstance(err, QueryReturnedNotFoundException): + raise QueryReturnedNotFoundException(error_string) from err + else: + raise ConnectionException(error_string) from err + self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False) + try: + if isinstance(err, TooManyRequestsException): + if is_graphql_query: + self._rate_controller.handle_429(params['query_hash']) + if is_doc_id_query: + self._rate_controller.handle_429(params['doc_id']) + if is_iphone_query: + self._rate_controller.handle_429('iphone') + if is_other_query: + self._rate_controller.handle_429('other') + return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1, + response_headers=response_headers) + except KeyboardInterrupt: + self.error("[skipped by user]", repeat_at_end=False) + raise ConnectionException(error_string) from err + + def graphql_query(self, query_hash: str, variables: Dict[str, Any], + referer: Optional[str] = None) -> Dict[str, Any]: + """ + Do a GraphQL Query. + + :param query_hash: Query identifying hash. + :param variables: Variables for the Query. + :param referer: HTTP Referer, or None. + :return: The server's response dictionary. + + .. versionchanged:: 4.13.1 + Removed the `rhx_gis` parameter. + """ + with copy_session(self._session, self.request_timeout) as tmpsession: + tmpsession.headers.update(self._default_http_header(empty_session_only=True)) + del tmpsession.headers['Connection'] + del tmpsession.headers['Content-Length'] + tmpsession.headers['authority'] = 'www.instagram.com' + tmpsession.headers['scheme'] = 'https' + tmpsession.headers['accept'] = '*/*' + if referer is not None: + tmpsession.headers['referer'] = urllib.parse.quote(referer) + + variables_json = json.dumps(variables, separators=(',', ':')) + + resp_json = self.get_json('graphql/query', + params={'query_hash': query_hash, + 'variables': variables_json}, + session=tmpsession) + if 'status' not in resp_json: + self.error("GraphQL response did not contain a \"status\" field.") + return resp_json + + def doc_id_graphql_query(self, doc_id: str, variables: Dict[str, Any], + referer: Optional[str] = None) -> Dict[str, Any]: + """ + Do a doc_id-based GraphQL Query using method POST. + + .. versionadded:: 4.13 + + :param doc_id: doc_id for the query. + :param variables: Variables for the Query. + :param referer: HTTP Referer, or None. + :return: The server's response dictionary. + """ + with copy_session(self._session, self.request_timeout) as tmpsession: + tmpsession.headers.update(self._default_http_header(empty_session_only=True)) + del tmpsession.headers['Connection'] + del tmpsession.headers['Content-Length'] + tmpsession.headers['authority'] = 'www.instagram.com' + tmpsession.headers['scheme'] = 'https' + tmpsession.headers['accept'] = '*/*' + if referer is not None: + tmpsession.headers['referer'] = urllib.parse.quote(referer) + + variables_json = json.dumps(variables, separators=(',', ':')) + + resp_json = self.get_json('graphql/query', + params={'variables': variables_json, + 'doc_id': doc_id, + 'server_timestamps': 'true'}, + session=tmpsession, + use_post=True) + if 'status' not in resp_json: + self.error("GraphQL response did not contain a \"status\" field.") + return resp_json + + def graphql_node_list(self, query_hash: str, query_variables: Dict[str, Any], + query_referer: Optional[str], + edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]], + _rhx_gis: Optional[str] = None, + first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]: + """ + Retrieve a list of GraphQL nodes. + + .. deprecated:: 4.5 + Use :class:`NodeIterator` instead, which provides more functionality. + """ + + def _query(): + query_variables['first'] = self._graphql_page_length + try: + return edge_extractor(self.graphql_query(query_hash, query_variables, query_referer)) + except QueryReturnedBadRequestException: + new_page_length = int(self._graphql_page_length / 2) + if new_page_length >= 12: + self._graphql_page_length = new_page_length + self.error("HTTP Error 400 (Bad Request) on GraphQL Query. Retrying with shorter page length.", + repeat_at_end=False) + return _query() + else: + raise + + if first_data: + data = first_data + else: + data = _query() + yield from (edge['node'] for edge in data['edges']) + while data['page_info']['has_next_page']: + query_variables['after'] = data['page_info']['end_cursor'] + data = _query() + yield from (edge['node'] for edge in data['edges']) + + def get_iphone_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]: + """JSON request to ``i.instagram.com``. + + :param path: URL, relative to ``i.instagram.com/`` + :param params: GET parameters + :return: Decoded response dictionary + :raises QueryReturnedBadRequestException: When the server responds with a 400. + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises ConnectionException: When query repeatedly failed. + + .. versionadded:: 4.2.1""" + with copy_session(self._session, self.request_timeout) as tempsession: + # Set headers to simulate an API request from iPad + tempsession.headers['ig-intended-user-id'] = str(self.user_id) + tempsession.headers['x-pigeon-rawclienttime'] = '{:.6f}'.format(time.time()) + + # Add headers obtained from previous iPad request + tempsession.headers.update(self.iphone_headers) + + # Extract key information from cookies if we haven't got it already from a previous request + header_cookies_mapping = {'x-mid': 'mid', + 'ig-u-ds-user-id': 'ds_user_id', + 'x-ig-device-id': 'ig_did', + 'x-ig-family-device-id': 'ig_did', + 'family_device_id': 'ig_did'} + + # Map the cookie value to the matching HTTP request header + cookies = tempsession.cookies.get_dict().copy() + for key, value in header_cookies_mapping.items(): + if value in cookies: + if key not in tempsession.headers: + tempsession.headers[key] = cookies[value] + else: + # Remove the cookie value if it's already specified as a header + tempsession.cookies.pop(value, None) + + # Edge case for ig-u-rur header due to special string encoding in cookie + if 'rur' in cookies: + if 'ig-u-rur' not in tempsession.headers: + tempsession.headers['ig-u-rur'] = cookies['rur'].strip('\"').encode('utf-8') \ + .decode('unicode_escape') + else: + tempsession.cookies.pop('rur', None) + + # Remove headers specific to Desktop version + for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With', 'Referer']: + tempsession.headers.pop(header, None) + + # No need for cookies if we have a bearer token + if 'authorization' in tempsession.headers: + tempsession.cookies.clear() + + response_headers = dict() # type: Dict[str, Any] + response = self.get_json(path, params, 'i.instagram.com', tempsession, response_headers=response_headers) + + # Extract the ig-set-* headers and use them in the next request + for key, value in response_headers.items(): + if key.startswith('ig-set-'): + self.iphone_headers[key.replace('ig-set-', '')] = value + elif key.startswith('x-ig-set-'): + self.iphone_headers[key.replace('x-ig-set-', 'x-ig-')] = value + + return response + + def write_raw(self, resp: Union[bytes, requests.Response], filename: str) -> None: + """Write raw response data into a file. + + .. versionadded:: 4.2.1""" + self.log(filename, end=' ', flush=True) + with open(filename + '.temp', 'wb') as file: + if isinstance(resp, requests.Response): + shutil.copyfileobj(resp.raw, file) + else: + file.write(resp) + os.replace(filename + '.temp', filename) + + def get_raw(self, url: str, _attempt=1) -> requests.Response: + """Downloads a file anonymously. + + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises QueryReturnedForbiddenException: When the server responds with a 403. + :raises ConnectionException: When download failed. + + .. versionadded:: 4.2.1""" + with self.get_anonymous_session(self.proxy) as anonymous_session: + resp = anonymous_session.get(url, stream=True) + if resp.status_code == 200: + resp.raw.decode_content = True + return resp + else: + if resp.status_code == 403: + # suspected invalid URL signature + raise QueryReturnedForbiddenException(self._response_error(resp)) + if resp.status_code == 404: + # 404 not worth retrying. + raise QueryReturnedNotFoundException(self._response_error(resp)) + raise ConnectionException(self._response_error(resp)) + + def get_and_write_raw(self, url: str, filename: str) -> None: + """Downloads and writes anonymously-requested raw data into a file. + + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises QueryReturnedForbiddenException: When the server responds with a 403. + :raises ConnectionException: When download repeatedly failed.""" + self.write_raw(self.get_raw(url), filename) + + def head(self, url: str, allow_redirects: bool = False) -> requests.Response: + """HEAD a URL anonymously. + + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises QueryReturnedForbiddenException: When the server responds with a 403. + :raises ConnectionException: When request failed. + + .. versionadded:: 4.7.6 + """ + with self.get_anonymous_session(self.proxy) as anonymous_session: + resp = anonymous_session.head(url, allow_redirects=allow_redirects) + if resp.status_code == 200: + return resp + else: + if resp.status_code == 403: + # suspected invalid URL signature + raise QueryReturnedForbiddenException(self._response_error(resp)) + if resp.status_code == 404: + # 404 not worth retrying. + raise QueryReturnedNotFoundException(self._response_error(resp)) + raise ConnectionException(self._response_error(resp)) + + +class RateController: + """ + Class providing request tracking and rate controlling to stay within rate limits. + + It can be overridden to change Instaloader's behavior regarding rate limits, for example to raise a custom + exception when the rate limit is hit:: + + import instaloader + + class MyRateController(instaloader.RateController): + def sleep(self, secs): + raise MyCustomException() + + L = instaloader.Instaloader(rate_controller=lambda ctx: MyRateController(ctx)) + """ + + def __init__(self, context: InstaloaderContext): + self._context = context + self._query_timestamps: Dict[str, List[float]] = dict() + self._earliest_next_request_time = 0.0 + self._iphone_earliest_next_request_time = 0.0 + + def sleep(self, secs: float): + """Wait given number of seconds.""" + # Not static, to allow for the behavior of this method to depend on context-inherent properties, such as + # whether we are logged in. + time.sleep(secs) + + def _dump_query_timestamps(self, current_time: float, failed_query_type: str): + windows = [10, 11, 20, 22, 30, 60] + self._context.error("Number of requests within last {} minutes grouped by type:" + .format('/'.join(str(w) for w in windows)), + repeat_at_end=False) + for query_type, times in self._query_timestamps.items(): + reqs_in_sliding_window = [sum(t > current_time - w * 60 for t in times) for w in windows] + self._context.error(" {} {:>32}: {}".format( + "*" if query_type == failed_query_type else " ", + query_type, + " ".join("{:4}".format(reqs) for reqs in reqs_in_sliding_window) + ), repeat_at_end=False) + + def count_per_sliding_window(self, query_type: str) -> int: + """Return how many requests of the given type can be done within a sliding window of 11 minutes. + + This is called by :meth:`RateController.query_waittime` and allows to simply customize wait times before queries + at query_type granularity. Consider overriding :meth:`RateController.query_waittime` directly if you need more + control.""" + # Not static, to allow for the count_per_sliding_window to depend on context-inherent properties, such as + # whether we are logged in. + return 75 if query_type == 'other' else 200 + + def _reqs_in_sliding_window(self, query_type: Optional[str], current_time: float, window: float) -> List[float]: + if query_type is not None: + # timestamps of type query_type + relevant_timestamps = self._query_timestamps[query_type] + else: + # all GraphQL queries, i.e. not 'iphone' or 'other' + graphql_query_timestamps = filter(lambda tp: tp[0] not in ['iphone', 'other'], + self._query_timestamps.items()) + relevant_timestamps = [t for times in (tp[1] for tp in graphql_query_timestamps) for t in times] + return list(filter(lambda t: t > current_time - window, relevant_timestamps)) + + def query_waittime(self, query_type: str, current_time: float, untracked_queries: bool = False) -> float: + """Calculate time needed to wait before query can be executed.""" + per_type_sliding_window = 660 + iphone_sliding_window = 1800 + if query_type not in self._query_timestamps: + self._query_timestamps[query_type] = [] + self._query_timestamps[query_type] = list(filter(lambda t: t > current_time - 60 * 60, + self._query_timestamps[query_type])) + + def per_type_next_request_time(): + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, per_type_sliding_window) + if len(reqs_in_sliding_window) < self.count_per_sliding_window(query_type): + return 0.0 + else: + return min(reqs_in_sliding_window) + per_type_sliding_window + 6 + + def gql_accumulated_next_request_time(): + if query_type in ['iphone', 'other']: + return 0.0 + gql_accumulated_sliding_window = 600 + gql_accumulated_max_count = 275 + reqs_in_sliding_window = self._reqs_in_sliding_window(None, current_time, gql_accumulated_sliding_window) + if len(reqs_in_sliding_window) < gql_accumulated_max_count: + return 0.0 + else: + return min(reqs_in_sliding_window) + gql_accumulated_sliding_window + + def untracked_next_request_time(): + if untracked_queries: + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + iphone_sliding_window) + self._iphone_earliest_next_request_time = min(reqs_in_sliding_window) + iphone_sliding_window + 18 + else: + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, + per_type_sliding_window) + self._earliest_next_request_time = min(reqs_in_sliding_window) + per_type_sliding_window + 6 + return max(self._iphone_earliest_next_request_time, self._earliest_next_request_time) + + def iphone_next_request(): + if query_type == "iphone": + reqs_in_sliding_window = self._reqs_in_sliding_window(query_type, current_time, iphone_sliding_window) + if len(reqs_in_sliding_window) >= 199: + return min(reqs_in_sliding_window) + iphone_sliding_window + 18 + return 0.0 + + return max(0.0, + max( + per_type_next_request_time(), + gql_accumulated_next_request_time(), + untracked_next_request_time(), + iphone_next_request(), + ) - current_time) + + def wait_before_query(self, query_type: str) -> None: + """This method is called before a query to Instagram. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until the request can be made.""" + waittime = self.query_waittime(query_type, time.monotonic(), False) + assert waittime >= 0 + if waittime > 15: + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.log("\nToo many queries in the last time. Need to wait {}, until {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime))) + if waittime > 0: + self.sleep(waittime) + if query_type not in self._query_timestamps: + self._query_timestamps[query_type] = [time.monotonic()] + else: + self._query_timestamps[query_type].append(time.monotonic()) + + def handle_429(self, query_type: str) -> None: + """This method is called to handle a 429 Too Many Requests response. + + It calls :meth:`RateController.query_waittime` to determine the time needed to wait and then calls + :meth:`RateController.sleep` to wait until we can repeat the same request.""" + current_time = time.monotonic() + waittime = self.query_waittime(query_type, current_time, True) + assert waittime >= 0 + self._dump_query_timestamps(current_time, query_type) + text_for_429 = ("Instagram responded with HTTP error \"429 - Too Many Requests\". Please do not run multiple " + "instances of Instaloader in parallel or within short sequence. Also, do not use any Instagram " + "App while Instaloader is running.") + self._context.error(textwrap.fill(text_for_429), repeat_at_end=False) + if waittime > 1.5: + formatted_waittime = ("{} seconds".format(round(waittime)) if waittime <= 666 else + "{} minutes".format(round(waittime / 60))) + self._context.error("The request will be retried in {}, at {:%H:%M}." + .format(formatted_waittime, datetime.now() + timedelta(seconds=waittime)), + repeat_at_end=False) + if waittime > 0: + self.sleep(waittime) diff --git a/build/lib/instaloader/lateststamps.py b/build/lib/instaloader/lateststamps.py new file mode 100644 index 0000000..02959ba --- /dev/null +++ b/build/lib/instaloader/lateststamps.py @@ -0,0 +1,117 @@ +import configparser +from datetime import datetime, timezone +from typing import Optional +from os.path import dirname +from os import makedirs + + +class LatestStamps: + """LatestStamps class. + + Convenience class for retrieving and storing data from the :option:`--latest-stamps` file. + + :param latest_stamps_file: path to file. + + .. versionadded:: 4.8""" + PROFILE_ID = 'profile-id' + PROFILE_PIC = 'profile-pic' + POST_TIMESTAMP = 'post-timestamp' + TAGGED_TIMESTAMP = 'tagged-timestamp' + IGTV_TIMESTAMP = 'igtv-timestamp' + STORY_TIMESTAMP = 'story-timestamp' + ISO_FORMAT = '%Y-%m-%dT%H:%M:%S.%f%z' + + def __init__(self, latest_stamps_file): + self.file = latest_stamps_file + self.data = configparser.ConfigParser() + self.data.read(latest_stamps_file) + + def _save(self): + if dn := dirname(self.file): + makedirs(dn, exist_ok=True) + with open(self.file, 'w') as f: + self.data.write(f) + + def _ensure_section(self, section: str): + if not self.data.has_section(section): + self.data.add_section(section) + + def get_profile_id(self, profile_name: str) -> Optional[int]: + """Returns stored ID of profile.""" + try: + return self.data.getint(profile_name, self.PROFILE_ID) + except (configparser.Error, ValueError): + return None + + def save_profile_id(self, profile_name: str, profile_id: int): + """Stores ID of profile.""" + self._ensure_section(profile_name) + self.data.set(profile_name, self.PROFILE_ID, str(profile_id)) + self._save() + + def rename_profile(self, old_profile: str, new_profile: str): + """Renames a profile.""" + self._ensure_section(new_profile) + for option in [self.PROFILE_ID, self.PROFILE_PIC, self.POST_TIMESTAMP, + self.TAGGED_TIMESTAMP, self.IGTV_TIMESTAMP, self.STORY_TIMESTAMP]: + if self.data.has_option(old_profile, option): + value = self.data.get(old_profile, option) + self.data.set(new_profile, option, value) + self.data.remove_section(old_profile) + self._save() + + def _get_timestamp(self, section: str, key: str) -> datetime: + try: + return datetime.strptime(self.data.get(section, key), self.ISO_FORMAT) + except (configparser.Error, ValueError): + return datetime.fromtimestamp(0, timezone.utc) + + def _set_timestamp(self, section: str, key: str, timestamp: datetime): + self._ensure_section(section) + self.data.set(section, key, timestamp.strftime(self.ISO_FORMAT)) + self._save() + + def get_last_post_timestamp(self, profile_name: str) -> datetime: + """Returns timestamp of last download of a profile's posts.""" + return self._get_timestamp(profile_name, self.POST_TIMESTAMP) + + def set_last_post_timestamp(self, profile_name: str, timestamp: datetime): + """Sets timestamp of last download of a profile's posts.""" + self._set_timestamp(profile_name, self.POST_TIMESTAMP, timestamp) + + def get_last_tagged_timestamp(self, profile_name: str) -> datetime: + """Returns timestamp of last download of a profile's tagged posts.""" + return self._get_timestamp(profile_name, self.TAGGED_TIMESTAMP) + + def set_last_tagged_timestamp(self, profile_name: str, timestamp: datetime): + """Sets timestamp of last download of a profile's tagged posts.""" + self._set_timestamp(profile_name, self.TAGGED_TIMESTAMP, timestamp) + + def get_last_igtv_timestamp(self, profile_name: str) -> datetime: + """Returns timestamp of last download of a profile's igtv posts.""" + return self._get_timestamp(profile_name, self.IGTV_TIMESTAMP) + + def set_last_igtv_timestamp(self, profile_name: str, timestamp: datetime): + """Sets timestamp of last download of a profile's igtv posts.""" + self._set_timestamp(profile_name, self.IGTV_TIMESTAMP, timestamp) + + def get_last_story_timestamp(self, profile_name: str) -> datetime: + """Returns timestamp of last download of a profile's stories.""" + return self._get_timestamp(profile_name, self.STORY_TIMESTAMP) + + def set_last_story_timestamp(self, profile_name: str, timestamp: datetime): + """Sets timestamp of last download of a profile's stories.""" + self._set_timestamp(profile_name, self.STORY_TIMESTAMP, timestamp) + + def get_profile_pic(self, profile_name: str) -> str: + """Returns filename of profile's last downloaded profile pic.""" + try: + return self.data.get(profile_name, self.PROFILE_PIC) + except configparser.Error: + return "" + + def set_profile_pic(self, profile_name: str, profile_pic: str): + """Sets filename of profile's last downloaded profile pic.""" + self._ensure_section(profile_name) + self.data.set(profile_name, self.PROFILE_PIC, profile_pic) + self._save() diff --git a/build/lib/instaloader/nodeiterator.py b/build/lib/instaloader/nodeiterator.py new file mode 100644 index 0000000..b573287 --- /dev/null +++ b/build/lib/instaloader/nodeiterator.py @@ -0,0 +1,329 @@ +import base64 +import hashlib +import json +import os +from contextlib import contextmanager +from datetime import datetime, timedelta +from lzma import LZMAError +from typing import Any, Callable, Dict, Iterable, Iterator, NamedTuple, Optional, Tuple, TypeVar + +from .exceptions import AbortDownloadException, InvalidArgumentException +from .instaloadercontext import InstaloaderContext + +class FrozenNodeIterator(NamedTuple): + query_hash: Optional[str] + query_variables: Dict + query_referer: Optional[str] + context_username: Optional[str] + total_index: int + best_before: Optional[float] + remaining_data: Optional[Dict] + first_node: Optional[Dict] + doc_id: Optional[str] +FrozenNodeIterator.query_hash.__doc__ = """The GraphQL ``query_hash`` parameter.""" +FrozenNodeIterator.query_variables.__doc__ = """The GraphQL ``query_variables`` parameter.""" +FrozenNodeIterator.query_referer.__doc__ = """The HTTP referer used for the GraphQL query.""" +FrozenNodeIterator.context_username.__doc__ = """The username who created the iterator, or ``None``.""" +FrozenNodeIterator.total_index.__doc__ = """Number of items that have already been returned.""" +FrozenNodeIterator.best_before.__doc__ = """Date when parts of the stored nodes might have expired.""" +FrozenNodeIterator.remaining_data.__doc__ = \ + """The already-retrieved, yet-unprocessed ``edges`` and the ``page_info`` at time of freezing.""" +FrozenNodeIterator.first_node.__doc__ = """Node data of the first item, if an item has been produced.""" +FrozenNodeIterator.doc_id.__doc__ = """The GraphQL ``doc_id`` parameter.""" + +T = TypeVar('T') + + +class NodeIterator(Iterator[T]): + """ + Iterate the nodes within edges in a GraphQL pagination. Instances of this class are returned by many (but not all) + of Instaloader's :class:`Post`-returning functions (such as :meth:`Profile.get_posts` etc.). + + What makes this iterator special is its ability to freeze/store its current state, e.g. to interrupt an iteration, + and later thaw/resume from where it left off. + + You can freeze a NodeIterator with :meth:`NodeIterator.freeze`:: + + post_iterator = profile.get_posts() + try: + for post in post_iterator: + do_something_with(post) + except KeyboardInterrupt: + save("resume_information.json", post_iterator.freeze()) + + and later reuse it with :meth:`NodeIterator.thaw` on an equally-constructed NodeIterator:: + + post_iterator = profile.get_posts() + post_iterator.thaw(load("resume_information.json")) + + (an appropriate method to load and save the :class:`FrozenNodeIterator` is e.g. + :func:`load_structure_from_file` and :func:`save_structure_to_file`.) + + A :class:`FrozenNodeIterator` can only be thawn with a matching NodeIterator, i.e. a NodeIterator instance that has + been constructed with the same parameters as the instance that is represented by the :class:`FrozenNodeIterator` in + question. This is to ensure that an iteration cannot be resumed in a wrong, unmatching loop. As a quick way to + distinguish iterators that are saved e.g. in files, there is the :attr:`NodeIterator.magic` string: Two + NodeIterators are matching if and only if they have the same magic. + + See also :func:`resumable_iteration` for a high-level context manager that handles a resumable iteration. + + .. versionchanged: 4.13 + Included support for `doc_id`-based queries (using POST method). + """ + + _graphql_page_length = 12 + _shelf_life = timedelta(days=29) + + def __init__(self, + context: InstaloaderContext, + query_hash: Optional[str], + edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]], + node_wrapper: Callable[[Dict], T], + query_variables: Optional[Dict[str, Any]] = None, + query_referer: Optional[str] = None, + first_data: Optional[Dict[str, Any]] = None, + is_first: Optional[Callable[[T, Optional[T]], bool]] = None, + doc_id: Optional[str] = None): + self._context = context + self._query_hash = query_hash + self._doc_id = doc_id + self._edge_extractor = edge_extractor + self._node_wrapper = node_wrapper + self._query_variables = query_variables if query_variables is not None else {} + self._query_referer = query_referer + self._page_index = 0 + self._total_index = 0 + if first_data is not None: + self._data = first_data + self._best_before = datetime.now() + NodeIterator._shelf_life + else: + self._data = self._query() + self._first_node: Optional[Dict] = None + self._is_first = is_first + + def _query(self, after: Optional[str] = None) -> Dict: + if self._doc_id is not None: + return self._query_doc_id(self._doc_id, after) + else: + assert self._query_hash is not None + return self._query_query_hash(self._query_hash, after) + + def _query_doc_id(self, doc_id: str, after: Optional[str] = None) -> Dict: + pagination_variables: Dict[str, Any] = {'__relay_internal__pv__PolarisFeedShareMenurelayprovider': False} + if after is not None: + pagination_variables['after'] = after + pagination_variables['before'] = None + pagination_variables['first'] = 12 + pagination_variables['last'] = None + data = self._edge_extractor( + self._context.doc_id_graphql_query( + doc_id, {**self._query_variables, **pagination_variables}, self._query_referer + ) + ) + self._best_before = datetime.now() + NodeIterator._shelf_life + return data + + def _query_query_hash(self, query_hash: str, after: Optional[str] = None) -> Dict: + pagination_variables: Dict[str, Any] = {'first': NodeIterator._graphql_page_length} + if after is not None: + pagination_variables['after'] = after + data = self._edge_extractor( + self._context.graphql_query( + query_hash, {**self._query_variables, **pagination_variables}, self._query_referer + ) + ) + self._best_before = datetime.now() + NodeIterator._shelf_life + return data + + def __iter__(self): + return self + + def __next__(self) -> T: + if self._page_index < len(self._data['edges']): + node = self._data['edges'][self._page_index]['node'] + page_index, total_index = self._page_index, self._total_index + try: + self._page_index += 1 + self._total_index += 1 + except KeyboardInterrupt: + self._page_index, self._total_index = page_index, total_index + raise + item = self._node_wrapper(node) + if self._is_first is not None: + if self._is_first(item, self.first_item): + self._first_node = node + else: + if self._first_node is None: + self._first_node = node + return item + if self._data.get('page_info', {}).get('has_next_page'): + query_response = self._query(self._data['page_info']['end_cursor']) + if self._data['edges'] != query_response['edges'] and len(query_response['edges']) > 0: + page_index, data = self._page_index, self._data + try: + self._page_index = 0 + self._data = query_response + except KeyboardInterrupt: + self._page_index, self._data = page_index, data + raise + return self.__next__() + raise StopIteration() + + @property + def count(self) -> Optional[int]: + """The ``count`` as returned by Instagram. This is not always the total count this iterator will yield.""" + return self._data.get('count') if self._data is not None else None + + @property + def total_index(self) -> int: + """Number of items that have already been returned.""" + return self._total_index + + @property + def magic(self) -> str: + """Magic string for easily identifying a matching iterator file for resuming (hash of some parameters).""" + magic_hash = hashlib.blake2b(digest_size=6) + magic_hash.update(json.dumps( + [self._query_hash, self._query_variables, self._query_referer, self._context.username] + ).encode()) + return base64.urlsafe_b64encode(magic_hash.digest()).decode() + + @property + def first_item(self) -> Optional[T]: + """ + If this iterator has produced any items, returns the first item produced. + + It is possible to override what is considered the first item (for example, to consider the + newest item in case items are not in strict chronological order) by passing a callback + function as the `is_first` parameter when creating the class. + + .. versionadded:: 4.8 + .. versionchanged:: 4.9.2 + What is considered the first item can be overridden. + """ + return self._node_wrapper(self._first_node) if self._first_node is not None else None + + @staticmethod + def page_length() -> int: + return NodeIterator._graphql_page_length + + def freeze(self) -> FrozenNodeIterator: + """Freeze the iterator for later resuming.""" + remaining_data = None + if self._data is not None: + remaining_data = {**self._data, + 'edges': (self._data['edges'][(max(self._page_index - 1, 0)):])} + return FrozenNodeIterator( + query_hash=self._query_hash, + query_variables=self._query_variables, + query_referer=self._query_referer, + context_username=self._context.username, + total_index=max(self.total_index - 1, 0), + best_before=self._best_before.timestamp() if self._best_before else None, + remaining_data=remaining_data, + first_node=self._first_node, + doc_id=self._doc_id, + ) + + def thaw(self, frozen: FrozenNodeIterator) -> None: + """ + Use this iterator for resuming from earlier iteration. + + :raises InvalidArgumentException: + If + + - the iterator on which this method is called has already been used, or + - the given :class:`FrozenNodeIterator` does not match, i.e. belongs to a different iteration. + """ + if self._total_index or self._page_index: + raise InvalidArgumentException("thaw() called on already-used iterator.") + if (self._query_hash != frozen.query_hash or + self._query_variables != frozen.query_variables or + self._query_referer != frozen.query_referer or + self._context.username != frozen.context_username or + self._doc_id != frozen.doc_id): + raise InvalidArgumentException("Mismatching resume information.") + if not frozen.best_before: + raise InvalidArgumentException("\"best before\" date missing.") + if frozen.remaining_data is None: + raise InvalidArgumentException("\"remaining_data\" missing.") + self._total_index = frozen.total_index + self._best_before = datetime.fromtimestamp(frozen.best_before) + self._data = frozen.remaining_data + if frozen.first_node is not None: + self._first_node = frozen.first_node + + +@contextmanager +def resumable_iteration(context: InstaloaderContext, + iterator: Iterable, + load: Callable[[InstaloaderContext, str], Any], + save: Callable[[FrozenNodeIterator, str], None], + format_path: Callable[[str], str], + check_bbd: bool = True, + enabled: bool = True) -> Iterator[Tuple[bool, int]]: + """ + High-level context manager to handle a resumable iteration that can be interrupted + with a :class:`KeyboardInterrupt` or an :class:`AbortDownloadException`. + + It can be used as follows to automatically load a previously-saved state into the iterator, save the iterator's + state when interrupted, and delete the resume file upon completion:: + + post_iterator = profile.get_posts() + with resumable_iteration( + context=L.context, + iterator=post_iterator, + load=lambda _, path: FrozenNodeIterator(**json.load(open(path))), + save=lambda fni, path: json.dump(fni._asdict(), open(path, 'w')), + format_path=lambda magic: "resume_info_{}.json".format(magic) + ) as (is_resuming, start_index): + for post in post_iterator: + do_something_with(post) + + It yields a tuple (is_resuming, start_index). + + When the passed iterator is not a :class:`NodeIterator`, it behaves as if ``resumable_iteration`` was not used, + just executing the inner body. + + :param context: The :class:`InstaloaderContext`. + :param iterator: The fresh :class:`NodeIterator`. + :param load: Loads a FrozenNodeIterator from given path. The object is ignored if it has a different type. + :param save: Saves the given FrozenNodeIterator to the given path. + :param format_path: Returns the path to the resume file for the given magic. + :param check_bbd: Whether to check the best before date and reject an expired FrozenNodeIterator. + :param enabled: Set to False to disable all functionality and simply execute the inner body. + + .. versionchanged:: 4.7 + Also interrupt on :class:`AbortDownloadException`. + """ + if not enabled or not isinstance(iterator, NodeIterator): + yield False, 0 + return + is_resuming = False + start_index = 0 + resume_file_path = format_path(iterator.magic) + resume_file_exists = os.path.isfile(resume_file_path) + if resume_file_exists: + try: + fni = load(context, resume_file_path) + if not isinstance(fni, FrozenNodeIterator): + raise InvalidArgumentException("Invalid type.") + if check_bbd and fni.best_before and datetime.fromtimestamp(fni.best_before) < datetime.now(): + raise InvalidArgumentException("\"Best before\" date exceeded.") + iterator.thaw(fni) + is_resuming = True + start_index = iterator.total_index + context.log("Resuming from {}.".format(resume_file_path)) + except (InvalidArgumentException, LZMAError, json.decoder.JSONDecodeError, EOFError) as exc: + context.error("Warning: Not resuming from {}: {}".format(resume_file_path, exc)) + try: + yield is_resuming, start_index + except (KeyboardInterrupt, AbortDownloadException): + if os.path.dirname(resume_file_path): + os.makedirs(os.path.dirname(resume_file_path), exist_ok=True) + save(iterator.freeze(), resume_file_path) + context.log("\nSaved resume information to {}.".format(resume_file_path)) + raise + if resume_file_exists: + os.unlink(resume_file_path) + context.log("Iteration complete, deleted resume information file {}.".format(resume_file_path)) diff --git a/build/lib/instaloader/py.typed b/build/lib/instaloader/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/instaloader/sectioniterator.py b/build/lib/instaloader/sectioniterator.py new file mode 100644 index 0000000..b16f4dc --- /dev/null +++ b/build/lib/instaloader/sectioniterator.py @@ -0,0 +1,46 @@ +from typing import Any, Callable, Dict, Iterator, Optional, TypeVar + +from .instaloadercontext import InstaloaderContext + +T = TypeVar('T') + + +class SectionIterator(Iterator[T]): + """Iterator for the new 'sections'-style responses. + + .. versionadded:: 4.9""" + def __init__(self, + context: InstaloaderContext, + sections_extractor: Callable[[Dict[str, Any]], Dict[str, Any]], + media_wrapper: Callable[[Dict], T], + query_path: str, + first_data: Optional[Dict[str, Any]] = None): + self._context = context + self._sections_extractor = sections_extractor + self._media_wrapper = media_wrapper + self._query_path = query_path + self._data = first_data or self._query() + self._page_index = 0 + self._section_index = 0 + + def __iter__(self): + return self + + def _query(self, max_id: Optional[str] = None) -> Dict[str, Any]: + pagination_variables = {"max_id": max_id} if max_id is not None else {} + return self._sections_extractor( + self._context.get_json(self._query_path, params={"__a": 1, "__d": "dis", **pagination_variables}) + ) + + def __next__(self) -> T: + if self._page_index < len(self._data['sections']): + media = self._data['sections'][self._page_index]['layout_content']['medias'][self._section_index]['media'] + self._section_index += 1 + if self._section_index >= len(self._data['sections'][self._page_index]['layout_content']['medias']): + self._section_index = 0 + self._page_index += 1 + return self._media_wrapper(media) + if self._data['more_available']: + self._page_index, self._section_index, self._data = 0, 0, self._query(self._data["next_max_id"]) + return self.__next__() + raise StopIteration() diff --git a/build/lib/instaloader/structures.py b/build/lib/instaloader/structures.py new file mode 100644 index 0000000..c9ae5a1 --- /dev/null +++ b/build/lib/instaloader/structures.py @@ -0,0 +1,2191 @@ +import json +import lzma +import re +from base64 import b64decode, b64encode +from contextlib import suppress +from datetime import datetime +from itertools import islice +from pathlib import Path +from typing import Any, Callable, Dict, Iterable, Iterator, List, NamedTuple, Optional, Tuple, Union +from unicodedata import normalize + +from . import __version__ +from .exceptions import * +from .instaloadercontext import InstaloaderContext +from .nodeiterator import FrozenNodeIterator, NodeIterator +from .sectioniterator import SectionIterator + + +class PostSidecarNode(NamedTuple): + """Item of a Sidecar Post.""" + is_video: bool + display_url: str + video_url: str + + +PostSidecarNode.is_video.__doc__ = "Whether this node is a video." +PostSidecarNode.display_url.__doc__ = "URL of image or video thumbnail." +PostSidecarNode.video_url.__doc__ = "URL of video or None." + + +class PostCommentAnswer(NamedTuple): + id: int + created_at_utc: datetime + text: str + owner: 'Profile' + likes_count: int + + +PostCommentAnswer.id.__doc__ = "ID number of comment." +PostCommentAnswer.created_at_utc.__doc__ = ":class:`~datetime.datetime` when comment was created (UTC)." +PostCommentAnswer.text.__doc__ = "Comment text." +PostCommentAnswer.owner.__doc__ = "Owner :class:`Profile` of the comment." +PostCommentAnswer.likes_count.__doc__ = "Number of likes on comment." + + +class PostComment: + def __init__(self, context: 'InstaloaderContext', node: Dict[str, Any], + answers: Iterator['PostCommentAnswer'], post: 'Post'): + self._context = context + self._node = node + self._answers = answers + self._post = post + + @classmethod + def from_iphone_struct( + cls, + context: "InstaloaderContext", + media: Dict[str, Any], + answers: Iterator["PostCommentAnswer"], + post: "Post", + ): + return cls( + context=context, + node={ + "id": int(media["pk"]), + "created_at": media["created_at"], + "text": media["text"], + "edge_liked_by": { + "count": media["comment_like_count"], + }, + "iphone_struct": media, + }, + answers=answers, + post=post, + ) + + @property + def id(self) -> int: + """ ID number of comment. """ + return self._node['id'] + + @property + def created_at_utc(self) -> datetime: + """ :class:`~datetime.datetime` when comment was created (UTC). """ + return datetime.utcfromtimestamp(self._node['created_at']) + + @property + def text(self): + """ Comment text. """ + return self._node['text'] + + @property + def owner(self) -> "Profile": + """ Owner :class:`Profile` of the comment. """ + if "iphone_struct" in self._node: + return Profile.from_iphone_struct( + self._context, self._node["iphone_struct"]["user"] + ) + return Profile(self._context, self._node["owner"]) + + @property + def likes_count(self): + """ Number of likes on comment. """ + return self._node.get('edge_liked_by', {}).get('count', 0) + + @property + def answers(self) -> Iterator['PostCommentAnswer']: + """ Iterator which yields all :class:`PostCommentAnswer` for the comment. """ + return self._answers + + @property + def likes(self) -> Iterable['Profile']: + """ + Iterate over all likes of a comment. A :class:`Profile` instance of each like is yielded. + + .. versionadded:: 4.11 + """ + if self.likes_count != 0: + return NodeIterator( + self._context, + '5f0b1f6281e72053cbc07909c8d154ae', + lambda d: d['data']['comment']['edge_liked_by'], + lambda n: Profile(self._context, n), + {'comment_id': self.id}, + 'https://www.instagram.com/p/{0}/'.format(self._post.shortcode), + ) + return [] + + def __repr__(self): + return f'' + +class PostLocation(NamedTuple): + id: int + name: str + slug: str + has_public_page: Optional[bool] + lat: Optional[float] + lng: Optional[float] + + +PostLocation.id.__doc__ = "ID number of location." +PostLocation.name.__doc__ = "Location name." +PostLocation.slug.__doc__ = "URL friendly variant of location name." +PostLocation.has_public_page.__doc__ = "Whether location has a public page." +PostLocation.lat.__doc__ = "Latitude (:class:`float` or None)." +PostLocation.lng.__doc__ = "Longitude (:class:`float` or None)." + +# This regular expression is by MiguelX413 +_hashtag_regex = re.compile(r"(?:#)((?:\w){1,150})") + +# This regular expression is modified from jStassen, adjusted to use Python's \w to +# support Unicode and a word/beginning of string delimiter at the beginning to ensure +# that no email addresses join the list of mentions. +# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/ +_mention_regex = re.compile(r"(?:^|[^\w\n]|_)(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)", re.ASCII) + + +def _optional_normalize(string: Optional[str]) -> Optional[str]: + if string is not None: + return normalize("NFC", string) + else: + return None + + +class Post: + """ + Structure containing information about an Instagram post. + + Created by methods :meth:`Profile.get_posts`, :meth:`Instaloader.get_hashtag_posts`, + :meth:`Instaloader.get_feed_posts` and :meth:`Profile.get_saved_posts`, which return iterators of Posts:: + + L = Instaloader() + for post in L.get_hashtag_posts(HASHTAG): + L.download_post(post, target='#'+HASHTAG) + + Might also be created with:: + + post = Post.from_shortcode(L.context, SHORTCODE) + + This class unifies access to the properties associated with a post. It implements == and is + hashable. + + :param context: :attr:`Instaloader.context` used for additional queries if neccessary.. + :param node: Node structure, as returned by Instagram. + :param owner_profile: The Profile of the owner, if already known at creation. + """ + + def __init__(self, context: InstaloaderContext, node: Dict[str, Any], + owner_profile: Optional['Profile'] = None): + assert 'shortcode' in node or 'code' in node + + self._context = context + self._node = node + self._owner_profile = owner_profile + self._full_metadata_dict: Optional[Dict[str, Any]] = None + self._location: Optional[PostLocation] = None + self._iphone_struct_ = None + if 'iphone_struct' in node: + # if loaded from JSON with load_structure_from_file() + self._iphone_struct_ = node['iphone_struct'] + + @classmethod + def from_shortcode(cls, context: InstaloaderContext, shortcode: str): + """Create a post object from a given shortcode""" + # pylint:disable=protected-access + post = cls(context, {'shortcode': shortcode}) + post._node = post._full_metadata + return post + + @classmethod + def from_mediaid(cls, context: InstaloaderContext, mediaid: int): + """Create a post object from a given mediaid""" + return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid)) + + @classmethod + def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]): + """Create a post from a given iphone_struct. + + .. versionadded:: 4.9""" + media_types = { + 1: "GraphImage", + 2: "GraphVideo", + 8: "GraphSidecar", + } + fake_node = { + "shortcode": media["code"], + "id": media["pk"], + "__typename": media_types[media["media_type"]], + "is_video": media_types[media["media_type"]] == "GraphVideo", + "date": media["taken_at"], + "caption": media["caption"].get("text") if media.get("caption") is not None else None, + "title": media.get("title"), + "viewer_has_liked": media["has_liked"], + "edge_media_preview_like": {"count": media["like_count"]}, + "accessibility_caption": media.get("accessibility_caption"), + "comments": media.get("comment_count"), + "iphone_struct": media, + } + with suppress(KeyError): + fake_node["display_url"] = media['image_versions2']['candidates'][0]['url'] + with suppress(KeyError, TypeError): + fake_node["video_url"] = media['video_versions'][-1]['url'] + fake_node["video_duration"] = media["video_duration"] + fake_node["video_view_count"] = media["view_count"] + with suppress(KeyError, TypeError): + fake_node["edge_sidecar_to_children"] = {"edges": [{"node": + Post._convert_iphone_carousel(node, media_types)} + for node in media["carousel_media"]]} + return cls(context, fake_node, Profile.from_iphone_struct(context, media["user"]) if "user" in media else None) + + @staticmethod + def _convert_iphone_carousel(iphone_node: Dict[str, Any], media_types: Dict[int, str]) -> Dict[str, Any]: + fake_node = { + "display_url": iphone_node["image_versions2"]["candidates"][0]["url"], + "is_video": media_types[iphone_node["media_type"]] == "GraphVideo", + } + if "video_versions" in iphone_node and iphone_node["video_versions"] is not None: + fake_node["video_url"] = iphone_node["video_versions"][0]["url"] + return fake_node + + @staticmethod + def shortcode_to_mediaid(code: str) -> int: + if len(code) > 11: + raise InvalidArgumentException("Wrong shortcode \"{0}\", unable to convert to mediaid.".format(code)) + code = 'A' * (12 - len(code)) + code + return int.from_bytes(b64decode(code.encode(), b'-_'), 'big') + + @staticmethod + def mediaid_to_shortcode(mediaid: int) -> str: + if mediaid.bit_length() > 64: + raise InvalidArgumentException("Wrong mediaid {0}, unable to convert to shortcode".format(str(mediaid))) + return b64encode(mediaid.to_bytes(9, 'big'), b'-_').decode().replace('A', ' ').lstrip().replace(' ', 'A') + + @staticmethod + def supported_graphql_types() -> List[str]: + """The values of __typename fields that the :class:`Post` class can handle.""" + return ["GraphImage", "GraphVideo", "GraphSidecar"] + + def _asdict(self): + node = self._node + if self._full_metadata_dict: + node.update(self._full_metadata_dict) + if self._owner_profile: + node['owner'] = self.owner_profile._asdict() + if self._location: + node['location'] = self._location._asdict() + if self._iphone_struct_: + node['iphone_struct'] = self._iphone_struct_ + return node + + @property + def shortcode(self) -> str: + """Media shortcode. URL of the post is instagram.com/p//.""" + return self._node['shortcode'] if 'shortcode' in self._node else self._node['code'] + + @property + def mediaid(self) -> int: + """The mediaid is a decimal representation of the media shortcode.""" + return int(self._node['id']) + + @property + def title(self) -> Optional[str]: + """Title of post""" + try: + return self._field('title') + except KeyError: + return None + + def __repr__(self): + return ''.format(self.shortcode) + + def __eq__(self, o: object) -> bool: + if isinstance(o, Post): + return self.shortcode == o.shortcode + return NotImplemented + + def __hash__(self) -> int: + return hash(self.shortcode) + + def _obtain_metadata(self): + if not self._full_metadata_dict: + pic_json = self._context.graphql_query( + '2b0673e0dc4580674a88d426fe00ea90', + {'shortcode': self.shortcode} + ) + self._full_metadata_dict = pic_json['data']['shortcode_media'] + if self._full_metadata_dict is None: + raise BadResponseException("Fetching Post metadata failed.") + if self.shortcode != self._full_metadata_dict['shortcode']: + self._node.update(self._full_metadata_dict) + raise PostChangedException + + @property + def _full_metadata(self) -> Dict[str, Any]: + self._obtain_metadata() + assert self._full_metadata_dict is not None + return self._full_metadata_dict + + @property + def _iphone_struct(self) -> Dict[str, Any]: + if not self._context.iphone_support: + raise IPhoneSupportDisabledException("iPhone support is disabled.") + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to access iPhone media info endpoint.") + if not self._iphone_struct_: + data = self._context.get_iphone_json(path='api/v1/media/{}/info/'.format(self.mediaid), params={}) + self._iphone_struct_ = data['items'][0] + return self._iphone_struct_ + + def _field(self, *keys) -> Any: + """Lookups given fields in _node, and if not found in _full_metadata. Raises KeyError if not found anywhere.""" + try: + d = self._node + for key in keys: + d = d[key] + return d + except KeyError: + d = self._full_metadata + for key in keys: + d = d[key] + return d + + @property + def owner_profile(self) -> 'Profile': + """:class:`Profile` instance of the Post's owner.""" + if not self._owner_profile: + if 'username' in self._node['owner']: + owner_struct = self._node['owner'] + else: + # Sometimes, the 'owner' structure does not contain the username, only the user's ID. In that case, + # this call triggers downloading of the complete Post metadata struct, where the owner username + # is contained. + # Note that we cannot use Profile.from_id() here since that would lead us into a recursion. + owner_struct = self._full_metadata['owner'] + self._owner_profile = Profile(self._context, owner_struct) + return self._owner_profile + + @property + def owner_username(self) -> str: + """The Post's lowercase owner name.""" + return self.owner_profile.username + + @property + def owner_id(self) -> int: + """The ID of the Post's owner.""" + # The ID may already be available, e.g. if the post instance was created + # from an `hashtag.get_posts()` iterator, so no need to make another + # http request. + if 'owner' in self._node and 'id' in self._node['owner']: + return self._node['owner']['id'] + else: + return self.owner_profile.userid + + @property + def date_local(self) -> datetime: + """Timestamp when the post was created (local time zone). + + .. versionchanged:: 4.9 + Return timezone aware datetime object.""" + return datetime.fromtimestamp(self._get_timestamp_date_created()).astimezone() + + @property + def date_utc(self) -> datetime: + """Timestamp when the post was created (UTC).""" + return datetime.utcfromtimestamp(self._get_timestamp_date_created()) + + @property + def date(self) -> datetime: + """Synonym to :attr:`~Post.date_utc`""" + return self.date_utc + + @property + def profile(self) -> str: + """Synonym to :attr:`~Post.owner_username`""" + return self.owner_username + + @property + def url(self) -> str: + """URL of the picture / video thumbnail of the post""" + if self.typename == "GraphImage" and self._context.iphone_support and self._context.is_logged_in: + try: + orig_url = self._iphone_struct['image_versions2']['candidates'][0]['url'] + url = re.sub(r'([?&])se=\d+&?', r'\1', orig_url).rstrip('&') + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Unable to fetch high quality image version of {self}: {err}") + return self._node["display_url"] if "display_url" in self._node else self._node["display_src"] + + @property + def typename(self) -> str: + """Type of post, GraphImage, GraphVideo or GraphSidecar""" + return self._field('__typename') + + @property + def mediacount(self) -> int: + """ + The number of media in a sidecar Post, or 1 if the Post it not a sidecar. + + .. versionadded:: 4.6 + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + return len(edges) + return 1 + + def _get_timestamp_date_created(self) -> float: + """Timestamp when the post was created""" + return (self._node["date"] + if "date" in self._node + else self._node["taken_at_timestamp"]) + + def get_is_videos(self) -> List[bool]: + """ + Return a list containing the ``is_video`` property for each media in the post. + + .. versionadded:: 4.7 + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + return [edge['node']['is_video'] for edge in edges] + return [self.is_video] + + def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]: + """ + Sidecar nodes of a Post with typename==GraphSidecar. + + .. versionchanged:: 4.6 + Added parameters *start* and *end* to specify a slice of sidecar media. + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + if end < 0: + end = len(edges)-1 + if start < 0: + start = len(edges)-1 + if any(edge['node']['is_video'] and 'video_url' not in edge['node'] for edge in edges[start:(end+1)]): + # video_url is only present in full metadata, issue #558. + edges = self._full_metadata['edge_sidecar_to_children']['edges'] + for idx, edge in enumerate(edges): + if start <= idx <= end: + node = edge['node'] + is_video = node['is_video'] + display_url = node['display_url'] + if not is_video and self._context.iphone_support and self._context.is_logged_in: + try: + carousel_media = self._iphone_struct['carousel_media'] + orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] + display_url = re.sub(r'([?&])se=\d+&?', r'\1', orig_url).rstrip('&') + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Unable to fetch high quality image version of {self}: {err}") + yield PostSidecarNode(is_video=is_video, display_url=display_url, + video_url=node['video_url'] if is_video else None) + + @property + def caption(self) -> Optional[str]: + """Caption.""" + if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]: + return _optional_normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"]) + elif "caption" in self._node: + return _optional_normalize(self._node["caption"]) + return None + + @property + def caption_hashtags(self) -> List[str]: + """List of all lowercased hashtags (without preceeding #) that occur in the Post's caption.""" + if not self.caption: + return [] + return _hashtag_regex.findall(self.caption.lower()) + + @property + def caption_mentions(self) -> List[str]: + """List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @.""" + if not self.caption: + return [] + return _mention_regex.findall(self.caption.lower()) + + @property + def pcaption(self) -> str: + """Printable caption, useful as a format specifier for --filename-pattern. + + .. versionadded:: 4.2.6""" + def _elliptify(caption): + pcaption = ' '.join([s.replace('/', '\u2215') for s in caption.splitlines() if s]).strip() + return (pcaption[:30] + "\u2026") if len(pcaption) > 31 else pcaption + return _elliptify(self.caption) if self.caption else '' + + @property + def accessibility_caption(self) -> Optional[str]: + """Accessibility caption of the post, if available. + + .. versionadded:: 4.9""" + try: + return self._field("accessibility_caption") + except KeyError: + return None + + @property + def tagged_users(self) -> List[str]: + """List of all lowercased users that are tagged in the Post.""" + try: + return [edge['node']['user']['username'].lower() for edge in self._field('edge_media_to_tagged_user', + 'edges')] + except KeyError: + return [] + + @property + def is_video(self) -> bool: + """True if the Post is a video.""" + return self._node['is_video'] + + @property + def video_url(self) -> Optional[str]: + """URL of the video, or None.""" + if self.is_video: + version_urls = [] + try: + version_urls.append(self._field('video_url')) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Warning: Unable to fetch video from graphql of {self}: {err}") + if self._context.iphone_support and self._context.is_logged_in: + try: + version_urls.extend(version['url'] for version in self._iphone_struct['video_versions']) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Unable to fetch high-quality video version of {self}: {err}") + version_urls = list(dict.fromkeys(version_urls)) + if len(version_urls) == 0: + return None + if len(version_urls) == 1: + return version_urls[0] + url_candidates: List[Tuple[int, str]] = [] + for idx, version_url in enumerate(version_urls): + try: + url_candidates.append(( + int(self._context.head(version_url, allow_redirects=True).headers.get('Content-Length', 0)), + version_url + )) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Video URL candidate {idx+1}/{len(version_urls)} for {self}: {err}") + if not url_candidates: + # All candidates fail: Fallback to default URL and handle errors later at the actual download attempt + return version_urls[0] + url_candidates.sort() + return url_candidates[-1][1] + return None + + @property + def video_view_count(self) -> Optional[int]: + """View count of the video, or None. + + .. versionadded:: 4.2.6""" + if self.is_video: + return self._field('video_view_count') + return None + + @property + def video_duration(self) -> Optional[float]: + """Duration of the video in seconds, or None. + + .. versionadded:: 4.2.6""" + if self.is_video: + return self._field('video_duration') + return None + + @property + def viewer_has_liked(self) -> Optional[bool]: + """Whether the viewer has liked the post, or None if not logged in.""" + if not self._context.is_logged_in: + return None + if 'likes' in self._node and 'viewer_has_liked' in self._node['likes']: + return self._node['likes']['viewer_has_liked'] + return self._field('viewer_has_liked') + + @property + def likes(self) -> int: + """Likes count""" + return self._field('edge_media_preview_like', 'count') + + @property + def comments(self) -> int: + """Comment count including answers""" + # If the count is already present in `self._node`, do not use `self._field` which could trigger fetching the + # full metadata dict. + comments = self._node.get('edge_media_to_comment') + if comments and 'count' in comments: + return comments['count'] + try: + return self._field('edge_media_to_parent_comment', 'count') + except KeyError: + return self._field('edge_media_to_comment', 'count') + + def _get_comments_via_iphone_endpoint(self) -> Iterable[PostComment]: + """ + Iterate over all comments of the post via an iPhone endpoint. + + .. versionadded:: 4.10.3 + fallback for :issue:`2125`. + """ + def _query(min_id=None): + pagination_params = {"min_id": min_id} if min_id is not None else {} + return self._context.get_iphone_json( + f"api/v1/media/{self.mediaid}/comments/", + { + "can_support_threading": "true", + "permalink_enabled": "false", + **pagination_params, + }, + ) + + def _answers(comment_node): + def _answer(child_comment): + return PostCommentAnswer( + id=int(child_comment["pk"]), + created_at_utc=datetime.utcfromtimestamp(child_comment["created_at"]), + text=child_comment["text"], + owner=Profile.from_iphone_struct(self._context, child_comment["user"]), + likes_count=child_comment["comment_like_count"], + ) + + child_comment_count = comment_node["child_comment_count"] + if child_comment_count == 0: + return + preview_child_comments = comment_node["preview_child_comments"] + if child_comment_count == len(preview_child_comments): + yield from ( + _answer(child_comment) for child_comment in preview_child_comments + ) + return + pk = comment_node["pk"] + answers_json = self._context.get_iphone_json( + f"api/v1/media/{self.mediaid}/comments/{pk}/child_comments/", + {"max_id": ""}, + ) + yield from ( + _answer(child_comment) for child_comment in answers_json["child_comments"] + ) + + def _paginated_comments(comments_json): + for comment_node in comments_json.get("comments", []): + yield PostComment.from_iphone_struct( + self._context, comment_node, _answers(comment_node), self + ) + + next_min_id = comments_json.get("next_min_id") + if next_min_id: + yield from _paginated_comments(_query(next_min_id)) + + return _paginated_comments(_query()) + + def get_comments(self) -> Iterable[PostComment]: + """Iterate over all comments of the post. + + Each comment is represented by a PostComment NamedTuple with fields text (string), created_at (datetime), + id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator` [:class:`PostCommentAnswer`]) + if available. + + .. versionchanged:: 4.7 + Change return type to ``Iterable``. + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to access comments of a post.") + + def _postcommentanswer(node): + return PostCommentAnswer(id=int(node['id']), + created_at_utc=datetime.utcfromtimestamp(node['created_at']), + text=node['text'], + owner=Profile(self._context, node['owner']), + likes_count=node.get('edge_liked_by', {}).get('count', 0)) + + def _postcommentanswers(node): + if 'edge_threaded_comments' not in node: + return + answer_count = node['edge_threaded_comments']['count'] + if answer_count == 0: + # Avoid doing additional requests if there are no comment answers + return + answer_edges = node['edge_threaded_comments']['edges'] + if answer_count == len(answer_edges): + # If the answer's metadata already contains all comments, don't do GraphQL requests to obtain them + yield from (_postcommentanswer(comment['node']) for comment in answer_edges) + return + yield from NodeIterator( + self._context, + '51fdd02b67508306ad4484ff574a0b62', + lambda d: d['data']['comment']['edge_threaded_comments'], + _postcommentanswer, + {'comment_id': node['id']}, + 'https://www.instagram.com/p/{0}/'.format(self.shortcode), + ) + + def _postcomment(node): + return PostComment(context=self._context, node=node, + answers=_postcommentanswers(node), post=self) + if self.comments == 0: + # Avoid doing additional requests if there are no comments + return [] + + comment_edges = self._field('edge_media_to_comment', 'edges') + answers_count = sum(edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges) + + if self.comments == len(comment_edges) + answers_count: + # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them + return [_postcomment(comment['node']) for comment in comment_edges] + + if self.comments > NodeIterator.page_length(): + # comments pagination via our graphql query does not work reliably anymore (issue #2125), fallback to an + # iphone endpoint if needed. + return self._get_comments_via_iphone_endpoint() + + return NodeIterator( + self._context, + '97b41c52301f77ce508f55e66d17620e', + lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'], + _postcomment, + {'shortcode': self.shortcode}, + 'https://www.instagram.com/p/{0}/'.format(self.shortcode), + ) + + def get_likes(self) -> Iterator['Profile']: + """ + Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded. + + .. versionchanged:: 4.5.4 + Require being logged in (as required by Instagram). + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to access likes of a post.") + if self.likes == 0: + # Avoid doing additional requests if there are no comments + return + likes_edges = self._field('edge_media_preview_like', 'edges') + if self.likes == len(likes_edges): + # If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them + yield from (Profile(self._context, like['node']) for like in likes_edges) + return + yield from NodeIterator( + self._context, + '1cb6ec562846122743b61e492c85999f', + lambda d: d['data']['shortcode_media']['edge_liked_by'], + lambda n: Profile(self._context, n), + {'shortcode': self.shortcode}, + 'https://www.instagram.com/p/{0}/'.format(self.shortcode), + ) + + @property + def is_sponsored(self) -> bool: + """ + Whether Post is a sponsored post, equivalent to non-empty :meth:`Post.sponsor_users`. + + .. versionadded:: 4.4 + """ + try: + sponsor_edges = self._field('edge_media_to_sponsor_user', 'edges') + except KeyError: + return False + return bool(sponsor_edges) + + @property + def sponsor_users(self) -> List['Profile']: + """ + The Post's sponsors. + + .. versionadded:: 4.4 + """ + return ([] if not self.is_sponsored else + [Profile(self._context, edge['node']['sponsor']) for edge in + self._field('edge_media_to_sponsor_user', 'edges')]) + + @property + def location(self) -> Optional[PostLocation]: + """ + If the Post has a location, returns PostLocation NamedTuple with fields 'id', 'lat' and 'lng' and 'name'. + + .. versionchanged:: 4.2.9 + Require being logged in (as required by Instagram), return None if not logged-in. + """ + loc = self._field("location") + if self._location or not loc: + return self._location + if not self._context.is_logged_in: + return None + location_id = int(loc['id']) + if any(k not in loc for k in ('name', 'slug', 'has_public_page', 'lat', 'lng')): + loc.update(self._context.get_json("explore/locations/{0}/".format(location_id), + params={'__a': 1, '__d': 'dis'})['native_location_data']['location_info']) + self._location = PostLocation(location_id, loc['name'], loc['slug'], loc['has_public_page'], + loc.get('lat'), loc.get('lng')) + return self._location + + @property + def is_pinned(self) -> bool: + """ + .. deprecated: 4.10.3 + This information is not returned by IG anymore + + Used to return True if this Post has been pinned by at least one user, now likely returns always false. + + .. versionadded: 4.9.2""" + return 'pinned_for_users' in self._node and bool(self._node['pinned_for_users']) + + +class Profile: + """ + An Instagram Profile. + + Provides methods for accessing profile properties, as well as :meth:`Profile.get_posts` and for own profile + :meth:`Profile.get_saved_posts`. + + Get instances with :meth:`Post.owner_profile`, :meth:`StoryItem.owner_profile`, :meth:`Profile.get_followees`, + :meth:`Profile.get_followers` or:: + + L = Instaloader() + profile = Profile.from_username(L.context, USERNAME) + + Provides :meth:`Profile.get_posts` and for own profile :meth:`Profile.get_saved_posts` to iterate over associated + :class:`Post` objects:: + + for post in profile.get_posts(): + L.download_post(post, target=profile.username) + + :meth:`Profile.get_followees` and :meth:`Profile.get_followers`:: + + print("{} follows these profiles:".format(profile.username)) + for followee in profile.get_followees(): + print(followee.username) + + Also, this class implements == and is hashable. + """ + def __init__(self, context: InstaloaderContext, node: Dict[str, Any]): + assert 'username' in node + self._context = context + self._has_public_story: Optional[bool] = None + self._node = node + self._has_full_metadata = False + self._iphone_struct_ = None + if 'iphone_struct' in node: + # if loaded from JSON with load_structure_from_file() + self._iphone_struct_ = node['iphone_struct'] + + @classmethod + def from_username(cls, context: InstaloaderContext, username: str): + """Create a Profile instance from a given username, raise exception if it does not exist. + + See also :meth:`Instaloader.check_profile_id`. + + :param context: :attr:`Instaloader.context` + :param username: Username + :raises: :class:`ProfileNotExistsException` + """ + # pylint:disable=protected-access + profile = cls(context, {'username': username.lower()}) + profile._obtain_metadata() # to raise ProfileNotExistsException now in case username is invalid + return profile + + @classmethod + def from_id(cls, context: InstaloaderContext, profile_id: int): + """Create a Profile instance from a given userid. If possible, use :meth:`Profile.from_username` + or constructor directly rather than this method, since it requires more requests. + + :param context: :attr:`Instaloader.context` + :param profile_id: userid + :raises: :class:`ProfileNotExistsException` + """ + if profile_id in context.profile_id_cache: + return context.profile_id_cache[profile_id] + data = context.graphql_query('7c16654f22c819fb63d1183034a5162f', + {'user_id': str(profile_id), + 'include_chaining': False, + 'include_reel': True, + 'include_suggested_users': False, + 'include_logged_out_extras': False, + 'include_highlight_reels': False})['data']['user'] + if data: + profile = cls(context, data['reel']['owner']) + else: + raise ProfileNotExistsException("No profile found, the user may have blocked you (ID: " + + str(profile_id) + ").") + context.profile_id_cache[profile_id] = profile + return profile + + @classmethod + def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]): + """Create a profile from a given iphone_struct. + + .. versionadded:: 4.9""" + return cls(context, { + "id": media["pk"], + "username": media["username"], + "is_private": media["is_private"], + "full_name": media["full_name"], + "profile_pic_url_hd": media["profile_pic_url"], + "iphone_struct": media, + }) + + @classmethod + def own_profile(cls, context: InstaloaderContext): + """Return own profile if logged-in. + + :param context: :attr:`Instaloader.context` + + .. versionadded:: 4.5.2""" + if not context.is_logged_in: + raise LoginRequiredException("Login required to access own profile.") + return cls(context, context.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})["data"]["user"]) + + def _asdict(self): + json_node = self._node.copy() + # remove posts to avoid "Circular reference detected" exception + json_node.pop('edge_media_collections', None) + json_node.pop('edge_owner_to_timeline_media', None) + json_node.pop('edge_saved_media', None) + json_node.pop('edge_felix_video_timeline', None) + if self._iphone_struct_: + json_node['iphone_struct'] = self._iphone_struct_ + return json_node + + def _obtain_metadata(self): + try: + if not self._has_full_metadata: + metadata = self._context.get_iphone_json(f'api/v1/users/web_profile_info/?username={self.username}', + params={}) + if metadata['data']['user'] is None: + raise ProfileNotExistsException('Profile {} does not exist.'.format(self.username)) + self._node = metadata['data']['user'] + self._has_full_metadata = True + except (QueryReturnedNotFoundException, KeyError) as err: + top_search_results = TopSearchResults(self._context, self.username) + similar_profiles = [profile.username for profile in top_search_results.get_profiles()] + if similar_profiles: + if self.username in similar_profiles: + raise ProfileNotExistsException( + f"Profile {self.username} seems to exist, but could not be loaded.") from err + raise ProfileNotExistsException('Profile {} does not exist.\nThe most similar profile{}: {}.' + .format(self.username, + 's are' if len(similar_profiles) > 1 else ' is', + ', '.join(similar_profiles[0:5]))) from err + raise ProfileNotExistsException('Profile {} does not exist.'.format(self.username)) from err + + def _metadata(self, *keys) -> Any: + try: + d = self._node + for key in keys: + d = d[key] + return d + except KeyError: + self._obtain_metadata() + d = self._node + for key in keys: + d = d[key] + return d + + @property + def _iphone_struct(self) -> Dict[str, Any]: + if not self._context.iphone_support: + raise IPhoneSupportDisabledException("iPhone support is disabled.") + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to access iPhone profile info endpoint.") + if not self._iphone_struct_: + data = self._context.get_iphone_json(path='api/v1/users/{}/info/'.format(self.userid), params={}) + self._iphone_struct_ = data['user'] + return self._iphone_struct_ + + @property + def userid(self) -> int: + """User ID""" + return int(self._metadata('id')) + + @property + def username(self) -> str: + """Profile Name""" + return self._metadata('username').lower() + + def __repr__(self): + return ''.format(self.username, self.userid) + + def __eq__(self, o: object) -> bool: + if isinstance(o, Profile): + return self.userid == o.userid + return NotImplemented + + def __hash__(self) -> int: + return hash(self.userid) + + @property + def is_private(self) -> bool: + return self._metadata('is_private') + + @property + def followed_by_viewer(self) -> bool: + return self._metadata('followed_by_viewer') + + @property + def mediacount(self) -> int: + return self._metadata('edge_owner_to_timeline_media', 'count') + + @property + def igtvcount(self) -> int: + return self._metadata('edge_felix_video_timeline', 'count') + + @property + def followers(self) -> int: + return self._metadata('edge_followed_by', 'count') + + @property + def followees(self) -> int: + return self._metadata('edge_follow', 'count') + + @property + def external_url(self) -> Optional[str]: + return self._metadata('external_url') + + @property + def is_business_account(self) -> bool: + """.. versionadded:: 4.4""" + return self._metadata('is_business_account') + + @property + def business_category_name(self) -> str: + """.. versionadded:: 4.4""" + return self._metadata('business_category_name') + + @property + def biography(self) -> str: + return normalize("NFC", self._metadata('biography')) + + @property + def biography_hashtags(self) -> List[str]: + """ + List of all lowercased hashtags (without preceeding #) that occur in the Profile's biography. + + .. versionadded:: 4.10 + """ + if not self.biography: + return [] + return _hashtag_regex.findall(self.biography.lower()) + + @property + def biography_mentions(self) -> List[str]: + """ + List of all lowercased profiles that are mentioned in the Profile's biography, without preceeding @. + + .. versionadded:: 4.10 + """ + if not self.biography: + return [] + return _mention_regex.findall(self.biography.lower()) + + @property + def blocked_by_viewer(self) -> bool: + return self._metadata('blocked_by_viewer') + + @property + def follows_viewer(self) -> bool: + return self._metadata('follows_viewer') + + @property + def full_name(self) -> str: + return self._metadata('full_name') + + @property + def has_blocked_viewer(self) -> bool: + return self._metadata('has_blocked_viewer') + + @property + def has_highlight_reels(self) -> bool: + """ + .. deprecated:: 4.0.6 + Always returns `True` since :issue:`153`. + + Before broken, this indicated whether the :class:`Profile` had available stories. + """ + return True + + @property + def has_public_story(self) -> bool: + if not self._has_public_story: + self._obtain_metadata() + # query rate might be limited: + data = self._context.graphql_query('9ca88e465c3f866a76f7adee3871bdd8', + {'user_id': self.userid, 'include_chaining': False, + 'include_reel': False, 'include_suggested_users': False, + 'include_logged_out_extras': True, + 'include_highlight_reels': False}, + 'https://www.instagram.com/{}/'.format(self.username)) + self._has_public_story = data['data']['user']['has_public_story'] + assert self._has_public_story is not None + return self._has_public_story + + @property + def has_viewable_story(self) -> bool: + """ + .. deprecated:: 4.0.6 + + Some stories are private. This property determines if the :class:`Profile` + has at least one story which can be viewed using the associated :class:`InstaloaderContext`, + i.e. the viewer has privileges to view it. + """ + return self.has_public_story or self.followed_by_viewer and self.has_highlight_reels + + @property + def has_requested_viewer(self) -> bool: + return self._metadata('has_requested_viewer') + + @property + def is_verified(self) -> bool: + return self._metadata('is_verified') + + @property + def requested_by_viewer(self) -> bool: + return self._metadata('requested_by_viewer') + + @property + def profile_pic_url(self) -> str: + """Return URL of profile picture. If logged in, the HD version is returned, otherwise a lower-quality version. + + .. versionadded:: 4.0.3 + + .. versionchanged:: 4.2.1 + Require being logged in for HD version (as required by Instagram).""" + if self._context.iphone_support and self._context.is_logged_in: + try: + return self._iphone_struct['hd_profile_pic_url_info']['url'] + except (InstaloaderException, KeyError) as err: + self._context.error(f"Unable to fetch high quality profile pic: {err}") + return self._metadata("profile_pic_url_hd") + else: + return self._metadata("profile_pic_url_hd") + + @property + def profile_pic_url_no_iphone(self) -> str: + """Return URL of lower-quality profile picture. + + .. versionadded:: 4.9.3""" + return self._metadata("profile_pic_url_hd") + + def get_profile_pic_url(self) -> str: + """.. deprecated:: 4.0.3 + + Use :attr:`profile_pic_url`.""" + return self.profile_pic_url + + def get_posts(self) -> NodeIterator[Post]: + """Retrieve all posts from a profile. + + :rtype: NodeIterator[Post]""" + self._obtain_metadata() + return NodeIterator( + context = self._context, + edge_extractor = lambda d: d['data']['xdt_api__v1__feed__user_timeline_graphql_connection'], + node_wrapper = lambda n: Post.from_iphone_struct(self._context, n), + query_variables = {'data': { + 'count': 12, 'include_relationship_info': True, + 'latest_besties_reel_media': True, 'latest_reel_media': True}, + 'username': self.username}, + query_referer = 'https://www.instagram.com/{0}/'.format(self.username), + is_first = Profile._make_is_newest_checker(), + doc_id = '7898261790222653', + query_hash = None, + ) + + def get_saved_posts(self) -> NodeIterator[Post]: + """Get Posts that are marked as saved by the user. + + :rtype: NodeIterator[Post]""" + + if self.username != self._context.username: + raise LoginRequiredException(f"Login as {self.username} required to get that profile's saved posts.") + + return NodeIterator( + self._context, + 'f883d95537fbcd400f466f63d42bd8a1', + lambda d: d['data']['user']['edge_saved_media'], + lambda n: Post(self._context, n), + {'id': self.userid}, + 'https://www.instagram.com/{0}/'.format(self.username), + ) + + def get_tagged_posts(self) -> NodeIterator[Post]: + """Retrieve all posts where a profile is tagged. + + :rtype: NodeIterator[Post] + + .. versionadded:: 4.0.7""" + self._obtain_metadata() + return NodeIterator( + self._context, + 'e31a871f7301132ceaab56507a66bbb7', + lambda d: d['data']['user']['edge_user_to_photos_of_you'], + lambda n: Post(self._context, n, self if int(n['owner']['id']) == self.userid else None), + {'id': self.userid}, + 'https://www.instagram.com/{0}/'.format(self.username), + is_first=Profile._make_is_newest_checker() + ) + + def get_igtv_posts(self) -> NodeIterator[Post]: + """Retrieve all IGTV posts. + + :rtype: NodeIterator[Post] + + .. versionadded:: 4.3""" + self._obtain_metadata() + return NodeIterator( + self._context, + 'bc78b344a68ed16dd5d7f264681c4c76', + lambda d: d['data']['user']['edge_felix_video_timeline'], + lambda n: Post(self._context, n, self), + {'id': self.userid}, + 'https://www.instagram.com/{0}/channel/'.format(self.username), + self._metadata('edge_felix_video_timeline'), + Profile._make_is_newest_checker() + ) + + @staticmethod + def _make_is_newest_checker() -> Callable[[Post, Optional[Post]], bool]: + return lambda post, first: first is None or post.date_local > first.date_local + + def get_followed_hashtags(self) -> NodeIterator['Hashtag']: + """ + Retrieve list of hashtags followed by given profile. + To use this, one needs to be logged in and private profiles has to be followed. + + :rtype: NodeIterator[Hashtag] + + .. versionadded:: 4.10 + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to get a profile's followers.") + self._obtain_metadata() + return NodeIterator( + self._context, + 'e6306cc3dbe69d6a82ef8b5f8654c50b', + lambda d: d["data"]["user"]["edge_following_hashtag"], + lambda n: Hashtag(self._context, n), + {'id': str(self.userid)}, + 'https://www.instagram.com/{0}/'.format(self.username), + ) + + def get_followers(self) -> NodeIterator['Profile']: + """ + Retrieve list of followers of given profile. + To use this, one needs to be logged in and private profiles has to be followed. + + :rtype: NodeIterator[Profile] + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to get a profile's followers.") + self._obtain_metadata() + return NodeIterator( + self._context, + '37479f2b8209594dde7facb0d904896a', + lambda d: d['data']['user']['edge_followed_by'], + lambda n: Profile(self._context, n), + {'id': str(self.userid)}, + 'https://www.instagram.com/{0}/'.format(self.username), + ) + + def get_followees(self) -> NodeIterator['Profile']: + """ + Retrieve list of followees (followings) of given profile. + To use this, one needs to be logged in and private profiles has to be followed. + + :rtype: NodeIterator[Profile] + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to get a profile's followees.") + self._obtain_metadata() + return NodeIterator( + self._context, + '58712303d941c6855d4e888c5f0cd22f', + lambda d: d['data']['user']['edge_follow'], + lambda n: Profile(self._context, n), + {'id': str(self.userid)}, + 'https://www.instagram.com/{0}/'.format(self.username), + ) + + def get_similar_accounts(self) -> Iterator['Profile']: + """ + Retrieve list of suggested / similar accounts for this profile. + To use this, one needs to be logged in. + + .. versionadded:: 4.4 + """ + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to get a profile's similar accounts.") + self._obtain_metadata() + yield from (Profile(self._context, edge["node"]) for edge in + self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7", + {"user_id": str(self.userid), "include_chaining": True}, + "https://www.instagram.com/{0}/" + .format(self.username))["data"]["user"]["edge_chaining"]["edges"]) + + +class StoryItem: + """ + Structure containing information about a user story item i.e. image or video. + + Created by method :meth:`Story.get_items`. This class implements == and is hashable. + + :param context: :class:`InstaloaderContext` instance used for additional queries if necessary. + :param node: Dictionary containing the available information of the story item. + :param owner_profile: :class:`Profile` instance representing the story owner. + """ + + def __init__(self, context: InstaloaderContext, node: Dict[str, Any], owner_profile: Optional[Profile] = None): + self._context = context + self._node = node + self._owner_profile = owner_profile + self._iphone_struct_ = None + if 'iphone_struct' in node: + # if loaded from JSON with load_structure_from_file() + self._iphone_struct_ = node['iphone_struct'] + + def _asdict(self): + node = self._node + if self._owner_profile: + node['owner'] = self._owner_profile._asdict() + if self._iphone_struct_: + node['iphone_struct'] = self._iphone_struct_ + return node + + @property + def mediaid(self) -> int: + """The mediaid is a decimal representation of the media shortcode.""" + return int(self._node['id']) + + @property + def shortcode(self) -> str: + """Convert :attr:`~StoryItem.mediaid` to a shortcode-like string, allowing ``{shortcode}`` to be used with + :option:`--filename-pattern`.""" + return Post.mediaid_to_shortcode(self.mediaid) + + def __repr__(self): + return ''.format(self.mediaid) + + def __eq__(self, o: object) -> bool: + if isinstance(o, StoryItem): + return self.mediaid == o.mediaid + return NotImplemented + + def __hash__(self) -> int: + return hash(self.mediaid) + + @classmethod + def from_mediaid(cls, context: InstaloaderContext, mediaid: int): + """Create a StoryItem object from a given mediaid. + + .. versionadded:: 4.9 + """ + pic_json = context.graphql_query( + '2b0673e0dc4580674a88d426fe00ea90', + {'shortcode': Post.mediaid_to_shortcode(mediaid)} + ) + shortcode_media = pic_json['data']['shortcode_media'] + if shortcode_media is None: + raise BadResponseException("Fetching StoryItem metadata failed.") + return cls(context, shortcode_media) + + @property + def _iphone_struct(self) -> Dict[str, Any]: + if not self._context.iphone_support: + raise IPhoneSupportDisabledException("iPhone support is disabled.") + if not self._context.is_logged_in: + raise LoginRequiredException("Login required to access iPhone media info endpoint.") + if not self._iphone_struct_: + data = self._context.get_iphone_json( + path='api/v1/feed/reels_media/?reel_ids={}'.format(self.owner_id), params={} + ) + self._iphone_struct_ = {} + for item in data['reels'][str(self.owner_id)]['items']: + if item['pk'] == self.mediaid: + self._iphone_struct_ = item + break + return self._iphone_struct_ + + @property + def owner_profile(self) -> Profile: + """:class:`Profile` instance of the story item's owner.""" + if not self._owner_profile: + self._owner_profile = Profile.from_id(self._context, self._node['owner']['id']) + assert self._owner_profile is not None + return self._owner_profile + + @property + def owner_username(self) -> str: + """The StoryItem owner's lowercase name.""" + return self.owner_profile.username + + @property + def owner_id(self) -> int: + """The ID of the StoryItem owner.""" + return self.owner_profile.userid + + @property + def date_local(self) -> datetime: + """Timestamp when the StoryItem was created (local time zone). + + .. versionchanged:: 4.9 + Return timezone aware datetime object.""" + return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone() + + @property + def date_utc(self) -> datetime: + """Timestamp when the StoryItem was created (UTC).""" + return datetime.utcfromtimestamp(self._node['taken_at_timestamp']) + + @property + def date(self) -> datetime: + """Synonym to :attr:`~StoryItem.date_utc`""" + return self.date_utc + + @property + def profile(self) -> str: + """Synonym to :attr:`~StoryItem.owner_username`""" + return self.owner_username + + @property + def expiring_local(self) -> datetime: + """Timestamp when the StoryItem will get unavailable (local time zone).""" + return datetime.fromtimestamp(self._node['expiring_at_timestamp']) + + @property + def expiring_utc(self) -> datetime: + """Timestamp when the StoryItem will get unavailable (UTC).""" + return datetime.utcfromtimestamp(self._node['expiring_at_timestamp']) + + @property + def url(self) -> str: + """URL of the picture / video thumbnail of the StoryItem""" + if self.typename in ["GraphStoryImage", "StoryImage"] and \ + self._context.iphone_support and self._context.is_logged_in: + try: + orig_url = self._iphone_struct['image_versions2']['candidates'][0]['url'] + url = re.sub(r'([?&])se=\d+&?', r'\1', orig_url).rstrip('&') + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Unable to fetch high quality image version of {self}: {err}") + return self._node['display_resources'][-1]['src'] + + @property + def typename(self) -> str: + """Type of post, GraphStoryImage or GraphStoryVideo""" + return self._node['__typename'] + + @property + def caption(self) -> Optional[str]: + """ + Caption. + + .. versionadded:: 4.10 + """ + if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]: + return _optional_normalize(self._node["edge_media_to_caption"]["edges"][0]["node"]["text"]) + elif "caption" in self._node: + return _optional_normalize(self._node["caption"]) + return None + + @property + def caption_hashtags(self) -> List[str]: + """ + List of all lowercased hashtags (without preceeding #) that occur in the StoryItem's caption. + + .. versionadded:: 4.10 + """ + if not self.caption: + return [] + return _hashtag_regex.findall(self.caption.lower()) + + @property + def caption_mentions(self) -> List[str]: + """ + List of all lowercased profiles that are mentioned in the StoryItem's caption, without preceeding @. + + .. versionadded:: 4.10 + """ + if not self.caption: + return [] + return _mention_regex.findall(self.caption.lower()) + + @property + def pcaption(self) -> str: + """ + Printable caption, useful as a format specifier for --filename-pattern. + + .. versionadded:: 4.10 + """ + def _elliptify(caption): + pcaption = ' '.join([s.replace('/', '\u2215') for s in caption.splitlines() if s]).strip() + return (pcaption[:30] + "\u2026") if len(pcaption) > 31 else pcaption + return _elliptify(self.caption) if self.caption else '' + + @property + def is_video(self) -> bool: + """True if the StoryItem is a video.""" + return self._node['is_video'] + + @property + def video_url(self) -> Optional[str]: + """URL of the video, or None.""" + if self.is_video: + version_urls = [] + try: + version_urls.append(self._node['video_resources'][-1]['src']) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Warning: Unable to fetch video from graphql of {self}: {err}") + if self._context.iphone_support and self._context.is_logged_in: + try: + version_urls.extend(version['url'] for version in self._iphone_struct['video_versions']) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Unable to fetch high-quality video version of {self}: {err}") + version_urls = list(dict.fromkeys(version_urls)) + if len(version_urls) == 0: + return None + if len(version_urls) == 1: + return version_urls[0] + url_candidates: List[Tuple[int, str]] = [] + for idx, version_url in enumerate(version_urls): + try: + url_candidates.append(( + int(self._context.head(version_url, allow_redirects=True).headers.get('Content-Length', 0)), + version_url + )) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Video URL candidate {idx+1}/{len(version_urls)} for {self}: {err}") + if not url_candidates: + # All candidates fail: Fallback to default URL and handle errors later at the actual download attempt + return version_urls[0] + url_candidates.sort() + return url_candidates[-1][1] + return None + + +class Story: + """ + Structure representing a user story with its associated items. + + Provides methods for accessing story properties, as well as :meth:`Story.get_items` to request associated + :class:`StoryItem` nodes. Stories are returned by :meth:`Instaloader.get_stories`. + + With a logged-in :class:`Instaloader` instance `L`, you may download all your visible user stories with:: + + for story in L.get_stories(): + # story is a Story object + for item in story.get_items(): + # item is a StoryItem object + L.download_storyitem(item, ':stories') + + This class implements == and is hashable. + + :param context: :class:`InstaloaderContext` instance used for additional queries if necessary. + :param node: Dictionary containing the available information of the story as returned by Instagram. + """ + + def __init__(self, context: InstaloaderContext, node: Dict[str, Any]): + self._context = context + self._node = node + self._unique_id: Optional[str] = None + self._owner_profile: Optional[Profile] = None + self._iphone_struct_: Optional[Dict[str, Any]] = None + + def __repr__(self): + return ''.format(self.owner_username, self.latest_media_utc) + + def __eq__(self, o: object) -> bool: + if isinstance(o, Story): + return self.unique_id == o.unique_id + return NotImplemented + + def __hash__(self) -> int: + return hash(self.unique_id) + + @property + def unique_id(self) -> Union[str, int]: + """ + This ID only equals amongst :class:`Story` instances which have the same owner and the same set of + :class:`StoryItem`. For all other :class:`Story` instances this ID is different. + """ + if not self._unique_id: + id_list = [item.mediaid for item in self.get_items()] + id_list.sort() + self._unique_id = str().join([str(self.owner_id)] + list(map(str, id_list))) + return self._unique_id + + @property + def last_seen_local(self) -> Optional[datetime]: + """Timestamp of the most recent StoryItem that has been watched or None (local time zone).""" + if self._node['seen']: + return datetime.fromtimestamp(self._node['seen']) + return None + + @property + def last_seen_utc(self) -> Optional[datetime]: + """Timestamp of the most recent StoryItem that has been watched or None (UTC).""" + if self._node['seen']: + return datetime.utcfromtimestamp(self._node['seen']) + return None + + @property + def latest_media_local(self) -> datetime: + """Timestamp when the last item of the story was created (local time zone).""" + return datetime.fromtimestamp(self._node['latest_reel_media']) + + @property + def latest_media_utc(self) -> datetime: + """Timestamp when the last item of the story was created (UTC).""" + return datetime.utcfromtimestamp(self._node['latest_reel_media']) + + @property + def itemcount(self) -> int: + """Count of items associated with the :class:`Story` instance.""" + return len(self._node['items']) + + @property + def owner_profile(self) -> Profile: + """:class:`Profile` instance of the story owner.""" + if not self._owner_profile: + self._owner_profile = Profile(self._context, self._node['user']) + return self._owner_profile + + @property + def owner_username(self) -> str: + """The story owner's lowercase username.""" + return self.owner_profile.username + + @property + def owner_id(self) -> int: + """The story owner's ID.""" + return self.owner_profile.userid + + def _fetch_iphone_struct(self) -> None: + if self._context.iphone_support and self._context.is_logged_in and not self._iphone_struct_: + data = self._context.get_iphone_json( + path='api/v1/feed/reels_media/?reel_ids={}'.format(self.owner_id), params={} + ) + self._iphone_struct_ = data['reels'][str(self.owner_id)] + + def get_items(self) -> Iterator[StoryItem]: + """Retrieve all items from a story.""" + self._fetch_iphone_struct() + for item in reversed(self._node['items']): + if self._iphone_struct_ is not None: + for iphone_struct_item in self._iphone_struct_['items']: + if iphone_struct_item['pk'] == int(item['id']): + item['iphone_struct'] = iphone_struct_item + break + yield StoryItem(self._context, item, self.owner_profile) + + +class Highlight(Story): + """ + Structure representing a user's highlight with its associated story items. + + Provides methods for accessing highlight properties, as well as :meth:`Highlight.get_items` to request associated + :class:`StoryItem` nodes. Highlights are returned by :meth:`Instaloader.get_highlights`. + + With a logged-in :class:`Instaloader` instance `L`, you may download all highlights of a :class:`Profile` instance + USER with:: + + for highlight in L.get_highlights(USER): + # highlight is a Highlight object + for item in highlight.get_items(): + # item is a StoryItem object + L.download_storyitem(item, '{}/{}'.format(highlight.owner_username, highlight.title)) + + This class implements == and is hashable. + + :param context: :class:`InstaloaderContext` instance used for additional queries if necessary. + :param node: Dictionary containing the available information of the highlight as returned by Instagram. + :param owner: :class:`Profile` instance representing the owner profile of the highlight. + """ + + def __init__(self, context: InstaloaderContext, node: Dict[str, Any], owner: Optional[Profile] = None): + super().__init__(context, node) + self._owner_profile = owner + self._items: Optional[List[Dict[str, Any]]] = None + self._iphone_struct_: Optional[Dict[str, Any]] = None + + def __repr__(self): + return ''.format(self.owner_username, self.title) + + @property + def unique_id(self) -> int: + """A unique ID identifying this set of highlights.""" + return int(self._node['id']) + + @property + def owner_profile(self) -> Profile: + """:class:`Profile` instance of the highlights' owner.""" + if not self._owner_profile: + self._owner_profile = Profile(self._context, self._node['owner']) + return self._owner_profile + + @property + def title(self) -> str: + """The title of these highlights.""" + return self._node['title'] + + @property + def cover_url(self) -> str: + """URL of the highlights' cover.""" + return self._node['cover_media']['thumbnail_src'] + + @property + def cover_cropped_url(self) -> str: + """URL of the cropped version of the cover.""" + return self._node['cover_media_cropped_thumbnail']['url'] + + def _fetch_items(self): + if not self._items: + self._items = self._context.graphql_query("45246d3fe16ccc6577e0bd297a5db1ab", + {"reel_ids": [], "tag_names": [], "location_ids": [], + "highlight_reel_ids": [str(self.unique_id)], + "precomposed_overlay": False})['data']['reels_media'][0]['items'] + + def _fetch_iphone_struct(self) -> None: + if self._context.iphone_support and self._context.is_logged_in and not self._iphone_struct_: + data = self._context.get_iphone_json( + path='api/v1/feed/reels_media/?reel_ids=highlight:{}'.format(self.unique_id), params={} + ) + self._iphone_struct_ = data['reels']['highlight:{}'.format(self.unique_id)] + + @property + def itemcount(self) -> int: + """Count of items associated with the :class:`Highlight` instance.""" + self._fetch_items() + assert self._items is not None + return len(self._items) + + def get_items(self) -> Iterator[StoryItem]: + """Retrieve all associated highlight items.""" + self._fetch_items() + self._fetch_iphone_struct() + assert self._items is not None + for item in self._items: + if self._iphone_struct_ is not None: + for iphone_struct_item in self._iphone_struct_['items']: + if iphone_struct_item['pk'] == int(item['id']): + item['iphone_struct'] = iphone_struct_item + break + yield StoryItem(self._context, item, self.owner_profile) + + +class Hashtag: + """ + An Hashtag. + + Analogous to :class:`Profile`, get an instance with:: + + L = Instaloader() + hashtag = Hashtag.from_name(L.context, HASHTAG) + + To then download the Hashtag's Posts, do:: + + for post in hashtag.get_posts(): + L.download_post(post, target="#"+hashtag.name) + + Also, this class implements == and is hashable. + + .. versionchanged:: 4.9 + Removed ``get_related_tags()`` and ``is_top_media_only`` as these features were removed from Instagram. + """ + def __init__(self, context: InstaloaderContext, node: Dict[str, Any]): + assert "name" in node + self._context = context + self._node = node + self._has_full_metadata = False + + @classmethod + def from_name(cls, context: InstaloaderContext, name: str): + """ + Create a Hashtag instance from a given hashtag name, without preceeding '#'. Raises an Exception if there is no + hashtag with the given name. + + :param context: :attr:`Instaloader.context` + :param name: Hashtag, without preceeding '#' + :raises: :class:`QueryReturnedNotFoundException` + """ + # pylint:disable=protected-access + hashtag = cls(context, {'name': name.lower()}) + hashtag._obtain_metadata() + return hashtag + + @property + def name(self): + """Hashtag name lowercased, without preceeding '#'""" + return self._node["name"].lower() + + def _query(self, params): + json_response = self._context.get_json("explore/tags/{0}/".format(self.name), params) + return json_response["graphql"]["hashtag"] if "graphql" in json_response else json_response["data"] + + def _obtain_metadata(self): + if not self._has_full_metadata: + self._node = self._query({"__a": 1, "__d": "dis"}) + self._has_full_metadata = True + + def _asdict(self): + json_node = self._node.copy() + # remove posts + json_node.pop("edge_hashtag_to_top_posts", None) + json_node.pop("top", None) + json_node.pop("edge_hashtag_to_media", None) + json_node.pop("recent", None) + return json_node + + def __repr__(self): + return "".format(self.name) + + def __eq__(self, other: object) -> bool: + if isinstance(other, Hashtag): + return self.name == other.name + return NotImplemented + + def __hash__(self) -> int: + return hash(self.name) + + def _metadata(self, *keys) -> Any: + try: + d = self._node + for key in keys: + d = d[key] + return d + except KeyError: + self._obtain_metadata() + d = self._node + for key in keys: + d = d[key] + return d + + @property + def hashtagid(self) -> int: + return int(self._metadata("id")) + + @property + def profile_pic_url(self) -> str: + return self._metadata("profile_pic_url") + + @property + def description(self) -> Optional[str]: + return self._metadata("description") + + @property + def allow_following(self) -> bool: + return bool(self._metadata("allow_following")) + + @property + def is_following(self) -> bool: + try: + return self._metadata("is_following") + except KeyError: + return bool(self._metadata("following")) + + def get_top_posts(self) -> Iterator[Post]: + """Yields the top posts of the hashtag.""" + try: + yield from (Post(self._context, edge["node"]) + for edge in self._metadata("edge_hashtag_to_top_posts", "edges")) + except KeyError: + yield from SectionIterator( + self._context, + lambda d: d["data"]["top"], + lambda m: Post.from_iphone_struct(self._context, m), + f"explore/tags/{self.name}/", + self._metadata("top"), + ) + + @property + def mediacount(self) -> int: + """ + The count of all media associated with this hashtag. + + The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as + the hashtag count might include private posts + """ + try: + return self._metadata("edge_hashtag_to_media", "count") + except KeyError: + return self._metadata("media_count") + + def get_posts(self) -> Iterator[Post]: + """Yields the recent posts associated with this hashtag. + + .. deprecated:: 4.9 + Use :meth:`Hashtag.get_posts_resumable` as this method may return incorrect results (:issue:`1457`)""" + try: + self._metadata("edge_hashtag_to_media", "edges") + self._metadata("edge_hashtag_to_media", "page_info") + conn = self._metadata("edge_hashtag_to_media") + yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) + while conn["page_info"]["has_next_page"]: + data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]}) + conn = data["edge_hashtag_to_media"] + yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) + except KeyError: + yield from SectionIterator( + self._context, + lambda d: d["data"]["recent"], + lambda m: Post.from_iphone_struct(self._context, m), + f"explore/tags/{self.name}/", + self._metadata("recent"), + ) + + def get_all_posts(self) -> Iterator[Post]: + """Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order.""" + sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True)) + other_posts = self.get_posts_resumable() + next_top = next(sorted_top_posts, None) + next_other = next(other_posts, None) + while next_top is not None or next_other is not None: + if next_other is None: + assert next_top is not None + yield next_top + yield from sorted_top_posts + break + if next_top is None: + assert next_other is not None + yield next_other + yield from other_posts + break + if next_top == next_other: + yield next_top + next_top = next(sorted_top_posts, None) + next_other = next(other_posts, None) + continue + if next_top.date_utc > next_other.date_utc: + yield next_top + next_top = next(sorted_top_posts, None) + else: + yield next_other + next_other = next(other_posts, None) + + def get_posts_resumable(self) -> NodeIterator[Post]: + """Get the recent posts of the hashtag in a resumable fashion. + + :rtype: NodeIterator[Post] + + .. versionadded:: 4.9""" + return NodeIterator( + self._context, "9b498c08113f1e09617a1703c22b2f32", + lambda d: d['data']['hashtag']['edge_hashtag_to_media'], + lambda n: Post(self._context, n), + {'tag_name': self.name}, + f"https://www.instagram.com/explore/tags/{self.name}/" + ) + + +class TopSearchResults: + """ + An invocation of this class triggers a search on Instagram for the provided search string. + + Provides methods to access the search results as profiles (:class:`Profile`), locations (:class:`PostLocation`) and + hashtags. + + :param context: :attr:`Instaloader.context` used to send the query for the search. + :param searchstring: String to search for with Instagram's "top search". + """ + + def __init__(self, context: InstaloaderContext, searchstring: str): + self._context = context + self._searchstring = searchstring + # The `__a` param is only needed to prevent `get_json()` from searching for 'window._sharedData'. + self._node = context.get_json('web/search/topsearch/', + params={'context': 'blended', + 'query': searchstring, + 'include_reel': False, + '__a': 1}) + + def get_profiles(self) -> Iterator[Profile]: + """ + Provides the :class:`Profile` instances from the search result. + """ + for user in self._node.get('users', []): + user_node = user['user'] + if 'pk' in user_node: + user_node['id'] = user_node['pk'] + yield Profile(self._context, user_node) + + def get_prefixed_usernames(self) -> Iterator[str]: + """ + Provides all profile names from the search result that start with the search string. + """ + for user in self._node.get('users', []): + username = user.get('user', {}).get('username', '') + if username.startswith(self._searchstring): + yield username + + def get_locations(self) -> Iterator[PostLocation]: + """ + Provides instances of :class:`PostLocation` from the search result. + """ + for location in self._node.get('places', []): + place = location.get('place', {}) + slug = place.get('slug') + loc = place.get('location', {}) + yield PostLocation(int(loc['pk']), loc['name'], slug, None, loc.get('lat'), loc.get('lng')) + + def get_hashtag_strings(self) -> Iterator[str]: + """ + Provides the hashtags from the search result as strings. + """ + for hashtag in self._node.get('hashtags', []): + name = hashtag.get('hashtag', {}).get('name') + if name: + yield name + + def get_hashtags(self) -> Iterator[Hashtag]: + """ + Provides the hashtags from the search result. + + .. versionadded:: 4.4 + """ + for hashtag in self._node.get('hashtags', []): + node = hashtag.get('hashtag', {}) + if 'name' in node: + yield Hashtag(self._context, node) + + @property + def searchstring(self) -> str: + """ + The string that was searched for on Instagram to produce this :class:`TopSearchResults` instance. + """ + return self._searchstring + + +class TitlePic: + def __init__(self, profile: Optional[Profile], target: Union[str, Path], typename: str, + filename: str, date_utc: Optional[datetime]): + self._profile = profile + self._target = target + self._typename = typename + self._filename = filename + self._date_utc = date_utc + + @property + def profile(self) -> Union[str, Path]: + return self._profile.username.lower() if self._profile is not None else self._target + + @property + def owner_username(self) -> Union[str, Path]: + return self.profile + + @property + def owner_id(self) -> Union[str, Path]: + return str(self._profile.userid) if self._profile is not None else self._target + + @property + def target(self) -> Union[str, Path]: + return self._target + + @property + def typename(self) -> str: + return self._typename + + @property + def filename(self) -> str: + return self._filename + + @property + def date_utc(self) -> Optional[datetime]: + return self._date_utc + + @property + def date(self) -> Optional[datetime]: + return self.date_utc + + @property + def date_local(self) -> Optional[datetime]: + return self._date_utc.astimezone() if self._date_utc is not None else None + + +JsonExportable = Union[Post, Profile, StoryItem, Hashtag, FrozenNodeIterator] + + +def get_json_structure(structure: JsonExportable) -> dict: + """Returns Instaloader JSON structure for a :class:`Post`, :class:`Profile`, :class:`StoryItem`, :class:`Hashtag` + or :class:`FrozenNodeIterator` so that it can be loaded by :func:`load_structure`. + + :param structure: :class:`Post`, :class:`Profile`, :class:`StoryItem` or :class:`Hashtag` + + .. versionadded:: 4.8 + """ + return { + 'node': structure._asdict(), + 'instaloader': {'version': __version__, 'node_type': structure.__class__.__name__} + } + + +def save_structure_to_file(structure: JsonExportable, filename: str) -> None: + """Saves a :class:`Post`, :class:`Profile`, :class:`StoryItem`, :class:`Hashtag` or :class:`FrozenNodeIterator` to a + '.json' or '.json.xz' file such that it can later be loaded by :func:`load_structure_from_file`. + + If the specified filename ends in '.xz', the file will be LZMA compressed. Otherwise, a pretty-printed JSON file + will be created. + + :param structure: :class:`Post`, :class:`Profile`, :class:`StoryItem` or :class:`Hashtag` + :param filename: Filename, ends in '.json' or '.json.xz' + """ + json_structure = get_json_structure(structure) + compress = filename.endswith('.xz') + if compress: + with lzma.open(filename, 'wt', check=lzma.CHECK_NONE) as fp: + json.dump(json_structure, fp=fp, separators=(',', ':')) + else: + with open(filename, 'wt') as fp: + json.dump(json_structure, fp=fp, indent=4, sort_keys=True) + + +def load_structure(context: InstaloaderContext, json_structure: dict) -> JsonExportable: + """Loads a :class:`Post`, :class:`Profile`, :class:`StoryItem`, :class:`Hashtag` or :class:`FrozenNodeIterator` from + a json structure. + + :param context: :attr:`Instaloader.context` linked to the new object, used for additional queries if neccessary. + :param json_structure: Instaloader JSON structure + + .. versionadded:: 4.8 + """ + if 'node' in json_structure and 'instaloader' in json_structure and \ + 'node_type' in json_structure['instaloader']: + node_type = json_structure['instaloader']['node_type'] + if node_type == "Post": + return Post(context, json_structure['node']) + elif node_type == "Profile": + return Profile(context, json_structure['node']) + elif node_type == "StoryItem": + return StoryItem(context, json_structure['node']) + elif node_type == "Hashtag": + return Hashtag(context, json_structure['node']) + elif node_type == "FrozenNodeIterator": + if not 'first_node' in json_structure['node']: + json_structure['node']['first_node'] = None + return FrozenNodeIterator(**json_structure['node']) + elif 'shortcode' in json_structure: + # Post JSON created with Instaloader v3 + return Post.from_shortcode(context, json_structure['shortcode']) + raise InvalidArgumentException("Passed json structure is not an Instaloader JSON") + + +def load_structure_from_file(context: InstaloaderContext, filename: str) -> JsonExportable: + """Loads a :class:`Post`, :class:`Profile`, :class:`StoryItem`, :class:`Hashtag` or :class:`FrozenNodeIterator` from + a '.json' or '.json.xz' file that has been saved by :func:`save_structure_to_file`. + + :param context: :attr:`Instaloader.context` linked to the new object, used for additional queries if neccessary. + :param filename: Filename, ends in '.json' or '.json.xz' + """ + compressed = filename.endswith('.xz') + if compressed: + fp = lzma.open(filename, 'rt') + else: + # pylint:disable=consider-using-with + fp = open(filename, 'rt') + json_structure = json.load(fp) + fp.close() + return load_structure(context, json_structure) diff --git a/instaloader.egg-info/PKG-INFO b/instaloader.egg-info/PKG-INFO new file mode 100644 index 0000000..69250d5 --- /dev/null +++ b/instaloader.egg-info/PKG-INFO @@ -0,0 +1,185 @@ +Metadata-Version: 2.1 +Name: instaloader +Version: 4.13.1 +Summary: Download pictures (or videos) along with their captions and other metadata from Instagram. +Home-page: https://instaloader.github.io/ +Author: Alexander Graf, André Koch-Kramer +Author-email: mail@agraf.me, koch-kramer@web.de +License: MIT +Keywords: instagram,instagram-scraper,instagram-client,instagram-feed,downloader,videos,photos,pictures,instagram-user-photos,instagram-photos,instagram-metadata,instagram-downloader,instagram-stories +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: End Users/Desktop +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Internet +Classifier: Topic :: Multimedia :: Graphics +Requires-Python: >=3.8 +License-File: LICENSE +License-File: AUTHORS.md +Requires-Dist: requests>=2.4 +Provides-Extra: browser-cookie3 +Requires-Dist: browser_cookie3>=0.19.1; extra == "browser-cookie3" + +.. image:: https://raw.githubusercontent.com/instaloader/instaloader/master/docs/logo_heading.png + +.. badges-start + +|pypi| |pyversion| |license| |aur| |contributors| |downloads| + +.. |pypi| image:: https://img.shields.io/pypi/v/instaloader.svg + :alt: Instaloader PyPI Project Page + :target: https://pypi.org/project/instaloader/ + +.. |license| image:: https://img.shields.io/github/license/instaloader/instaloader.svg + :alt: MIT License + :target: https://github.com/instaloader/instaloader/blob/master/LICENSE + +.. |pyversion| image:: https://img.shields.io/pypi/pyversions/instaloader.svg + :alt: Supported Python Versions + +.. |contributors| image:: https://img.shields.io/github/contributors/instaloader/instaloader.svg + :alt: Contributor Count + :target: https://github.com/instaloader/instaloader/graphs/contributors + +.. |aur| image:: https://img.shields.io/aur/version/instaloader.svg + :alt: Arch User Repository Package + :target: https://aur.archlinux.org/packages/instaloader/ + +.. |downloads| image:: https://pepy.tech/badge/instaloader/month + :alt: PyPI Download Count + :target: https://pepy.tech/project/instaloader + +.. badges-end + +:: + + $ pip3 install instaloader + + $ instaloader profile [profile ...] + +**Instaloader** + +- downloads **public and private profiles, hashtags, user stories, + feeds and saved media**, + +- downloads **comments, geotags and captions** of each post, + +- automatically **detects profile name changes** and renames the target + directory accordingly, + +- allows **fine-grained customization** of filters and where to store + downloaded media, + +- automatically **resumes previously-interrupted** download iterations. + +:: + + instaloader [--comments] [--geotags] + [--stories] [--highlights] [--tagged] [--igtv] + [--login YOUR-USERNAME] [--fast-update] + profile | "#hashtag" | :stories | :feed | :saved + +`Instaloader Documentation `__ + + +How to Automatically Download Pictures from Instagram +----------------------------------------------------- + +To **download all pictures and videos of a profile**, as well as the +**profile picture**, do + +:: + + instaloader profile [profile ...] + +where ``profile`` is the name of a profile you want to download. Instead +of only one profile, you may also specify a list of profiles. + +To later **update your local copy** of that profiles, you may run + +:: + + instaloader --fast-update profile [profile ...] + +If ``--fast-update`` is given, Instaloader stops when arriving at the +first already-downloaded picture. + +Alternatively, you can use ``--latest-stamps`` to have Instaloader store +the time each profile was last downloaded and only download newer media: + +:: + + instaloader --latest-stamps -- profile [profile ...] + +With this option it's possible to move or delete downloaded media and still keep +the archive updated. + +When updating profiles, Instaloader +automatically **detects profile name changes** and renames the target directory +accordingly. + +Instaloader can also be used to **download private profiles**. To do so, +invoke it with + +:: + + instaloader --login=your_username profile [profile ...] + +When logging in, Instaloader **stores the session cookies** in a file in your +temporary directory, which will be reused later the next time ``--login`` +is given. So you can download private profiles **non-interactively** when you +already have a valid session cookie file. + +`Instaloader Documentation `__ + +Contributing +------------ + +As an open source project, Instaloader heavily depends on the contributions from +its community. See +`contributing `__ +for how you may help Instaloader to become an even greater tool. + +Supporters +---------- + +.. current-sponsors-start + +| Instaloader is proudly sponsored by +| `@rocketapi-io `__ + +See `Alex' GitHub Sponsors `__ page for +how you can sponsor the development of Instaloader! + +.. current-sponsors-end + +It is a pleasure for us to share our Instaloader to the world, and we are proud +to have attracted such an active and motivating community, with so many users +who share their suggestions and ideas with us. Buying a community-sponsored beer +or coffee from time to time is very likely to further raise our passion for the +development of Instaloader. + +| For Donations, we provide GitHub Sponsors page, a PayPal.Me link and a Bitcoin address. +| GitHub Sponsors: `Sponsor @aandergr on GitHub Sponsors `__ +| PayPal: `PayPal.me/aandergr `__ +| BTC: 1Nst4LoadeYzrKjJ1DX9CpbLXBYE9RKLwY + +Disclaimer +---------- + +.. disclaimer-start + +Instaloader is in no way affiliated with, authorized, maintained or endorsed by Instagram or any of its affiliates or +subsidiaries. This is an independent and unofficial project. Use at your own risk. + +Instaloader is licensed under an MIT license. Refer to ``LICENSE`` file for more information. + +.. disclaimer-end diff --git a/instaloader.egg-info/SOURCES.txt b/instaloader.egg-info/SOURCES.txt new file mode 100644 index 0000000..71a17b3 --- /dev/null +++ b/instaloader.egg-info/SOURCES.txt @@ -0,0 +1,21 @@ +AUTHORS.md +LICENSE +README.rst +setup.py +instaloader/__init__.py +instaloader/__main__.py +instaloader/exceptions.py +instaloader/instaloader.py +instaloader/instaloadercontext.py +instaloader/lateststamps.py +instaloader/nodeiterator.py +instaloader/py.typed +instaloader/sectioniterator.py +instaloader/structures.py +instaloader.egg-info/PKG-INFO +instaloader.egg-info/SOURCES.txt +instaloader.egg-info/dependency_links.txt +instaloader.egg-info/entry_points.txt +instaloader.egg-info/not-zip-safe +instaloader.egg-info/requires.txt +instaloader.egg-info/top_level.txt \ No newline at end of file diff --git a/instaloader.egg-info/dependency_links.txt b/instaloader.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/instaloader.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/instaloader.egg-info/entry_points.txt b/instaloader.egg-info/entry_points.txt new file mode 100644 index 0000000..e79b6f7 --- /dev/null +++ b/instaloader.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +instaloader = instaloader.__main__:main diff --git a/instaloader.egg-info/not-zip-safe b/instaloader.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/instaloader.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/instaloader.egg-info/requires.txt b/instaloader.egg-info/requires.txt new file mode 100644 index 0000000..5b25a87 --- /dev/null +++ b/instaloader.egg-info/requires.txt @@ -0,0 +1,4 @@ +requests>=2.4 + +[browser_cookie3] +browser_cookie3>=0.19.1 diff --git a/instaloader.egg-info/top_level.txt b/instaloader.egg-info/top_level.txt new file mode 100644 index 0000000..e25ee4d --- /dev/null +++ b/instaloader.egg-info/top_level.txt @@ -0,0 +1 @@ +instaloader diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 30ea42b..a60f6f7 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -209,6 +209,7 @@ class Instaloader: """ def __init__(self, + proxy:Optional[dict] = None, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, @@ -234,7 +235,7 @@ class Instaloader: title_pattern: Optional[str] = None, sanitize_paths: bool = False): - self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, + self.context = InstaloaderContext(proxy,sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller, fatal_status_codes, iphone_support) diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 34459be..dc66946 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -78,7 +78,7 @@ class InstaloaderContext: class :class:`Instaloader`. """ - def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, + def __init__(self,proxy:Optional[dict], sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, max_connection_attempts: int = 3, request_timeout: float = 300.0, rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None, fatal_status_codes: Optional[List[int]] = None, @@ -86,7 +86,7 @@ class InstaloaderContext: self.user_agent = user_agent if user_agent is not None else default_user_agent() self.request_timeout = request_timeout - self._session = self.get_anonymous_session() + self._session = self.get_anonymous_session(proxy) self.username = None self.user_id = None self.sleep = sleep @@ -96,6 +96,7 @@ class InstaloaderContext: self.two_factor_auth_pending = None self.iphone_support = iphone_support self.iphone_headers = default_iphone_headers() + self.proxy = proxy # error log, filled with error() and printed at the end of Instaloader.main() self.error_log: List[str] = [] @@ -117,7 +118,7 @@ class InstaloaderContext: username = self.username user_id = self.user_id iphone_headers = self.iphone_headers - self._session = self.get_anonymous_session() + self._session = self.get_anonymous_session(self.proxy) self.username = None self.user_id = None self.iphone_headers = default_iphone_headers() @@ -199,12 +200,13 @@ class InstaloaderContext: del header['X-Requested-With'] return header - def get_anonymous_session(self) -> requests.Session: + def get_anonymous_session(self,proxy) -> requests.Session: """Returns our default anonymous requests.Session object.""" session = requests.Session() session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1', 'ig_vw': '1920', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) + session.proxies.update(proxy) session.headers.update(self._default_http_header(empty_session_only=True)) # Override default timeout behavior. # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 @@ -670,7 +672,7 @@ class InstaloaderContext: :raises ConnectionException: When download failed. .. versionadded:: 4.2.1""" - with self.get_anonymous_session() as anonymous_session: + with self.get_anonymous_session(self.proxy) as anonymous_session: resp = anonymous_session.get(url, stream=True) if resp.status_code == 200: resp.raw.decode_content = True @@ -701,7 +703,7 @@ class InstaloaderContext: .. versionadded:: 4.7.6 """ - with self.get_anonymous_session() as anonymous_session: + with self.get_anonymous_session(self.proxy) as anonymous_session: resp = anonymous_session.head(url, allow_redirects=allow_redirects) if resp.status_code == 200: return resp