mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-24 19:22:32 +01:00
Split module into multiple files
This commit is contained in:
parent
3511811090
commit
c71be83f43
@ -8,7 +8,7 @@ install:
|
||||
- pip install pylint requests
|
||||
- pip install -r docs/requirements.txt
|
||||
script:
|
||||
- python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods,too-many-lines,too-many-instance-attributes,too-many-locals,too-many-branches,too-many-statements,inconsistent-return-statements,invalid-name instaloader
|
||||
- python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods,too-many-lines,too-many-instance-attributes,too-many-locals,too-many-branches,too-many-statements,inconsistent-return-statements,invalid-name,wildcard-import,unused-wildcard-import instaloader
|
||||
- make -C docs html
|
||||
deploy:
|
||||
- provider: pypi
|
||||
|
1916
instaloader.py
1916
instaloader.py
File diff suppressed because it is too large
Load Diff
17
instaloader/__init__.py
Normal file
17
instaloader/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||
|
||||
|
||||
__version__ = '4.0a0'
|
||||
|
||||
|
||||
try:
|
||||
# pylint:disable=wrong-import-position
|
||||
import win_unicode_console
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
win_unicode_console.enable()
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloader import Instaloader, Tristate
|
||||
from .structures import Post, Profile, shortcode_to_mediaid, mediaid_to_shortcode
|
297
instaloader/__main__.py
Normal file
297
instaloader/__main__.py
Normal file
@ -0,0 +1,297 @@
|
||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||
|
||||
import ast
|
||||
import sys
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
from . import __version__
|
||||
from .exceptions import *
|
||||
from .instaloader import Instaloader, Tristate, get_default_session_filename
|
||||
from .instaloadercontext import default_user_agent
|
||||
from .structures import Post
|
||||
|
||||
# NOTE: duplicated in README.rst and docs/index.rst
|
||||
USAGE_STRING = """
|
||||
{0} [--comments] [--geotags] [--stories]
|
||||
{2:{1}} [--login YOUR-USERNAME] [--fast-update]
|
||||
{2:{1}} profile | "#hashtag" | :stories | :feed | :saved
|
||||
{0} --help""".format(sys.argv[0], len(sys.argv[0]), '')
|
||||
|
||||
|
||||
def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post'], bool]:
|
||||
"""Takes an --only-if=... filter specification and makes a filter_func Callable out of it."""
|
||||
|
||||
# The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A
|
||||
# function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace.
|
||||
|
||||
class TransformFilterAst(ast.NodeTransformer):
|
||||
def visit_Name(self, node: ast.Name):
|
||||
# pylint:disable=no-self-use
|
||||
if not isinstance(node.ctx, ast.Load):
|
||||
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
|
||||
if not hasattr(Post, node.id):
|
||||
raise InvalidArgumentException("Invalid filter: Name {} is not defined.".format(node.id))
|
||||
if node.id in Post.LOGIN_REQUIRING_PROPERTIES and not logged_in:
|
||||
raise InvalidArgumentException("Invalid filter: Name {} requires being logged in.".format(node.id))
|
||||
new_node = ast.Attribute(ast.copy_location(ast.Name('post', ast.Load()), node), node.id,
|
||||
ast.copy_location(ast.Load(), node))
|
||||
return ast.copy_location(new_node, node)
|
||||
|
||||
input_filename = '<--only-if parameter>'
|
||||
compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')),
|
||||
filename=input_filename, mode='eval')
|
||||
|
||||
def filterfunc(post: 'Post') -> bool:
|
||||
# pylint:disable=eval-used
|
||||
return bool(eval(compiled_filter, {'post': post}))
|
||||
|
||||
return filterfunc
|
||||
|
||||
|
||||
def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
username: Optional[str] = None, password: Optional[str] = None,
|
||||
sessionfile: Optional[str] = None, max_count: Optional[int] = None,
|
||||
profile_pic: bool = True, profile_pic_only: bool = False,
|
||||
fast_update: bool = False,
|
||||
stories: bool = False, stories_only: bool = False,
|
||||
filter_str: Optional[str] = None) -> None:
|
||||
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
|
||||
# Parse and generate filter function
|
||||
if filter_str is not None:
|
||||
filter_func = filterstr_to_filterfunc(filter_str, username is not None)
|
||||
instaloader.context.log('Only download posts with property "{}".'.format(filter_str))
|
||||
else:
|
||||
filter_func = None
|
||||
# Login, if desired
|
||||
if username is not None:
|
||||
try:
|
||||
instaloader.load_session_from_file(username, sessionfile)
|
||||
except FileNotFoundError as err:
|
||||
if sessionfile is not None:
|
||||
print(err, file=sys.stderr)
|
||||
instaloader.context.log("Session file does not exist yet - Logging in.")
|
||||
if not instaloader.context.is_logged_in or username != instaloader.test_login():
|
||||
if password is not None:
|
||||
instaloader.login(username, password)
|
||||
else:
|
||||
instaloader.interactive_login(username)
|
||||
instaloader.context.log("Logged in as %s." % username)
|
||||
# Try block for KeyboardInterrupt (save session on ^C)
|
||||
profiles = set()
|
||||
try:
|
||||
# Generate set of profiles, already downloading non-profile targets
|
||||
for target in targetlist:
|
||||
# strip '/' characters to be more shell-autocompletion-friendly
|
||||
target = target.rstrip('/')
|
||||
with instaloader.context.error_catcher(target):
|
||||
if target[0] == '@':
|
||||
instaloader.context.log("Retrieving followees of %s..." % target[1:])
|
||||
profiles.update([followee['username'] for followee in instaloader.get_followees(target[1:])])
|
||||
elif target[0] == '#':
|
||||
instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update,
|
||||
filter_func=filter_func)
|
||||
elif target == ":feed":
|
||||
instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count,
|
||||
filter_func=filter_func)
|
||||
elif target == ":stories":
|
||||
instaloader.download_stories(fast_update=fast_update)
|
||||
elif target == ":saved":
|
||||
instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count,
|
||||
filter_func=filter_func)
|
||||
else:
|
||||
profiles.add(target)
|
||||
if len(profiles) > 1:
|
||||
instaloader.context.log("Downloading {} profiles: {}".format(len(profiles), ','.join(profiles)))
|
||||
# Iterate through profiles list and download them
|
||||
for target in profiles:
|
||||
with instaloader.context.error_catcher(target):
|
||||
try:
|
||||
instaloader.download_profile(target, profile_pic, profile_pic_only, fast_update,
|
||||
stories, stories_only, filter_func=filter_func)
|
||||
except ProfileNotExistsException as err:
|
||||
if not instaloader.context.is_logged_in:
|
||||
instaloader.context.log(err)
|
||||
instaloader.context.log("Trying again anonymously, helps in case you are just blocked.")
|
||||
with instaloader.anonymous_copy() as anonymous_loader:
|
||||
with instaloader.context.error_catcher():
|
||||
anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
|
||||
fast_update, filter_func=filter_func)
|
||||
else:
|
||||
raise err
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted by user.", file=sys.stderr)
|
||||
# Save session if it is useful
|
||||
if instaloader.context.is_logged_in:
|
||||
instaloader.save_session_to_file(sessionfile)
|
||||
# User might be confused if Instaloader does nothing
|
||||
if not targetlist:
|
||||
if instaloader.context.is_logged_in:
|
||||
# Instaloader did at least save a session file
|
||||
instaloader.context.log("No targets were specified, thus nothing has been downloaded.")
|
||||
else:
|
||||
# Instloader did not do anything
|
||||
instaloader.context.log("usage:" + USAGE_STRING)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(description=__doc__, add_help=False, usage=USAGE_STRING,
|
||||
epilog="Report issues at https://github.com/instaloader/instaloader/issues. "
|
||||
"The complete documentation can be found at "
|
||||
"https://instaloader.github.io/.")
|
||||
|
||||
g_what = parser.add_argument_group('What to Download',
|
||||
'Specify a list of profiles or #hashtags. For each of these, Instaloader '
|
||||
'creates a folder and '
|
||||
'downloads all posts along with the pictures\'s '
|
||||
'captions and the current profile picture. '
|
||||
'If an already-downloaded profile has been renamed, Instaloader automatically '
|
||||
'finds it by its unique ID and renames the folder likewise.')
|
||||
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
||||
help='Name of profile or #hashtag to download. '
|
||||
'Alternatively, if --login is given: @<profile> to download all followees of '
|
||||
'<profile>; the special targets '
|
||||
':feed to download pictures from your feed; '
|
||||
':stories to download the stories of your followees; or '
|
||||
':saved to download the posts marked as saved.')
|
||||
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
||||
help='Only download profile picture.')
|
||||
g_what.add_argument('--no-profile-pic', action='store_true',
|
||||
help='Do not download profile picture.')
|
||||
g_what.add_argument('-V', '--no-videos', action='store_true',
|
||||
help='Do not download videos.')
|
||||
g_what.add_argument('--no-video-thumbnails', action='store_true',
|
||||
help='Do not download thumbnails of videos.')
|
||||
g_what.add_argument('-G', '--geotags', action='store_true',
|
||||
help='Download geotags when available. Geotags are stored as a '
|
||||
'text file with the location\'s name and a Google Maps link. '
|
||||
'This requires an additional request to the Instagram '
|
||||
'server for each picture, which is why it is disabled by default.')
|
||||
g_what.add_argument('--no-geotags', action='store_true',
|
||||
help='Do not store geotags, even if they can be obtained without any additional request.')
|
||||
g_what.add_argument('-C', '--comments', action='store_true',
|
||||
help='Download and update comments for each post. '
|
||||
'This requires an additional request to the Instagram '
|
||||
'server for each post, which is why it is disabled by default.')
|
||||
g_what.add_argument('--no-captions', action='store_true',
|
||||
help='Do not store media captions, although no additional request is needed to obtain them.')
|
||||
g_what.add_argument('--metadata-json', action='store_true',
|
||||
help='Create a JSON file containing the metadata of each post. This does not include comments '
|
||||
'nor geotags.')
|
||||
g_what.add_argument('-s', '--stories', action='store_true',
|
||||
help='Also download stories of each profile that is downloaded. Requires --login.')
|
||||
g_what.add_argument('--stories-only', action='store_true',
|
||||
help='Rather than downloading regular posts of each specified profile, only download '
|
||||
'stories. Requires --login. Does not imply --no-profile-pic.')
|
||||
g_what.add_argument('--only-if', metavar='filter',
|
||||
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
|
||||
'a syntactically valid python expression. Variables are evaluated to '
|
||||
'instaloader.Post attributes. Example: --only-if=viewer_has_liked.')
|
||||
|
||||
g_stop = parser.add_argument_group('When to Stop Downloading',
|
||||
'If none of these options are given, Instaloader goes through all pictures '
|
||||
'matching the specified targets.')
|
||||
g_stop.add_argument('-F', '--fast-update', action='store_true',
|
||||
help='For each target, stop when encountering the first already-downloaded picture. This '
|
||||
'flag is recommended when you use Instaloader to update your personal Instagram archive.')
|
||||
g_stop.add_argument('-c', '--count',
|
||||
help='Do not attempt to download more than COUNT posts. '
|
||||
'Applies only to #hashtag and :feed.')
|
||||
|
||||
g_login = parser.add_argument_group('Login (Download Private Profiles)',
|
||||
'Instaloader can login to Instagram. This allows downloading private profiles. '
|
||||
'To login, pass the --login option. Your session cookie (not your password!) '
|
||||
'will be saved to a local file to be reused next time you want Instaloader '
|
||||
'to login.')
|
||||
g_login.add_argument('-l', '--login', metavar='YOUR-USERNAME',
|
||||
help='Login name (profile name) for your Instagram account.')
|
||||
g_login.add_argument('-f', '--sessionfile',
|
||||
help='Path for loading and storing session key file. '
|
||||
'Defaults to ' + get_default_session_filename("<login_name>"))
|
||||
g_login.add_argument('-p', '--password', metavar='YOUR-PASSWORD',
|
||||
help='Password for your Instagram account. Without this option, '
|
||||
'you\'ll be prompted for your password interactively if '
|
||||
'there is not yet a valid session file.')
|
||||
|
||||
g_how = parser.add_argument_group('How to Download')
|
||||
g_how.add_argument('--dirname-pattern',
|
||||
help='Name of directory where to store posts. {profile} is replaced by the profile name, '
|
||||
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
||||
'profile name. Defaults to \'{target}\'.')
|
||||
g_how.add_argument('--filename-pattern',
|
||||
help='Prefix of filenames. Posts are stored in the directory whose pattern is given with '
|
||||
'--dirname-pattern. {profile} is replaced by the profile name, '
|
||||
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
||||
'profile name. Also, the fields {date} and {shortcode} can be specified. In case of not '
|
||||
'downloading stories, the attributes of the Post class can be used in addition, e.g. '
|
||||
'{post.owner_id} or {post.mediaid}. Defaults to \'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
|
||||
g_how.add_argument('--user-agent',
|
||||
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
||||
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
|
||||
g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3,
|
||||
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
|
||||
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
|
||||
'infinitely.')
|
||||
|
||||
g_misc = parser.add_argument_group('Miscellaneous Options')
|
||||
g_misc.add_argument('-q', '--quiet', action='store_true',
|
||||
help='Disable user interaction, i.e. do not print messages (except errors) and fail '
|
||||
'if login credentials are needed but not given. This makes Instaloader suitable as a '
|
||||
'cron job.')
|
||||
g_misc.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
|
||||
g_misc.add_argument('--version', action='version', help='Show version number and exit.',
|
||||
version=__version__)
|
||||
|
||||
args = parser.parse_args()
|
||||
try:
|
||||
if args.login is None and (args.stories or args.stories_only):
|
||||
print("--login=USERNAME required to download stories.", file=sys.stderr)
|
||||
args.stories = False
|
||||
if args.stories_only:
|
||||
raise SystemExit(1)
|
||||
|
||||
if ':feed-all' in args.profile or ':feed-liked' in args.profile:
|
||||
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
|
||||
"eventually --only-if=viewer_has_liked.")
|
||||
|
||||
download_videos = Tristate.always if not args.no_videos else Tristate.no_extra_query
|
||||
download_video_thumbnails = Tristate.always if not args.no_video_thumbnails else Tristate.never
|
||||
download_comments = Tristate.always if args.comments else Tristate.no_extra_query
|
||||
save_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
|
||||
save_metadata = Tristate.always if args.metadata_json else Tristate.never
|
||||
|
||||
if args.geotags and args.no_geotags:
|
||||
raise SystemExit("--geotags and --no-geotags given. I am confused and refuse to work.")
|
||||
elif args.geotags:
|
||||
download_geotags = Tristate.always
|
||||
elif args.no_geotags:
|
||||
download_geotags = Tristate.never
|
||||
else:
|
||||
download_geotags = Tristate.no_extra_query
|
||||
|
||||
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet,
|
||||
user_agent=args.user_agent,
|
||||
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
|
||||
download_videos=download_videos, download_video_thumbnails=download_video_thumbnails,
|
||||
download_geotags=download_geotags,
|
||||
save_captions=save_captions, download_comments=download_comments,
|
||||
save_metadata=save_metadata, max_connection_attempts=args.max_connection_attempts)
|
||||
_main(loader,
|
||||
args.profile,
|
||||
username=args.login.lower() if args.login is not None else None,
|
||||
password=args.password,
|
||||
sessionfile=args.sessionfile,
|
||||
max_count=int(args.count) if args.count is not None else None,
|
||||
profile_pic=not args.no_profile_pic,
|
||||
profile_pic_only=args.profile_pic_only,
|
||||
fast_update=args.fast_update,
|
||||
stories=args.stories,
|
||||
stories_only=args.stories_only,
|
||||
filter_str=args.only_if)
|
||||
loader.close()
|
||||
except InstaloaderException as err:
|
||||
raise SystemExit("Fatal error: %s" % err)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
49
instaloader/exceptions.py
Normal file
49
instaloader/exceptions.py
Normal file
@ -0,0 +1,49 @@
|
||||
class InstaloaderException(Exception):
|
||||
"""Base exception for this script.
|
||||
|
||||
:note: This exception should not be raised directly."""
|
||||
pass
|
||||
|
||||
|
||||
class QueryReturnedNotFoundException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class QueryReturnedForbiddenException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class ProfileNotExistsException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class ProfileHasNoPicsException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class PrivateProfileNotFollowedException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class LoginRequiredException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidArgumentException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class BadResponseException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class BadCredentialsException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class ConnectionException(InstaloaderException):
|
||||
pass
|
||||
|
||||
|
||||
class TooManyRequestsException(ConnectionException):
|
||||
pass
|
804
instaloader/instaloader.py
Normal file
804
instaloader/instaloader.py
Normal file
@ -0,0 +1,804 @@
|
||||
import getpass
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import sys
|
||||
import tempfile
|
||||
from contextlib import contextmanager, suppress
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from functools import wraps
|
||||
from io import BytesIO
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import InstaloaderContext
|
||||
from .structures import Post, Profile, mediaid_to_shortcode
|
||||
|
||||
|
||||
def get_default_session_filename(username: str) -> str:
|
||||
"""Returns default session filename for given username."""
|
||||
dirname = tempfile.gettempdir() + "/" + ".instaloader-" + getpass.getuser()
|
||||
filename = dirname + "/" + "session-" + username
|
||||
return filename.lower()
|
||||
|
||||
|
||||
def format_string_contains_key(format_string: '_PathPattern', key: str) -> bool:
|
||||
# pylint:disable=unused-variable
|
||||
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string):
|
||||
if field_name and (field_name == key or field_name.startswith(key + '.')):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class Tristate(Enum):
|
||||
"""Tri-state to encode whether we should save certain information, i.e. videos, captions, comments or geotags.
|
||||
|
||||
:attr:`never`
|
||||
Do not save, even if the information is available without any additional request,
|
||||
|
||||
:attr:`no_extra_query`
|
||||
Save if and only if available without doing additional queries,
|
||||
|
||||
:attr:`always`
|
||||
Save (and query, if neccessary).
|
||||
"""
|
||||
never = 0
|
||||
no_extra_query = 1
|
||||
always = 2
|
||||
|
||||
|
||||
def _requires_login(func: Callable) -> Callable:
|
||||
"""Decorator to raise an exception if herewith-decorated function is called without being logged in"""
|
||||
@wraps(func)
|
||||
def call(instaloader, *args, **kwargs):
|
||||
if not instaloader.context.is_logged_in:
|
||||
raise LoginRequiredException("--login=USERNAME required.")
|
||||
return func(instaloader, *args, **kwargs)
|
||||
# pylint:disable=no-member
|
||||
call.__doc__ += ":raises LoginRequiredException: If called without being logged in.\n"
|
||||
return call
|
||||
|
||||
|
||||
class _PathPattern(str):
|
||||
"""Class overriding :meth:`str.format` for character substitution in paths for Windows, see issue #84."""
|
||||
|
||||
def format(self, *args: Any, **kwargs: Any) -> str:
|
||||
ret = super().format(*args, **kwargs)
|
||||
return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret
|
||||
|
||||
|
||||
class Instaloader:
|
||||
|
||||
def __init__(self,
|
||||
sleep: bool = True, quiet: bool = False,
|
||||
user_agent: Optional[str] = None,
|
||||
dirname_pattern: Optional[str] = None,
|
||||
filename_pattern: Optional[str] = None,
|
||||
download_videos: Tristate = Tristate.always,
|
||||
download_video_thumbnails: Tristate = Tristate.always,
|
||||
download_geotags: Tristate = Tristate.no_extra_query,
|
||||
save_captions: Tristate = Tristate.no_extra_query,
|
||||
download_comments: Tristate = Tristate.no_extra_query,
|
||||
save_metadata: Tristate = Tristate.never,
|
||||
max_connection_attempts: int = 3):
|
||||
|
||||
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts)
|
||||
|
||||
# configuration parameters
|
||||
self.dirname_pattern = _PathPattern(dirname_pattern if dirname_pattern is not None else '{target}')
|
||||
if filename_pattern is not None:
|
||||
filename_pattern = re.sub(r"({(?:post\.)?date)([:}])", r"\1_utc\2", filename_pattern)
|
||||
self.filename_pattern_old = filename_pattern.replace('{date_utc}', '{date_utc:%Y-%m-%d_%H-%M-%S}')
|
||||
self.filename_pattern_old = _PathPattern(re.sub(r"(?i)({(?:post\.)?date_utc:[^}]*?)_UTC",
|
||||
r"\1", self.filename_pattern_old))
|
||||
filename_pattern = re.sub(r"(?i)({(date_utc|post\.date_utc):(?![^}]*UTC[^}]*).*?)}",
|
||||
r"\1_UTC}", filename_pattern)
|
||||
self.filename_pattern = _PathPattern(filename_pattern.replace('{date_utc}',
|
||||
'{date_utc:%Y-%m-%d_%H-%M-%S_UTC}'))
|
||||
else:
|
||||
self.filename_pattern = _PathPattern('{date_utc:%Y-%m-%d_%H-%M-%S_UTC}')
|
||||
self.filename_pattern_old = _PathPattern('{date_utc:%Y-%m-%d_%H-%M-%S}')
|
||||
self.download_videos = download_videos
|
||||
self.download_video_thumbnails = download_video_thumbnails
|
||||
self.download_geotags = download_geotags
|
||||
self.save_captions = save_captions
|
||||
self.download_comments = download_comments
|
||||
self.save_metadata = save_metadata
|
||||
|
||||
@contextmanager
|
||||
def anonymous_copy(self):
|
||||
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
|
||||
new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent,
|
||||
self.dirname_pattern, self.filename_pattern,
|
||||
self.download_videos,
|
||||
self.download_video_thumbnails,
|
||||
self.download_geotags,
|
||||
self.save_captions, self.download_comments,
|
||||
self.save_metadata, self.context.max_connection_attempts)
|
||||
new_loader.context.previous_queries = self.context.previous_queries
|
||||
yield new_loader
|
||||
self.context.error_log.extend(new_loader.context.error_log)
|
||||
new_loader.context.error_log = [] # avoid double-printing of errors
|
||||
self.context.previous_queries = new_loader.context.previous_queries
|
||||
new_loader.close()
|
||||
|
||||
def close(self):
|
||||
self.context.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def get_username_by_id(self, profile_id: int) -> str:
|
||||
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
||||
data = self.context.graphql_query("472f257a40c653c64c666ce877d59d2b",
|
||||
{'id': str(profile_id), 'first': 1})['data']['user']
|
||||
if data:
|
||||
data = data["edge_owner_to_timeline_media"]
|
||||
else:
|
||||
raise ProfileNotExistsException("No profile found, the user may have blocked you (ID: " +
|
||||
str(profile_id) + ").")
|
||||
if not data['edges']:
|
||||
if data['count'] == 0:
|
||||
raise ProfileHasNoPicsException("Profile with ID {0}: no pics found.".format(str(profile_id)))
|
||||
else:
|
||||
raise LoginRequiredException("Login required to determine username (ID: " + str(profile_id) + ").")
|
||||
else:
|
||||
return Post.from_mediaid(self.context, int(data['edges'][0]["node"]["id"])).owner_username
|
||||
|
||||
def get_id_by_username(self, profile: str) -> int:
|
||||
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||
return Profile(self.context, profile).userid
|
||||
|
||||
@_requires_login
|
||||
def get_followers(self, profile: str) -> Iterator[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve list of followers of given profile.
|
||||
To use this, one needs to be logged in and private profiles has to be followed,
|
||||
otherwise this returns an empty list.
|
||||
|
||||
:param profile: Name of profile to lookup followers.
|
||||
"""
|
||||
yield from self.context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
|
||||
{'id': str(self.get_id_by_username(profile))},
|
||||
'https://www.instagram.com/' + profile + '/',
|
||||
lambda d: d['data']['user']['edge_followed_by'])
|
||||
|
||||
@_requires_login
|
||||
def get_followees(self, profile: str) -> Iterator[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve list of followees (followings) of given profile.
|
||||
To use this, one needs to be logged in and private profiles has to be followed,
|
||||
otherwise this returns an empty list.
|
||||
|
||||
:param profile: Name of profile to lookup followers.
|
||||
"""
|
||||
yield from self.context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
|
||||
{'id': str(self.get_id_by_username(profile))},
|
||||
'https://www.instagram.com/' + profile + '/',
|
||||
lambda d: d['data']['user']['edge_follow'])
|
||||
|
||||
def download_pic(self, filename: str, url: str, mtime: datetime,
|
||||
filename_alt: Optional[str] = None, filename_suffix: Optional[str] = None) -> bool:
|
||||
"""Downloads and saves picture with given url under given directory with given timestamp.
|
||||
Returns true, if file was actually downloaded, i.e. updated."""
|
||||
urlmatch = re.search('\\.[a-z0-9]*\\?', url)
|
||||
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
|
||||
if filename_suffix is not None:
|
||||
filename += '_' + filename_suffix
|
||||
if filename_alt is not None:
|
||||
filename_alt += '_' + filename_suffix
|
||||
filename += '.' + file_extension
|
||||
if os.path.isfile(filename):
|
||||
self.context.log(filename + ' exists', end=' ', flush=True)
|
||||
return False
|
||||
if filename_alt is not None:
|
||||
filename_alt += '.' + file_extension
|
||||
if os.path.isfile(filename_alt):
|
||||
self.context.log(filename_alt + 'exists', end=' ', flush=True)
|
||||
return False
|
||||
self.context.get_and_write_raw(url, filename)
|
||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||
return True
|
||||
|
||||
def save_metadata_json(self, filename: str, post: Post) -> None:
|
||||
"""Saves metadata JSON file of a :class:`Post`."""
|
||||
filename += '.json'
|
||||
with open(filename, 'w') as fp:
|
||||
json.dump(post, fp=fp, indent=4, default=Post.json_encoder)
|
||||
self.context.log('json', end=' ', flush=True)
|
||||
|
||||
def update_comments(self, filename: str, post: Post, filename_alt: Optional[str] = None) -> None:
|
||||
try:
|
||||
filename_current = filename + '_comments.json'
|
||||
comments = json.load(open(filename_current))
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
filename_current = filename_alt + '_comments.json'
|
||||
comments = json.load(open(filename_current))
|
||||
except (FileNotFoundError, TypeError):
|
||||
filename_current = filename + '_comments.json'
|
||||
comments = list()
|
||||
comments.extend(post.get_comments())
|
||||
if comments:
|
||||
with open(filename_current, 'w') as file:
|
||||
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
|
||||
key=lambda t: t['created_at'], reverse=True)
|
||||
unique_comments_list = [comments_list[0]]
|
||||
#for comment in comments_list:
|
||||
# if unique_comments_list[-1]['id'] != comment['id']:
|
||||
# unique_comments_list.append(comment)
|
||||
#file.write(json.dumps(unique_comments_list, indent=4))
|
||||
for x, y in zip(comments_list[:-1], comments_list[1:]):
|
||||
if x['id'] != y['id']:
|
||||
unique_comments_list.append(y)
|
||||
file.write(json.dumps(unique_comments_list, indent=4))
|
||||
os.rename(filename_current, filename + '_comments.json')
|
||||
self.context.log('comments', end=' ', flush=True)
|
||||
|
||||
def save_caption(self, filename: str, mtime: datetime, caption: str, filename_alt: Optional[str] = None) -> None:
|
||||
"""Updates picture caption"""
|
||||
filename += '.txt'
|
||||
if filename_alt is not None:
|
||||
filename_alt += '.txt'
|
||||
pcaption = caption.replace('\n', ' ').strip()
|
||||
caption = caption.encode("UTF-8")
|
||||
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
||||
with suppress(FileNotFoundError):
|
||||
try:
|
||||
with open(filename, 'rb') as file:
|
||||
file_caption = file.read()
|
||||
except FileNotFoundError:
|
||||
if filename_alt is not None:
|
||||
with open(filename_alt, 'rb') as file:
|
||||
file_caption = file.read()
|
||||
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
||||
try:
|
||||
self.context.log(pcaption + ' unchanged', end=' ', flush=True)
|
||||
except UnicodeEncodeError:
|
||||
self.context.log('txt unchanged', end=' ', flush=True)
|
||||
return None
|
||||
else:
|
||||
def get_filename(file, index):
|
||||
return file if index == 0 else (file[:-4] + '_old_' +
|
||||
(str(0) if index < 10 else str()) + str(index) + file[-4:])
|
||||
|
||||
i = 0
|
||||
file_exists_list = []
|
||||
while True:
|
||||
file_exists_list.append(1 if os.path.isfile(get_filename(filename, i)) else 0)
|
||||
if not file_exists_list[i] and filename_alt is not None:
|
||||
file_exists_list[i] = 2 if os.path.isfile(get_filename(filename_alt, i)) else 0
|
||||
if not file_exists_list[i]:
|
||||
break
|
||||
i = i + 1
|
||||
for index in range(i, 0, -1):
|
||||
os.rename(get_filename(filename if file_exists_list[index - 1] % 2 else filename_alt, index - 1),
|
||||
get_filename(filename, index))
|
||||
try:
|
||||
self.context.log(pcaption + ' updated', end=' ', flush=True)
|
||||
except UnicodeEncodeError:
|
||||
self.context.log('txt updated', end=' ', flush=True)
|
||||
try:
|
||||
self.context.log(pcaption, end=' ', flush=True)
|
||||
except UnicodeEncodeError:
|
||||
self.context.log('txt', end=' ', flush=True)
|
||||
with open(filename, 'wb') as text_file:
|
||||
shutil.copyfileobj(BytesIO(caption), text_file)
|
||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||
|
||||
def save_location(self, filename: str, location_json: Dict[str, str], mtime: datetime) -> None:
|
||||
filename += '_location.txt'
|
||||
location_string = (location_json["name"] + "\n" +
|
||||
"https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location_json["lat"],
|
||||
location_json["lng"]))
|
||||
with open(filename, 'wb') as text_file:
|
||||
shutil.copyfileobj(BytesIO(location_string.encode()), text_file)
|
||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||
self.context.log('geo', end=' ', flush=True)
|
||||
|
||||
def download_profilepic(self, profile: Profile) -> None:
|
||||
"""Downloads and saves profile pic."""
|
||||
|
||||
def _epoch_to_string(epoch: datetime) -> str:
|
||||
return epoch.strftime('%Y-%m-%d_%H-%M-%S')
|
||||
|
||||
profile_pic_url = profile.get_profile_pic_url()
|
||||
with self.context.get_anonymous_session() as anonymous_session:
|
||||
date_object = datetime.strptime(anonymous_session.head(profile_pic_url).headers["Last-Modified"],
|
||||
'%a, %d %b %Y %H:%M:%S GMT')
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||
filename = '{0}/{1}_UTC_profile_pic.{2}'.format(self.dirname_pattern.format(profile=profile.username.lower(),
|
||||
target=profile.username.lower()),
|
||||
_epoch_to_string(date_object), profile_pic_url[-3:])
|
||||
else:
|
||||
filename = '{0}/{1}_{2}_UTC_profile_pic.{3}'.format(self.dirname_pattern.format(), profile.username.lower(),
|
||||
_epoch_to_string(date_object), profile_pic_url[-3:])
|
||||
if os.path.isfile(filename):
|
||||
self.context.log(filename + ' already exists')
|
||||
return None
|
||||
self.context.get_and_write_raw(profile_pic_url, filename)
|
||||
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
|
||||
self.context.log('') # log output of _get_and_write_raw() does not produce \n
|
||||
|
||||
@_requires_login
|
||||
def save_session_to_file(self, filename: Optional[str] = None) -> None:
|
||||
"""Saves internally stored :class:`requests.Session` object."""
|
||||
if filename is None:
|
||||
filename = get_default_session_filename(self.context.username)
|
||||
dirname = os.path.dirname(filename)
|
||||
if dirname != '' and not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
os.chmod(dirname, 0o700)
|
||||
with open(filename, 'wb') as sessionfile:
|
||||
os.chmod(filename, 0o600)
|
||||
self.context.save_session_to_file(sessionfile)
|
||||
self.context.log("Saved session to %s." % filename)
|
||||
|
||||
def load_session_from_file(self, username: str, filename: Optional[str] = None) -> None:
|
||||
"""Internally stores :class:`requests.Session` object loaded from file.
|
||||
|
||||
If filename is None, the file with the default session path is loaded.
|
||||
|
||||
:raises FileNotFoundError: If the file does not exist.
|
||||
"""
|
||||
if filename is None:
|
||||
filename = get_default_session_filename(username)
|
||||
with open(filename, 'rb') as sessionfile:
|
||||
self.context.load_session_from_file(username, sessionfile)
|
||||
self.context.log("Loaded session from %s." % filename)
|
||||
|
||||
def test_login(self) -> Optional[str]:
|
||||
"""Returns the Instagram username to which given :class:`requests.Session` object belongs, or None."""
|
||||
return self.context.test_login()
|
||||
|
||||
def login(self, user: str, passwd: str) -> None:
|
||||
"""Log in to instagram with given username and password and internally store session object"""
|
||||
self.context.login(user, passwd)
|
||||
|
||||
def download_post(self, post: Post, target: str) -> bool:
|
||||
"""
|
||||
Download everything associated with one instagram post node, i.e. picture, caption and video.
|
||||
|
||||
:param post: Post to download.
|
||||
:param target: Target name, i.e. profile name, #hashtag, :feed; for filename.
|
||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||
"""
|
||||
|
||||
# Format dirname and filename. post.owner_username might do an additional request, so only access it, if
|
||||
# {profile} is part of the dirname pattern or filename pattern.
|
||||
needs_profilename = (format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.filename_pattern, 'profile'))
|
||||
profilename = post.owner_username if needs_profilename else None
|
||||
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
||||
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
||||
date_utc=post.date_utc,
|
||||
shortcode=post.shortcode,
|
||||
post=post)
|
||||
filename_old = dirname + '/' + self.filename_pattern_old.replace("{post.date_utc", "{date_utc") \
|
||||
.format(profile=profilename, target=target.lower(),
|
||||
date_utc=post.date_local,
|
||||
shortcode=post.shortcode,
|
||||
post=post)
|
||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||
|
||||
# Download the image(s) / video thumbnail and videos within sidecars if desired
|
||||
downloaded = False
|
||||
if post.typename == 'GraphSidecar':
|
||||
edge_number = 1
|
||||
for edge in post.get_sidecar_edges():
|
||||
# Download picture or video thumbnail
|
||||
if not edge['node']['is_video'] or self.download_video_thumbnails is Tristate.always:
|
||||
downloaded |= self.download_pic(filename=filename,
|
||||
filename_alt=filename_old,
|
||||
url=edge['node']['display_url'],
|
||||
mtime=post.date_local,
|
||||
filename_suffix=str(edge_number))
|
||||
# Additionally download video if available and desired
|
||||
if edge['node']['is_video'] and self.download_videos is Tristate.always:
|
||||
downloaded |= self.download_pic(filename=filename,
|
||||
filename_alt=filename_old,
|
||||
url=edge['node']['video_url'],
|
||||
mtime=post.date_local,
|
||||
filename_suffix=str(edge_number))
|
||||
edge_number += 1
|
||||
elif post.typename == 'GraphImage':
|
||||
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
|
||||
url=post.url, mtime=post.date_local)
|
||||
elif post.typename == 'GraphVideo':
|
||||
if self.download_video_thumbnails is Tristate.always:
|
||||
downloaded = self.download_pic(filename=filename, filename_alt=filename_old,
|
||||
url=post.url, mtime=post.date_local)
|
||||
else:
|
||||
self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))
|
||||
|
||||
# Save caption if desired
|
||||
if self.save_captions is not Tristate.never:
|
||||
if post.caption:
|
||||
self.save_caption(filename=filename, filename_alt=filename_old,
|
||||
mtime=post.date_local, caption=post.caption)
|
||||
else:
|
||||
self.context.log("<no caption>", end=' ', flush=True)
|
||||
|
||||
# Download video if desired
|
||||
if post.is_video and self.download_videos is Tristate.always:
|
||||
downloaded |= self.download_pic(filename=filename, filename_alt=filename_old,
|
||||
url=post.video_url, mtime=post.date_local)
|
||||
|
||||
# Download geotags if desired
|
||||
if self.download_geotags is Tristate.always:
|
||||
location = post.get_location()
|
||||
if location:
|
||||
self.save_location(filename, location, post.date_local)
|
||||
|
||||
# Update comments if desired
|
||||
if self.download_comments is Tristate.always:
|
||||
self.update_comments(filename=filename, filename_alt=filename_old, post=post)
|
||||
|
||||
# Save metadata as JSON if desired. It might require an extra query, depending on which information has been
|
||||
# already obtained. Regarding Tristate interpretation, we always assume that it requires an extra query.
|
||||
if self.save_metadata is Tristate.always:
|
||||
self.save_metadata_json(filename, post)
|
||||
|
||||
self.context.log()
|
||||
return downloaded
|
||||
|
||||
@_requires_login
|
||||
def get_stories(self, userids: Optional[List[int]] = None) -> Iterator[Dict[str, Any]]:
|
||||
"""Get available stories from followees or all stories of users whose ID are given.
|
||||
Does not mark stories as seen.
|
||||
To use this, one needs to be logged in
|
||||
|
||||
:param userids: List of user IDs to be processed in terms of downloading their stories, or None.
|
||||
"""
|
||||
|
||||
if userids is None:
|
||||
data = self.context.graphql_query("d15efd8c0c5b23f0ef71f18bf363c704",
|
||||
{"only_stories": True})["data"]["user"]
|
||||
if data is None:
|
||||
raise BadResponseException('Bad stories reel JSON.')
|
||||
userids = list(edge["node"]["id"] for edge in data["feed_reels_tray"]["edge_reels_tray_to_reel"]["edges"])
|
||||
|
||||
stories = self.context.graphql_query("bf41e22b1c4ba4c9f31b844ebb7d9056",
|
||||
{"reel_ids": userids, "precomposed_overlay": False})["data"]
|
||||
|
||||
for media in stories["reels_media"]:
|
||||
yield media
|
||||
|
||||
@_requires_login
|
||||
def download_stories(self,
|
||||
userids: Optional[List[int]] = None,
|
||||
fast_update: bool = False,
|
||||
filename_target: str = ':stories') -> None:
|
||||
"""
|
||||
Download available stories from user followees or all stories of users whose ID are given.
|
||||
Does not mark stories as seen.
|
||||
To use this, one needs to be logged in
|
||||
|
||||
:param userids: List of user IDs to be processed in terms of downloading their stories
|
||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||
:param filename_target: Replacement for {target} in dirname_pattern and filename_pattern
|
||||
"""
|
||||
|
||||
if not userids:
|
||||
self.context.log("Retrieving all visible stories...")
|
||||
|
||||
if format_string_contains_key(self.filename_pattern, 'post'):
|
||||
raise InvalidArgumentException("The \"post\" keyword is not supported in the filename pattern when "
|
||||
"downloading stories.")
|
||||
|
||||
for user_stories in self.get_stories(userids):
|
||||
if "items" not in user_stories:
|
||||
raise BadResponseException('Bad reel media JSON.')
|
||||
name = user_stories["user"]["username"].lower()
|
||||
self.context.log("Retrieving stories from profile {}.".format(name))
|
||||
totalcount = len(user_stories["items"])
|
||||
count = 1
|
||||
for item in user_stories["items"]:
|
||||
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||
count += 1
|
||||
with self.context.error_catcher('Download story from user {}'.format(name)):
|
||||
downloaded = self.download_story(item, filename_target, name)
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
def download_story(self, item: Dict[str, Any], target: str, profile: str) -> bool:
|
||||
"""Download one user story.
|
||||
|
||||
:param item: Story item, as in story['items'] for story in :meth:`get_stories`
|
||||
:param target: Replacement for {target} in dirname_pattern and filename_pattern
|
||||
:param profile: Owner profile name
|
||||
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
||||
"""
|
||||
|
||||
shortcode = mediaid_to_shortcode(int(item["id"]))
|
||||
date_local = datetime.fromtimestamp(item["taken_at_timestamp"])
|
||||
date_utc = datetime.utcfromtimestamp(item["taken_at_timestamp"])
|
||||
dirname = self.dirname_pattern.format(profile=profile, target=target)
|
||||
filename = dirname + '/' + self.filename_pattern.format(profile=profile, target=target,
|
||||
date_utc=date_utc,
|
||||
shortcode=shortcode)
|
||||
filename_old = dirname + '/' + self.filename_pattern_old.format(profile=profile, target=target,
|
||||
date_utc=date_local,
|
||||
shortcode=shortcode)
|
||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||
downloaded = False
|
||||
if not item["is_video"] or self.download_video_thumbnails is Tristate.always:
|
||||
url = item["display_resources"][-1]["src"]
|
||||
downloaded = self.download_pic(filename=filename,
|
||||
filename_alt=filename_old,
|
||||
url=url,
|
||||
mtime=date_local)
|
||||
if item["is_video"] and self.download_videos is Tristate.always:
|
||||
downloaded |= self.download_pic(filename=filename,
|
||||
filename_alt=filename_old,
|
||||
url=item["video_resources"][-1]["src"],
|
||||
mtime=date_local)
|
||||
self.context.log()
|
||||
return downloaded
|
||||
|
||||
@_requires_login
|
||||
def get_feed_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts of the user's feed."""
|
||||
|
||||
data = self.context.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})["data"]
|
||||
|
||||
while True:
|
||||
feed = data["user"]["edge_web_feed_timeline"]
|
||||
yield from (Post(self.context, edge["node"]) for edge in feed["edges"]
|
||||
if not edge["node"]["__typename"] == "GraphSuggestedUserFeedUnit")
|
||||
if not feed["page_info"]["has_next_page"]:
|
||||
break
|
||||
data = self.context.graphql_query("d6f4427fbe92d846298cf93df0b937d3",
|
||||
{'fetch_media_item_count': 12,
|
||||
'fetch_media_item_cursor': feed["page_info"]["end_cursor"],
|
||||
'fetch_comment_count': 4,
|
||||
'fetch_like': 10,
|
||||
'has_stories': False})["data"]
|
||||
|
||||
@_requires_login
|
||||
def download_feed_posts(self, max_count: int = None, fast_update: bool = False,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
"""
|
||||
Download pictures from the user's feed.
|
||||
|
||||
Example to download up to the 20 pics the user last liked::
|
||||
|
||||
loader = Instaloader()
|
||||
loader.load_session_from_file('USER')
|
||||
loader.download_feed_posts(max_count=20, fast_update=True,
|
||||
filter_func=lambda post: post.viewer_has_liked)
|
||||
|
||||
:param max_count: Maximum count of pictures to download
|
||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||
:param filter_func: function(post), which returns True if given picture should be downloaded
|
||||
"""
|
||||
self.context.log("Retrieving pictures from your feed...")
|
||||
count = 1
|
||||
for post in self.get_feed_posts():
|
||||
if max_count is not None and count > max_count:
|
||||
break
|
||||
name = post.owner_username
|
||||
if filter_func is not None and not filter_func(post):
|
||||
self.context.log("<pic by %s skipped>" % name, flush=True)
|
||||
continue
|
||||
self.context.log("[%3i] %s " % (count, name), end="", flush=True)
|
||||
count += 1
|
||||
with self.context.error_catcher('Download feed'):
|
||||
downloaded = self.download_post(post, target=':feed')
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
@_requires_login
|
||||
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
"""Download user's saved pictures.
|
||||
|
||||
:param max_count: Maximum count of pictures to download
|
||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||
:param filter_func: function(post), which returns True if given picture should be downloaded
|
||||
"""
|
||||
self.context.log("Retrieving saved posts...")
|
||||
count = 1
|
||||
for post in Profile(self.context, self.context.username).get_saved_posts():
|
||||
if max_count is not None and count > max_count:
|
||||
break
|
||||
name = post.owner_username
|
||||
if filter_func is not None and not filter_func(post):
|
||||
self.context.log("<pic by {} skipped".format(name), flush=True)
|
||||
continue
|
||||
self.context.log("[{:>3}] {} ".format(count, name), end=str(), flush=True)
|
||||
count += 1
|
||||
with self.context.error_catcher('Download saved posts'):
|
||||
downloaded = self.download_post(post, target=':saved')
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
@_requires_login
|
||||
def get_explore_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts which are worthy of exploring suggested by Instagram."""
|
||||
yield from (Post(self.context, node)
|
||||
for node in self.context.graphql_node_list("df0dcc250c2b18d9fd27c5581ef33c7c",
|
||||
{}, 'https://www.instagram.com/explore/',
|
||||
lambda d: d['data']['user']['edge_web_discover_media']))
|
||||
|
||||
def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]:
|
||||
"""Get Posts associated with a #hashtag."""
|
||||
yield from (Post(self.context, node)
|
||||
for node in self.context.graphql_node_list("298b92c8d7cad703f7565aa892ede943",
|
||||
{'tag_name': hashtag},
|
||||
'https://www.instagram.com/explore/tags/{0}/'
|
||||
.format(hashtag),
|
||||
lambda d: d['data']['hashtag']['edge_hashtag_to_media']))
|
||||
|
||||
def download_hashtag(self, hashtag: str,
|
||||
max_count: Optional[int] = None,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None,
|
||||
fast_update: bool = False) -> None:
|
||||
"""Download pictures of one hashtag.
|
||||
|
||||
To download the last 30 pictures with hashtag #cat, do::
|
||||
|
||||
loader = Instaloader()
|
||||
loader.download_hashtag('cat', max_count=30)
|
||||
|
||||
:param hashtag: Hashtag to download, without leading '#'
|
||||
:param max_count: Maximum count of pictures to download
|
||||
:param filter_func: function(post), which returns True if given picture should be downloaded
|
||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||
"""
|
||||
hashtag = hashtag.lower()
|
||||
self.context.log("Retrieving pictures with hashtag {}...".format(hashtag))
|
||||
count = 1
|
||||
for post in self.get_hashtag_posts(hashtag):
|
||||
if max_count is not None and count > max_count:
|
||||
break
|
||||
self.context.log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
|
||||
if filter_func is not None and not filter_func(post):
|
||||
self.context.log('<skipped>')
|
||||
continue
|
||||
count += 1
|
||||
with self.context.error_catcher('Download hashtag #{}'.format(hashtag)):
|
||||
downloaded = self.download_post(post, target='#' + hashtag)
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
def check_profile_id(self, profile_name: str, profile: Optional[Profile] = None) -> str:
|
||||
"""
|
||||
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
||||
has changed and return current name of the profile, and store ID of profile.
|
||||
|
||||
:param profile_name: Profile name
|
||||
:param profile: The :class:`Profile`, or None if the profile was not found
|
||||
:return: current profile name, profile id
|
||||
"""
|
||||
profile_exists = profile is not None
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||
id_filename = '{0}/id'.format(self.dirname_pattern.format(profile=profile_name.lower(),
|
||||
target=profile_name.lower()))
|
||||
else:
|
||||
id_filename = '{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower())
|
||||
try:
|
||||
with open(id_filename, 'rb') as id_file:
|
||||
profile_id = int(id_file.read())
|
||||
if (not profile_exists) or \
|
||||
(profile_id != profile.userid):
|
||||
if profile_exists:
|
||||
self.context.log("Profile {0} does not match the stored unique ID {1}.".format(profile_name,
|
||||
profile_id))
|
||||
else:
|
||||
self.context.log("Trying to find profile {0} using its unique ID {1}.".format(profile_name,
|
||||
profile_id))
|
||||
newname = self.get_username_by_id(profile_id)
|
||||
self.context.log("Profile {0} has changed its name to {1}.".format(profile_name, newname))
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||
os.rename(self.dirname_pattern.format(profile=profile_name.lower(),
|
||||
target=profile_name.lower()),
|
||||
self.dirname_pattern.format(profile=newname.lower(),
|
||||
target=newname.lower()))
|
||||
else:
|
||||
os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower()),
|
||||
'{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower()))
|
||||
return newname
|
||||
return profile_name
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
if profile_exists:
|
||||
os.makedirs(self.dirname_pattern.format(profile=profile_name.lower(),
|
||||
target=profile_name.lower()), exist_ok=True)
|
||||
with open(id_filename, 'w') as text_file:
|
||||
text_file.write(str(profile.userid) + "\n")
|
||||
self.context.log("Stored ID {0} for profile {1}.".format(profile.userid, profile_name))
|
||||
return profile_name
|
||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile_name))
|
||||
|
||||
def download_profile(self, profile_name: str,
|
||||
profile_pic: bool = True, profile_pic_only: bool = False,
|
||||
fast_update: bool = False,
|
||||
download_stories: bool = False, download_stories_only: bool = False,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
"""Download one profile"""
|
||||
profile_name = profile_name.lower()
|
||||
|
||||
# Get profile main page json
|
||||
profile = None
|
||||
with suppress(ProfileNotExistsException):
|
||||
# ProfileNotExistsException is raised again later in check_profile_id() when we search the profile, so we
|
||||
# must suppress it here.
|
||||
profile = Profile(self.context, profile_name)
|
||||
|
||||
# check if profile does exist or name has changed since last download
|
||||
# and update name and json data if necessary
|
||||
name_updated = self.check_profile_id(profile_name, profile)
|
||||
if name_updated != profile_name:
|
||||
profile_name = name_updated
|
||||
profile = Profile(self.context, profile_name)
|
||||
|
||||
if self.context.is_logged_in and profile.has_blocked_viewer and not profile.is_private:
|
||||
# raising ProfileNotExistsException invokes "trying again anonymously" logic
|
||||
raise ProfileNotExistsException("Profile {} has blocked you".format(profile_name))
|
||||
|
||||
# Download profile picture
|
||||
if profile_pic or profile_pic_only:
|
||||
with self.context.error_catcher('Download profile picture of {}'.format(profile_name)):
|
||||
self.download_profilepic(profile)
|
||||
if profile_pic_only:
|
||||
return
|
||||
|
||||
# Catch some errors
|
||||
if profile.is_private:
|
||||
if not self.context.is_logged_in:
|
||||
raise LoginRequiredException("profile %s requires login" % profile_name)
|
||||
if not profile.followed_by_viewer and \
|
||||
self.context.username != profile.username:
|
||||
raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % profile_name)
|
||||
else:
|
||||
if self.context.is_logged_in and not (download_stories or download_stories_only):
|
||||
self.context.log("profile %s could also be downloaded anonymously." % profile_name)
|
||||
|
||||
# Download stories, if requested
|
||||
if download_stories or download_stories_only:
|
||||
with self.context.error_catcher("Download stories of {}".format(profile_name)):
|
||||
self.download_stories(userids=[profile.userid], filename_target=profile_name, fast_update=fast_update)
|
||||
if download_stories_only:
|
||||
return
|
||||
|
||||
# Iterate over pictures and download them
|
||||
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
||||
totalcount = profile.mediacount
|
||||
count = 1
|
||||
for post in profile.get_posts():
|
||||
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||
count += 1
|
||||
if filter_func is not None and not filter_func(post):
|
||||
self.context.log('<skipped>')
|
||||
continue
|
||||
with self.context.error_catcher('Download profile {}'.format(profile_name)):
|
||||
downloaded = self.download_post(post, target=profile_name)
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
def interactive_login(self, username: str) -> None:
|
||||
"""Logs in and internally stores session, asking user for password interactively.
|
||||
|
||||
:raises LoginRequiredException: when in quiet mode."""
|
||||
if self.context.quiet:
|
||||
raise LoginRequiredException("Quiet mode requires given password or valid session file.")
|
||||
password = None
|
||||
while password is None:
|
||||
password = getpass.getpass(prompt="Enter Instagram password for %s: " % username)
|
||||
try:
|
||||
self.login(username, password)
|
||||
except BadCredentialsException as err:
|
||||
print(err, file=sys.stderr)
|
||||
password = None
|
342
instaloader/instaloadercontext.py
Normal file
342
instaloader/instaloadercontext.py
Normal file
@ -0,0 +1,342 @@
|
||||
import json
|
||||
import pickle
|
||||
import random
|
||||
import shutil
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
import urllib.parse
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Callable, Dict, Iterator, Optional
|
||||
|
||||
import requests
|
||||
import requests.utils
|
||||
import urllib3
|
||||
|
||||
from .exceptions import *
|
||||
|
||||
GRAPHQL_PAGE_LENGTH = 200
|
||||
|
||||
|
||||
def copy_session(session: requests.Session) -> requests.Session:
|
||||
"""Duplicates a requests.Session."""
|
||||
new = requests.Session()
|
||||
new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
|
||||
new.headers = session.headers.copy()
|
||||
return new
|
||||
|
||||
|
||||
def default_user_agent() -> str:
|
||||
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
||||
|
||||
|
||||
class InstaloaderContext:
|
||||
"""Class providing methods for (error) logging and low-level communication with Instagram.
|
||||
|
||||
It is not thought to be instantiated directly, rather :class:`Instaloader` instances maintain a context
|
||||
object.
|
||||
|
||||
For logging, it provides :meth:`log`, :meth:`error`, :meth:`error_catcher`.
|
||||
|
||||
It provides low-level communication routines :meth:`get_json`, :meth:`graphql_query`, :meth:`graphql_node_list`,
|
||||
:meth:`get_and_write_raw` and implements mechanisms for rate controlling and error handling.
|
||||
|
||||
Further, it provides methods for logging in and general session handles, which are used by that routines in
|
||||
class :class:`Instaloader`.
|
||||
"""
|
||||
|
||||
def __init__(self, sleep: bool = True, quiet: bool = False,
|
||||
user_agent: Optional[str] = None, max_connection_attempts: int = 3):
|
||||
|
||||
self.user_agent = user_agent if user_agent is not None else default_user_agent()
|
||||
self._session = self.get_anonymous_session()
|
||||
self.username = None
|
||||
self.sleep = sleep
|
||||
self.quiet = quiet
|
||||
self.max_connection_attempts = max_connection_attempts
|
||||
|
||||
# error log, filled with error() and printed at the end of Instaloader.main()
|
||||
self.error_log = []
|
||||
|
||||
# For the adaption of sleep intervals (rate control)
|
||||
self.previous_queries = dict()
|
||||
|
||||
# Can be set to True for testing, disables supression of InstaloaderContext._error_catcher
|
||||
self.raise_all_errors = False
|
||||
|
||||
@property
|
||||
def is_logged_in(self) -> bool:
|
||||
"""True, if this Instaloader instance is logged in."""
|
||||
return bool(self.username)
|
||||
|
||||
def log(self, *msg, sep='', end='\n', flush=False):
|
||||
"""Log a message to stdout that can be suppressed with --quiet."""
|
||||
if not self.quiet:
|
||||
print(*msg, sep=sep, end=end, flush=flush)
|
||||
|
||||
def error(self, msg, repeat_at_end=True):
|
||||
"""Log a non-fatal error message to stderr, which is repeated at program termination.
|
||||
|
||||
:param msg: Message to be printed.
|
||||
:param repeat_at_end: Set to false if the message should be printed, but not repeated at program termination."""
|
||||
print(msg, file=sys.stderr)
|
||||
if repeat_at_end:
|
||||
self.error_log.append(msg)
|
||||
|
||||
def close(self):
|
||||
"""Print error log and close session"""
|
||||
if self.error_log and not self.quiet:
|
||||
print("\nErrors occured:", file=sys.stderr)
|
||||
for err in self.error_log:
|
||||
print(err, file=sys.stderr)
|
||||
self._session.close()
|
||||
|
||||
@contextmanager
|
||||
def error_catcher(self, extra_info: Optional[str] = None):
|
||||
"""
|
||||
Context manager to catch, print and record InstaloaderExceptions.
|
||||
|
||||
:param extra_info: String to prefix error message with."""
|
||||
try:
|
||||
yield
|
||||
except InstaloaderException as err:
|
||||
if extra_info:
|
||||
self.error('{}: {}'.format(extra_info, err))
|
||||
else:
|
||||
self.error('{}'.format(err))
|
||||
if self.raise_all_errors:
|
||||
raise err
|
||||
|
||||
def _default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
||||
"""Returns default HTTP header we use for requests."""
|
||||
header = {'Accept-Encoding': 'gzip, deflate',
|
||||
'Accept-Language': 'en-US,en;q=0.8',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Length': '0',
|
||||
'Host': 'www.instagram.com',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
'User-Agent': self.user_agent,
|
||||
'X-Instagram-AJAX': '1',
|
||||
'X-Requested-With': 'XMLHttpRequest'}
|
||||
if empty_session_only:
|
||||
del header['Host']
|
||||
del header['Origin']
|
||||
del header['Referer']
|
||||
del header['X-Instagram-AJAX']
|
||||
del header['X-Requested-With']
|
||||
return header
|
||||
|
||||
def get_anonymous_session(self) -> requests.Session:
|
||||
"""Returns our default anonymous requests.Session object."""
|
||||
session = requests.Session()
|
||||
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
||||
'ig_vw': '1920', 'csrftoken': '',
|
||||
's_network': '', 'ds_user_id': ''})
|
||||
session.headers.update(self._default_http_header(empty_session_only=True))
|
||||
return session
|
||||
|
||||
def save_session_to_file(self, sessionfile):
|
||||
pickle.dump(requests.utils.dict_from_cookiejar(self._session.cookies), sessionfile)
|
||||
|
||||
def load_session_from_file(self, username, sessionfile):
|
||||
session = requests.Session()
|
||||
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
||||
session.headers.update(self._default_http_header())
|
||||
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
|
||||
self._session = session
|
||||
self.username = username
|
||||
|
||||
def test_login(self) -> Optional[str]:
|
||||
data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})
|
||||
return data["data"]["user"]["username"] if data["data"]["user"] is not None else None
|
||||
|
||||
def login(self, user, passwd):
|
||||
session = requests.Session()
|
||||
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
||||
'ig_vw': '1920', 'csrftoken': '',
|
||||
's_network': '', 'ds_user_id': ''})
|
||||
session.headers.update(self._default_http_header())
|
||||
self._sleep()
|
||||
resp = session.get('https://www.instagram.com/')
|
||||
session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']})
|
||||
self._sleep()
|
||||
login = session.post('https://www.instagram.com/accounts/login/ajax/',
|
||||
data={'password': passwd, 'username': user}, allow_redirects=True)
|
||||
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
|
||||
if login.status_code == 200:
|
||||
self._session = session
|
||||
if user == self.test_login():
|
||||
self.username = user
|
||||
else:
|
||||
self.username = None
|
||||
self._session = None
|
||||
raise BadCredentialsException('Login error! Check your credentials!')
|
||||
else:
|
||||
raise ConnectionException('Login error! Connection error!')
|
||||
|
||||
def _sleep(self):
|
||||
"""Sleep a short time if self.sleep is set. Called before each request to instagram.com."""
|
||||
if self.sleep:
|
||||
time.sleep(random.uniform(0.5, 3))
|
||||
|
||||
def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com',
|
||||
session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]:
|
||||
"""JSON request to Instagram.
|
||||
|
||||
:param path: URL, relative to the given domain which defaults to www.instagram.com/
|
||||
:param params: GET parameters
|
||||
:param host: Domain part of the URL from where to download the requested JSON; defaults to www.instagram.com
|
||||
:param session: Session to use, or None to use self.session
|
||||
:return: Decoded response dictionary
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
:raises ConnectionException: When query repeatedly failed.
|
||||
"""
|
||||
def graphql_query_waittime(query_hash: str, untracked_queries: bool = False) -> int:
|
||||
sliding_window = 660
|
||||
timestamps = self.previous_queries.get(query_hash)
|
||||
if not timestamps:
|
||||
return sliding_window if untracked_queries else 0
|
||||
current_time = time.monotonic()
|
||||
timestamps = list(filter(lambda t: t > current_time - sliding_window, timestamps))
|
||||
self.previous_queries[query_hash] = timestamps
|
||||
if len(timestamps) < 100 and not untracked_queries:
|
||||
return 0
|
||||
return round(min(timestamps) + sliding_window - current_time) + 6
|
||||
is_graphql_query = 'query_hash' in params and 'graphql/query' in path
|
||||
if is_graphql_query:
|
||||
query_hash = params['query_hash']
|
||||
waittime = graphql_query_waittime(query_hash)
|
||||
if waittime > 0:
|
||||
self.log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
timestamp_list = self.previous_queries.get(query_hash)
|
||||
if timestamp_list is not None:
|
||||
timestamp_list.append(time.monotonic())
|
||||
else:
|
||||
self.previous_queries[query_hash] = [time.monotonic()]
|
||||
sess = session if session else self._session
|
||||
try:
|
||||
self._sleep()
|
||||
resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False)
|
||||
while resp.is_redirect:
|
||||
redirect_url = resp.headers['location']
|
||||
self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url))
|
||||
if redirect_url.index('https://{}/'.format(host)) == 0:
|
||||
resp = sess.get(redirect_url if redirect_url.endswith('/') else redirect_url + '/',
|
||||
params=params, allow_redirects=False)
|
||||
else:
|
||||
break
|
||||
if resp.status_code == 404:
|
||||
raise QueryReturnedNotFoundException("404")
|
||||
if resp.status_code == 429:
|
||||
raise TooManyRequestsException("429 - Too Many Requests")
|
||||
if resp.status_code != 200:
|
||||
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||
resp_json = resp.json()
|
||||
if 'status' in resp_json and resp_json['status'] != "ok":
|
||||
if 'message' in resp_json:
|
||||
raise ConnectionException("Returned \"{}\" status, message \"{}\".".format(resp_json['status'],
|
||||
resp_json['message']))
|
||||
else:
|
||||
raise ConnectionException("Returned \"{}\" status.".format(resp_json['status']))
|
||||
return resp_json
|
||||
except (ConnectionException, json.decoder.JSONDecodeError, requests.exceptions.RequestException) as err:
|
||||
error_string = "JSON Query to {}: {}".format(path, err)
|
||||
if _attempt == self.max_connection_attempts:
|
||||
raise ConnectionException(error_string)
|
||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||
text_for_429 = ("HTTP error code 429 was returned because too many queries occured in the last time. "
|
||||
"Please do not use Instagram in your browser or run multiple instances of Instaloader "
|
||||
"in parallel.")
|
||||
try:
|
||||
if isinstance(err, TooManyRequestsException):
|
||||
print(textwrap.fill(text_for_429), file=sys.stderr)
|
||||
if is_graphql_query:
|
||||
waittime = graphql_query_waittime(query_hash=params['query_hash'], untracked_queries=True)
|
||||
if waittime > 0:
|
||||
self.log('The request will be retried in {} seconds.'.format(waittime))
|
||||
time.sleep(waittime)
|
||||
self._sleep()
|
||||
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
|
||||
except KeyboardInterrupt:
|
||||
self.error("[skipped by user]", repeat_at_end=False)
|
||||
raise ConnectionException(error_string)
|
||||
|
||||
def graphql_query(self, query_hash: str, variables: Dict[str, Any],
|
||||
referer: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Do a GraphQL Query.
|
||||
|
||||
:param query_hash: Query identifying hash.
|
||||
:param variables: Variables for the Query.
|
||||
:param referer: HTTP Referer, or None.
|
||||
:return: The server's response dictionary.
|
||||
"""
|
||||
tmpsession = copy_session(self._session)
|
||||
tmpsession.headers.update(self._default_http_header(empty_session_only=True))
|
||||
del tmpsession.headers['Connection']
|
||||
del tmpsession.headers['Content-Length']
|
||||
tmpsession.headers['authority'] = 'www.instagram.com'
|
||||
tmpsession.headers['scheme'] = 'https'
|
||||
tmpsession.headers['accept'] = '*/*'
|
||||
if referer is not None:
|
||||
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
||||
resp_json = self.get_json('graphql/query',
|
||||
params={'query_hash': query_hash,
|
||||
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||
session=tmpsession)
|
||||
tmpsession.close()
|
||||
if 'status' not in resp_json:
|
||||
self.error("GraphQL response did not contain a \"status\" field.")
|
||||
return resp_json
|
||||
|
||||
def graphql_node_list(self, query_hash: str, query_variables: Dict[str, Any],
|
||||
query_referer: Optional[str],
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||
"""Retrieve a list of GraphQL nodes."""
|
||||
query_variables['first'] = GRAPHQL_PAGE_LENGTH
|
||||
data = self.graphql_query(query_hash, query_variables, query_referer)
|
||||
while True:
|
||||
edge_struct = edge_extractor(data)
|
||||
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||
if edge_struct['page_info']['has_next_page']:
|
||||
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||
data = self.graphql_query(query_hash, query_variables, query_referer)
|
||||
else:
|
||||
break
|
||||
|
||||
def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None:
|
||||
"""Downloads raw data.
|
||||
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
:raises QueryReturnedForbiddenException: When the server responds with a 403.
|
||||
:raises ConnectionException: When download repeatedly failed."""
|
||||
try:
|
||||
with self.get_anonymous_session() as anonymous_session:
|
||||
resp = anonymous_session.get(url)
|
||||
if resp.status_code == 200:
|
||||
self.log(filename, end=' ', flush=True)
|
||||
with open(filename, 'wb') as file:
|
||||
resp.raw.decode_content = True
|
||||
shutil.copyfileobj(resp.raw, file)
|
||||
else:
|
||||
if resp.status_code == 403:
|
||||
# suspected invalid URL signature
|
||||
raise QueryReturnedForbiddenException("403 when accessing {}.".format(url))
|
||||
if resp.status_code == 404:
|
||||
# 404 not worth retrying.
|
||||
raise QueryReturnedNotFoundException("404 when accessing {}.".format(url))
|
||||
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
||||
error_string = "URL {}: {}".format(url, err)
|
||||
if _attempt == self.max_connection_attempts:
|
||||
raise ConnectionException(error_string)
|
||||
self.error(error_string + " [retrying; skip with ^C]", repeat_at_end=False)
|
||||
try:
|
||||
self._sleep()
|
||||
self.get_and_write_raw(url, filename, _attempt + 1)
|
||||
except KeyboardInterrupt:
|
||||
self.error("[skipped by user]", repeat_at_end=False)
|
||||
raise ConnectionException(error_string)
|
436
instaloader/structures.py
Normal file
436
instaloader/structures.py
Normal file
@ -0,0 +1,436 @@
|
||||
import re
|
||||
from base64 import b64decode, b64encode
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import GRAPHQL_PAGE_LENGTH, InstaloaderContext
|
||||
|
||||
|
||||
def shortcode_to_mediaid(code: str) -> int:
|
||||
if len(code) > 11:
|
||||
raise InvalidArgumentException("Wrong shortcode \"{0}\", unable to convert to mediaid.".format(code))
|
||||
code = 'A' * (12 - len(code)) + code
|
||||
return int.from_bytes(b64decode(code.encode(), b'-_'), 'big')
|
||||
|
||||
|
||||
def mediaid_to_shortcode(mediaid: int) -> str:
|
||||
if mediaid.bit_length() > 64:
|
||||
raise InvalidArgumentException("Wrong mediaid {0}, unable to convert to shortcode".format(str(mediaid)))
|
||||
return b64encode(mediaid.to_bytes(9, 'big'), b'-_').decode().replace('A', ' ').lstrip().replace(' ', 'A')
|
||||
|
||||
|
||||
class Post:
|
||||
"""
|
||||
Structure containing information about an Instagram post.
|
||||
|
||||
Created by methods :meth:`Profile.get_posts`, :meth:`Instaloader.get_hashtag_posts`,
|
||||
:meth:`Instaloader.get_feed_posts` and :meth:`Profile.get_saved_posts`.
|
||||
This class unifies access to the properties associated with a post. It implements == and is
|
||||
hashable.
|
||||
|
||||
The properties defined here are accessible by the filter expressions specified with the :option:`--only-if`
|
||||
parameter and exported into JSON files with :option:`--metadata-json`.
|
||||
"""
|
||||
|
||||
LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"]
|
||||
|
||||
def __init__(self, context: InstaloaderContext, node: Dict[str, Any],
|
||||
owner_profile: Optional['Profile'] = None):
|
||||
"""Create a Post instance from a node structure as returned by Instagram.
|
||||
|
||||
:param context: :class:`InstaloaderContext` instance used for additional queries if neccessary.
|
||||
:param node: Node structure, as returned by Instagram.
|
||||
:param owner_profile: The Profile of the owner, if already known at creation.
|
||||
"""
|
||||
self._context = context
|
||||
self._node = node
|
||||
self._owner_profile = owner_profile
|
||||
self._full_metadata_dict = None
|
||||
|
||||
@classmethod
|
||||
def from_shortcode(cls, context: InstaloaderContext, shortcode: str):
|
||||
"""Create a post object from a given shortcode"""
|
||||
# pylint:disable=protected-access
|
||||
post = cls(context, {'shortcode': shortcode})
|
||||
post._node = post._full_metadata
|
||||
return post
|
||||
|
||||
@classmethod
|
||||
def from_mediaid(cls, context: InstaloaderContext, mediaid: int):
|
||||
"""Create a post object from a given mediaid"""
|
||||
return cls.from_shortcode(context, mediaid_to_shortcode(mediaid))
|
||||
|
||||
@property
|
||||
def shortcode(self) -> str:
|
||||
"""Media shortcode. URL of the post is instagram.com/p/<shortcode>/."""
|
||||
return self._node['shortcode'] if 'shortcode' in self._node else self._node['code']
|
||||
|
||||
@property
|
||||
def mediaid(self) -> int:
|
||||
"""The mediaid is a decimal representation of the media shortcode."""
|
||||
return int(self._node['id'])
|
||||
|
||||
def __repr__(self):
|
||||
return '<Post {}>'.format(self.shortcode)
|
||||
|
||||
def __eq__(self, o: object) -> bool:
|
||||
if isinstance(o, Post):
|
||||
return self.shortcode == o.shortcode
|
||||
return NotImplemented
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.shortcode)
|
||||
|
||||
@property
|
||||
def _full_metadata(self) -> Dict[str, Any]:
|
||||
if not self._full_metadata_dict:
|
||||
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={'__a': 1})
|
||||
if "graphql" in pic_json:
|
||||
self._full_metadata_dict = pic_json["graphql"]["shortcode_media"]
|
||||
else:
|
||||
self._full_metadata_dict = pic_json["media"]
|
||||
return self._full_metadata_dict
|
||||
|
||||
def _field(self, *keys) -> Any:
|
||||
"""Lookups given fields in _node, and if not found in _full_metadata. Raises KeyError if not found anywhere."""
|
||||
try:
|
||||
d = self._node
|
||||
for key in keys:
|
||||
d = d[key]
|
||||
return d
|
||||
except KeyError:
|
||||
d = self._full_metadata
|
||||
for key in keys:
|
||||
d = d[key]
|
||||
return d
|
||||
|
||||
@property
|
||||
def owner_username(self) -> str:
|
||||
"""The Post's lowercase owner name, or 'UNKNOWN'."""
|
||||
try:
|
||||
if self._owner_profile:
|
||||
return self._owner_profile.username.lower()
|
||||
return self._field('owner', 'username').lower()
|
||||
except (InstaloaderException, KeyError, TypeError) as err:
|
||||
if self._context.raise_all_errors:
|
||||
raise err
|
||||
self._context.error("Get owner name of {}: {} -- using \'UNKNOWN\'.".format(self, err))
|
||||
return 'UNKNOWN'
|
||||
|
||||
@property
|
||||
def owner_id(self) -> int:
|
||||
"""The ID of the Post's owner."""
|
||||
if self._owner_profile:
|
||||
return self._owner_profile.userid
|
||||
return int(self._field('owner', 'id'))
|
||||
|
||||
@property
|
||||
def date_local(self) -> datetime:
|
||||
"""Timestamp when the post was created (local time zone)."""
|
||||
return datetime.fromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
|
||||
|
||||
@property
|
||||
def date_utc(self) -> datetime:
|
||||
"""Timestamp when the post was created (UTC)."""
|
||||
return datetime.utcfromtimestamp(self._node["date"] if "date" in self._node else self._node["taken_at_timestamp"])
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
"""URL of the picture / video thumbnail of the post"""
|
||||
return self._node["display_url"] if "display_url" in self._node else self._node["display_src"]
|
||||
|
||||
@property
|
||||
def typename(self) -> str:
|
||||
"""Type of post, GraphImage, GraphVideo or GraphSidecar"""
|
||||
if '__typename' in self._node:
|
||||
return self._node['__typename']
|
||||
# if __typename is not in node, it is an old image or video
|
||||
return 'GraphImage'
|
||||
|
||||
def get_sidecar_edges(self) -> List[Dict[str, Any]]:
|
||||
return self._field('edge_sidecar_to_children', 'edges')
|
||||
|
||||
@property
|
||||
def caption(self) -> Optional[str]:
|
||||
"""Caption."""
|
||||
if "edge_media_to_caption" in self._node and self._node["edge_media_to_caption"]["edges"]:
|
||||
return self._node["edge_media_to_caption"]["edges"][0]["node"]["text"]
|
||||
elif "caption" in self._node:
|
||||
return self._node["caption"]
|
||||
|
||||
@property
|
||||
def caption_hashtags(self) -> List[str]:
|
||||
"""List of all lowercased hashtags (without preceeding #) that occur in the Post's caption."""
|
||||
if not self.caption:
|
||||
return []
|
||||
# This regular expression is from jStassen, adjusted to use Python's \w to support Unicode
|
||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||
hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
||||
return re.findall(hashtag_regex, self.caption.lower())
|
||||
|
||||
@property
|
||||
def caption_mentions(self) -> List[str]:
|
||||
"""List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @."""
|
||||
if not self.caption:
|
||||
return []
|
||||
# This regular expression is from jStassen, adjusted to use Python's \w to support Unicode
|
||||
# http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/
|
||||
mention_regex = re.compile(r"(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
||||
return re.findall(mention_regex, self.caption.lower())
|
||||
|
||||
@property
|
||||
def tagged_users(self) -> List[str]:
|
||||
"""List of all lowercased users that are tagged in the Post."""
|
||||
try:
|
||||
return [edge['node']['user']['username'].lower() for edge in self._field('edge_media_to_tagged_user',
|
||||
'edges')]
|
||||
except KeyError:
|
||||
return []
|
||||
|
||||
@property
|
||||
def is_video(self) -> bool:
|
||||
"""True if the Post is a video."""
|
||||
return self._node['is_video']
|
||||
|
||||
@property
|
||||
def video_url(self) -> Optional[str]:
|
||||
"""URL of the video, or None."""
|
||||
if self.is_video:
|
||||
return self._field('video_url')
|
||||
|
||||
@property
|
||||
def viewer_has_liked(self) -> Optional[bool]:
|
||||
"""Whether the viewer has liked the post, or None if not logged in."""
|
||||
if not self._context.is_logged_in:
|
||||
return None
|
||||
if 'likes' in self._node and 'viewer_has_liked' in self._node['likes']:
|
||||
return self._node['likes']['viewer_has_liked']
|
||||
return self._field('viewer_has_liked')
|
||||
|
||||
@property
|
||||
def likes(self) -> int:
|
||||
"""Likes count"""
|
||||
return self._field('edge_media_preview_like', 'count')
|
||||
|
||||
@property
|
||||
def comments(self) -> int:
|
||||
"""Comment count"""
|
||||
return self._field('edge_media_to_comment', 'count')
|
||||
|
||||
def get_comments(self) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate over all comments of the post.
|
||||
|
||||
Each comment is represented by a dictionary having the keys text, created_at, id and owner, which is a
|
||||
dictionary with keys username, profile_pic_url and id.
|
||||
"""
|
||||
if self.comments == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
if self.comments == len(comment_edges):
|
||||
# If the Post's metadata already contains all comments, don't do GraphQL requests to obtain them
|
||||
yield from (comment['node'] for comment in comment_edges)
|
||||
yield from self._context.graphql_node_list("33ba35852cb50da46f5b5e889df7d159",
|
||||
{'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
||||
|
||||
def get_likes(self) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate over all likes of the post.
|
||||
|
||||
Each like is represented by a dictionary having the keys username, followed_by_viewer, id, is_verified,
|
||||
requested_by_viewer, followed_by_viewer, profile_pic_url.
|
||||
"""
|
||||
if self.likes == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
likes_edges = self._field('edge_media_preview_like', 'edges')
|
||||
if self.likes == len(likes_edges):
|
||||
# If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them
|
||||
yield from (like['node'] for like in likes_edges)
|
||||
yield from self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'])
|
||||
|
||||
def get_location(self) -> Optional[Dict[str, str]]:
|
||||
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
|
||||
loc_dict = self._field("location")
|
||||
if loc_dict is not None:
|
||||
location_json = self._context.get_json("explore/locations/{0}/".format(loc_dict["id"]),
|
||||
params={'__a': 1})
|
||||
return location_json["location"] if "location" in location_json else location_json['graphql']['location']
|
||||
|
||||
@staticmethod
|
||||
def json_encoder(obj) -> Dict[str, Any]:
|
||||
"""Convert instance of :class:`Post` to a JSON-serializable dictionary."""
|
||||
if not isinstance(obj, Post):
|
||||
raise TypeError("Object of type {} is not a Post object.".format(obj.__class__.__name__))
|
||||
jsondict = {}
|
||||
for prop in dir(Post):
|
||||
if prop[0].isupper() or prop[0] == '_':
|
||||
# skip uppercase and private properties
|
||||
continue
|
||||
val = obj.__getattribute__(prop)
|
||||
if val is True or val is False or isinstance(val, (str, int, float, list)):
|
||||
jsondict[prop] = val
|
||||
elif isinstance(val, datetime):
|
||||
jsondict[prop] = val.isoformat()
|
||||
return jsondict
|
||||
|
||||
|
||||
class Profile:
|
||||
"""
|
||||
An Instagram Profile.
|
||||
|
||||
Provides methods for accessing profile properties, as well as :meth:`Profile.get_posts` and for own profile
|
||||
:meth:`Profile.get_saved_posts`.
|
||||
|
||||
This class implements == and is hashable.
|
||||
"""
|
||||
def __init__(self, context: InstaloaderContext, profile_name: str):
|
||||
"""
|
||||
Lookup Profile information and create Profile instance.
|
||||
|
||||
:param context: :class:`InstaloaderContext` instance used for queries etc.
|
||||
:param identifier: Profile name (string).
|
||||
"""
|
||||
self._context = context
|
||||
|
||||
try:
|
||||
metadata = self._context.get_json('{}/'.format(profile_name), params={'__a': 1})
|
||||
self._metadata = metadata['graphql'] if 'graphql' in metadata else metadata
|
||||
except QueryReturnedNotFoundException:
|
||||
raise ProfileNotExistsException('Profile {} does not exist.'.format(profile_name))
|
||||
|
||||
@property
|
||||
def userid(self) -> int:
|
||||
return int(self._metadata['user']['id'])
|
||||
|
||||
@property
|
||||
def username(self) -> str:
|
||||
return self._metadata['user']['username']
|
||||
|
||||
def __repr__(self):
|
||||
return '<Profile {} ({})>'.format(self.username, self.userid)
|
||||
|
||||
def __eq__(self, o: object) -> bool:
|
||||
if isinstance(o, Profile):
|
||||
return self.userid == o.userid
|
||||
return NotImplemented
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.userid)
|
||||
|
||||
@property
|
||||
def is_private(self) -> bool:
|
||||
return self._metadata['user']['is_private']
|
||||
|
||||
@property
|
||||
def followed_by_viewer(self) -> bool:
|
||||
return self._metadata['user']['followed_by_viewer']
|
||||
|
||||
@property
|
||||
def mediacount(self) -> int:
|
||||
if "media" in self._metadata["user"]:
|
||||
# backwards compatibility with old non-graphql structure
|
||||
return self._metadata["user"]["media"]["count"]
|
||||
return self._metadata["user"]["edge_owner_to_timeline_media"]["count"]
|
||||
|
||||
@property
|
||||
def biography(self) -> str:
|
||||
return self._metadata['user']['biography']
|
||||
|
||||
@property
|
||||
def blocked_by_viewer(self) -> bool:
|
||||
return self._metadata['user']['blocked_by_viewer']
|
||||
|
||||
@property
|
||||
def follows_viewer(self) -> bool:
|
||||
return self._metadata['user']['follows_viewer']
|
||||
|
||||
@property
|
||||
def full_name(self) -> str:
|
||||
return self._metadata['user']['full_name']
|
||||
|
||||
@property
|
||||
def has_blocked_viewer(self) -> bool:
|
||||
return self._metadata['user']['has_blocked_viewer']
|
||||
|
||||
@property
|
||||
def has_requested_viewer(self) -> bool:
|
||||
return self._metadata['user']['has_requested_viewer']
|
||||
|
||||
@property
|
||||
def is_verified(self) -> bool:
|
||||
return self._metadata['user']['is_verified']
|
||||
|
||||
@property
|
||||
def requested_by_viewer(self) -> bool:
|
||||
return self._metadata['user']['requested_by_viewer']
|
||||
|
||||
def get_profile_pic_url(self) -> str:
|
||||
"""Return URL of profile picture"""
|
||||
try:
|
||||
with self._context.get_anonymous_session() as anonymous_session:
|
||||
data = self._context.get_json(path='api/v1/users/{0}/info/'.format(self.userid), params={},
|
||||
host='i.instagram.com', session=anonymous_session)
|
||||
return data["user"]["hd_profile_pic_url_info"]["url"]
|
||||
except (InstaloaderException, KeyError) as err:
|
||||
self._context.error('{} Unable to fetch high quality profile pic.'.format(err))
|
||||
return self._metadata["user"]["profile_pic_url_hd"] if "profile_pic_url_hd" in self._metadata["user"] \
|
||||
else self._metadata["user"]["profile_pic_url"]
|
||||
|
||||
def get_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all posts from a profile."""
|
||||
if 'media' in self._metadata['user']:
|
||||
# backwards compatibility with old non-graphql structure
|
||||
yield from (Post(self._context, node, owner_profile=self)
|
||||
for node in self._metadata['user']['media']['nodes'])
|
||||
has_next_page = self._metadata['user']['media']['page_info']['has_next_page']
|
||||
end_cursor = self._metadata['user']['media']['page_info']['end_cursor']
|
||||
else:
|
||||
yield from (Post(self._context, edge['node'], owner_profile=self)
|
||||
for edge in self._metadata['user']['edge_owner_to_timeline_media']['edges'])
|
||||
has_next_page = self._metadata['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
|
||||
end_cursor = self._metadata['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
|
||||
while has_next_page:
|
||||
# We do not use self.graphql_node_list() here, because profile_metadata
|
||||
# lets us obtain the first 12 nodes 'for free'
|
||||
data = self._context.graphql_query("472f257a40c653c64c666ce877d59d2b",
|
||||
{'id': self.userid,
|
||||
'first': GRAPHQL_PAGE_LENGTH,
|
||||
'after': end_cursor},
|
||||
'https://www.instagram.com/{0}/'.format(self.username))
|
||||
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||
yield from (Post(self._context, edge['node'], owner_profile=self)
|
||||
for edge in media['edges'])
|
||||
has_next_page = media['page_info']['has_next_page']
|
||||
end_cursor = media['page_info']['end_cursor']
|
||||
|
||||
def get_saved_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts that are marked as saved by the user."""
|
||||
|
||||
if self.username != self._context.username:
|
||||
raise LoginRequiredException("--login={} required to get that profile's saved posts.".format(self.username))
|
||||
|
||||
data = self._metadata
|
||||
|
||||
while True:
|
||||
if "edge_saved_media" in data["user"]:
|
||||
is_edge = True
|
||||
saved_media = data["user"]["edge_saved_media"]
|
||||
else:
|
||||
is_edge = False
|
||||
saved_media = data["user"]["saved_media"]
|
||||
|
||||
if is_edge:
|
||||
yield from (Post(self._context, edge["node"]) for edge in saved_media["edges"])
|
||||
else:
|
||||
yield from (Post(self._context, node) for node in saved_media["nodes"])
|
||||
|
||||
if not saved_media["page_info"]["has_next_page"]:
|
||||
break
|
||||
data = self._context.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
|
||||
{'id': self.userid, 'first': GRAPHQL_PAGE_LENGTH,
|
||||
'after': saved_media["page_info"]["end_cursor"]})['data']
|
6
setup.py
6
setup.py
@ -11,7 +11,7 @@ SRC = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
def get_version():
|
||||
with open(os.path.join(SRC, 'instaloader.py')) as f:
|
||||
with open(os.path.join(SRC, 'instaloader/__init__.py')) as f:
|
||||
for line in f:
|
||||
m = re.match("__version__ = '(.*)'", line)
|
||||
if m:
|
||||
@ -37,7 +37,7 @@ keywords = (['instagram', 'instagram-scraper', 'instagram-client', 'instagram-fe
|
||||
setup(
|
||||
name='instaloader',
|
||||
version=get_version(),
|
||||
py_modules=['instaloader'],
|
||||
packages=['instaloader'],
|
||||
url='https://instaloader.github.io/',
|
||||
license='MIT',
|
||||
author='Alexander Graf, André Koch-Kramer',
|
||||
@ -47,7 +47,7 @@ setup(
|
||||
long_description=open(os.path.join(SRC, 'README.rst')).read(),
|
||||
install_requires=requirements,
|
||||
python_requires='>=3.5',
|
||||
entry_points={'console_scripts': ['instaloader=instaloader:main']},
|
||||
entry_points={'console_scripts': ['instaloader=instaloader.__main__:main']},
|
||||
zip_safe=True,
|
||||
keywords=keywords,
|
||||
classifiers=[
|
||||
|
Loading…
Reference in New Issue
Block a user