2016-06-15 12:42:08 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2017-07-20 14:54:22 +02:00
|
|
|
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
2016-09-18 14:38:58 +02:00
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
import getpass
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import pickle
|
|
|
|
import random
|
|
|
|
import re
|
|
|
|
import shutil
|
2017-07-25 18:31:08 +02:00
|
|
|
import string
|
2017-06-24 22:43:40 +02:00
|
|
|
import sys
|
|
|
|
import tempfile
|
|
|
|
import time
|
2016-06-26 09:43:02 +02:00
|
|
|
from argparse import ArgumentParser
|
2017-07-06 22:26:25 +02:00
|
|
|
from base64 import b64decode, b64encode
|
2017-07-29 11:08:52 +02:00
|
|
|
from datetime import datetime
|
2016-08-01 18:10:35 +02:00
|
|
|
from io import BytesIO
|
2017-07-29 17:51:39 +02:00
|
|
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
2017-06-24 22:43:40 +02:00
|
|
|
|
|
|
|
import requests
|
|
|
|
import requests.utils
|
2016-12-22 13:20:41 +01:00
|
|
|
|
2016-08-01 18:10:35 +02:00
|
|
|
|
2016-09-18 14:38:58 +02:00
|
|
|
# To get version from setup.py for instaloader --version
|
|
|
|
import pkg_resources
|
|
|
|
try:
|
|
|
|
# pylint:disable=no-member
|
|
|
|
__version__ = pkg_resources.get_distribution('instaloader').version
|
|
|
|
except pkg_resources.DistributionNotFound:
|
|
|
|
__version__ = 'Run ./setup.py --version'
|
|
|
|
|
2016-08-01 18:10:35 +02:00
|
|
|
try:
|
2016-09-18 14:38:58 +02:00
|
|
|
# pylint:disable=wrong-import-position
|
2016-08-01 18:10:35 +02:00
|
|
|
import win_unicode_console
|
|
|
|
except ImportError:
|
|
|
|
WINUNICODE = False
|
|
|
|
else:
|
|
|
|
win_unicode_console.enable()
|
|
|
|
WINUNICODE = True
|
2016-06-15 12:42:08 +02:00
|
|
|
|
2016-08-18 09:58:07 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class InstaloaderException(Exception):
|
|
|
|
"""Base exception for this script"""
|
2016-06-17 21:38:21 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class NonfatalException(InstaloaderException):
|
|
|
|
"""Base exception for errors which should not cause instaloader to stop"""
|
2016-07-12 21:04:40 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class ProfileNotExistsException(NonfatalException):
|
2016-07-12 21:04:40 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2017-04-10 21:05:58 +02:00
|
|
|
class ProfileAccessDeniedException(NonfatalException):
|
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class ProfileHasNoPicsException(NonfatalException):
|
2016-07-22 15:49:20 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class PrivateProfileNotFollowedException(NonfatalException):
|
2016-07-15 15:54:35 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class LoginRequiredException(NonfatalException):
|
2016-07-25 20:19:07 +02:00
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2017-07-13 22:33:01 +02:00
|
|
|
class InvalidArgumentException(NonfatalException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2017-07-27 22:18:43 +02:00
|
|
|
class BadResponseException(NonfatalException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class BadCredentialsException(InstaloaderException):
|
|
|
|
pass
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-07-26 10:57:29 +02:00
|
|
|
class ConnectionException(InstaloaderException):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2016-12-22 13:20:41 +01:00
|
|
|
def get_default_session_filename(username: str) -> str:
|
2016-09-18 16:35:25 +02:00
|
|
|
"""Returns default session filename for given username."""
|
2016-07-26 17:36:21 +02:00
|
|
|
dirname = tempfile.gettempdir() + "/" + ".instaloader-" + getpass.getuser()
|
|
|
|
filename = dirname + "/" + "session-" + username
|
2017-07-20 18:08:16 +02:00
|
|
|
return filename.lower()
|
2016-07-26 17:36:21 +02:00
|
|
|
|
2016-09-18 16:35:25 +02:00
|
|
|
|
2016-12-22 13:20:41 +01:00
|
|
|
def copy_session(session: requests.Session) -> requests.Session:
|
2016-09-18 16:35:25 +02:00
|
|
|
"""Duplicates a requests.Session."""
|
2016-07-28 15:45:31 +02:00
|
|
|
new = requests.Session()
|
|
|
|
new.cookies = \
|
2017-06-24 22:43:40 +02:00
|
|
|
requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
|
2017-07-29 01:54:42 +02:00
|
|
|
new.headers = session.headers.copy()
|
2016-07-28 15:45:31 +02:00
|
|
|
return new
|
|
|
|
|
2016-09-18 16:35:25 +02:00
|
|
|
|
2017-07-20 11:25:46 +02:00
|
|
|
def default_user_agent() -> str:
|
|
|
|
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
|
|
|
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
2016-07-25 23:43:41 +02:00
|
|
|
|
2016-09-18 16:35:25 +02:00
|
|
|
|
2017-07-06 22:26:25 +02:00
|
|
|
def shortcode_to_mediaid(code: str) -> int:
|
|
|
|
if len(code) > 11:
|
2017-07-13 22:33:01 +02:00
|
|
|
raise InvalidArgumentException("Wrong shortcode \"{0}\", unable to convert to mediaid.".format(code))
|
2017-07-06 22:26:25 +02:00
|
|
|
code = 'A' * (12 - len(code)) + code
|
|
|
|
return int.from_bytes(b64decode(code.encode(), b'-_'), 'big')
|
|
|
|
|
|
|
|
|
|
|
|
def mediaid_to_shortcode(mediaid: int) -> str:
|
|
|
|
if mediaid.bit_length() > 64:
|
2017-07-13 22:33:01 +02:00
|
|
|
raise InvalidArgumentException("Wrong mediaid {0}, unable to convert to shortcode".format(str(mediaid)))
|
2017-07-06 22:26:25 +02:00
|
|
|
return b64encode(mediaid.to_bytes(9, 'big'), b'-_').decode().replace('A', ' ').lstrip().replace(' ','A')
|
|
|
|
|
|
|
|
|
2017-07-25 18:31:08 +02:00
|
|
|
def format_string_contains_key(format_string: str, key: str) -> bool:
|
|
|
|
# pylint:disable=unused-variable
|
|
|
|
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string):
|
|
|
|
if field_name == key:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
class Instaloader:
|
|
|
|
def __init__(self,
|
2017-07-25 18:31:08 +02:00
|
|
|
sleep: bool = True, quiet: bool = False, shorter_output: bool = False,
|
|
|
|
user_agent: Optional[str] = None,
|
|
|
|
dirname_pattern: Optional[str] = None,
|
|
|
|
filename_pattern: Optional[str] = None):
|
2017-07-20 11:25:46 +02:00
|
|
|
self.user_agent = user_agent if user_agent is not None else default_user_agent()
|
|
|
|
self.session = self.get_anonymous_session()
|
2017-06-24 22:43:40 +02:00
|
|
|
self.username = None
|
|
|
|
self.sleep = sleep
|
|
|
|
self.quiet = quiet
|
|
|
|
self.shorter_output = shorter_output
|
2017-07-25 18:31:08 +02:00
|
|
|
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
|
2017-07-26 19:13:56 +02:00
|
|
|
self.filename_pattern = filename_pattern.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \
|
|
|
|
if filename_pattern is not None else '{date:%Y-%m-%d_%H-%M-%S}'
|
2017-06-24 22:43:40 +02:00
|
|
|
|
|
|
|
def _log(self, *msg, sep='', end='\n', flush=False):
|
|
|
|
if not self.quiet:
|
|
|
|
print(*msg, sep=sep, end=end, flush=flush)
|
|
|
|
|
2017-07-26 15:08:11 +02:00
|
|
|
def _sleep(self):
|
|
|
|
"""Sleep a short, random time if self.sleep is set. Called before each request to the instagram.com."""
|
|
|
|
if self.sleep:
|
|
|
|
time.sleep(random.uniform(0.25, 2.0))
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
def get_json(self, name: str, session: requests.Session = None,
|
|
|
|
max_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
|
|
|
"""Return JSON of a profile"""
|
|
|
|
if session is None:
|
|
|
|
session = self.session
|
2017-07-26 15:08:11 +02:00
|
|
|
self._sleep()
|
2017-06-24 22:43:40 +02:00
|
|
|
if not max_id:
|
|
|
|
resp = session.get('https://www.instagram.com/' + name)
|
2016-06-21 19:37:14 +02:00
|
|
|
else:
|
2017-06-24 22:43:40 +02:00
|
|
|
resp = session.get('https://www.instagram.com/' + name, params={'max_id': max_id})
|
|
|
|
match = re.search('window\\._sharedData = .*<', resp.text)
|
|
|
|
if match is not None:
|
|
|
|
return json.loads(match.group(0)[21:-2])
|
|
|
|
|
2017-07-20 11:25:46 +02:00
|
|
|
def default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
|
|
|
"""Returns default HTTP header we use for requests."""
|
|
|
|
header = {'Accept-Encoding': 'gzip, deflate',
|
|
|
|
'Accept-Language': 'en-US,en;q=0.8',
|
|
|
|
'Connection': 'keep-alive',
|
|
|
|
'Content-Length': '0',
|
|
|
|
'Host': 'www.instagram.com',
|
|
|
|
'Origin': 'https://www.instagram.com',
|
|
|
|
'Referer': 'https://www.instagram.com/',
|
|
|
|
'User-Agent': self.user_agent,
|
|
|
|
'X-Instagram-AJAX': '1',
|
|
|
|
'X-Requested-With': 'XMLHttpRequest'}
|
|
|
|
if empty_session_only:
|
|
|
|
del header['Host']
|
|
|
|
del header['Origin']
|
|
|
|
del header['Referer']
|
|
|
|
del header['X-Instagram-AJAX']
|
|
|
|
del header['X-Requested-With']
|
|
|
|
return header
|
|
|
|
|
|
|
|
def get_anonymous_session(self) -> requests.Session:
|
|
|
|
"""Returns our default anonymous requests.Session object."""
|
|
|
|
session = requests.Session()
|
|
|
|
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
|
|
|
'ig_vw': '1920', 'csrftoken': '',
|
|
|
|
's_network': '', 'ds_user_id': ''})
|
|
|
|
session.headers.update(self.default_http_header(empty_session_only=True))
|
|
|
|
return session
|
|
|
|
|
2017-07-24 12:08:08 +02:00
|
|
|
def graphql_query(self, query_id: int, variables: Dict[str, Any],
|
|
|
|
referer: Optional[str] = None) -> Dict[str, Any]:
|
|
|
|
"""
|
|
|
|
Do a GraphQL Query.
|
|
|
|
|
|
|
|
:param query_id: Query ID.
|
|
|
|
:param variables: Variables for the Query.
|
|
|
|
:param referer: HTTP Referer, or None.
|
|
|
|
:return: The server's response dictionary.
|
|
|
|
"""
|
|
|
|
tmpsession = copy_session(self.session)
|
|
|
|
tmpsession.headers.update(self.default_http_header(empty_session_only=True))
|
|
|
|
del tmpsession.headers['Connection']
|
|
|
|
del tmpsession.headers['Content-Length']
|
|
|
|
tmpsession.headers['authority'] = 'www.instagram.com'
|
|
|
|
tmpsession.headers['scheme'] = 'https'
|
|
|
|
tmpsession.headers['accept'] = '*/*'
|
|
|
|
if referer is not None:
|
|
|
|
tmpsession.headers['referer'] = referer
|
2017-07-26 15:08:11 +02:00
|
|
|
self._sleep()
|
2017-07-24 12:08:08 +02:00
|
|
|
response = tmpsession.get('https://www.instagram.com/graphql/query',
|
|
|
|
params={'query_id': query_id,
|
|
|
|
'variables': json.dumps(variables, separators=(',', ':'))})
|
|
|
|
if response.status_code != 200:
|
|
|
|
raise ConnectionException("GraphQL query returned HTTP error code {}.".format(response.status_code))
|
|
|
|
return response.json()
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
def get_username_by_id(self, profile_id: int) -> str:
|
2017-07-14 05:18:18 +02:00
|
|
|
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
2017-07-24 12:08:08 +02:00
|
|
|
data = self.graphql_query(17862015703145017, {'id': str(profile_id), 'first': 1})['data']['user']
|
|
|
|
if data:
|
|
|
|
data = data["edge_owner_to_timeline_media"]
|
|
|
|
else:
|
|
|
|
raise ProfileNotExistsException("No profile found, the user may have blocked you (ID: " +
|
|
|
|
str(profile_id) + ").")
|
|
|
|
if not data['edges']:
|
|
|
|
if data['count'] == 0:
|
|
|
|
raise ProfileHasNoPicsException("Profile with ID {0}: no pics found.".format(str(profile_id)))
|
2017-07-14 05:18:18 +02:00
|
|
|
else:
|
2017-07-24 12:08:08 +02:00
|
|
|
raise LoginRequiredException("Login required to determine username (ID: " + str(profile_id) + ").")
|
2017-04-21 18:01:20 +02:00
|
|
|
else:
|
2017-07-24 12:08:08 +02:00
|
|
|
shortcode = mediaid_to_shortcode(int(data['edges'][0]["node"]["id"]))
|
|
|
|
data = self.get_json("p/" + shortcode)
|
|
|
|
return data['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username']
|
2017-06-24 22:43:40 +02:00
|
|
|
|
|
|
|
def get_id_by_username(self, profile: str) -> int:
|
|
|
|
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
|
|
|
his/her username. To get said ID, given the profile's name, you may call this function."""
|
2017-07-20 11:25:46 +02:00
|
|
|
data = self.get_json(profile, session=self.get_anonymous_session())
|
2017-06-24 22:43:40 +02:00
|
|
|
if "ProfilePage" not in data["entry_data"]:
|
|
|
|
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
|
|
|
return int(data['entry_data']['ProfilePage'][0]['user']['id'])
|
|
|
|
|
2017-07-20 17:57:12 +02:00
|
|
|
def get_followers(self, profile: str) -> List[Dict[str, Any]]:
|
|
|
|
"""
|
|
|
|
Retrieve list of followers of given profile.
|
|
|
|
To use this, one needs to be logged in and private profiles has to be followed,
|
|
|
|
otherwise this returns an empty list.
|
|
|
|
|
|
|
|
:param profile: Name of profile to lookup followers.
|
|
|
|
:return: List of followers (list of dictionaries).
|
|
|
|
"""
|
|
|
|
profile_id = self.get_id_by_username(profile)
|
2017-07-24 12:08:08 +02:00
|
|
|
data = self.graphql_query(17851374694183129, {'id': str(profile_id),
|
|
|
|
'first': 500},
|
|
|
|
referer='https://www.instagram.com/' + profile + '/')
|
|
|
|
followers = []
|
|
|
|
while True:
|
|
|
|
edge_followed_by = data['data']['user']['edge_followed_by']
|
|
|
|
followers.extend([follower['node'] for follower in edge_followed_by['edges']])
|
|
|
|
page_info = edge_followed_by['page_info']
|
|
|
|
if page_info['has_next_page']:
|
|
|
|
data = self.graphql_query(17851374694183129, {'id': str(profile_id),
|
|
|
|
'first': 500,
|
|
|
|
'after': page_info['end_cursor']},
|
|
|
|
referer='https://www.instagram.com/' + profile + '/')
|
|
|
|
else:
|
|
|
|
break
|
2017-07-20 17:57:12 +02:00
|
|
|
return followers
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
def get_followees(self, profile: str) -> List[Dict[str, Any]]:
|
|
|
|
"""
|
2017-07-20 17:57:12 +02:00
|
|
|
Retrieve list of followees (followings) of given profile.
|
|
|
|
To use this, one needs to be logged in and private profiles has to be followed,
|
|
|
|
otherwise this returns an empty list.
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2017-07-20 17:57:12 +02:00
|
|
|
:param profile: Name of profile to lookup followers.
|
|
|
|
:return: List of followees (list of dictionaries).
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
2017-07-20 17:57:12 +02:00
|
|
|
profile_id = self.get_id_by_username(profile)
|
2017-07-24 12:08:08 +02:00
|
|
|
data = self.graphql_query(17874545323001329, {'id': profile_id,
|
|
|
|
'first': 500},
|
|
|
|
referer='https://www.instagram.com/' + profile + '/')
|
|
|
|
followees = []
|
|
|
|
while True:
|
|
|
|
edge_follow = data['data']['user']['edge_follow']
|
|
|
|
followees.extend([followee['node'] for followee in edge_follow['edges']])
|
|
|
|
page_info = edge_follow['page_info']
|
|
|
|
if page_info['has_next_page']:
|
|
|
|
data = self.graphql_query(17874545323001329, {'id': profile_id,
|
|
|
|
'first': 500,
|
|
|
|
'after': page_info['end_cursor']},
|
|
|
|
referer='https://www.instagram.com/' + profile + '/')
|
|
|
|
else:
|
|
|
|
break
|
2017-07-20 17:57:12 +02:00
|
|
|
return followees
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2017-07-20 22:30:12 +02:00
|
|
|
def get_comments(self, shortcode: str) -> List[Dict[str, Any]]:
|
2017-07-24 12:08:08 +02:00
|
|
|
"""Retrieve comments of node with given shortcode."""
|
|
|
|
data = self.graphql_query(17852405266163336, {'shortcode': shortcode,
|
|
|
|
'first': 500},
|
|
|
|
referer='https://www.instagram.com/p/' + shortcode + '/')
|
|
|
|
comments = []
|
|
|
|
while True:
|
|
|
|
edge_media_to_comment = data['data']['shortcode_media']['edge_media_to_comment']
|
|
|
|
comments.extend([comment['node'] for comment in edge_media_to_comment['edges']])
|
|
|
|
page_info = edge_media_to_comment['page_info']
|
|
|
|
if page_info['has_next_page']:
|
|
|
|
data = self.graphql_query(17852405266163336, {'shortcode': shortcode,
|
|
|
|
'first': 500,
|
|
|
|
'after': page_info['end_cursor']},
|
|
|
|
referer='https://www.instagram.com/p/' + shortcode + '/')
|
|
|
|
else:
|
|
|
|
break
|
2017-07-20 22:30:12 +02:00
|
|
|
return comments
|
|
|
|
|
2017-07-29 11:08:52 +02:00
|
|
|
def download_pic(self, filename: str, url: str, mtime: datetime,
|
2017-06-24 22:43:40 +02:00
|
|
|
filename_suffix: Optional[str] = None) -> bool:
|
|
|
|
"""Downloads and saves picture with given url under given directory with given timestamp.
|
|
|
|
Returns true, if file was actually downloaded, i.e. updated."""
|
|
|
|
urlmatch = re.search('\\.[a-z]*\\?', url)
|
|
|
|
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
|
|
|
|
if filename_suffix is not None:
|
|
|
|
filename += '_' + filename_suffix
|
|
|
|
filename += '.' + file_extension
|
|
|
|
if os.path.isfile(filename):
|
2017-07-25 18:31:08 +02:00
|
|
|
self._log(filename + ' exists', end=' ', flush=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
return False
|
2017-07-20 11:25:46 +02:00
|
|
|
resp = self.get_anonymous_session().get(url, stream=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
if resp.status_code == 200:
|
2017-07-25 18:31:08 +02:00
|
|
|
self._log(filename, end=' ', flush=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
with open(filename, 'wb') as file:
|
|
|
|
resp.raw.decode_content = True
|
|
|
|
shutil.copyfileobj(resp.raw, file)
|
2017-07-29 11:08:52 +02:00
|
|
|
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
2017-06-24 22:43:40 +02:00
|
|
|
return True
|
2017-04-21 18:01:20 +02:00
|
|
|
else:
|
2017-06-24 22:43:40 +02:00
|
|
|
raise ConnectionException("File \'" + url + "\' could not be downloaded.")
|
|
|
|
|
2017-07-25 18:31:08 +02:00
|
|
|
def update_comments(self, filename: str, shortcode: str) -> None:
|
|
|
|
filename += '_comments.json'
|
2017-07-20 22:30:12 +02:00
|
|
|
try:
|
|
|
|
comments = json.load(open(filename))
|
|
|
|
except FileNotFoundError:
|
|
|
|
comments = list()
|
|
|
|
comments.extend(self.get_comments(shortcode))
|
|
|
|
if comments:
|
|
|
|
with open(filename, 'w') as file:
|
|
|
|
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
|
|
|
|
key=lambda t: t['created_at'], reverse=True)
|
|
|
|
unique_comments_list = [comments_list[0]]
|
|
|
|
#for comment in comments_list:
|
|
|
|
# if unique_comments_list[-1]['id'] != comment['id']:
|
|
|
|
# unique_comments_list.append(comment)
|
|
|
|
#file.write(json.dumps(unique_comments_list, indent=4))
|
|
|
|
#pylint:disable=invalid-name
|
|
|
|
for x, y in zip(comments_list[:-1], comments_list[1:]):
|
|
|
|
if x['id'] != y['id']:
|
|
|
|
unique_comments_list.append(y)
|
|
|
|
file.write(json.dumps(unique_comments_list, indent=4))
|
|
|
|
self._log('comments', end=' ', flush=True)
|
|
|
|
|
2017-07-29 11:08:52 +02:00
|
|
|
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""Updates picture caption"""
|
2017-07-25 18:31:08 +02:00
|
|
|
filename += '.txt'
|
2017-06-24 22:43:40 +02:00
|
|
|
pcaption = caption.replace('\n', ' ').strip()
|
|
|
|
caption = caption.encode("UTF-8")
|
|
|
|
if self.shorter_output:
|
|
|
|
pcaption = "txt"
|
2017-04-17 12:10:43 +02:00
|
|
|
else:
|
2017-06-24 22:43:40 +02:00
|
|
|
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
|
|
|
try:
|
|
|
|
with open(filename, 'rb') as file:
|
|
|
|
file_caption = file.read()
|
|
|
|
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
|
|
|
try:
|
|
|
|
self._log(pcaption + ' unchanged', end=' ', flush=True)
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
self._log('txt unchanged', end=' ', flush=True)
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
def get_filename(index):
|
|
|
|
return filename if index == 0 else (filename[:-4] + '_old_' +
|
|
|
|
(str(0) if index < 10 else str()) + str(index) + filename[-4:])
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
while os.path.isfile(get_filename(i)):
|
|
|
|
i = i + 1
|
|
|
|
for index in range(i, 0, -1):
|
|
|
|
os.rename(get_filename(index - 1), get_filename(index))
|
|
|
|
try:
|
|
|
|
self._log(pcaption + ' updated', end=' ', flush=True)
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
self._log('txt updated', end=' ', flush=True)
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
try:
|
|
|
|
self._log(pcaption, end=' ', flush=True)
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
self._log('txt', end=' ', flush=True)
|
|
|
|
with open(filename, 'wb') as text_file:
|
|
|
|
shutil.copyfileobj(BytesIO(caption), text_file)
|
2017-07-29 11:08:52 +02:00
|
|
|
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
2017-04-17 12:10:43 +02:00
|
|
|
|
2017-07-29 11:08:52 +02:00
|
|
|
def save_location(self, filename: str, location_json: Dict[str, str], mtime: datetime) -> None:
|
2017-07-25 18:31:08 +02:00
|
|
|
filename += '_location.txt'
|
2017-06-24 22:43:40 +02:00
|
|
|
location_string = (location_json["name"] + "\n" +
|
|
|
|
"https://maps.google.com/maps?q={0},{1}&ll={0},{1}\n".format(location_json["lat"],
|
|
|
|
location_json["lng"]))
|
|
|
|
with open(filename, 'wb') as text_file:
|
|
|
|
shutil.copyfileobj(BytesIO(location_string.encode()), text_file)
|
2017-07-29 11:08:52 +02:00
|
|
|
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
2017-06-24 22:43:40 +02:00
|
|
|
self._log('geo', end=' ', flush=True)
|
|
|
|
|
|
|
|
def download_profilepic(self, name: str, url: str) -> None:
|
|
|
|
"""Downloads and saves profile pic with given url."""
|
2017-07-29 11:08:52 +02:00
|
|
|
|
|
|
|
def _epoch_to_string(epoch: datetime) -> str:
|
|
|
|
return epoch.strftime('%Y-%m-%d_%H-%M-%S')
|
|
|
|
|
|
|
|
date_object = datetime.strptime(requests.head(url).headers["Last-Modified"],
|
|
|
|
'%a, %d %b %Y %H:%M:%S GMT')
|
2017-07-25 18:31:08 +02:00
|
|
|
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
|
|
|
format_string_contains_key(self.dirname_pattern, 'target'))):
|
|
|
|
filename = '{0}/{1}_UTC_profile_pic.{2}'.format(self.dirname_pattern.format(profile=name.lower(),
|
|
|
|
target=name.lower()),
|
2017-07-29 11:08:52 +02:00
|
|
|
_epoch_to_string(date_object), url[-3:])
|
2017-06-25 14:53:43 +02:00
|
|
|
else:
|
2017-07-25 18:31:08 +02:00
|
|
|
filename = '{0}/{1}_{2}_UTC_profile_pic.{3}'.format(self.dirname_pattern.format(), name.lower(),
|
2017-07-29 11:08:52 +02:00
|
|
|
_epoch_to_string(date_object), url[-3:])
|
2017-06-24 22:43:40 +02:00
|
|
|
if os.path.isfile(filename):
|
|
|
|
self._log(filename + ' already exists')
|
|
|
|
return None
|
|
|
|
match = re.search('http.*://.*instagram.*[^/]*\\.(com|net)/[^/]+/.', url)
|
|
|
|
if match is None:
|
|
|
|
raise ConnectionException("URL \'" + url + "\' could not be processed.")
|
|
|
|
index = len(match.group(0)) - 1
|
|
|
|
offset = 8 if match.group(0)[-1:] == 's' else 0
|
|
|
|
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
|
2017-07-20 11:25:46 +02:00
|
|
|
resp = self.get_anonymous_session().get(url, stream=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
if resp.status_code == 200:
|
|
|
|
self._log(filename)
|
|
|
|
with open(filename, 'wb') as file:
|
|
|
|
resp.raw.decode_content = True
|
|
|
|
shutil.copyfileobj(resp.raw, file)
|
2017-07-29 11:08:52 +02:00
|
|
|
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
raise ConnectionException("File \'" + url + "\' could not be downloaded.")
|
|
|
|
|
|
|
|
def save_session_to_file(self, filename: Optional[str] = None) -> None:
|
|
|
|
"""Saves requests.Session object."""
|
|
|
|
if filename is None:
|
|
|
|
filename = get_default_session_filename(self.username)
|
|
|
|
dirname = os.path.dirname(filename)
|
|
|
|
if dirname != '' and not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
os.chmod(dirname, 0o700)
|
|
|
|
with open(filename, 'wb') as sessionfile:
|
|
|
|
os.chmod(filename, 0o600)
|
|
|
|
pickle.dump(requests.utils.dict_from_cookiejar(self.session.cookies), sessionfile)
|
|
|
|
self._log("Saved session to %s." % filename)
|
|
|
|
|
|
|
|
def load_session_from_file(self, username: str, filename: Optional[str] = None) -> None:
|
2017-07-14 11:00:22 +02:00
|
|
|
"""Internally stores requests.Session object loaded from file.
|
|
|
|
|
|
|
|
If filename is None, the file with the default session path is loaded.
|
|
|
|
|
|
|
|
:raises FileNotFoundError; If the file does not exist.
|
|
|
|
"""
|
2017-06-24 22:43:40 +02:00
|
|
|
if filename is None:
|
|
|
|
filename = get_default_session_filename(username)
|
2017-07-14 11:00:22 +02:00
|
|
|
with open(filename, 'rb') as sessionfile:
|
|
|
|
session = requests.Session()
|
|
|
|
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
2017-07-20 11:25:46 +02:00
|
|
|
session.headers.update(self.default_http_header())
|
2017-07-14 11:00:22 +02:00
|
|
|
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
|
|
|
|
self._log("Loaded session from %s." % filename)
|
|
|
|
self.session = session
|
|
|
|
self.username = username
|
2017-06-24 22:43:40 +02:00
|
|
|
|
|
|
|
def test_login(self, session: requests.Session) -> Optional[str]:
|
|
|
|
"""Returns the Instagram username to which given requests.Session object belongs, or None."""
|
|
|
|
if self.session is None:
|
|
|
|
return
|
|
|
|
data = self.get_json(str(), session=session)
|
|
|
|
if data['config']['viewer'] is None:
|
|
|
|
return
|
|
|
|
return data['config']['viewer']['username']
|
|
|
|
|
|
|
|
def login(self, user: str, passwd: str) -> None:
|
2017-06-30 15:09:54 +02:00
|
|
|
"""Log in to instagram with given username and password and internally store session object"""
|
2017-06-24 22:43:40 +02:00
|
|
|
session = requests.Session()
|
|
|
|
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
|
|
|
'ig_vw': '1920', 'csrftoken': '',
|
|
|
|
's_network': '', 'ds_user_id': ''})
|
2017-07-20 11:25:46 +02:00
|
|
|
session.headers.update(self.default_http_header())
|
2017-07-26 15:08:11 +02:00
|
|
|
self._sleep()
|
2017-06-24 22:43:40 +02:00
|
|
|
resp = session.get('https://www.instagram.com/')
|
|
|
|
session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']})
|
2017-07-26 15:08:11 +02:00
|
|
|
self._sleep()
|
2017-06-24 22:43:40 +02:00
|
|
|
login = session.post('https://www.instagram.com/accounts/login/ajax/',
|
|
|
|
data={'password': passwd, 'username': user}, allow_redirects=True)
|
|
|
|
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
|
|
|
|
if login.status_code == 200:
|
|
|
|
if user == self.test_login(session):
|
|
|
|
self.username = user
|
|
|
|
self.session = session
|
|
|
|
else:
|
|
|
|
raise BadCredentialsException('Login error! Check your credentials!')
|
|
|
|
else:
|
|
|
|
raise ConnectionException('Login error! Connection error!')
|
|
|
|
|
|
|
|
def get_feed_json(self, end_cursor: str = None) -> Dict[str, Any]:
|
|
|
|
"""
|
|
|
|
Get JSON of the user's feed.
|
|
|
|
|
|
|
|
:param end_cursor: The end cursor, as from json["feed"]["media"]["page_info"]["end_cursor"]
|
|
|
|
:return: JSON
|
|
|
|
"""
|
|
|
|
if end_cursor is None:
|
|
|
|
return self.get_json(str())["entry_data"]["FeedPage"][0]
|
2017-07-25 21:01:48 +02:00
|
|
|
return self.graphql_query(17863003771166879, {'fetch_media_item_count': 12,
|
|
|
|
'fetch_media_item_cursor': end_cursor,
|
|
|
|
'fetch_comment_count': 4,
|
|
|
|
'fetch_like': 10})
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2017-06-27 09:17:06 +02:00
|
|
|
def get_node_metadata(self, node_code: str) -> Dict[str, Any]:
|
2017-06-24 22:43:40 +02:00
|
|
|
pic_json = self.get_json("p/" + node_code)
|
|
|
|
media = pic_json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] \
|
|
|
|
if "graphql" in pic_json["entry_data"]["PostPage"][0] \
|
|
|
|
else pic_json["entry_data"]["PostPage"][0]["media"]
|
2017-06-27 09:17:06 +02:00
|
|
|
return media
|
|
|
|
|
|
|
|
def get_location(self, node_code: str) -> Dict[str, str]:
|
|
|
|
media = self.get_node_metadata(node_code)
|
2017-06-24 22:43:40 +02:00
|
|
|
if media["location"] is not None:
|
|
|
|
location_json = self.get_json("explore/locations/" +
|
|
|
|
media["location"]["id"])
|
|
|
|
return location_json["entry_data"]["LocationsPage"][0]["location"]
|
|
|
|
|
2017-07-25 18:31:08 +02:00
|
|
|
def download_node(self, node: Dict[str, Any], profile: Optional[str], target: str,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
|
|
|
Download everything associated with one instagram node, i.e. picture, caption and video.
|
|
|
|
|
|
|
|
:param node: Node, as from media->nodes list in instagram's JSONs
|
2017-07-25 18:31:08 +02:00
|
|
|
:param profile: Name of profile to which this node belongs
|
|
|
|
:param target: Target name, i.e. profile name, #hashtag, :feed; for filename.
|
2017-06-24 22:43:40 +02:00
|
|
|
:param download_videos: True, if videos should be downloaded
|
|
|
|
:param geotags: Download geotags
|
2017-07-20 22:30:12 +02:00
|
|
|
:param download_comments: Update comments
|
2017-06-24 22:43:40 +02:00
|
|
|
:return: True if something was downloaded, False otherwise, i.e. file was already there
|
|
|
|
"""
|
2017-07-25 18:31:08 +02:00
|
|
|
already_has_profilename = profile is not None or ('owner' in node and 'username' in node['owner'])
|
|
|
|
needs_profilename = (format_string_contains_key(self.dirname_pattern, 'profile') or
|
|
|
|
format_string_contains_key(self.filename_pattern, 'profile'))
|
|
|
|
shortcode = node['shortcode'] if 'shortcode' in node else node['code']
|
|
|
|
if needs_profilename:
|
|
|
|
if already_has_profilename:
|
|
|
|
profilename = profile if profile is not None else node['owner']['username']
|
|
|
|
else:
|
|
|
|
metadata = self.get_node_metadata(shortcode)
|
|
|
|
profilename = metadata['owner']['username']
|
|
|
|
else:
|
|
|
|
profilename = None
|
|
|
|
profilename = profilename.lower() if profilename else None
|
2017-07-29 11:08:52 +02:00
|
|
|
date = datetime.fromtimestamp(node["date"] if "date" in node else node["taken_at_timestamp"])
|
2017-07-25 18:31:08 +02:00
|
|
|
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
|
|
|
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
2017-07-29 11:08:52 +02:00
|
|
|
date=date,
|
2017-07-25 18:31:08 +02:00
|
|
|
shortcode=shortcode)
|
2017-07-29 01:40:53 +02:00
|
|
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
if '__typename' in node:
|
|
|
|
if node['__typename'] == 'GraphSidecar':
|
2017-07-26 15:08:11 +02:00
|
|
|
self._sleep()
|
2017-07-25 21:01:48 +02:00
|
|
|
sidecar_data = self.session.get('https://www.instagram.com/p/' + shortcode + '/',
|
2017-06-24 22:43:40 +02:00
|
|
|
params={'__a': 1}).json()
|
|
|
|
edge_number = 1
|
|
|
|
downloaded = True
|
|
|
|
media = sidecar_data["graphql"]["shortcode_media"] if "graphql" in sidecar_data else sidecar_data[
|
|
|
|
"media"]
|
|
|
|
for edge in media['edge_sidecar_to_children']['edges']:
|
2017-07-25 18:31:08 +02:00
|
|
|
edge_downloaded = self.download_pic(filename=filename,
|
|
|
|
url=edge['node']['display_url'],
|
2017-07-29 11:08:52 +02:00
|
|
|
mtime=date,
|
2017-07-25 18:31:08 +02:00
|
|
|
filename_suffix=str(edge_number))
|
2017-06-24 22:43:40 +02:00
|
|
|
downloaded = downloaded and edge_downloaded
|
|
|
|
edge_number += 1
|
|
|
|
elif node['__typename'] in ['GraphImage', 'GraphVideo']:
|
2017-07-25 18:31:08 +02:00
|
|
|
url = node["display_url"] if "display_url" in node else node["display_src"]
|
|
|
|
downloaded = self.download_pic(filename=filename,
|
|
|
|
url=url,
|
2017-07-29 11:08:52 +02:00
|
|
|
mtime=date)
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
self._log("Warning: Unknown typename discovered:" + node['__typename'])
|
|
|
|
downloaded = False
|
|
|
|
else:
|
|
|
|
# Node is an old image or video.
|
2017-07-29 11:08:52 +02:00
|
|
|
downloaded = self.download_pic(filename=filename, url=node["display_src"], mtime=date)
|
2017-06-24 22:43:40 +02:00
|
|
|
if "edge_media_to_caption" in node and node["edge_media_to_caption"]["edges"]:
|
2017-07-25 18:31:08 +02:00
|
|
|
self.save_caption(filename, date, node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
2017-06-24 22:43:40 +02:00
|
|
|
elif "caption" in node:
|
2017-07-25 18:31:08 +02:00
|
|
|
self.save_caption(filename, date, node["caption"])
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
self._log("<no caption>", end=' ', flush=True)
|
|
|
|
if node["is_video"] and download_videos:
|
2017-07-25 18:31:08 +02:00
|
|
|
video_data = self.get_json('p/' + shortcode)
|
|
|
|
self.download_pic(filename=filename,
|
|
|
|
url=video_data['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'],
|
2017-07-29 11:08:52 +02:00
|
|
|
mtime=date)
|
2017-06-24 22:43:40 +02:00
|
|
|
if geotags:
|
2017-07-25 18:31:08 +02:00
|
|
|
location = self.get_location(shortcode)
|
2017-06-24 22:43:40 +02:00
|
|
|
if location:
|
2017-07-25 18:31:08 +02:00
|
|
|
self.save_location(filename, location, date)
|
2017-07-20 22:30:12 +02:00
|
|
|
if download_comments:
|
2017-07-25 18:31:08 +02:00
|
|
|
self.update_comments(filename, shortcode)
|
2017-06-24 22:43:40 +02:00
|
|
|
self._log()
|
|
|
|
return downloaded
|
|
|
|
|
2017-07-27 16:59:21 +02:00
|
|
|
def download_stories(self,
|
2017-07-28 19:49:48 +02:00
|
|
|
userids: Optional[List[int]] = None,
|
2017-07-27 16:59:21 +02:00
|
|
|
download_videos: bool = True,
|
2017-07-29 17:51:39 +02:00
|
|
|
fast_update: bool = False,
|
|
|
|
filename_target: str = ':stories') -> None:
|
2017-07-27 16:59:21 +02:00
|
|
|
"""
|
2017-07-29 04:12:26 +02:00
|
|
|
Download available stories from user followees or all stories of users whose ID are given.
|
2017-07-28 05:22:43 +02:00
|
|
|
Does not mark stories as seen.
|
2017-07-29 04:12:26 +02:00
|
|
|
To use this, one needs to be logged in
|
2017-07-27 16:59:21 +02:00
|
|
|
|
2017-07-28 19:49:48 +02:00
|
|
|
:param userids: List of user IDs to be processed in terms of downloading their stories
|
2017-07-27 16:59:21 +02:00
|
|
|
:param download_videos: True, if videos should be downloaded
|
2017-07-28 19:49:48 +02:00
|
|
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
2017-07-29 17:51:39 +02:00
|
|
|
:param filename_target: Replacement for {target} in dirname_pattern and filename_pattern
|
2017-07-27 16:59:21 +02:00
|
|
|
"""
|
|
|
|
|
2017-07-27 22:18:43 +02:00
|
|
|
if self.username is None:
|
|
|
|
raise LoginRequiredException('Login required to download stories')
|
2017-07-27 16:59:21 +02:00
|
|
|
|
2017-07-27 22:18:43 +02:00
|
|
|
tempsession = copy_session(self.session)
|
|
|
|
header = tempsession.headers
|
|
|
|
header['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; scale=2.00; 750x1334) ' \
|
|
|
|
'AppleWebKit/420+'
|
2017-07-27 16:59:21 +02:00
|
|
|
del header['Host']
|
|
|
|
del header['Origin']
|
|
|
|
del header['X-Instagram-AJAX']
|
|
|
|
del header['X-Requested-With']
|
|
|
|
|
2017-07-28 05:22:43 +02:00
|
|
|
def _user_stories():
|
|
|
|
def _get(url):
|
|
|
|
self._sleep()
|
|
|
|
resp = tempsession.get(url)
|
|
|
|
if resp.status_code != 200:
|
|
|
|
raise ConnectionException('Failed to fetch stories.')
|
|
|
|
return json.loads(resp.text)
|
2017-07-29 04:12:26 +02:00
|
|
|
url_reel_media = 'https://i.instagram.com/api/v1/feed/user/{0}/reel_media/'
|
|
|
|
url_reels_tray = 'https://i.instagram.com/api/v1/feed/reels_tray/'
|
2017-07-28 19:49:48 +02:00
|
|
|
if userids is not None:
|
|
|
|
for userid in userids:
|
2017-07-29 04:12:26 +02:00
|
|
|
yield _get(url_reel_media.format(userid))
|
2017-07-28 05:22:43 +02:00
|
|
|
else:
|
2017-07-29 04:12:26 +02:00
|
|
|
data = _get(url_reels_tray)
|
2017-07-28 05:22:43 +02:00
|
|
|
if not 'tray' in data:
|
|
|
|
raise BadResponseException('Bad story reel JSON.')
|
|
|
|
for user in data["tray"]:
|
2017-07-29 04:12:26 +02:00
|
|
|
yield user if "items" in user else _get(url_reel_media.format(user['user']['pk']))
|
2017-07-28 05:22:43 +02:00
|
|
|
|
|
|
|
for user_stories in _user_stories():
|
2017-07-27 16:59:21 +02:00
|
|
|
if "items" not in user_stories:
|
|
|
|
continue
|
|
|
|
name = user_stories["user"]["username"].lower()
|
2017-07-29 17:51:39 +02:00
|
|
|
self._log("Retrieving stories from profile {}.".format(name))
|
2017-07-28 05:22:43 +02:00
|
|
|
totalcount = len(user_stories["items"]) if "items" in user_stories else 0
|
|
|
|
count = 1
|
2017-07-27 16:59:21 +02:00
|
|
|
for item in user_stories["items"]:
|
|
|
|
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
self._sleep()
|
|
|
|
shortcode = item["code"] if "code" in item else "no_code"
|
|
|
|
|
2017-07-29 11:08:52 +02:00
|
|
|
date_float = item["device_timestamp"] if "device_timestamp" in item else item["taken_at"]
|
2017-07-27 16:59:21 +02:00
|
|
|
try:
|
2017-07-29 11:08:52 +02:00
|
|
|
date = datetime.fromtimestamp(date_float)
|
2017-07-27 16:59:21 +02:00
|
|
|
except ValueError:
|
|
|
|
# device_timestamp seems to sometime be in milliseconds
|
2017-07-29 11:08:52 +02:00
|
|
|
date_float /= 1000
|
|
|
|
date = datetime.fromtimestamp(date_float)
|
2017-07-27 16:59:21 +02:00
|
|
|
|
2017-07-29 17:51:39 +02:00
|
|
|
dirname = self.dirname_pattern.format(profile=name, target=filename_target)
|
|
|
|
filename = dirname + '/' + self.filename_pattern.format(profile=name, target=filename_target,
|
2017-07-29 11:08:52 +02:00
|
|
|
date=date,
|
2017-07-27 16:59:21 +02:00
|
|
|
shortcode=shortcode)
|
2017-07-29 01:40:53 +02:00
|
|
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
2017-07-27 16:59:21 +02:00
|
|
|
if "image_versions2" in item:
|
|
|
|
url = item["image_versions2"]["candidates"][0]["url"]
|
|
|
|
downloaded = self.download_pic(filename=filename,
|
|
|
|
url=url,
|
2017-07-29 11:08:52 +02:00
|
|
|
mtime=date)
|
2017-07-27 16:59:21 +02:00
|
|
|
else:
|
|
|
|
self._log("Warning: Unable to find story image.")
|
|
|
|
downloaded = False
|
|
|
|
if "caption" in item and item["caption"] is not None:
|
2017-07-28 05:22:43 +02:00
|
|
|
caption = item["caption"]
|
|
|
|
if isinstance(caption, dict) and "text" in caption:
|
|
|
|
caption = caption["text"]
|
|
|
|
self.save_caption(filename, date, caption)
|
2017-07-27 16:59:21 +02:00
|
|
|
else:
|
|
|
|
self._log("<no caption>", end=' ', flush=True)
|
|
|
|
if "video_versions" in item and download_videos:
|
2017-07-28 05:22:43 +02:00
|
|
|
downloaded = self.download_pic(filename=filename,
|
|
|
|
url=item["video_versions"][0]["url"],
|
2017-07-29 11:08:52 +02:00
|
|
|
mtime=date)
|
2017-07-28 05:22:43 +02:00
|
|
|
if "video_duration" in item and self.sleep and downloaded:
|
2017-07-27 16:59:21 +02:00
|
|
|
time.sleep(item["video_duration"])
|
2017-07-27 22:18:43 +02:00
|
|
|
if item["story_locations"]:
|
2017-07-27 16:59:21 +02:00
|
|
|
location = item["story_locations"][0]["location"]
|
|
|
|
if location:
|
|
|
|
self.save_location(filename, location, date)
|
|
|
|
self._log()
|
|
|
|
if fast_update and not downloaded:
|
|
|
|
break
|
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
def download_feed_pics(self, max_count: int = None, fast_update: bool = False,
|
|
|
|
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos: bool = True, geotags: bool = False,
|
|
|
|
download_comments: bool = False) -> None:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
|
|
|
Download pictures from the user's feed.
|
|
|
|
|
|
|
|
Example to download up to the 20 pics the user last liked:
|
|
|
|
>>> loader = Instaloader()
|
|
|
|
>>> loader.load_session_from_file('USER')
|
|
|
|
>>> loader.download_feed_pics(max_count=20, fast_update=True,
|
|
|
|
>>> filter_func=lambda node:
|
|
|
|
>>> not node["likes"]["viewer_has_liked"]
|
|
|
|
>>> if "likes" in node else
|
|
|
|
>>> not node["viewer_has_liked"])
|
|
|
|
|
|
|
|
:param max_count: Maximum count of pictures to download
|
|
|
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
|
|
|
:param filter_func: function(node), which returns True if given picture should not be downloaded
|
|
|
|
:param download_videos: True, if videos should be downloaded
|
|
|
|
:param geotags: Download geotags
|
2017-07-20 22:30:12 +02:00
|
|
|
:param download_comments: Update comments
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
|
|
|
data = self.get_feed_json()
|
|
|
|
count = 1
|
|
|
|
while True:
|
|
|
|
if "graphql" in data:
|
|
|
|
is_edge = True
|
|
|
|
feed = data["graphql"]["user"]["edge_web_feed_timeline"]
|
2017-07-25 21:01:48 +02:00
|
|
|
elif "data" in data:
|
|
|
|
is_edge = True
|
|
|
|
feed = data["data"]["user"]["edge_web_feed_timeline"]
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
is_edge = False
|
|
|
|
feed = data["feed"]["media"]
|
|
|
|
for edge_or_node in feed["edges"] if is_edge else feed["nodes"]:
|
|
|
|
if max_count is not None and count > max_count:
|
|
|
|
return
|
|
|
|
node = edge_or_node["node"] if is_edge else edge_or_node
|
|
|
|
name = node["owner"]["username"]
|
|
|
|
if filter_func is not None and filter_func(node):
|
|
|
|
self._log("<pic by %s skipped>" % name, flush=True)
|
|
|
|
continue
|
|
|
|
self._log("[%3i] %s " % (count, name), end="", flush=True)
|
|
|
|
count += 1
|
2017-07-25 18:31:08 +02:00
|
|
|
downloaded = self.download_node(node, profile=name, target=':feed',
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
|
|
|
download_comments=download_comments)
|
2017-06-24 22:43:40 +02:00
|
|
|
if fast_update and not downloaded:
|
|
|
|
return
|
|
|
|
if not feed["page_info"]["has_next_page"]:
|
|
|
|
break
|
|
|
|
data = self.get_feed_json(end_cursor=feed["page_info"]["end_cursor"])
|
|
|
|
|
|
|
|
def get_hashtag_json(self, hashtag: str,
|
|
|
|
max_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
|
|
|
"""Return JSON of a #hashtag"""
|
|
|
|
return self.get_json(name='explore/tags/{0}/'.format(hashtag), max_id=max_id)
|
|
|
|
|
|
|
|
def download_hashtag(self, hashtag: str,
|
|
|
|
max_count: Optional[int] = None,
|
|
|
|
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
|
2017-06-27 09:17:06 +02:00
|
|
|
fast_update: bool = False, download_videos: bool = True, geotags: bool = False,
|
2017-07-25 18:31:08 +02:00
|
|
|
download_comments: bool = False) -> None:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""Download pictures of one hashtag.
|
|
|
|
|
|
|
|
To download the last 30 pictures with hashtag #cat, do
|
|
|
|
>>> loader = Instaloader()
|
|
|
|
>>> loader.download_hashtag('cat', max_count=30)
|
|
|
|
|
|
|
|
:param hashtag: Hashtag to download, without leading '#'
|
|
|
|
:param max_count: Maximum count of pictures to download
|
|
|
|
:param filter_func: function(node), which returns True if given picture should not be downloaded
|
|
|
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
|
|
|
:param download_videos: True, if videos should be downloaded
|
|
|
|
:param geotags: Download geotags
|
2017-07-20 22:30:12 +02:00
|
|
|
:param download_comments: Update comments
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
|
|
|
data = self.get_hashtag_json(hashtag)
|
|
|
|
count = 1
|
|
|
|
while data:
|
|
|
|
for node in data['entry_data']['TagPage'][0]['tag']['media']['nodes']:
|
|
|
|
if max_count is not None and count > max_count:
|
|
|
|
return
|
2017-07-25 18:31:08 +02:00
|
|
|
self._log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
|
2017-06-24 22:43:40 +02:00
|
|
|
if filter_func is not None and filter_func(node):
|
|
|
|
self._log('<skipped>')
|
|
|
|
continue
|
|
|
|
count += 1
|
2017-07-25 18:31:08 +02:00
|
|
|
downloaded = self.download_node(node=node, profile=None, target='#'+hashtag,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
|
|
|
download_comments=download_comments)
|
2017-06-24 22:43:40 +02:00
|
|
|
if fast_update and not downloaded:
|
|
|
|
return
|
|
|
|
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
|
|
|
|
data = self.get_hashtag_json(hashtag,
|
|
|
|
max_id=data['entry_data']['TagPage'][0]['tag']['media']['page_info'][
|
|
|
|
'end_cursor'])
|
|
|
|
else:
|
|
|
|
break
|
2017-04-17 12:10:43 +02:00
|
|
|
|
2017-07-29 17:51:39 +02:00
|
|
|
def check_id(self, profile: str, json_data: Dict[str, Any]) -> Tuple[str, int]:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
|
|
|
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
|
|
|
has changed and return current name of the profile, and store ID of profile.
|
2017-07-29 17:51:39 +02:00
|
|
|
|
|
|
|
:return: current profile name, profile id
|
2017-06-24 22:43:40 +02:00
|
|
|
"""
|
2017-07-25 18:31:08 +02:00
|
|
|
profile_exists = "ProfilePage" in json_data["entry_data"]
|
|
|
|
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
|
|
|
format_string_contains_key(self.dirname_pattern, 'target'))):
|
|
|
|
id_filename = '{0}/id'.format(self.dirname_pattern.format(profile=profile.lower(),
|
|
|
|
target=profile.lower()))
|
2017-06-25 14:53:43 +02:00
|
|
|
else:
|
2017-07-25 18:31:08 +02:00
|
|
|
id_filename = '{0}/{1}_id'.format(self.dirname_pattern.format(), profile.lower())
|
2016-07-25 22:27:23 +02:00
|
|
|
try:
|
2017-06-25 14:53:43 +02:00
|
|
|
with open(id_filename, 'rb') as id_file:
|
2017-06-24 22:43:40 +02:00
|
|
|
profile_id = int(id_file.read())
|
|
|
|
if (not profile_exists) or \
|
|
|
|
(profile_id != int(json_data['entry_data']['ProfilePage'][0]['user']['id'])):
|
|
|
|
if profile_exists:
|
2017-07-14 05:18:18 +02:00
|
|
|
self._log("Profile {0} does not match the stored unique ID {1}.".format(profile, profile_id))
|
|
|
|
else:
|
|
|
|
self._log("Trying to find profile {0} using its unique ID {1}.".format(profile, profile_id))
|
|
|
|
newname = self.get_username_by_id(profile_id)
|
|
|
|
self._log("Profile {0} has changed its name to {1}.".format(profile, newname))
|
2017-07-25 18:31:08 +02:00
|
|
|
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
|
|
|
format_string_contains_key(self.dirname_pattern, 'target'))):
|
|
|
|
os.rename(self.dirname_pattern.format(profile=profile.lower(),
|
|
|
|
target=profile.lower()),
|
|
|
|
self.dirname_pattern.format(profile=newname.lower(),
|
|
|
|
target=newname.lower()))
|
|
|
|
else:
|
|
|
|
os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile.lower()),
|
|
|
|
'{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower()))
|
2017-07-29 17:51:39 +02:00
|
|
|
return newname, profile_id
|
|
|
|
return profile, profile_id
|
2017-06-24 22:43:40 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
if profile_exists:
|
2017-07-25 18:31:08 +02:00
|
|
|
os.makedirs(self.dirname_pattern.format(profile=profile.lower(),
|
|
|
|
target=profile.lower()), exist_ok=True)
|
2017-06-25 14:53:43 +02:00
|
|
|
with open(id_filename, 'w') as text_file:
|
2017-06-24 22:43:40 +02:00
|
|
|
profile_id = json_data['entry_data']['ProfilePage'][0]['user']['id']
|
|
|
|
text_file.write(profile_id + "\n")
|
|
|
|
self._log("Stored ID {0} for profile {1}.".format(profile_id, profile))
|
2017-07-29 17:51:39 +02:00
|
|
|
return profile, profile_id
|
2017-06-24 22:43:40 +02:00
|
|
|
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
|
|
|
|
|
|
|
def download(self, name: str,
|
|
|
|
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
2017-07-29 17:51:39 +02:00
|
|
|
download_comments: bool = False, fast_update: bool = False,
|
|
|
|
download_stories: bool = False, download_stories_only: bool = False) -> None:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""Download one profile"""
|
|
|
|
# Get profile main page json
|
|
|
|
data = self.get_json(name)
|
|
|
|
# check if profile does exist or name has changed since last download
|
|
|
|
# and update name and json data if necessary
|
2017-07-29 17:51:39 +02:00
|
|
|
name_updated, profile_id = self.check_id(name, data)
|
2017-06-24 22:43:40 +02:00
|
|
|
if name_updated != name:
|
|
|
|
name = name_updated
|
|
|
|
data = self.get_json(name)
|
|
|
|
# Download profile picture
|
|
|
|
self.download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
|
|
|
|
if profile_pic_only:
|
|
|
|
return
|
|
|
|
# Catch some errors
|
|
|
|
if data["entry_data"]["ProfilePage"][0]["user"]["is_private"]:
|
|
|
|
if data["config"]["viewer"] is None:
|
|
|
|
raise LoginRequiredException("profile %s requires login" % name)
|
|
|
|
if not data["entry_data"]["ProfilePage"][0]["user"]["followed_by_viewer"]:
|
|
|
|
raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % name)
|
|
|
|
else:
|
2017-07-29 17:51:39 +02:00
|
|
|
if data["config"]["viewer"] is not None and not (download_stories or download_stories_only):
|
2017-06-24 22:43:40 +02:00
|
|
|
self._log("profile %s could also be downloaded anonymously." % name)
|
2017-07-29 17:51:39 +02:00
|
|
|
if download_stories or download_stories_only:
|
|
|
|
self.download_stories(userids=[profile_id], filename_target=name,
|
|
|
|
download_videos=download_videos, fast_update=fast_update)
|
|
|
|
if download_stories_only:
|
|
|
|
return
|
2017-06-24 22:43:40 +02:00
|
|
|
if ("nodes" not in data["entry_data"]["ProfilePage"][0]["user"]["media"] or
|
|
|
|
not data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) \
|
|
|
|
and not profile_pic_only:
|
|
|
|
raise ProfileHasNoPicsException("Profile %s: no pics found." % name)
|
|
|
|
|
|
|
|
# Iterate over pictures and download them
|
2017-07-29 17:51:39 +02:00
|
|
|
self._log("Retrieving posts from profile {}.".format(name))
|
2017-06-24 22:43:40 +02:00
|
|
|
def get_last_id(data):
|
|
|
|
if data["entry_data"] and data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
|
|
|
return data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"][-1]["id"]
|
|
|
|
|
|
|
|
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
|
|
|
|
count = 1
|
|
|
|
while get_last_id(data) is not None:
|
|
|
|
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
|
|
|
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
|
|
|
count += 1
|
2017-07-25 18:31:08 +02:00
|
|
|
downloaded = self.download_node(node=node, profile=name, target=name,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
|
|
|
download_comments=download_comments)
|
2017-06-24 22:43:40 +02:00
|
|
|
if fast_update and not downloaded:
|
|
|
|
return
|
|
|
|
data = self.get_json(name, max_id=get_last_id(data))
|
|
|
|
|
2017-07-14 11:00:22 +02:00
|
|
|
def interactive_login(self, username: str) -> None:
|
|
|
|
"""Logs in and internally stores session, asking user for password interactively.
|
|
|
|
|
|
|
|
:raises LoginRequiredException: when in quiet mode."""
|
2017-06-24 22:43:40 +02:00
|
|
|
if self.quiet:
|
|
|
|
raise LoginRequiredException("Quiet mode requires given password or valid session file.")
|
2017-07-14 11:00:22 +02:00
|
|
|
password = None
|
2017-06-24 22:43:40 +02:00
|
|
|
while password is None:
|
|
|
|
password = getpass.getpass(prompt="Enter Instagram password for %s: " % username)
|
2016-07-26 17:03:32 +02:00
|
|
|
try:
|
2017-06-24 22:43:40 +02:00
|
|
|
self.login(username, password)
|
|
|
|
except BadCredentialsException as err:
|
2016-07-26 17:03:32 +02:00
|
|
|
print(err, file=sys.stderr)
|
2017-06-24 22:43:40 +02:00
|
|
|
password = None
|
|
|
|
|
|
|
|
def download_profiles(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
|
|
|
|
sessionfile: Optional[str] = None, max_count: Optional[int] = None,
|
|
|
|
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_comments: bool = False,
|
2017-07-29 17:51:39 +02:00
|
|
|
fast_update: bool = False,
|
|
|
|
stories: bool = False, stories_only: bool = False) -> None:
|
2017-06-24 22:43:40 +02:00
|
|
|
"""Download set of profiles and handle sessions"""
|
|
|
|
# Login, if desired
|
|
|
|
if username is not None:
|
2017-07-14 11:00:22 +02:00
|
|
|
try:
|
|
|
|
self.load_session_from_file(username, sessionfile)
|
|
|
|
except FileNotFoundError as err:
|
|
|
|
if sessionfile is not None:
|
|
|
|
print(err, file=sys.stderr)
|
|
|
|
self._log("Session file does not exist yet - Logging in.")
|
2017-06-24 22:43:40 +02:00
|
|
|
if username != self.test_login(self.session):
|
2017-07-14 11:00:22 +02:00
|
|
|
if password is not None:
|
|
|
|
self.login(username, password)
|
|
|
|
else:
|
|
|
|
self.interactive_login(username)
|
2017-06-24 22:43:40 +02:00
|
|
|
self._log("Logged in as %s." % username)
|
|
|
|
# Try block for KeyboardInterrupt (save session on ^C)
|
|
|
|
failedtargets = []
|
|
|
|
targets = set()
|
|
|
|
try:
|
|
|
|
# Generate set of targets
|
|
|
|
for pentry in profilelist:
|
|
|
|
if pentry[0] == '#':
|
|
|
|
self._log("Retrieving pictures with hashtag {0}".format(pentry))
|
|
|
|
self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
|
2017-06-27 09:17:06 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
2017-07-25 18:31:08 +02:00
|
|
|
download_comments=download_comments)
|
2017-07-14 11:00:22 +02:00
|
|
|
elif pentry[0] == '@':
|
|
|
|
if username is not None:
|
|
|
|
self._log("Retrieving followees of %s..." % pentry[1:])
|
|
|
|
followees = self.get_followees(pentry[1:])
|
|
|
|
targets.update([followee['username'] for followee in followees])
|
|
|
|
else:
|
|
|
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
|
|
|
elif pentry == ":feed-all":
|
|
|
|
if username is not None:
|
|
|
|
self._log("Retrieving pictures from your feed...")
|
|
|
|
self.download_feed_pics(fast_update=fast_update, max_count=max_count,
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
|
|
|
download_comments=download_comments)
|
2017-07-14 11:00:22 +02:00
|
|
|
else:
|
|
|
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
|
|
|
elif pentry == ":feed-liked":
|
|
|
|
if username is not None:
|
|
|
|
self._log("Retrieving pictures you liked from your feed...")
|
|
|
|
self.download_feed_pics(fast_update=fast_update, max_count=max_count,
|
|
|
|
filter_func=lambda node:
|
|
|
|
not node["likes"]["viewer_has_liked"]
|
|
|
|
if "likes" in node
|
|
|
|
else not node["viewer_has_liked"],
|
2017-07-20 22:30:12 +02:00
|
|
|
download_videos=download_videos, geotags=geotags,
|
|
|
|
download_comments=download_comments)
|
2017-07-14 11:00:22 +02:00
|
|
|
else:
|
|
|
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
2017-07-29 17:51:39 +02:00
|
|
|
elif pentry == ":stories":
|
|
|
|
if username is not None:
|
|
|
|
self.download_stories(download_videos=download_videos, fast_update=fast_update)
|
|
|
|
else:
|
|
|
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
targets.add(pentry)
|
|
|
|
if len(targets) > 1:
|
|
|
|
self._log("Downloading %i profiles..." % len(targets))
|
|
|
|
# Iterate through targets list and download them
|
|
|
|
for target in targets:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
self.download(target, profile_pic_only, download_videos,
|
2017-07-29 17:51:39 +02:00
|
|
|
geotags, download_comments, fast_update, stories, stories_only)
|
2017-06-24 22:43:40 +02:00
|
|
|
except ProfileNotExistsException as err:
|
|
|
|
if username is not None:
|
2017-07-14 05:18:18 +02:00
|
|
|
self._log(err)
|
2017-07-20 15:24:57 +02:00
|
|
|
self._log("Trying again anonymously, helps in case you are just blocked.")
|
|
|
|
anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output,
|
2017-07-25 18:31:08 +02:00
|
|
|
self.user_agent, self.dirname_pattern, self.filename_pattern)
|
2017-06-24 22:43:40 +02:00
|
|
|
anonymous_loader.download(target, profile_pic_only, download_videos,
|
2017-07-20 22:30:12 +02:00
|
|
|
geotags, download_comments, fast_update)
|
2017-06-24 22:43:40 +02:00
|
|
|
else:
|
|
|
|
raise err
|
|
|
|
except NonfatalException as err:
|
|
|
|
failedtargets.append(target)
|
|
|
|
print(err, file=sys.stderr)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print("\nInterrupted by user.", file=sys.stderr)
|
|
|
|
if len(targets) > 1 and failedtargets:
|
|
|
|
print("Errors occured (see above) while downloading profiles: %s." %
|
|
|
|
", ".join(failedtargets), file=sys.stderr)
|
|
|
|
# Save session if it is useful
|
|
|
|
if username is not None:
|
|
|
|
self.save_session_to_file(sessionfile)
|
|
|
|
|
2016-06-15 12:42:08 +02:00
|
|
|
|
2016-06-27 16:49:00 +02:00
|
|
|
def main():
|
2017-07-20 14:54:22 +02:00
|
|
|
parser = ArgumentParser(description=__doc__, add_help=False,
|
2017-02-13 09:57:03 +01:00
|
|
|
epilog="Report issues at https://github.com/Thammus/instaloader/issues.")
|
2017-07-20 14:54:22 +02:00
|
|
|
|
|
|
|
g_what = parser.add_argument_group('What to Download',
|
|
|
|
'Specify a list of profiles or #hashtags. For each of these, Instaloader '
|
2017-07-20 18:19:15 +02:00
|
|
|
'creates a folder and '
|
2017-07-20 14:54:22 +02:00
|
|
|
'downloads all posts along with the pictures\'s '
|
2017-07-20 18:19:15 +02:00
|
|
|
'captions and the current profile picture. '
|
|
|
|
'If an already-downloaded profile has been renamed, Instaloader automatically '
|
|
|
|
'finds it by its unique ID and renames the folder likewise.')
|
2017-07-20 14:54:22 +02:00
|
|
|
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
2017-04-17 12:10:43 +02:00
|
|
|
help='Name of profile or #hashtag to download. '
|
|
|
|
'Alternatively, if --login is given: @<profile> to download all followees of '
|
2017-07-29 17:51:39 +02:00
|
|
|
'<profile>; the special targets :feed-all or :feed-liked to '
|
|
|
|
'download pictures from your feed; or :stories to download the stories of your '
|
|
|
|
'followees.')
|
2017-07-20 14:54:22 +02:00
|
|
|
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
|
|
|
help='Only download profile picture.')
|
|
|
|
g_what.add_argument('-V', '--skip-videos', action='store_true',
|
|
|
|
help='Do not download videos.')
|
|
|
|
g_what.add_argument('-G', '--geotags', action='store_true',
|
|
|
|
help='Download geotags when available. Geotags are stored as a '
|
|
|
|
'text file with the location\'s name and a Google Maps link. '
|
|
|
|
'This requires an additional request to the Instagram '
|
2017-06-27 09:17:06 +02:00
|
|
|
'server for each picture, which is why it is disabled by default.')
|
2017-07-20 22:30:12 +02:00
|
|
|
g_what.add_argument('-C', '--comments', action='store_true',
|
|
|
|
help='Download and update comments for each post. '
|
|
|
|
'This requires an additional request to the Instagram '
|
|
|
|
'server for each post, which is why it is disabled by default.')
|
2017-07-29 17:51:39 +02:00
|
|
|
g_what.add_argument('-s', '--stories', action='store_true',
|
|
|
|
help='Also download stories of each profile that is downloaded. Requires --login.')
|
|
|
|
g_what.add_argument('--stories-only', action='store_true',
|
|
|
|
help='Rather than downloading regular posts of each specified profile, only download '
|
|
|
|
'stories. Requires --login.')
|
2017-07-20 14:54:22 +02:00
|
|
|
|
|
|
|
g_stop = parser.add_argument_group('When to Stop Downloading',
|
|
|
|
'If none of these options are given, Instaloader goes through all pictures '
|
|
|
|
'matching the specified targets.')
|
|
|
|
g_stop.add_argument('-F', '--fast-update', action='store_true',
|
|
|
|
help='For each target, stop when encountering the first already-downloaded picture. This '
|
|
|
|
'flag is recommended when you use Instaloader to update your personal Instagram archive.')
|
|
|
|
g_stop.add_argument('-c', '--count',
|
2017-04-22 17:34:49 +02:00
|
|
|
help='Do not attempt to download more than COUNT posts. '
|
2017-04-22 17:21:02 +02:00
|
|
|
'Applies only to #hashtag, :feed-all and :feed-liked.')
|
2017-07-20 14:54:22 +02:00
|
|
|
|
|
|
|
g_login = parser.add_argument_group('Login (Download Private Profiles)',
|
2017-07-20 18:19:15 +02:00
|
|
|
'Instaloader can login to Instagram. This allows downloading private profiles. '
|
2017-07-20 14:54:22 +02:00
|
|
|
'To login, pass the --login option. Your session cookie (not your password!) '
|
|
|
|
'will be saved to a local file to be reused next time you want Instaloader '
|
|
|
|
'to login.')
|
|
|
|
g_login.add_argument('-l', '--login', metavar='YOUR-USERNAME',
|
|
|
|
help='Login name (profile name) for your Instagram account.')
|
|
|
|
g_login.add_argument('-f', '--sessionfile',
|
|
|
|
help='Path for loading and storing session key file. '
|
|
|
|
'Defaults to ' + get_default_session_filename("<login_name>"))
|
|
|
|
g_login.add_argument('-p', '--password', metavar='YOUR-PASSWORD',
|
|
|
|
help='Password for your Instagram account. Without this option, '
|
|
|
|
'you\'ll be prompted for your password interactively if '
|
|
|
|
'there is not yet a valid session file.')
|
|
|
|
|
|
|
|
g_how = parser.add_argument_group('How to Download')
|
2017-07-25 18:31:08 +02:00
|
|
|
g_how.add_argument('--dirname-pattern',
|
|
|
|
help='Name of directory where to store posts. {profile} is replaced by the profile name, '
|
|
|
|
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
|
|
|
'profile name. Defaults to \'{target}\'.')
|
|
|
|
g_how.add_argument('--filename-pattern',
|
|
|
|
help='Prefix of filenames. Posts are stored in the directory whose pattern is given with '
|
|
|
|
'--dirname-pattern. {profile} is replaced by the profile name, '
|
|
|
|
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
|
|
|
'profile name. Also, the fields date and shortcode can be specified. Defaults to '
|
|
|
|
'\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
|
2017-07-20 14:54:22 +02:00
|
|
|
g_how.add_argument('--user-agent',
|
|
|
|
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
|
|
|
g_how.add_argument('-S', '--no-sleep', action='store_true',
|
|
|
|
help='Do not sleep between requests to Instagram\'s servers. This makes downloading faster, but '
|
|
|
|
'may be suspicious.')
|
|
|
|
|
|
|
|
g_misc = parser.add_argument_group('Miscellaneous Options')
|
|
|
|
g_misc.add_argument('-O', '--shorter-output', action='store_true',
|
|
|
|
help='Do not display captions while downloading.')
|
|
|
|
g_misc.add_argument('-q', '--quiet', action='store_true',
|
2017-06-24 22:43:40 +02:00
|
|
|
help='Disable user interaction, i.e. do not print messages (except errors) and fail '
|
2017-07-20 14:54:22 +02:00
|
|
|
'if login credentials are needed but not given. This makes Instaloader suitable as a '
|
|
|
|
'cron job.')
|
|
|
|
g_misc.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
|
|
|
|
g_misc.add_argument('--version', action='version', help='Show version number and exit.',
|
|
|
|
version=__version__)
|
|
|
|
|
2016-06-15 12:42:08 +02:00
|
|
|
args = parser.parse_args()
|
2016-07-26 10:57:29 +02:00
|
|
|
try:
|
2017-07-29 17:51:39 +02:00
|
|
|
if args.login is None and (args.stories or args.stories_only):
|
|
|
|
print("--login=USERNAME required to download stories.", file=sys.stderr)
|
|
|
|
args.stories = False
|
|
|
|
if args.stories_only:
|
|
|
|
raise SystemExit(1)
|
2017-07-20 14:54:22 +02:00
|
|
|
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, shorter_output=args.shorter_output,
|
2017-07-25 18:31:08 +02:00
|
|
|
user_agent=args.user_agent,
|
|
|
|
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern)
|
2017-06-24 22:43:40 +02:00
|
|
|
loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
|
|
|
|
int(args.count) if args.count is not None else None,
|
2017-07-21 15:32:41 +02:00
|
|
|
args.profile_pic_only, not args.skip_videos, args.geotags, args.comments,
|
2017-07-29 17:51:39 +02:00
|
|
|
args.fast_update, args.stories, args.stories_only)
|
2016-07-26 10:57:29 +02:00
|
|
|
except InstaloaderException as err:
|
|
|
|
raise SystemExit("Fatal error: %s" % err)
|
2016-06-27 16:49:00 +02:00
|
|
|
|
2017-06-24 22:43:40 +02:00
|
|
|
|
2016-06-27 16:49:00 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|