diff --git a/README.rst b/README.rst
index c9ec769..ed10274 100644
--- a/README.rst
+++ b/README.rst
@@ -9,8 +9,8 @@ Instaloader
**Instaloader**
-- downloads **public and private profiles, hashtags, user stories and
- feeds**,
+- downloads **public and private profiles, hashtags, user stories,
+ feeds and saved media**,
- downloads **comments, geotags and captions** of each post,
@@ -24,7 +24,7 @@ Instaloader
instaloader [--comments] [--geotags] [--stories]
[--login YOUR-USERNAME] [--fast-update]
- profile | "#hashtag" | :stories | :feed
+ profile | "#hashtag" | :stories | :feed | :saved
`Instaloader Documentation `__
diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst
index 6f0ea54..e646a5e 100644
--- a/docs/basic-usage.rst
+++ b/docs/basic-usage.rst
@@ -63,6 +63,9 @@ Instaloader supports the following targets:
``:feed``
Your **feed** (requires :option:`--login`),
+``:saved``
+ Posts which are marked as **saved** (requires :option:`--login`),
+
``@profile``
All profiles that are followed by ``profile``, i.e. the *followees* of
``profile`` (requires :option:`--login`).
diff --git a/docs/cli-options.rst b/docs/cli-options.rst
index 15c431d..84dc864 100644
--- a/docs/cli-options.rst
+++ b/docs/cli-options.rst
@@ -7,7 +7,8 @@ Instaloader is invoked with::
where ``target`` is a ``profile``, a ``"#hashtag"``, ``@profile`` (all profiles
that *profile* is following), or if logged in ``:feed`` (pictures from your
-feed) or ``:stories`` (stories of your followees).
+feed), ``:stories`` (stories of your followees) or ``:saved`` (collection of
+posts marked as saved).
Here we explain the additional options that can be given to Instaloader to
customize its behavior. To get a list of all flags, their abbreviations and
@@ -16,11 +17,11 @@ their descriptions, you may also run ``instaloader --help``.
What to Download
^^^^^^^^^^^^^^^^
-Specify a list of targets (profiles, #hashtags, ``:feed`` or ``:stories``). For
-each of these, Instaloader creates a folder and stores all posts along with the
-pictures's captions and the current **profile picture** there. If an
-already-downloaded profile has been renamed, Instaloader automatically **finds
-it by its unique ID** and renames the folder likewise.
+Specify a list of targets (profiles, #hashtags, ``:feed``, ``:stories`` or
+``:saved``). For each of these, Instaloader creates a folder and stores all
+posts along with the pictures's captions and the current **profile picture**
+there. If an already-downloaded profile has been renamed, Instaloader
+automatically **finds it by its unique ID** and renames the folder likewise.
.. option:: --profile-pic-only
diff --git a/docs/index.rst b/docs/index.rst
index 96e5ae0..d3a7e37 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -2,7 +2,7 @@
:description:
Command line tool to download pictures (and videos) from Instagram.
Instaloader downloads public and private profiles, hashtags, user stories,
- feeds, comments, geotags, captions and other metadata of each post.
+ feeds, saved media, comments, geotags, captions and other metadata of each post.
Instaloader
===========
@@ -20,8 +20,8 @@ With `Python `__ installed, do::
**Instaloader**
-- downloads **public and private profiles, hashtags, user stories and
- feeds**,
+- downloads **public and private profiles, hashtags, user stories,
+ feeds and saved media**,
- downloads **comments, geotags and captions** of each post,
@@ -38,7 +38,8 @@ With `Python `__ installed, do::
instaloader [--comments] [--geotags] [--stories]
[--login YOUR-USERNAME] [--fast-update]
- profile | "#hashtag" | :stories | :feed
+ profile | "#hashtag" |
+ :stories | :feed | :saved
Table of Contents
diff --git a/instaloader.py b/instaloader.py
index 18c7a06..b606639 100755
--- a/instaloader.py
+++ b/instaloader.py
@@ -22,7 +22,7 @@ from datetime import datetime
from enum import Enum
from io import BytesIO
-from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
import requests
import requests.utils
@@ -364,7 +364,11 @@ class Post:
return self._field('edge_media_to_comment', 'count')
def get_comments(self) -> Iterator[Dict[str, Any]]:
- """Iterate over all comments of the post."""
+ """Iterate over all comments of the post.
+
+ Each comment is represented by a dictionary having the keys text, created_at, id and owner, which is a
+ dictionary with keys username, profile_pic_url and id.
+ """
if self.comments == 0:
# Avoid doing additional requests if there are no comments
return
@@ -376,6 +380,23 @@ class Post:
'https://www.instagram.com/p/' + self.shortcode + '/',
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
+ def get_likes(self) -> Iterator[Dict[str, Any]]:
+ """Iterate over all likes of the post.
+
+ Each like is represented by a dictionary having the keys username, followed_by_viewer, id, is_verified,
+ requested_by_viewer, followed_by_viewer, profile_pic_url.
+ """
+ if self.likes == 0:
+ # Avoid doing additional requests if there are no comments
+ return
+ likes_edges = self._field('edge_media_preview_like', 'edges')
+ if self.likes == len(likes_edges):
+ # If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them
+ yield from (like['node'] for like in likes_edges)
+ yield from self._instaloader.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
+ 'https://www.instagram.com/p/' + self.shortcode + '/',
+ lambda d: d['data']['shortcode_media']['edge_liked_by'])
+
def get_location(self) -> Optional[Dict[str, str]]:
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
loc_dict = self._field("location")
@@ -419,6 +440,8 @@ class Tristate(Enum):
class Instaloader:
+ GRAPHQL_PAGE_LENGTH = 200
+
def __init__(self,
sleep: bool = True, quiet: bool = False,
user_agent: Optional[str] = None,
@@ -651,12 +674,12 @@ class Instaloader:
session.headers.update(self._default_http_header(empty_session_only=True))
return session
- def graphql_query(self, query_id: int, variables: Dict[str, Any],
+ def graphql_query(self, query_identifier: Union[int, str], variables: Dict[str, Any],
referer: Optional[str] = None) -> Dict[str, Any]:
"""
Do a GraphQL Query.
- :param query_id: Query ID.
+ :param query_identifier: Query ID or Hash.
:param variables: Variables for the Query.
:param referer: HTTP Referer, or None.
:return: The server's response dictionary.
@@ -670,8 +693,9 @@ class Instaloader:
tmpsession.headers['accept'] = '*/*'
if referer is not None:
tmpsession.headers['referer'] = urllib.parse.quote(referer)
- resp_json = self.get_json('graphql/query', params={'query_id': query_id,
- 'variables': json.dumps(variables, separators=(',', ':'))},
+ resp_json = self.get_json('graphql/query',
+ params={'query_id' if isinstance(query_identifier, int) else 'query_hash': query_identifier,
+ 'variables': json.dumps(variables, separators=(',', ':'))},
session=tmpsession)
if 'status' not in resp_json:
self.error("GraphQL response did not contain a \"status\" field.")
@@ -698,17 +722,18 @@ class Instaloader:
his/her username. To get said ID, given the profile's name, you may call this function."""
return int(self.get_profile_metadata(profile)['user']['id'])
- def graphql_node_list(self, query_id: int, query_variables: Dict[str, Any], query_referer: Optional[str],
+ def graphql_node_list(self, query_identifier: Union[int, str], query_variables: Dict[str, Any],
+ query_referer: Optional[str],
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
"""Retrieve a list of GraphQL nodes."""
- query_variables['first'] = 200
- data = self.graphql_query(query_id, query_variables, query_referer)
+ query_variables['first'] = Instaloader.GRAPHQL_PAGE_LENGTH
+ data = self.graphql_query(query_identifier, query_variables, query_referer)
while True:
edge_struct = edge_extractor(data)
yield from [edge['node'] for edge in edge_struct['edges']]
if edge_struct['page_info']['has_next_page']:
query_variables['after'] = edge_struct['page_info']['end_cursor']
- data = self.graphql_query(query_id, query_variables, query_referer)
+ data = self.graphql_query(query_identifier, query_variables, query_referer)
else:
break
@@ -918,11 +943,10 @@ class Instaloader:
self.session = session
self.username = username
- def test_login(self, session: Optional[requests.Session]) -> Optional[str]:
+ def test_login(self) -> Optional[str]:
"""Returns the Instagram username to which given :class:`requests.Session` object belongs, or None."""
- if session:
- data = self.get_json('', params={'__a': 1}, session=session)
- return data['graphql']['user']['username'] if 'graphql' in data else None
+ data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})
+ return data["data"]["user"]["username"] if "username" in data["data"]["user"] else None
def login(self, user: str, passwd: str) -> None:
"""Log in to instagram with given username and password and internally store session object"""
@@ -939,10 +963,12 @@ class Instaloader:
data={'password': passwd, 'username': user}, allow_redirects=True)
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
if login.status_code == 200:
- if user == self.test_login(session):
+ self.session = session
+ if user == self.test_login():
self.username = user
- self.session = session
else:
+ self.username = None
+ self.session = None
raise BadCredentialsException('Login error! Check your credentials!')
else:
raise ConnectionException('Login error! Connection error!')
@@ -1214,6 +1240,57 @@ class Instaloader:
if fast_update and not downloaded:
break
+ def get_saved_posts(self) -> Iterator[Post]:
+ """Get Posts that are marked as saved by the user."""
+
+ data = self.get_profile_metadata(self.username)
+ user_id = data["user"]["id"]
+
+ while True:
+ if "graphql" in data:
+ is_edge = True
+ saved_media = data["graphql"]["user"]["edge_saved_media"]
+ elif "data" in data:
+ is_edge = True
+ saved_media = data["data"]["user"]["edge_saved_media"]
+ else:
+ is_edge = False
+ saved_media = data["user"]["saved_media"]
+
+ if is_edge:
+ yield from (Post(self, edge["node"]) for edge in saved_media["edges"])
+ else:
+ yield from (Post(self, node) for node in saved_media["nodes"])
+
+ if not saved_media["page_info"]["has_next_page"]:
+ break
+ data = self.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
+ {'id': user_id, 'first': Instaloader.GRAPHQL_PAGE_LENGTH,
+ 'after': saved_media["page_info"]["end_cursor"]})
+
+ def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
+ filter_func: Optional[Callable[[Post], bool]] = None) -> None:
+ """Download user's saved pictures.
+
+ :param max_count: Maximum count of pictures to download
+ :param fast_update: If true, abort when first already-downloaded picture is encountered
+ :param filter_func: function(post), which returns True if given picture should be downloaded
+ """
+ count = 1
+ for post in self.get_saved_posts():
+ if max_count is not None and count > max_count:
+ break
+ name = post.owner_username
+ if filter_func is not None and not filter_func(post):
+ self._log("3}] {} ".format(count, name), end=str(), flush=True)
+ count += 1
+ with self._error_catcher('Download saved posts'):
+ downloaded = self.download_post(post, target=':saved')
+ if fast_update and not downloaded:
+ break
+
def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]:
"""Get Posts associated with a #hashtag."""
yield from (Post(self, node) for node in
@@ -1322,7 +1399,7 @@ class Instaloader:
# We do not use self.graphql_node_list() here, because profile_metadata
# lets us obtain the first 12 nodes 'for free'
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
- 'first': 200,
+ 'first': Instaloader.GRAPHQL_PAGE_LENGTH,
'after': end_cursor},
'https://www.instagram.com/{0}/'.format(profile_name))
media = data['data']['user']['edge_owner_to_timeline_media']
@@ -1433,7 +1510,7 @@ class Instaloader:
if sessionfile is not None:
print(err, file=sys.stderr)
self._log("Session file does not exist yet - Logging in.")
- if not self.is_logged_in or username != self.test_login(self.session):
+ if not self.is_logged_in or username != self.test_login():
if password is not None:
self.login(username, password)
else:
@@ -1471,6 +1548,14 @@ class Instaloader:
self.download_stories(fast_update=fast_update)
else:
self.error("--login=USERNAME required to download {}.".format(pentry))
+ elif pentry == ":saved":
+ if username is not None:
+ self._log("Retrieving saved posts...")
+ with self._error_catcher():
+ self.download_saved_posts(fast_update=fast_update, max_count=max_count,
+ filter_func=filter_func)
+ else:
+ self.error("--login=USERNAME required to download {}.".format(pentry))
else:
targets.add(pentry)
if len(targets) > 1:
@@ -1518,9 +1603,10 @@ def main():
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
help='Name of profile or #hashtag to download. '
'Alternatively, if --login is given: @ to download all followees of '
- '; the special targets :feed to '
- 'download pictures from your feed; or :stories to download the stories of your '
- 'followees.')
+ '; the special targets '
+ ':feed to download pictures from your feed; '
+ ':stories to download the stories of your followees; or '
+ ':saved to download the posts marked as saved.')
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
help='Only download profile picture.')
g_what.add_argument('--no-profile-pic', action='store_true',