mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-23 10:42:30 +01:00
Merge branch 'v3.3-dev'
This commit is contained in:
commit
5f34fca9e7
@ -9,8 +9,8 @@ Instaloader
|
||||
|
||||
**Instaloader**
|
||||
|
||||
- downloads **public and private profiles, hashtags, user stories and
|
||||
feeds**,
|
||||
- downloads **public and private profiles, hashtags, user stories,
|
||||
feeds and saved media**,
|
||||
|
||||
- downloads **comments, geotags and captions** of each post,
|
||||
|
||||
@ -24,7 +24,7 @@ Instaloader
|
||||
|
||||
instaloader [--comments] [--geotags] [--stories]
|
||||
[--login YOUR-USERNAME] [--fast-update]
|
||||
profile | "#hashtag" | :stories | :feed
|
||||
profile | "#hashtag" | :stories | :feed | :saved
|
||||
|
||||
`Instaloader Documentation <https://instaloader.github.io/>`__
|
||||
|
||||
|
@ -63,6 +63,9 @@ Instaloader supports the following targets:
|
||||
``:feed``
|
||||
Your **feed** (requires :option:`--login`),
|
||||
|
||||
``:saved``
|
||||
Posts which are marked as **saved** (requires :option:`--login`),
|
||||
|
||||
``@profile``
|
||||
All profiles that are followed by ``profile``, i.e. the *followees* of
|
||||
``profile`` (requires :option:`--login`).
|
||||
|
@ -7,7 +7,8 @@ Instaloader is invoked with::
|
||||
|
||||
where ``target`` is a ``profile``, a ``"#hashtag"``, ``@profile`` (all profiles
|
||||
that *profile* is following), or if logged in ``:feed`` (pictures from your
|
||||
feed) or ``:stories`` (stories of your followees).
|
||||
feed), ``:stories`` (stories of your followees) or ``:saved`` (collection of
|
||||
posts marked as saved).
|
||||
|
||||
Here we explain the additional options that can be given to Instaloader to
|
||||
customize its behavior. To get a list of all flags, their abbreviations and
|
||||
@ -16,11 +17,11 @@ their descriptions, you may also run ``instaloader --help``.
|
||||
What to Download
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Specify a list of targets (profiles, #hashtags, ``:feed`` or ``:stories``). For
|
||||
each of these, Instaloader creates a folder and stores all posts along with the
|
||||
pictures's captions and the current **profile picture** there. If an
|
||||
already-downloaded profile has been renamed, Instaloader automatically **finds
|
||||
it by its unique ID** and renames the folder likewise.
|
||||
Specify a list of targets (profiles, #hashtags, ``:feed``, ``:stories`` or
|
||||
``:saved``). For each of these, Instaloader creates a folder and stores all
|
||||
posts along with the pictures's captions and the current **profile picture**
|
||||
there. If an already-downloaded profile has been renamed, Instaloader
|
||||
automatically **finds it by its unique ID** and renames the folder likewise.
|
||||
|
||||
.. option:: --profile-pic-only
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
:description:
|
||||
Command line tool to download pictures (and videos) from Instagram.
|
||||
Instaloader downloads public and private profiles, hashtags, user stories,
|
||||
feeds, comments, geotags, captions and other metadata of each post.
|
||||
feeds, saved media, comments, geotags, captions and other metadata of each post.
|
||||
|
||||
Instaloader
|
||||
===========
|
||||
@ -20,8 +20,8 @@ With `Python <https://www.python.org/>`__ installed, do::
|
||||
|
||||
**Instaloader**
|
||||
|
||||
- downloads **public and private profiles, hashtags, user stories and
|
||||
feeds**,
|
||||
- downloads **public and private profiles, hashtags, user stories,
|
||||
feeds and saved media**,
|
||||
|
||||
- downloads **comments, geotags and captions** of each post,
|
||||
|
||||
@ -38,7 +38,8 @@ With `Python <https://www.python.org/>`__ installed, do::
|
||||
|
||||
instaloader [--comments] [--geotags] [--stories]
|
||||
[--login YOUR-USERNAME] [--fast-update]
|
||||
profile | "#hashtag" | :stories | :feed
|
||||
profile | "#hashtag" |
|
||||
:stories | :feed | :saved
|
||||
|
||||
|
||||
Table of Contents
|
||||
|
128
instaloader.py
128
instaloader.py
@ -22,7 +22,7 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from io import BytesIO
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
import requests
|
||||
import requests.utils
|
||||
@ -364,7 +364,11 @@ class Post:
|
||||
return self._field('edge_media_to_comment', 'count')
|
||||
|
||||
def get_comments(self) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate over all comments of the post."""
|
||||
"""Iterate over all comments of the post.
|
||||
|
||||
Each comment is represented by a dictionary having the keys text, created_at, id and owner, which is a
|
||||
dictionary with keys username, profile_pic_url and id.
|
||||
"""
|
||||
if self.comments == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
@ -376,6 +380,23 @@ class Post:
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
||||
|
||||
def get_likes(self) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate over all likes of the post.
|
||||
|
||||
Each like is represented by a dictionary having the keys username, followed_by_viewer, id, is_verified,
|
||||
requested_by_viewer, followed_by_viewer, profile_pic_url.
|
||||
"""
|
||||
if self.likes == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
likes_edges = self._field('edge_media_preview_like', 'edges')
|
||||
if self.likes == len(likes_edges):
|
||||
# If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them
|
||||
yield from (like['node'] for like in likes_edges)
|
||||
yield from self._instaloader.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'])
|
||||
|
||||
def get_location(self) -> Optional[Dict[str, str]]:
|
||||
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
|
||||
loc_dict = self._field("location")
|
||||
@ -419,6 +440,8 @@ class Tristate(Enum):
|
||||
|
||||
|
||||
class Instaloader:
|
||||
GRAPHQL_PAGE_LENGTH = 200
|
||||
|
||||
def __init__(self,
|
||||
sleep: bool = True, quiet: bool = False,
|
||||
user_agent: Optional[str] = None,
|
||||
@ -651,12 +674,12 @@ class Instaloader:
|
||||
session.headers.update(self._default_http_header(empty_session_only=True))
|
||||
return session
|
||||
|
||||
def graphql_query(self, query_id: int, variables: Dict[str, Any],
|
||||
def graphql_query(self, query_identifier: Union[int, str], variables: Dict[str, Any],
|
||||
referer: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Do a GraphQL Query.
|
||||
|
||||
:param query_id: Query ID.
|
||||
:param query_identifier: Query ID or Hash.
|
||||
:param variables: Variables for the Query.
|
||||
:param referer: HTTP Referer, or None.
|
||||
:return: The server's response dictionary.
|
||||
@ -670,8 +693,9 @@ class Instaloader:
|
||||
tmpsession.headers['accept'] = '*/*'
|
||||
if referer is not None:
|
||||
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
||||
resp_json = self.get_json('graphql/query', params={'query_id': query_id,
|
||||
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||
resp_json = self.get_json('graphql/query',
|
||||
params={'query_id' if isinstance(query_identifier, int) else 'query_hash': query_identifier,
|
||||
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||
session=tmpsession)
|
||||
if 'status' not in resp_json:
|
||||
self.error("GraphQL response did not contain a \"status\" field.")
|
||||
@ -698,17 +722,18 @@ class Instaloader:
|
||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||
return int(self.get_profile_metadata(profile)['user']['id'])
|
||||
|
||||
def graphql_node_list(self, query_id: int, query_variables: Dict[str, Any], query_referer: Optional[str],
|
||||
def graphql_node_list(self, query_identifier: Union[int, str], query_variables: Dict[str, Any],
|
||||
query_referer: Optional[str],
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||
"""Retrieve a list of GraphQL nodes."""
|
||||
query_variables['first'] = 200
|
||||
data = self.graphql_query(query_id, query_variables, query_referer)
|
||||
query_variables['first'] = Instaloader.GRAPHQL_PAGE_LENGTH
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||
while True:
|
||||
edge_struct = edge_extractor(data)
|
||||
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||
if edge_struct['page_info']['has_next_page']:
|
||||
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||
data = self.graphql_query(query_id, query_variables, query_referer)
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||
else:
|
||||
break
|
||||
|
||||
@ -918,11 +943,10 @@ class Instaloader:
|
||||
self.session = session
|
||||
self.username = username
|
||||
|
||||
def test_login(self, session: Optional[requests.Session]) -> Optional[str]:
|
||||
def test_login(self) -> Optional[str]:
|
||||
"""Returns the Instagram username to which given :class:`requests.Session` object belongs, or None."""
|
||||
if session:
|
||||
data = self.get_json('', params={'__a': 1}, session=session)
|
||||
return data['graphql']['user']['username'] if 'graphql' in data else None
|
||||
data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})
|
||||
return data["data"]["user"]["username"] if "username" in data["data"]["user"] else None
|
||||
|
||||
def login(self, user: str, passwd: str) -> None:
|
||||
"""Log in to instagram with given username and password and internally store session object"""
|
||||
@ -939,10 +963,12 @@ class Instaloader:
|
||||
data={'password': passwd, 'username': user}, allow_redirects=True)
|
||||
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
|
||||
if login.status_code == 200:
|
||||
if user == self.test_login(session):
|
||||
self.session = session
|
||||
if user == self.test_login():
|
||||
self.username = user
|
||||
self.session = session
|
||||
else:
|
||||
self.username = None
|
||||
self.session = None
|
||||
raise BadCredentialsException('Login error! Check your credentials!')
|
||||
else:
|
||||
raise ConnectionException('Login error! Connection error!')
|
||||
@ -1214,6 +1240,57 @@ class Instaloader:
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
def get_saved_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts that are marked as saved by the user."""
|
||||
|
||||
data = self.get_profile_metadata(self.username)
|
||||
user_id = data["user"]["id"]
|
||||
|
||||
while True:
|
||||
if "graphql" in data:
|
||||
is_edge = True
|
||||
saved_media = data["graphql"]["user"]["edge_saved_media"]
|
||||
elif "data" in data:
|
||||
is_edge = True
|
||||
saved_media = data["data"]["user"]["edge_saved_media"]
|
||||
else:
|
||||
is_edge = False
|
||||
saved_media = data["user"]["saved_media"]
|
||||
|
||||
if is_edge:
|
||||
yield from (Post(self, edge["node"]) for edge in saved_media["edges"])
|
||||
else:
|
||||
yield from (Post(self, node) for node in saved_media["nodes"])
|
||||
|
||||
if not saved_media["page_info"]["has_next_page"]:
|
||||
break
|
||||
data = self.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
|
||||
{'id': user_id, 'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||
'after': saved_media["page_info"]["end_cursor"]})
|
||||
|
||||
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
"""Download user's saved pictures.
|
||||
|
||||
:param max_count: Maximum count of pictures to download
|
||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||
:param filter_func: function(post), which returns True if given picture should be downloaded
|
||||
"""
|
||||
count = 1
|
||||
for post in self.get_saved_posts():
|
||||
if max_count is not None and count > max_count:
|
||||
break
|
||||
name = post.owner_username
|
||||
if filter_func is not None and not filter_func(post):
|
||||
self._log("<pic by {} skipped".format(name), flush=True)
|
||||
continue
|
||||
self._log("[{:>3}] {} ".format(count, name), end=str(), flush=True)
|
||||
count += 1
|
||||
with self._error_catcher('Download saved posts'):
|
||||
downloaded = self.download_post(post, target=':saved')
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
|
||||
def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]:
|
||||
"""Get Posts associated with a #hashtag."""
|
||||
yield from (Post(self, node) for node in
|
||||
@ -1322,7 +1399,7 @@ class Instaloader:
|
||||
# We do not use self.graphql_node_list() here, because profile_metadata
|
||||
# lets us obtain the first 12 nodes 'for free'
|
||||
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
|
||||
'first': 200,
|
||||
'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||
'after': end_cursor},
|
||||
'https://www.instagram.com/{0}/'.format(profile_name))
|
||||
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||
@ -1433,7 +1510,7 @@ class Instaloader:
|
||||
if sessionfile is not None:
|
||||
print(err, file=sys.stderr)
|
||||
self._log("Session file does not exist yet - Logging in.")
|
||||
if not self.is_logged_in or username != self.test_login(self.session):
|
||||
if not self.is_logged_in or username != self.test_login():
|
||||
if password is not None:
|
||||
self.login(username, password)
|
||||
else:
|
||||
@ -1471,6 +1548,14 @@ class Instaloader:
|
||||
self.download_stories(fast_update=fast_update)
|
||||
else:
|
||||
self.error("--login=USERNAME required to download {}.".format(pentry))
|
||||
elif pentry == ":saved":
|
||||
if username is not None:
|
||||
self._log("Retrieving saved posts...")
|
||||
with self._error_catcher():
|
||||
self.download_saved_posts(fast_update=fast_update, max_count=max_count,
|
||||
filter_func=filter_func)
|
||||
else:
|
||||
self.error("--login=USERNAME required to download {}.".format(pentry))
|
||||
else:
|
||||
targets.add(pentry)
|
||||
if len(targets) > 1:
|
||||
@ -1518,9 +1603,10 @@ def main():
|
||||
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
||||
help='Name of profile or #hashtag to download. '
|
||||
'Alternatively, if --login is given: @<profile> to download all followees of '
|
||||
'<profile>; the special targets :feed to '
|
||||
'download pictures from your feed; or :stories to download the stories of your '
|
||||
'followees.')
|
||||
'<profile>; the special targets '
|
||||
':feed to download pictures from your feed; '
|
||||
':stories to download the stories of your followees; or '
|
||||
':saved to download the posts marked as saved.')
|
||||
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
||||
help='Only download profile picture.')
|
||||
g_what.add_argument('--no-profile-pic', action='store_true',
|
||||
|
Loading…
Reference in New Issue
Block a user