mirror of
https://github.com/instaloader/instaloader.git
synced 2024-07-14 23:00:06 +02:00
Merge branch 'v3.3-dev'
This commit is contained in:
commit
5f34fca9e7
@ -9,8 +9,8 @@ Instaloader
|
|||||||
|
|
||||||
**Instaloader**
|
**Instaloader**
|
||||||
|
|
||||||
- downloads **public and private profiles, hashtags, user stories and
|
- downloads **public and private profiles, hashtags, user stories,
|
||||||
feeds**,
|
feeds and saved media**,
|
||||||
|
|
||||||
- downloads **comments, geotags and captions** of each post,
|
- downloads **comments, geotags and captions** of each post,
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ Instaloader
|
|||||||
|
|
||||||
instaloader [--comments] [--geotags] [--stories]
|
instaloader [--comments] [--geotags] [--stories]
|
||||||
[--login YOUR-USERNAME] [--fast-update]
|
[--login YOUR-USERNAME] [--fast-update]
|
||||||
profile | "#hashtag" | :stories | :feed
|
profile | "#hashtag" | :stories | :feed | :saved
|
||||||
|
|
||||||
`Instaloader Documentation <https://instaloader.github.io/>`__
|
`Instaloader Documentation <https://instaloader.github.io/>`__
|
||||||
|
|
||||||
|
@ -63,6 +63,9 @@ Instaloader supports the following targets:
|
|||||||
``:feed``
|
``:feed``
|
||||||
Your **feed** (requires :option:`--login`),
|
Your **feed** (requires :option:`--login`),
|
||||||
|
|
||||||
|
``:saved``
|
||||||
|
Posts which are marked as **saved** (requires :option:`--login`),
|
||||||
|
|
||||||
``@profile``
|
``@profile``
|
||||||
All profiles that are followed by ``profile``, i.e. the *followees* of
|
All profiles that are followed by ``profile``, i.e. the *followees* of
|
||||||
``profile`` (requires :option:`--login`).
|
``profile`` (requires :option:`--login`).
|
||||||
|
@ -7,7 +7,8 @@ Instaloader is invoked with::
|
|||||||
|
|
||||||
where ``target`` is a ``profile``, a ``"#hashtag"``, ``@profile`` (all profiles
|
where ``target`` is a ``profile``, a ``"#hashtag"``, ``@profile`` (all profiles
|
||||||
that *profile* is following), or if logged in ``:feed`` (pictures from your
|
that *profile* is following), or if logged in ``:feed`` (pictures from your
|
||||||
feed) or ``:stories`` (stories of your followees).
|
feed), ``:stories`` (stories of your followees) or ``:saved`` (collection of
|
||||||
|
posts marked as saved).
|
||||||
|
|
||||||
Here we explain the additional options that can be given to Instaloader to
|
Here we explain the additional options that can be given to Instaloader to
|
||||||
customize its behavior. To get a list of all flags, their abbreviations and
|
customize its behavior. To get a list of all flags, their abbreviations and
|
||||||
@ -16,11 +17,11 @@ their descriptions, you may also run ``instaloader --help``.
|
|||||||
What to Download
|
What to Download
|
||||||
^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
Specify a list of targets (profiles, #hashtags, ``:feed`` or ``:stories``). For
|
Specify a list of targets (profiles, #hashtags, ``:feed``, ``:stories`` or
|
||||||
each of these, Instaloader creates a folder and stores all posts along with the
|
``:saved``). For each of these, Instaloader creates a folder and stores all
|
||||||
pictures's captions and the current **profile picture** there. If an
|
posts along with the pictures's captions and the current **profile picture**
|
||||||
already-downloaded profile has been renamed, Instaloader automatically **finds
|
there. If an already-downloaded profile has been renamed, Instaloader
|
||||||
it by its unique ID** and renames the folder likewise.
|
automatically **finds it by its unique ID** and renames the folder likewise.
|
||||||
|
|
||||||
.. option:: --profile-pic-only
|
.. option:: --profile-pic-only
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
:description:
|
:description:
|
||||||
Command line tool to download pictures (and videos) from Instagram.
|
Command line tool to download pictures (and videos) from Instagram.
|
||||||
Instaloader downloads public and private profiles, hashtags, user stories,
|
Instaloader downloads public and private profiles, hashtags, user stories,
|
||||||
feeds, comments, geotags, captions and other metadata of each post.
|
feeds, saved media, comments, geotags, captions and other metadata of each post.
|
||||||
|
|
||||||
Instaloader
|
Instaloader
|
||||||
===========
|
===========
|
||||||
@ -20,8 +20,8 @@ With `Python <https://www.python.org/>`__ installed, do::
|
|||||||
|
|
||||||
**Instaloader**
|
**Instaloader**
|
||||||
|
|
||||||
- downloads **public and private profiles, hashtags, user stories and
|
- downloads **public and private profiles, hashtags, user stories,
|
||||||
feeds**,
|
feeds and saved media**,
|
||||||
|
|
||||||
- downloads **comments, geotags and captions** of each post,
|
- downloads **comments, geotags and captions** of each post,
|
||||||
|
|
||||||
@ -38,7 +38,8 @@ With `Python <https://www.python.org/>`__ installed, do::
|
|||||||
|
|
||||||
instaloader [--comments] [--geotags] [--stories]
|
instaloader [--comments] [--geotags] [--stories]
|
||||||
[--login YOUR-USERNAME] [--fast-update]
|
[--login YOUR-USERNAME] [--fast-update]
|
||||||
profile | "#hashtag" | :stories | :feed
|
profile | "#hashtag" |
|
||||||
|
:stories | :feed | :saved
|
||||||
|
|
||||||
|
|
||||||
Table of Contents
|
Table of Contents
|
||||||
|
128
instaloader.py
128
instaloader.py
@ -22,7 +22,7 @@ from datetime import datetime
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
|
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import requests.utils
|
import requests.utils
|
||||||
@ -364,7 +364,11 @@ class Post:
|
|||||||
return self._field('edge_media_to_comment', 'count')
|
return self._field('edge_media_to_comment', 'count')
|
||||||
|
|
||||||
def get_comments(self) -> Iterator[Dict[str, Any]]:
|
def get_comments(self) -> Iterator[Dict[str, Any]]:
|
||||||
"""Iterate over all comments of the post."""
|
"""Iterate over all comments of the post.
|
||||||
|
|
||||||
|
Each comment is represented by a dictionary having the keys text, created_at, id and owner, which is a
|
||||||
|
dictionary with keys username, profile_pic_url and id.
|
||||||
|
"""
|
||||||
if self.comments == 0:
|
if self.comments == 0:
|
||||||
# Avoid doing additional requests if there are no comments
|
# Avoid doing additional requests if there are no comments
|
||||||
return
|
return
|
||||||
@ -376,6 +380,23 @@ class Post:
|
|||||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||||
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
||||||
|
|
||||||
|
def get_likes(self) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Iterate over all likes of the post.
|
||||||
|
|
||||||
|
Each like is represented by a dictionary having the keys username, followed_by_viewer, id, is_verified,
|
||||||
|
requested_by_viewer, followed_by_viewer, profile_pic_url.
|
||||||
|
"""
|
||||||
|
if self.likes == 0:
|
||||||
|
# Avoid doing additional requests if there are no comments
|
||||||
|
return
|
||||||
|
likes_edges = self._field('edge_media_preview_like', 'edges')
|
||||||
|
if self.likes == len(likes_edges):
|
||||||
|
# If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them
|
||||||
|
yield from (like['node'] for like in likes_edges)
|
||||||
|
yield from self._instaloader.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||||
|
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||||
|
lambda d: d['data']['shortcode_media']['edge_liked_by'])
|
||||||
|
|
||||||
def get_location(self) -> Optional[Dict[str, str]]:
|
def get_location(self) -> Optional[Dict[str, str]]:
|
||||||
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
|
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
|
||||||
loc_dict = self._field("location")
|
loc_dict = self._field("location")
|
||||||
@ -419,6 +440,8 @@ class Tristate(Enum):
|
|||||||
|
|
||||||
|
|
||||||
class Instaloader:
|
class Instaloader:
|
||||||
|
GRAPHQL_PAGE_LENGTH = 200
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
sleep: bool = True, quiet: bool = False,
|
sleep: bool = True, quiet: bool = False,
|
||||||
user_agent: Optional[str] = None,
|
user_agent: Optional[str] = None,
|
||||||
@ -651,12 +674,12 @@ class Instaloader:
|
|||||||
session.headers.update(self._default_http_header(empty_session_only=True))
|
session.headers.update(self._default_http_header(empty_session_only=True))
|
||||||
return session
|
return session
|
||||||
|
|
||||||
def graphql_query(self, query_id: int, variables: Dict[str, Any],
|
def graphql_query(self, query_identifier: Union[int, str], variables: Dict[str, Any],
|
||||||
referer: Optional[str] = None) -> Dict[str, Any]:
|
referer: Optional[str] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Do a GraphQL Query.
|
Do a GraphQL Query.
|
||||||
|
|
||||||
:param query_id: Query ID.
|
:param query_identifier: Query ID or Hash.
|
||||||
:param variables: Variables for the Query.
|
:param variables: Variables for the Query.
|
||||||
:param referer: HTTP Referer, or None.
|
:param referer: HTTP Referer, or None.
|
||||||
:return: The server's response dictionary.
|
:return: The server's response dictionary.
|
||||||
@ -670,8 +693,9 @@ class Instaloader:
|
|||||||
tmpsession.headers['accept'] = '*/*'
|
tmpsession.headers['accept'] = '*/*'
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
||||||
resp_json = self.get_json('graphql/query', params={'query_id': query_id,
|
resp_json = self.get_json('graphql/query',
|
||||||
'variables': json.dumps(variables, separators=(',', ':'))},
|
params={'query_id' if isinstance(query_identifier, int) else 'query_hash': query_identifier,
|
||||||
|
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||||
session=tmpsession)
|
session=tmpsession)
|
||||||
if 'status' not in resp_json:
|
if 'status' not in resp_json:
|
||||||
self.error("GraphQL response did not contain a \"status\" field.")
|
self.error("GraphQL response did not contain a \"status\" field.")
|
||||||
@ -698,17 +722,18 @@ class Instaloader:
|
|||||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||||
return int(self.get_profile_metadata(profile)['user']['id'])
|
return int(self.get_profile_metadata(profile)['user']['id'])
|
||||||
|
|
||||||
def graphql_node_list(self, query_id: int, query_variables: Dict[str, Any], query_referer: Optional[str],
|
def graphql_node_list(self, query_identifier: Union[int, str], query_variables: Dict[str, Any],
|
||||||
|
query_referer: Optional[str],
|
||||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||||
"""Retrieve a list of GraphQL nodes."""
|
"""Retrieve a list of GraphQL nodes."""
|
||||||
query_variables['first'] = 200
|
query_variables['first'] = Instaloader.GRAPHQL_PAGE_LENGTH
|
||||||
data = self.graphql_query(query_id, query_variables, query_referer)
|
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||||
while True:
|
while True:
|
||||||
edge_struct = edge_extractor(data)
|
edge_struct = edge_extractor(data)
|
||||||
yield from [edge['node'] for edge in edge_struct['edges']]
|
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||||
if edge_struct['page_info']['has_next_page']:
|
if edge_struct['page_info']['has_next_page']:
|
||||||
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||||
data = self.graphql_query(query_id, query_variables, query_referer)
|
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -918,11 +943,10 @@ class Instaloader:
|
|||||||
self.session = session
|
self.session = session
|
||||||
self.username = username
|
self.username = username
|
||||||
|
|
||||||
def test_login(self, session: Optional[requests.Session]) -> Optional[str]:
|
def test_login(self) -> Optional[str]:
|
||||||
"""Returns the Instagram username to which given :class:`requests.Session` object belongs, or None."""
|
"""Returns the Instagram username to which given :class:`requests.Session` object belongs, or None."""
|
||||||
if session:
|
data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})
|
||||||
data = self.get_json('', params={'__a': 1}, session=session)
|
return data["data"]["user"]["username"] if "username" in data["data"]["user"] else None
|
||||||
return data['graphql']['user']['username'] if 'graphql' in data else None
|
|
||||||
|
|
||||||
def login(self, user: str, passwd: str) -> None:
|
def login(self, user: str, passwd: str) -> None:
|
||||||
"""Log in to instagram with given username and password and internally store session object"""
|
"""Log in to instagram with given username and password and internally store session object"""
|
||||||
@ -939,10 +963,12 @@ class Instaloader:
|
|||||||
data={'password': passwd, 'username': user}, allow_redirects=True)
|
data={'password': passwd, 'username': user}, allow_redirects=True)
|
||||||
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
|
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
|
||||||
if login.status_code == 200:
|
if login.status_code == 200:
|
||||||
if user == self.test_login(session):
|
self.session = session
|
||||||
|
if user == self.test_login():
|
||||||
self.username = user
|
self.username = user
|
||||||
self.session = session
|
|
||||||
else:
|
else:
|
||||||
|
self.username = None
|
||||||
|
self.session = None
|
||||||
raise BadCredentialsException('Login error! Check your credentials!')
|
raise BadCredentialsException('Login error! Check your credentials!')
|
||||||
else:
|
else:
|
||||||
raise ConnectionException('Login error! Connection error!')
|
raise ConnectionException('Login error! Connection error!')
|
||||||
@ -1214,6 +1240,57 @@ class Instaloader:
|
|||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
def get_saved_posts(self) -> Iterator[Post]:
|
||||||
|
"""Get Posts that are marked as saved by the user."""
|
||||||
|
|
||||||
|
data = self.get_profile_metadata(self.username)
|
||||||
|
user_id = data["user"]["id"]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if "graphql" in data:
|
||||||
|
is_edge = True
|
||||||
|
saved_media = data["graphql"]["user"]["edge_saved_media"]
|
||||||
|
elif "data" in data:
|
||||||
|
is_edge = True
|
||||||
|
saved_media = data["data"]["user"]["edge_saved_media"]
|
||||||
|
else:
|
||||||
|
is_edge = False
|
||||||
|
saved_media = data["user"]["saved_media"]
|
||||||
|
|
||||||
|
if is_edge:
|
||||||
|
yield from (Post(self, edge["node"]) for edge in saved_media["edges"])
|
||||||
|
else:
|
||||||
|
yield from (Post(self, node) for node in saved_media["nodes"])
|
||||||
|
|
||||||
|
if not saved_media["page_info"]["has_next_page"]:
|
||||||
|
break
|
||||||
|
data = self.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
|
||||||
|
{'id': user_id, 'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||||
|
'after': saved_media["page_info"]["end_cursor"]})
|
||||||
|
|
||||||
|
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
||||||
|
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||||
|
"""Download user's saved pictures.
|
||||||
|
|
||||||
|
:param max_count: Maximum count of pictures to download
|
||||||
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||||
|
:param filter_func: function(post), which returns True if given picture should be downloaded
|
||||||
|
"""
|
||||||
|
count = 1
|
||||||
|
for post in self.get_saved_posts():
|
||||||
|
if max_count is not None and count > max_count:
|
||||||
|
break
|
||||||
|
name = post.owner_username
|
||||||
|
if filter_func is not None and not filter_func(post):
|
||||||
|
self._log("<pic by {} skipped".format(name), flush=True)
|
||||||
|
continue
|
||||||
|
self._log("[{:>3}] {} ".format(count, name), end=str(), flush=True)
|
||||||
|
count += 1
|
||||||
|
with self._error_catcher('Download saved posts'):
|
||||||
|
downloaded = self.download_post(post, target=':saved')
|
||||||
|
if fast_update and not downloaded:
|
||||||
|
break
|
||||||
|
|
||||||
def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]:
|
def get_hashtag_posts(self, hashtag: str) -> Iterator[Post]:
|
||||||
"""Get Posts associated with a #hashtag."""
|
"""Get Posts associated with a #hashtag."""
|
||||||
yield from (Post(self, node) for node in
|
yield from (Post(self, node) for node in
|
||||||
@ -1322,7 +1399,7 @@ class Instaloader:
|
|||||||
# We do not use self.graphql_node_list() here, because profile_metadata
|
# We do not use self.graphql_node_list() here, because profile_metadata
|
||||||
# lets us obtain the first 12 nodes 'for free'
|
# lets us obtain the first 12 nodes 'for free'
|
||||||
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
|
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
|
||||||
'first': 200,
|
'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||||
'after': end_cursor},
|
'after': end_cursor},
|
||||||
'https://www.instagram.com/{0}/'.format(profile_name))
|
'https://www.instagram.com/{0}/'.format(profile_name))
|
||||||
media = data['data']['user']['edge_owner_to_timeline_media']
|
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||||
@ -1433,7 +1510,7 @@ class Instaloader:
|
|||||||
if sessionfile is not None:
|
if sessionfile is not None:
|
||||||
print(err, file=sys.stderr)
|
print(err, file=sys.stderr)
|
||||||
self._log("Session file does not exist yet - Logging in.")
|
self._log("Session file does not exist yet - Logging in.")
|
||||||
if not self.is_logged_in or username != self.test_login(self.session):
|
if not self.is_logged_in or username != self.test_login():
|
||||||
if password is not None:
|
if password is not None:
|
||||||
self.login(username, password)
|
self.login(username, password)
|
||||||
else:
|
else:
|
||||||
@ -1471,6 +1548,14 @@ class Instaloader:
|
|||||||
self.download_stories(fast_update=fast_update)
|
self.download_stories(fast_update=fast_update)
|
||||||
else:
|
else:
|
||||||
self.error("--login=USERNAME required to download {}.".format(pentry))
|
self.error("--login=USERNAME required to download {}.".format(pentry))
|
||||||
|
elif pentry == ":saved":
|
||||||
|
if username is not None:
|
||||||
|
self._log("Retrieving saved posts...")
|
||||||
|
with self._error_catcher():
|
||||||
|
self.download_saved_posts(fast_update=fast_update, max_count=max_count,
|
||||||
|
filter_func=filter_func)
|
||||||
|
else:
|
||||||
|
self.error("--login=USERNAME required to download {}.".format(pentry))
|
||||||
else:
|
else:
|
||||||
targets.add(pentry)
|
targets.add(pentry)
|
||||||
if len(targets) > 1:
|
if len(targets) > 1:
|
||||||
@ -1518,9 +1603,10 @@ def main():
|
|||||||
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
||||||
help='Name of profile or #hashtag to download. '
|
help='Name of profile or #hashtag to download. '
|
||||||
'Alternatively, if --login is given: @<profile> to download all followees of '
|
'Alternatively, if --login is given: @<profile> to download all followees of '
|
||||||
'<profile>; the special targets :feed to '
|
'<profile>; the special targets '
|
||||||
'download pictures from your feed; or :stories to download the stories of your '
|
':feed to download pictures from your feed; '
|
||||||
'followees.')
|
':stories to download the stories of your followees; or '
|
||||||
|
':saved to download the posts marked as saved.')
|
||||||
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
||||||
help='Only download profile picture.')
|
help='Only download profile picture.')
|
||||||
g_what.add_argument('--no-profile-pic', action='store_true',
|
g_what.add_argument('--no-profile-pic', action='store_true',
|
||||||
|
Loading…
Reference in New Issue
Block a user