1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-16 19:59:40 +02:00

Merge branch 'master' into upcoming/v4.5

This commit is contained in:
Alexander Graf 2020-07-18 15:32:05 +02:00
commit c817d1901a
7 changed files with 74 additions and 54 deletions

View File

@ -17,7 +17,9 @@ code {
.highlight pre {
padding: 0.7em;
color: #fff; }
.highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
.highlight .c1 {
color: #666; }
.highlight .k, .highlight .kn, .highlight .ow {
color: #008d06; }
.highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
color: #f48400; }

View File

@ -36,7 +36,11 @@ code {
color: #fff;
}
.c1, .k, .kn, .ow {
.c1 {
color: #666;
}
.k, .kn, .ow {
color: $color_instaloader_main
}

View File

@ -28,8 +28,9 @@ Download Posts in a Specific Period
-----------------------------------
To only download Instagram pictures (and metadata) that are within a specific
period, you can play around with :func:`~itertools.dropwhile` and
:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
period, you can simply use :func:`~itertools.dropwhile` and
:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
.. literalinclude:: codesnippets/121_since_until.py
@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.
Discussed in :issue:`121`.
The code example with :func:`~itertools.dropwhile` and
:func:`~itertools.takewhile` makes the assumption that the post iterator returns
posts in exact chronological order. As discussed in :issue:`666`, the following
approach fits for an **almost chronological order**, where up to *k* older posts
are inserted into an otherwise chronological order, such as an Hashtag feed.
.. literalinclude:: codesnippets/666_historical_hashtag_data.py
Likes of a Profile / Ghost Followers
------------------------------------

View File

@ -5,13 +5,11 @@ import instaloader
L = instaloader.Instaloader()
posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
# or
# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
SINCE = datetime(2015, 5, 1)
UNTIL = datetime(2015, 3, 1)
for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
print(post.date)
L.download_post(post, '#urbanphotography')
L.download_post(post, "instagram")

View File

@ -0,0 +1,30 @@
from datetime import datetime
import instaloader
L = instaloader.Instaloader()
posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
SINCE = datetime(2020, 5, 10) # further from today, inclusive
UNTIL = datetime(2020, 5, 11) # closer to today, not inclusive
k = 0 # initiate k
k_list = [] # uncomment this to tune k
for post in posts:
postdate = post.date
if postdate > UNTIL:
continue
elif postdate <= SINCE:
k += 1
if k == 50:
break
else:
continue
else:
L.download_post(post, "#urbanphotography")
k = 0 # set k to 0
# if you want to tune k, uncomment below to get your k max
#k_list.append(k)
#max(k_list)

View File

@ -1,7 +1,7 @@
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
__version__ = '4.4.4'
__version__ = '4.4.5'
try:

View File

@ -69,7 +69,6 @@ class Post:
self._node = node
self._owner_profile = owner_profile
self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
self._rhx_gis_str = None # type: Optional[str]
self._location = None # type: Optional[PostLocation]
self._iphone_struct_ = None
if 'iphone_struct' in node:
@ -142,9 +141,11 @@ class Post:
def _obtain_metadata(self):
if not self._full_metadata_dict:
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
self._rhx_gis_str = pic_json.get('rhx_gis')
pic_json = self._context.graphql_query(
'2b0673e0dc4580674a88d426fe00ea90',
{'shortcode': self.shortcode}
)
self._full_metadata_dict = pic_json['data']['shortcode_media']
if self._full_metadata_dict is None:
# issue #449
self._context.error("Fetching Post metadata failed (issue #449). "
@ -161,11 +162,6 @@ class Post:
assert self._full_metadata_dict is not None
return self._full_metadata_dict
@property
def _rhx_gis(self) -> Optional[str]:
self._obtain_metadata()
return self._rhx_gis_str
@property
def _iphone_struct(self) -> Dict[str, Any]:
if not self._context.is_logged_in:
@ -392,7 +388,7 @@ class Post:
created_at_utc=datetime.utcfromtimestamp(node['created_at']),
text=node['text'],
owner=Profile(self._context, node['owner']),
likes_count=node['edge_liked_by']['count'])
likes_count=node.get('edge_liked_by', {}).get('count', 0))
def _postcommentanswers(node):
if 'edge_threaded_comments' not in node:
@ -418,14 +414,9 @@ class Post:
if self.comments == 0:
# Avoid doing additional requests if there are no comments
return
try:
comment_edges = self._field('edge_media_to_parent_comment', 'edges')
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
threaded_comments_available = True
except KeyError:
comment_edges = self._field('edge_media_to_comment', 'edges')
answers_count = 0
threaded_comments_available = False
comment_edges = self._field('edge_media_to_comment', 'edges')
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
if self.comments == len(comment_edges) + answers_count:
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
@ -433,14 +424,10 @@ class Post:
return
yield from (_postcomment(node) for node in
self._context.graphql_node_list(
"97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
else "f0986789a5c5d17c2400faebf16efd0d",
"97b41c52301f77ce508f55e66d17620e",
{'shortcode': self.shortcode},
'https://www.instagram.com/p/' + self.shortcode + '/',
lambda d:
d['data']['shortcode_media'][
'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
self._rhx_gis))
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
def get_likes(self) -> Iterator['Profile']:
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
@ -455,8 +442,7 @@ class Post:
yield from (Profile(self._context, node) for node in
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
'https://www.instagram.com/p/' + self.shortcode + '/',
lambda d: d['data']['shortcode_media']['edge_liked_by'],
self._rhx_gis))
lambda d: d['data']['shortcode_media']['edge_liked_by']))
@property
def is_sponsored(self) -> bool:
@ -537,7 +523,6 @@ class Profile:
self._has_public_story = None # type: Optional[bool]
self._node = node
self._has_full_metadata = False
self._rhx_gis = None
self._iphone_struct_ = None
if 'iphone_struct' in node:
# if loaded from JSON with load_structure_from_file()
@ -599,10 +584,9 @@ class Profile:
def _obtain_metadata(self):
try:
if not self._has_full_metadata:
metadata = self._context.get_json('{}/'.format(self.username), params={})
metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
self._has_full_metadata = True
self._rhx_gis = metadata.get('rhx_gis')
except (QueryReturnedNotFoundException, KeyError) as err:
top_search_results = TopSearchResults(self._context, self.username)
similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
@ -735,8 +719,7 @@ class Profile:
'include_reel': False, 'include_suggested_users': False,
'include_logged_out_extras': True,
'include_highlight_reels': False},
'https://www.instagram.com/{}/'.format(self.username),
self._rhx_gis)
'https://www.instagram.com/{}/'.format(self.username))
self._has_public_story = data['data']['user']['has_public_story']
assert self._has_public_story is not None
return self._has_public_story
@ -795,8 +778,7 @@ class Profile:
{'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
self._rhx_gis,
self._metadata('edge_owner_to_timeline_media')))
first_data=self._metadata('edge_owner_to_timeline_media')))
def get_saved_posts(self) -> Iterator[Post]:
"""Get Posts that are marked as saved by the user."""
@ -810,8 +792,7 @@ class Profile:
{'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_saved_media'],
self._rhx_gis,
self._metadata('edge_saved_media')))
first_data=self._metadata('edge_saved_media')))
def get_tagged_posts(self) -> Iterator[Post]:
"""Retrieve all posts where a profile is tagged.
@ -822,8 +803,7 @@ class Profile:
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
{'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_user_to_photos_of_you'],
self._rhx_gis))
lambda d: d['data']['user']['edge_user_to_photos_of_you']))
def get_igtv_posts(self) -> Iterator[Post]:
"""Retrieve all IGTV posts.
@ -835,8 +815,7 @@ class Profile:
{'id': self.userid},
'https://www.instagram.com/{0}/channel/'.format(self.username),
lambda d: d['data']['user']['edge_felix_video_timeline'],
self._rhx_gis,
self._metadata('edge_felix_video_timeline')))
first_data=self._metadata('edge_felix_video_timeline')))
def get_followers(self) -> Iterator['Profile']:
"""
@ -850,8 +829,7 @@ class Profile:
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
{'id': str(self.userid)},
'https://www.instagram.com/' + self.username + '/',
lambda d: d['data']['user']['edge_followed_by'],
self._rhx_gis))
lambda d: d['data']['user']['edge_followed_by']))
def get_followees(self) -> Iterator['Profile']:
"""
@ -865,8 +843,7 @@ class Profile:
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
{'id': str(self.userid)},
'https://www.instagram.com/' + self.username + '/',
lambda d: d['data']['user']['edge_follow'],
self._rhx_gis))
lambda d: d['data']['user']['edge_follow']))
def get_similar_accounts(self) -> Iterator['Profile']:
"""
@ -881,8 +858,8 @@ class Profile:
yield from (Profile(self._context, edge["node"]) for edge in
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
{"user_id": str(self.userid), "include_chaining": True},
"https://www.instagram.com/{0}/".format(self.username),
self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
"https://www.instagram.com/{0}/"
.format(self.username))["data"]["user"]["edge_chaining"]["edges"])
class StoryItem: