diff --git a/docs/_static/instaloader.css b/docs/_static/instaloader.css index 4c92ea1..4d1ec72 100644 --- a/docs/_static/instaloader.css +++ b/docs/_static/instaloader.css @@ -17,7 +17,9 @@ code { .highlight pre { padding: 0.7em; color: #fff; } - .highlight .c1, .highlight .k, .highlight .kn, .highlight .ow { + .highlight .c1 { + color: #666; } + .highlight .k, .highlight .kn, .highlight .ow { color: #008d06; } .highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm { color: #f48400; } diff --git a/docs/_static/instaloader.scss b/docs/_static/instaloader.scss index 4680585..b8953f5 100644 --- a/docs/_static/instaloader.scss +++ b/docs/_static/instaloader.scss @@ -36,7 +36,11 @@ code { color: #fff; } - .c1, .k, .kn, .ow { + .c1 { + color: #666; + } + + .k, .kn, .ow { color: $color_instaloader_main } diff --git a/docs/codesnippets.rst b/docs/codesnippets.rst index 58430a6..39fba14 100644 --- a/docs/codesnippets.rst +++ b/docs/codesnippets.rst @@ -28,8 +28,9 @@ Download Posts in a Specific Period ----------------------------------- To only download Instagram pictures (and metadata) that are within a specific -period, you can play around with :func:`~itertools.dropwhile` and -:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet. +period, you can simply use :func:`~itertools.dropwhile` and +:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns +Posts in **exact chronological order**, such as :meth:`Profile.get_posts`. .. literalinclude:: codesnippets/121_since_until.py @@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`. Discussed in :issue:`121`. +The code example with :func:`~itertools.dropwhile` and +:func:`~itertools.takewhile` makes the assumption that the post iterator returns +posts in exact chronological order. As discussed in :issue:`666`, the following +approach fits for an **almost chronological order**, where up to *k* older posts +are inserted into an otherwise chronological order, such as an Hashtag feed. + +.. literalinclude:: codesnippets/666_historical_hashtag_data.py + Likes of a Profile / Ghost Followers ------------------------------------ diff --git a/docs/codesnippets/121_since_until.py b/docs/codesnippets/121_since_until.py index 355b392..4df2c27 100644 --- a/docs/codesnippets/121_since_until.py +++ b/docs/codesnippets/121_since_until.py @@ -5,13 +5,11 @@ import instaloader L = instaloader.Instaloader() -posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts() -# or -# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts() +posts = instaloader.Profile.from_username(L.context, "instagram").get_posts() SINCE = datetime(2015, 5, 1) UNTIL = datetime(2015, 3, 1) for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)): print(post.date) - L.download_post(post, '#urbanphotography') + L.download_post(post, "instagram") diff --git a/docs/codesnippets/666_historical_hashtag_data.py b/docs/codesnippets/666_historical_hashtag_data.py new file mode 100644 index 0000000..8889937 --- /dev/null +++ b/docs/codesnippets/666_historical_hashtag_data.py @@ -0,0 +1,30 @@ +from datetime import datetime +import instaloader + +L = instaloader.Instaloader() + +posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts() + +SINCE = datetime(2020, 5, 10) # further from today, inclusive +UNTIL = datetime(2020, 5, 11) # closer to today, not inclusive + +k = 0 # initiate k +k_list = [] # uncomment this to tune k + +for post in posts: + postdate = post.date + + if postdate > UNTIL: + continue + elif postdate <= SINCE: + k += 1 + if k == 50: + break + else: + continue + else: + L.download_post(post, "#urbanphotography") + k = 0 # set k to 0 + # if you want to tune k, uncomment below to get your k max + #k_list.append(k) +#max(k_list) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 9903691..b03c906 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.4.4' +__version__ = '4.4.5' try: diff --git a/instaloader/structures.py b/instaloader/structures.py index 7994113..c2dda25 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -69,7 +69,6 @@ class Post: self._node = node self._owner_profile = owner_profile self._full_metadata_dict = None # type: Optional[Dict[str, Any]] - self._rhx_gis_str = None # type: Optional[str] self._location = None # type: Optional[PostLocation] self._iphone_struct_ = None if 'iphone_struct' in node: @@ -142,9 +141,11 @@ class Post: def _obtain_metadata(self): if not self._full_metadata_dict: - pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={}) - self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media'] - self._rhx_gis_str = pic_json.get('rhx_gis') + pic_json = self._context.graphql_query( + '2b0673e0dc4580674a88d426fe00ea90', + {'shortcode': self.shortcode} + ) + self._full_metadata_dict = pic_json['data']['shortcode_media'] if self._full_metadata_dict is None: # issue #449 self._context.error("Fetching Post metadata failed (issue #449). " @@ -161,11 +162,6 @@ class Post: assert self._full_metadata_dict is not None return self._full_metadata_dict - @property - def _rhx_gis(self) -> Optional[str]: - self._obtain_metadata() - return self._rhx_gis_str - @property def _iphone_struct(self) -> Dict[str, Any]: if not self._context.is_logged_in: @@ -392,7 +388,7 @@ class Post: created_at_utc=datetime.utcfromtimestamp(node['created_at']), text=node['text'], owner=Profile(self._context, node['owner']), - likes_count=node['edge_liked_by']['count']) + likes_count=node.get('edge_liked_by', {}).get('count', 0)) def _postcommentanswers(node): if 'edge_threaded_comments' not in node: @@ -418,14 +414,9 @@ class Post: if self.comments == 0: # Avoid doing additional requests if there are no comments return - try: - comment_edges = self._field('edge_media_to_parent_comment', 'edges') - answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges]) - threaded_comments_available = True - except KeyError: - comment_edges = self._field('edge_media_to_comment', 'edges') - answers_count = 0 - threaded_comments_available = False + + comment_edges = self._field('edge_media_to_comment', 'edges') + answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges]) if self.comments == len(comment_edges) + answers_count: # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them @@ -433,14 +424,10 @@ class Post: return yield from (_postcomment(node) for node in self._context.graphql_node_list( - "97b41c52301f77ce508f55e66d17620e" if threaded_comments_available - else "f0986789a5c5d17c2400faebf16efd0d", + "97b41c52301f77ce508f55e66d17620e", {'shortcode': self.shortcode}, 'https://www.instagram.com/p/' + self.shortcode + '/', - lambda d: - d['data']['shortcode_media'][ - 'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'], - self._rhx_gis)) + lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'])) def get_likes(self) -> Iterator['Profile']: """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded.""" @@ -455,8 +442,7 @@ class Post: yield from (Profile(self._context, node) for node in self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode}, 'https://www.instagram.com/p/' + self.shortcode + '/', - lambda d: d['data']['shortcode_media']['edge_liked_by'], - self._rhx_gis)) + lambda d: d['data']['shortcode_media']['edge_liked_by'])) @property def is_sponsored(self) -> bool: @@ -537,7 +523,6 @@ class Profile: self._has_public_story = None # type: Optional[bool] self._node = node self._has_full_metadata = False - self._rhx_gis = None self._iphone_struct_ = None if 'iphone_struct' in node: # if loaded from JSON with load_structure_from_file() @@ -599,10 +584,9 @@ class Profile: def _obtain_metadata(self): try: if not self._has_full_metadata: - metadata = self._context.get_json('{}/'.format(self.username), params={}) + metadata = self._context.get_json('{}/feed/'.format(self.username), params={}) self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user'] self._has_full_metadata = True - self._rhx_gis = metadata.get('rhx_gis') except (QueryReturnedNotFoundException, KeyError) as err: top_search_results = TopSearchResults(self._context, self.username) similar_profiles = [profile.username for profile in top_search_results.get_profiles()] @@ -735,8 +719,7 @@ class Profile: 'include_reel': False, 'include_suggested_users': False, 'include_logged_out_extras': True, 'include_highlight_reels': False}, - 'https://www.instagram.com/{}/'.format(self.username), - self._rhx_gis) + 'https://www.instagram.com/{}/'.format(self.username)) self._has_public_story = data['data']['user']['has_public_story'] assert self._has_public_story is not None return self._has_public_story @@ -795,8 +778,7 @@ class Profile: {'id': self.userid}, 'https://www.instagram.com/{0}/'.format(self.username), lambda d: d['data']['user']['edge_owner_to_timeline_media'], - self._rhx_gis, - self._metadata('edge_owner_to_timeline_media'))) + first_data=self._metadata('edge_owner_to_timeline_media'))) def get_saved_posts(self) -> Iterator[Post]: """Get Posts that are marked as saved by the user.""" @@ -810,8 +792,7 @@ class Profile: {'id': self.userid}, 'https://www.instagram.com/{0}/'.format(self.username), lambda d: d['data']['user']['edge_saved_media'], - self._rhx_gis, - self._metadata('edge_saved_media'))) + first_data=self._metadata('edge_saved_media'))) def get_tagged_posts(self) -> Iterator[Post]: """Retrieve all posts where a profile is tagged. @@ -822,8 +803,7 @@ class Profile: self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7", {'id': self.userid}, 'https://www.instagram.com/{0}/'.format(self.username), - lambda d: d['data']['user']['edge_user_to_photos_of_you'], - self._rhx_gis)) + lambda d: d['data']['user']['edge_user_to_photos_of_you'])) def get_igtv_posts(self) -> Iterator[Post]: """Retrieve all IGTV posts. @@ -835,8 +815,7 @@ class Profile: {'id': self.userid}, 'https://www.instagram.com/{0}/channel/'.format(self.username), lambda d: d['data']['user']['edge_felix_video_timeline'], - self._rhx_gis, - self._metadata('edge_felix_video_timeline'))) + first_data=self._metadata('edge_felix_video_timeline'))) def get_followers(self) -> Iterator['Profile']: """ @@ -850,8 +829,7 @@ class Profile: self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a", {'id': str(self.userid)}, 'https://www.instagram.com/' + self.username + '/', - lambda d: d['data']['user']['edge_followed_by'], - self._rhx_gis)) + lambda d: d['data']['user']['edge_followed_by'])) def get_followees(self) -> Iterator['Profile']: """ @@ -865,8 +843,7 @@ class Profile: self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f", {'id': str(self.userid)}, 'https://www.instagram.com/' + self.username + '/', - lambda d: d['data']['user']['edge_follow'], - self._rhx_gis)) + lambda d: d['data']['user']['edge_follow'])) def get_similar_accounts(self) -> Iterator['Profile']: """ @@ -881,8 +858,8 @@ class Profile: yield from (Profile(self._context, edge["node"]) for edge in self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7", {"user_id": str(self.userid), "include_chaining": True}, - "https://www.instagram.com/{0}/".format(self.username), - self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"]) + "https://www.instagram.com/{0}/" + .format(self.username))["data"]["user"]["edge_chaining"]["edges"]) class StoryItem: