Merge branch 'master' into upcoming/v4.5

2024-08-16 19:59:40 +02:00 · 2020-07-18 15:32:05 +02:00 · 2020-07-18 15:32:05 +02:00 · c817d1901a
commit c817d1901a
parent fbe05a1add e232c82d5e
7 changed files with 74 additions and 54 deletions
--- a/docs/_static/instaloader.css
+++ b/docs/_static/instaloader.css
@ -17,7 +17,9 @@ code {
  .highlight pre {
    padding: 0.7em;
    color: #fff; }
-  .highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
+  .highlight .c1 {
+    color: #666; }
+  .highlight .k, .highlight .kn, .highlight .ow {
    color: #008d06; }
  .highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
    color: #f48400; }
--- a/docs/_static/instaloader.scss
+++ b/docs/_static/instaloader.scss
@ -36,7 +36,11 @@ code {
    color: #fff;
  }

-  .c1, .k, .kn, .ow {
+  .c1 {
+    color: #666;
+  }
+
+  .k, .kn, .ow {
    color: $color_instaloader_main
  }

--- a/docs/codesnippets.rst
+++ b/docs/codesnippets.rst
@ -28,8 +28,9 @@ Download Posts in a Specific Period
 -----------------------------------

 To only download Instagram pictures (and metadata) that are within a specific
-period, you can play around with :func:`~itertools.dropwhile` and
-:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
+period, you can simply use :func:`~itertools.dropwhile` and
+:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
+Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.

 .. literalinclude:: codesnippets/121_since_until.py

@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.

 Discussed in :issue:`121`.

+The code example with :func:`~itertools.dropwhile` and
+:func:`~itertools.takewhile` makes the assumption that the post iterator returns
+posts in exact chronological order.  As discussed in :issue:`666`, the following
+approach fits for an **almost chronological order**, where up to *k* older posts
+are inserted into an otherwise chronological order, such as an Hashtag feed.
+
+.. literalinclude:: codesnippets/666_historical_hashtag_data.py
+
 Likes of a Profile / Ghost Followers
 ------------------------------------

--- a/docs/codesnippets/121_since_until.py
+++ b/docs/codesnippets/121_since_until.py
@ -5,13 +5,11 @@ import instaloader

 L = instaloader.Instaloader()

-posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
-# or
-# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
+posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()

 SINCE = datetime(2015, 5, 1)
 UNTIL = datetime(2015, 3, 1)

 for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
    print(post.date)
-    L.download_post(post, '#urbanphotography')
+    L.download_post(post, "instagram")
--- a/docs/codesnippets/666_historical_hashtag_data.py
+++ b/docs/codesnippets/666_historical_hashtag_data.py
@ -0,0 +1,30 @@
+from datetime import datetime
+import instaloader
+
+L = instaloader.Instaloader()
+
+posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
+
+SINCE = datetime(2020, 5, 10)  # further from today, inclusive
+UNTIL = datetime(2020, 5, 11)  # closer to today, not inclusive
+
+k = 0  # initiate k
+k_list = []  # uncomment this to tune k
+
+for post in posts:
+    postdate = post.date
+
+    if postdate > UNTIL:
+        continue
+    elif postdate <= SINCE:
+        k += 1
+        if k == 50:
+            break
+        else:
+            continue
+    else:
+        L.download_post(post, "#urbanphotography")
+        k = 0  # set k to 0
+        # if you want to tune k, uncomment below to get your k max
+        #k_list.append(k)
+#max(k_list)
--- a/instaloader/init.py
+++ b/instaloader/init.py
@ -1,7 +1,7 @@
 """Download pictures (or videos) along with their captions and other metadata from Instagram."""


-__version__ = '4.4.4'
+__version__ = '4.4.5'


 try:
--- a/instaloader/structures.py
+++ b/instaloader/structures.py
@ -69,7 +69,6 @@ class Post:
        self._node = node
        self._owner_profile = owner_profile
        self._full_metadata_dict = None  # type: Optional[Dict[str, Any]]
-        self._rhx_gis_str = None         # type: Optional[str]
        self._location = None            # type: Optional[PostLocation]
        self._iphone_struct_ = None
        if 'iphone_struct' in node:
@ -142,9 +141,11 @@ class Post:

    def _obtain_metadata(self):
        if not self._full_metadata_dict:
-            pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
-            self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
-            self._rhx_gis_str = pic_json.get('rhx_gis')
+            pic_json = self._context.graphql_query(
+                '2b0673e0dc4580674a88d426fe00ea90',
+                {'shortcode': self.shortcode}
+            )
+            self._full_metadata_dict = pic_json['data']['shortcode_media']
            if self._full_metadata_dict is None:
                # issue #449
                self._context.error("Fetching Post metadata failed (issue #449). "
@ -161,11 +162,6 @@ class Post:
        assert self._full_metadata_dict is not None
        return self._full_metadata_dict

-    @property
-    def _rhx_gis(self) -> Optional[str]:
-        self._obtain_metadata()
-        return self._rhx_gis_str
-
    @property
    def _iphone_struct(self) -> Dict[str, Any]:
        if not self._context.is_logged_in:
@ -392,7 +388,7 @@ class Post:
                                     created_at_utc=datetime.utcfromtimestamp(node['created_at']),
                                     text=node['text'],
                                     owner=Profile(self._context, node['owner']),
-                                     likes_count=node['edge_liked_by']['count'])
+                                     likes_count=node.get('edge_liked_by', {}).get('count', 0))

        def _postcommentanswers(node):
            if 'edge_threaded_comments' not in node:
@ -418,14 +414,9 @@ class Post:
        if self.comments == 0:
            # Avoid doing additional requests if there are no comments
            return
-        try:
-            comment_edges = self._field('edge_media_to_parent_comment', 'edges')
-            answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
-            threaded_comments_available = True
-        except KeyError:
-            comment_edges = self._field('edge_media_to_comment', 'edges')
-            answers_count = 0
-            threaded_comments_available = False
+
+        comment_edges = self._field('edge_media_to_comment', 'edges')
+        answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])

        if self.comments == len(comment_edges) + answers_count:
            # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
@ -433,14 +424,10 @@ class Post:
            return
        yield from (_postcomment(node) for node in
                    self._context.graphql_node_list(
-                        "97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
-                        else "f0986789a5c5d17c2400faebf16efd0d",
+                        "97b41c52301f77ce508f55e66d17620e",
                        {'shortcode': self.shortcode},
                        'https://www.instagram.com/p/' + self.shortcode + '/',
-                        lambda d:
-                        d['data']['shortcode_media'][
-                            'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
-                        self._rhx_gis))
+                        lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))

    def get_likes(self) -> Iterator['Profile']:
        """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
@ -455,8 +442,7 @@ class Post:
        yield from (Profile(self._context, node) for node in
                    self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
                                                    'https://www.instagram.com/p/' + self.shortcode + '/',
-                                                    lambda d: d['data']['shortcode_media']['edge_liked_by'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['shortcode_media']['edge_liked_by']))

    @property
    def is_sponsored(self) -> bool:
@ -537,7 +523,6 @@ class Profile:
        self._has_public_story = None  # type: Optional[bool]
        self._node = node
        self._has_full_metadata = False
-        self._rhx_gis = None
        self._iphone_struct_ = None
        if 'iphone_struct' in node:
            # if loaded from JSON with load_structure_from_file()
@ -599,10 +584,9 @@ class Profile:
    def _obtain_metadata(self):
        try:
            if not self._has_full_metadata:
-                metadata = self._context.get_json('{}/'.format(self.username), params={})
+                metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
                self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
                self._has_full_metadata = True
-                self._rhx_gis = metadata.get('rhx_gis')
        except (QueryReturnedNotFoundException, KeyError) as err:
            top_search_results = TopSearchResults(self._context, self.username)
            similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
@ -735,8 +719,7 @@ class Profile:
                                                        'include_reel': False, 'include_suggested_users': False,
                                                        'include_logged_out_extras': True,
                                                        'include_highlight_reels': False},
-                                                       'https://www.instagram.com/{}/'.format(self.username),
-                                                       self._rhx_gis)
+                                                       'https://www.instagram.com/{}/'.format(self.username))
            self._has_public_story = data['data']['user']['has_public_story']
        assert self._has_public_story is not None
        return self._has_public_story
@ -795,8 +778,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_owner_to_timeline_media'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_owner_to_timeline_media')))
+                                                    first_data=self._metadata('edge_owner_to_timeline_media')))

    def get_saved_posts(self) -> Iterator[Post]:
        """Get Posts that are marked as saved by the user."""
@ -810,8 +792,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_saved_media'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_saved_media')))
+                                                    first_data=self._metadata('edge_saved_media')))

    def get_tagged_posts(self) -> Iterator[Post]:
        """Retrieve all posts where a profile is tagged.
@ -822,8 +803,7 @@ class Profile:
                    self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
-                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you']))

    def get_igtv_posts(self) -> Iterator[Post]:
        """Retrieve all IGTV posts.
@ -835,8 +815,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/channel/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_felix_video_timeline'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_felix_video_timeline')))
+                                                    first_data=self._metadata('edge_felix_video_timeline')))

    def get_followers(self) -> Iterator['Profile']:
        """
@ -850,8 +829,7 @@ class Profile:
                    self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
                                                    {'id': str(self.userid)},
                                                    'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_followed_by'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_followed_by']))

    def get_followees(self) -> Iterator['Profile']:
        """
@ -865,8 +843,7 @@ class Profile:
                    self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
                                                    {'id': str(self.userid)},
                                                    'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_follow'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_follow']))

    def get_similar_accounts(self) -> Iterator['Profile']:
        """
@ -881,8 +858,8 @@ class Profile:
        yield from (Profile(self._context, edge["node"]) for edge in
                    self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
                                                {"user_id": str(self.userid), "include_chaining": True},
-                                                "https://www.instagram.com/{0}/".format(self.username),
-                                                self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
+                                                "https://www.instagram.com/{0}/"
+                                                .format(self.username))["data"]["user"]["edge_chaining"]["edges"])


 class StoryItem: