From e21d34124dac3e5f384ca5d233eafb977c2f48ee Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Sun, 21 Jun 2020 19:38:16 +0200
Subject: [PATCH 1/5] Add codesnippet for almost chronological order

Such as for downloading hashtag feeds, as discussed in #666 and contributed by
@e2tovar.

Also change comment color to grey in codesnippets in documentation.
---
 docs/_static/instaloader.css                  |  4 ++-
 docs/_static/instaloader.scss                 |  6 +++-
 docs/codesnippets.rst                         | 13 ++++++--
 docs/codesnippets/121_since_until.py          |  6 ++--
 .../666_historical_hashtag_data.py            | 30 +++++++++++++++++++
 5 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 docs/codesnippets/666_historical_hashtag_data.py

diff --git a/docs/_static/instaloader.css b/docs/_static/instaloader.css
index 4c92ea1..4d1ec72 100644
--- a/docs/_static/instaloader.css
+++ b/docs/_static/instaloader.css
@@ -17,7 +17,9 @@ code {
   .highlight pre {
     padding: 0.7em;
     color: #fff; }
-  .highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
+  .highlight .c1 {
+    color: #666; }
+  .highlight .k, .highlight .kn, .highlight .ow {
     color: #008d06; }
   .highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
     color: #f48400; }
diff --git a/docs/_static/instaloader.scss b/docs/_static/instaloader.scss
index 4680585..b8953f5 100644
--- a/docs/_static/instaloader.scss
+++ b/docs/_static/instaloader.scss
@@ -36,7 +36,11 @@ code {
     color: #fff;
   }
 
-  .c1, .k, .kn, .ow {
+  .c1 {
+    color: #666;
+  }
+
+  .k, .kn, .ow {
     color: $color_instaloader_main
   }
 
diff --git a/docs/codesnippets.rst b/docs/codesnippets.rst
index 58430a6..39fba14 100644
--- a/docs/codesnippets.rst
+++ b/docs/codesnippets.rst
@@ -28,8 +28,9 @@ Download Posts in a Specific Period
 -----------------------------------
 
 To only download Instagram pictures (and metadata) that are within a specific
-period, you can play around with :func:`~itertools.dropwhile` and
-:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
+period, you can simply use :func:`~itertools.dropwhile` and
+:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
+Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
 
 .. literalinclude:: codesnippets/121_since_until.py
 
@@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.
 
 Discussed in :issue:`121`.
 
+The code example with :func:`~itertools.dropwhile` and
+:func:`~itertools.takewhile` makes the assumption that the post iterator returns
+posts in exact chronological order.  As discussed in :issue:`666`, the following
+approach fits for an **almost chronological order**, where up to *k* older posts
+are inserted into an otherwise chronological order, such as an Hashtag feed.
+
+.. literalinclude:: codesnippets/666_historical_hashtag_data.py
+
 Likes of a Profile / Ghost Followers
 ------------------------------------
 
diff --git a/docs/codesnippets/121_since_until.py b/docs/codesnippets/121_since_until.py
index 355b392..4df2c27 100644
--- a/docs/codesnippets/121_since_until.py
+++ b/docs/codesnippets/121_since_until.py
@@ -5,13 +5,11 @@ import instaloader
 
 L = instaloader.Instaloader()
 
-posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
-# or
-# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
+posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
 
 SINCE = datetime(2015, 5, 1)
 UNTIL = datetime(2015, 3, 1)
 
 for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
     print(post.date)
-    L.download_post(post, '#urbanphotography')
+    L.download_post(post, "instagram")
diff --git a/docs/codesnippets/666_historical_hashtag_data.py b/docs/codesnippets/666_historical_hashtag_data.py
new file mode 100644
index 0000000..8889937
--- /dev/null
+++ b/docs/codesnippets/666_historical_hashtag_data.py
@@ -0,0 +1,30 @@
+from datetime import datetime
+import instaloader
+
+L = instaloader.Instaloader()
+
+posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
+
+SINCE = datetime(2020, 5, 10)  # further from today, inclusive
+UNTIL = datetime(2020, 5, 11)  # closer to today, not inclusive
+
+k = 0  # initiate k
+k_list = []  # uncomment this to tune k
+
+for post in posts:
+    postdate = post.date
+
+    if postdate > UNTIL:
+        continue
+    elif postdate <= SINCE:
+        k += 1
+        if k == 50:
+            break
+        else:
+            continue
+    else:
+        L.download_post(post, "#urbanphotography")
+        k = 0  # set k to 0
+        # if you want to tune k, uncomment below to get your k max
+        #k_list.append(k)
+#max(k_list)

From db63e5c7e5d92fc1b0c4ad52476d53936e06649d Mon Sep 17 00:00:00 2001
From: Cadence Ember <cloudrac3r@vivaldi.net>
Date: Wed, 15 Jul 2020 19:46:39 +1200
Subject: [PATCH 2/5] Bypass IP-based redirects to /accounts/login (#727)

* Bypass IP-based redirects to /accounts/login

There are two main changes made:

For users, we request /{username}/feed/ instead of /{username}/. For some
reason, this completely bypasses the login redirect. This page doesn't
work in browser while blocked, but fortunately all the data we need is
just present in the HTML page.

For posts, we change from using the /p/ page to using the graphql
endpoint for the same data, which is still subject to graphql rate
limits, but is not subject to login redirects. The data is identical
between the two pages, apart from the object keys being sorted
differently and rhx_gis being missing on graphql.

Yes, this now unblocks access from VPNs, Tor, cloud servers, etc.

* Apply requested patch to fix comments

* Remove rhx_gis from Post and Profile

Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com>
---
 instaloader/structures.py | 67 +++++++++++++--------------------------
 1 file changed, 22 insertions(+), 45 deletions(-)

diff --git a/instaloader/structures.py b/instaloader/structures.py
index 7994113..e045269 100644
--- a/instaloader/structures.py
+++ b/instaloader/structures.py
@@ -69,7 +69,6 @@ class Post:
         self._node = node
         self._owner_profile = owner_profile
         self._full_metadata_dict = None  # type: Optional[Dict[str, Any]]
-        self._rhx_gis_str = None         # type: Optional[str]
         self._location = None            # type: Optional[PostLocation]
         self._iphone_struct_ = None
         if 'iphone_struct' in node:
@@ -142,9 +141,11 @@ class Post:
 
     def _obtain_metadata(self):
         if not self._full_metadata_dict:
-            pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
-            self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
-            self._rhx_gis_str = pic_json.get('rhx_gis')
+            pic_json = self._context.graphql_query(
+                '2b0673e0dc4580674a88d426fe00ea90',
+                {'shortcode': self.shortcode}
+            )
+            self._full_metadata_dict = pic_json['data']['shortcode_media']
             if self._full_metadata_dict is None:
                 # issue #449
                 self._context.error("Fetching Post metadata failed (issue #449). "
@@ -161,11 +162,6 @@ class Post:
         assert self._full_metadata_dict is not None
         return self._full_metadata_dict
 
-    @property
-    def _rhx_gis(self) -> Optional[str]:
-        self._obtain_metadata()
-        return self._rhx_gis_str
-
     @property
     def _iphone_struct(self) -> Dict[str, Any]:
         if not self._context.is_logged_in:
@@ -392,7 +388,7 @@ class Post:
                                      created_at_utc=datetime.utcfromtimestamp(node['created_at']),
                                      text=node['text'],
                                      owner=Profile(self._context, node['owner']),
-                                     likes_count=node['edge_liked_by']['count'])
+                                     likes_count=node.get('edge_liked_by', {}).get('count', 0))
 
         def _postcommentanswers(node):
             if 'edge_threaded_comments' not in node:
@@ -418,14 +414,9 @@ class Post:
         if self.comments == 0:
             # Avoid doing additional requests if there are no comments
             return
-        try:
-            comment_edges = self._field('edge_media_to_parent_comment', 'edges')
-            answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
-            threaded_comments_available = True
-        except KeyError:
-            comment_edges = self._field('edge_media_to_comment', 'edges')
-            answers_count = 0
-            threaded_comments_available = False
+
+        comment_edges = self._field('edge_media_to_comment', 'edges')
+        answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
 
         if self.comments == len(comment_edges) + answers_count:
             # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
@@ -433,14 +424,10 @@ class Post:
             return
         yield from (_postcomment(node) for node in
                     self._context.graphql_node_list(
-                        "97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
-                        else "f0986789a5c5d17c2400faebf16efd0d",
+                        "97b41c52301f77ce508f55e66d17620e",
                         {'shortcode': self.shortcode},
                         'https://www.instagram.com/p/' + self.shortcode + '/',
-                        lambda d:
-                        d['data']['shortcode_media'][
-                            'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
-                        self._rhx_gis))
+                        lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
 
     def get_likes(self) -> Iterator['Profile']:
         """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
@@ -455,8 +442,7 @@ class Post:
         yield from (Profile(self._context, node) for node in
                     self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
                                                     'https://www.instagram.com/p/' + self.shortcode + '/',
-                                                    lambda d: d['data']['shortcode_media']['edge_liked_by'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['shortcode_media']['edge_liked_by']))
 
     @property
     def is_sponsored(self) -> bool:
@@ -537,7 +523,6 @@ class Profile:
         self._has_public_story = None  # type: Optional[bool]
         self._node = node
         self._has_full_metadata = False
-        self._rhx_gis = None
         self._iphone_struct_ = None
         if 'iphone_struct' in node:
             # if loaded from JSON with load_structure_from_file()
@@ -599,10 +584,9 @@ class Profile:
     def _obtain_metadata(self):
         try:
             if not self._has_full_metadata:
-                metadata = self._context.get_json('{}/'.format(self.username), params={})
+                metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
                 self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
                 self._has_full_metadata = True
-                self._rhx_gis = metadata.get('rhx_gis')
         except (QueryReturnedNotFoundException, KeyError) as err:
             top_search_results = TopSearchResults(self._context, self.username)
             similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
@@ -735,8 +719,7 @@ class Profile:
                                                         'include_reel': False, 'include_suggested_users': False,
                                                         'include_logged_out_extras': True,
                                                         'include_highlight_reels': False},
-                                                       'https://www.instagram.com/{}/'.format(self.username),
-                                                       self._rhx_gis)
+                                                       'https://www.instagram.com/{}/'.format(self.username))
             self._has_public_story = data['data']['user']['has_public_story']
         assert self._has_public_story is not None
         return self._has_public_story
@@ -795,8 +778,7 @@ class Profile:
                                                     {'id': self.userid},
                                                     'https://www.instagram.com/{0}/'.format(self.username),
                                                     lambda d: d['data']['user']['edge_owner_to_timeline_media'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_owner_to_timeline_media')))
+                                                    first_data=self._metadata('edge_owner_to_timeline_media')))
 
     def get_saved_posts(self) -> Iterator[Post]:
         """Get Posts that are marked as saved by the user."""
@@ -810,8 +792,7 @@ class Profile:
                                                     {'id': self.userid},
                                                     'https://www.instagram.com/{0}/'.format(self.username),
                                                     lambda d: d['data']['user']['edge_saved_media'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_saved_media')))
+                                                    first_data=self._metadata('edge_saved_media')))
 
     def get_tagged_posts(self) -> Iterator[Post]:
         """Retrieve all posts where a profile is tagged.
@@ -822,8 +803,7 @@ class Profile:
                     self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
                                                     {'id': self.userid},
                                                     'https://www.instagram.com/{0}/'.format(self.username),
-                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you']))
 
     def get_igtv_posts(self) -> Iterator[Post]:
         """Retrieve all IGTV posts.
@@ -835,8 +815,7 @@ class Profile:
                                                     {'id': self.userid},
                                                     'https://www.instagram.com/{0}/channel/'.format(self.username),
                                                     lambda d: d['data']['user']['edge_felix_video_timeline'],
-                                                    self._rhx_gis,
-                                                    self._metadata('edge_felix_video_timeline')))
+                                                    first_data=self._metadata('edge_felix_video_timeline')))
 
     def get_followers(self) -> Iterator['Profile']:
         """
@@ -850,8 +829,7 @@ class Profile:
                     self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
                                                     {'id': str(self.userid)},
                                                     'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_followed_by'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_followed_by']))
 
     def get_followees(self) -> Iterator['Profile']:
         """
@@ -865,8 +843,7 @@ class Profile:
                     self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
                                                     {'id': str(self.userid)},
                                                     'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_follow'],
-                                                    self._rhx_gis))
+                                                    lambda d: d['data']['user']['edge_follow']))
 
     def get_similar_accounts(self) -> Iterator['Profile']:
         """
@@ -881,8 +858,8 @@ class Profile:
         yield from (Profile(self._context, edge["node"]) for edge in
                     self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
                                                 {"user_id": str(self.userid), "include_chaining": True},
-                                                "https://www.instagram.com/{0}/".format(self.username),
-                                                self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
+                                                "https://www.instagram.com/{0}/"
+                                                .format(self.username))["data"]["user"]["edge_chaining"]["edges"])
 
 
 class StoryItem:

From 4c20b573e20b5ac97a262cefd85c1fe47dd7f3af Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Wed, 15 Jul 2020 10:17:20 +0200
Subject: [PATCH 3/5] Fix KeyError on Posts with very few comments

---
 instaloader/structures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instaloader/structures.py b/instaloader/structures.py
index e045269..c2dda25 100644
--- a/instaloader/structures.py
+++ b/instaloader/structures.py
@@ -416,7 +416,7 @@ class Post:
             return
 
         comment_edges = self._field('edge_media_to_comment', 'edges')
-        answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
+        answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
 
         if self.comments == len(comment_edges) + answers_count:
             # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them

From 601dd0b10608e4031e05f170776b65b761b35218 Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Wed, 15 Jul 2020 10:26:22 +0200
Subject: [PATCH 4/5] First Release Candidate for Version 4.4.5

---
 instaloader/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instaloader/__init__.py b/instaloader/__init__.py
index 29f124c..e489d5a 100644
--- a/instaloader/__init__.py
+++ b/instaloader/__init__.py
@@ -1,7 +1,7 @@
 """Download pictures (or videos) along with their captions and other metadata from Instagram."""
 
 
-__version__ = '4.4.4'
+__version__ = '4.4.5rc1'
 
 
 try:

From e232c82d5eca7d33947533a7fc546b3156cd5282 Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Sat, 18 Jul 2020 15:24:31 +0200
Subject: [PATCH 5/5] Release of Version 4.4.5

---
 instaloader/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instaloader/__init__.py b/instaloader/__init__.py
index e489d5a..6165a3e 100644
--- a/instaloader/__init__.py
+++ b/instaloader/__init__.py
@@ -1,7 +1,7 @@
 """Download pictures (or videos) along with their captions and other metadata from Instagram."""
 
 
-__version__ = '4.4.5rc1'
+__version__ = '4.4.5'
 
 
 try: