1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-27 05:32:30 +01:00

Bypass IP-based redirects to /accounts/login (#727)

* Bypass IP-based redirects to /accounts/login

There are two main changes made:

For users, we request /{username}/feed/ instead of /{username}/. For some
reason, this completely bypasses the login redirect. This page doesn't
work in browser while blocked, but fortunately all the data we need is
just present in the HTML page.

For posts, we change from using the /p/ page to using the graphql
endpoint for the same data, which is still subject to graphql rate
limits, but is not subject to login redirects. The data is identical
between the two pages, apart from the object keys being sorted
differently and rhx_gis being missing on graphql.

Yes, this now unblocks access from VPNs, Tor, cloud servers, etc.

* Apply requested patch to fix comments

* Remove rhx_gis from Post and Profile

Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com>
This commit is contained in:
Cadence Ember 2020-07-15 19:46:39 +12:00 committed by GitHub
parent e21d34124d
commit db63e5c7e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -69,7 +69,6 @@ class Post:
self._node = node self._node = node
self._owner_profile = owner_profile self._owner_profile = owner_profile
self._full_metadata_dict = None # type: Optional[Dict[str, Any]] self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
self._rhx_gis_str = None # type: Optional[str]
self._location = None # type: Optional[PostLocation] self._location = None # type: Optional[PostLocation]
self._iphone_struct_ = None self._iphone_struct_ = None
if 'iphone_struct' in node: if 'iphone_struct' in node:
@ -142,9 +141,11 @@ class Post:
def _obtain_metadata(self): def _obtain_metadata(self):
if not self._full_metadata_dict: if not self._full_metadata_dict:
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={}) pic_json = self._context.graphql_query(
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media'] '2b0673e0dc4580674a88d426fe00ea90',
self._rhx_gis_str = pic_json.get('rhx_gis') {'shortcode': self.shortcode}
)
self._full_metadata_dict = pic_json['data']['shortcode_media']
if self._full_metadata_dict is None: if self._full_metadata_dict is None:
# issue #449 # issue #449
self._context.error("Fetching Post metadata failed (issue #449). " self._context.error("Fetching Post metadata failed (issue #449). "
@ -161,11 +162,6 @@ class Post:
assert self._full_metadata_dict is not None assert self._full_metadata_dict is not None
return self._full_metadata_dict return self._full_metadata_dict
@property
def _rhx_gis(self) -> Optional[str]:
self._obtain_metadata()
return self._rhx_gis_str
@property @property
def _iphone_struct(self) -> Dict[str, Any]: def _iphone_struct(self) -> Dict[str, Any]:
if not self._context.is_logged_in: if not self._context.is_logged_in:
@ -392,7 +388,7 @@ class Post:
created_at_utc=datetime.utcfromtimestamp(node['created_at']), created_at_utc=datetime.utcfromtimestamp(node['created_at']),
text=node['text'], text=node['text'],
owner=Profile(self._context, node['owner']), owner=Profile(self._context, node['owner']),
likes_count=node['edge_liked_by']['count']) likes_count=node.get('edge_liked_by', {}).get('count', 0))
def _postcommentanswers(node): def _postcommentanswers(node):
if 'edge_threaded_comments' not in node: if 'edge_threaded_comments' not in node:
@ -418,14 +414,9 @@ class Post:
if self.comments == 0: if self.comments == 0:
# Avoid doing additional requests if there are no comments # Avoid doing additional requests if there are no comments
return return
try:
comment_edges = self._field('edge_media_to_parent_comment', 'edges') comment_edges = self._field('edge_media_to_comment', 'edges')
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges]) answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
threaded_comments_available = True
except KeyError:
comment_edges = self._field('edge_media_to_comment', 'edges')
answers_count = 0
threaded_comments_available = False
if self.comments == len(comment_edges) + answers_count: if self.comments == len(comment_edges) + answers_count:
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
@ -433,14 +424,10 @@ class Post:
return return
yield from (_postcomment(node) for node in yield from (_postcomment(node) for node in
self._context.graphql_node_list( self._context.graphql_node_list(
"97b41c52301f77ce508f55e66d17620e" if threaded_comments_available "97b41c52301f77ce508f55e66d17620e",
else "f0986789a5c5d17c2400faebf16efd0d",
{'shortcode': self.shortcode}, {'shortcode': self.shortcode},
'https://www.instagram.com/p/' + self.shortcode + '/', 'https://www.instagram.com/p/' + self.shortcode + '/',
lambda d: lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
d['data']['shortcode_media'][
'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
self._rhx_gis))
def get_likes(self) -> Iterator['Profile']: def get_likes(self) -> Iterator['Profile']:
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded.""" """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
@ -455,8 +442,7 @@ class Post:
yield from (Profile(self._context, node) for node in yield from (Profile(self._context, node) for node in
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode}, self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
'https://www.instagram.com/p/' + self.shortcode + '/', 'https://www.instagram.com/p/' + self.shortcode + '/',
lambda d: d['data']['shortcode_media']['edge_liked_by'], lambda d: d['data']['shortcode_media']['edge_liked_by']))
self._rhx_gis))
@property @property
def is_sponsored(self) -> bool: def is_sponsored(self) -> bool:
@ -537,7 +523,6 @@ class Profile:
self._has_public_story = None # type: Optional[bool] self._has_public_story = None # type: Optional[bool]
self._node = node self._node = node
self._has_full_metadata = False self._has_full_metadata = False
self._rhx_gis = None
self._iphone_struct_ = None self._iphone_struct_ = None
if 'iphone_struct' in node: if 'iphone_struct' in node:
# if loaded from JSON with load_structure_from_file() # if loaded from JSON with load_structure_from_file()
@ -599,10 +584,9 @@ class Profile:
def _obtain_metadata(self): def _obtain_metadata(self):
try: try:
if not self._has_full_metadata: if not self._has_full_metadata:
metadata = self._context.get_json('{}/'.format(self.username), params={}) metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user'] self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
self._has_full_metadata = True self._has_full_metadata = True
self._rhx_gis = metadata.get('rhx_gis')
except (QueryReturnedNotFoundException, KeyError) as err: except (QueryReturnedNotFoundException, KeyError) as err:
top_search_results = TopSearchResults(self._context, self.username) top_search_results = TopSearchResults(self._context, self.username)
similar_profiles = [profile.username for profile in top_search_results.get_profiles()] similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
@ -735,8 +719,7 @@ class Profile:
'include_reel': False, 'include_suggested_users': False, 'include_reel': False, 'include_suggested_users': False,
'include_logged_out_extras': True, 'include_logged_out_extras': True,
'include_highlight_reels': False}, 'include_highlight_reels': False},
'https://www.instagram.com/{}/'.format(self.username), 'https://www.instagram.com/{}/'.format(self.username))
self._rhx_gis)
self._has_public_story = data['data']['user']['has_public_story'] self._has_public_story = data['data']['user']['has_public_story']
assert self._has_public_story is not None assert self._has_public_story is not None
return self._has_public_story return self._has_public_story
@ -795,8 +778,7 @@ class Profile:
{'id': self.userid}, {'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username), 'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_owner_to_timeline_media'], lambda d: d['data']['user']['edge_owner_to_timeline_media'],
self._rhx_gis, first_data=self._metadata('edge_owner_to_timeline_media')))
self._metadata('edge_owner_to_timeline_media')))
def get_saved_posts(self) -> Iterator[Post]: def get_saved_posts(self) -> Iterator[Post]:
"""Get Posts that are marked as saved by the user.""" """Get Posts that are marked as saved by the user."""
@ -810,8 +792,7 @@ class Profile:
{'id': self.userid}, {'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username), 'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_saved_media'], lambda d: d['data']['user']['edge_saved_media'],
self._rhx_gis, first_data=self._metadata('edge_saved_media')))
self._metadata('edge_saved_media')))
def get_tagged_posts(self) -> Iterator[Post]: def get_tagged_posts(self) -> Iterator[Post]:
"""Retrieve all posts where a profile is tagged. """Retrieve all posts where a profile is tagged.
@ -822,8 +803,7 @@ class Profile:
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7", self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
{'id': self.userid}, {'id': self.userid},
'https://www.instagram.com/{0}/'.format(self.username), 'https://www.instagram.com/{0}/'.format(self.username),
lambda d: d['data']['user']['edge_user_to_photos_of_you'], lambda d: d['data']['user']['edge_user_to_photos_of_you']))
self._rhx_gis))
def get_igtv_posts(self) -> Iterator[Post]: def get_igtv_posts(self) -> Iterator[Post]:
"""Retrieve all IGTV posts. """Retrieve all IGTV posts.
@ -835,8 +815,7 @@ class Profile:
{'id': self.userid}, {'id': self.userid},
'https://www.instagram.com/{0}/channel/'.format(self.username), 'https://www.instagram.com/{0}/channel/'.format(self.username),
lambda d: d['data']['user']['edge_felix_video_timeline'], lambda d: d['data']['user']['edge_felix_video_timeline'],
self._rhx_gis, first_data=self._metadata('edge_felix_video_timeline')))
self._metadata('edge_felix_video_timeline')))
def get_followers(self) -> Iterator['Profile']: def get_followers(self) -> Iterator['Profile']:
""" """
@ -850,8 +829,7 @@ class Profile:
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a", self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
{'id': str(self.userid)}, {'id': str(self.userid)},
'https://www.instagram.com/' + self.username + '/', 'https://www.instagram.com/' + self.username + '/',
lambda d: d['data']['user']['edge_followed_by'], lambda d: d['data']['user']['edge_followed_by']))
self._rhx_gis))
def get_followees(self) -> Iterator['Profile']: def get_followees(self) -> Iterator['Profile']:
""" """
@ -865,8 +843,7 @@ class Profile:
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f", self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
{'id': str(self.userid)}, {'id': str(self.userid)},
'https://www.instagram.com/' + self.username + '/', 'https://www.instagram.com/' + self.username + '/',
lambda d: d['data']['user']['edge_follow'], lambda d: d['data']['user']['edge_follow']))
self._rhx_gis))
def get_similar_accounts(self) -> Iterator['Profile']: def get_similar_accounts(self) -> Iterator['Profile']:
""" """
@ -881,8 +858,8 @@ class Profile:
yield from (Profile(self._context, edge["node"]) for edge in yield from (Profile(self._context, edge["node"]) for edge in
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7", self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
{"user_id": str(self.userid), "include_chaining": True}, {"user_id": str(self.userid), "include_chaining": True},
"https://www.instagram.com/{0}/".format(self.username), "https://www.instagram.com/{0}/"
self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"]) .format(self.username))["data"]["user"]["edge_chaining"]["edges"])
class StoryItem: class StoryItem: