diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 026cb8a..5efd08a 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -3,7 +3,6 @@ import json import os import pickle import random -import re import shutil import sys import textwrap @@ -260,18 +259,23 @@ class InstaloaderContext: # Override default timeout behavior. # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 session.request = partial(session.request, timeout=self.request_timeout) # type: ignore - csrf_json = self.get_json('accounts/login/', {}, session=session) - csrf_token = csrf_json['config']['csrf_token'] + + # Make a request to Instagram's root URL, which will set the session's csrftoken cookie + # Not using self.get_json() here, because we need to access the cookie + session.get('https://www.instagram.com/') + # Add session's csrftoken cookie to session headers + csrf_token = session.cookies.get_dict()['csrftoken'] session.headers.update({'X-CSRFToken': csrf_token}) - # Not using self.get_json() here, because we need to access csrftoken cookie + self.do_sleep() # Workaround credits to pgrimaud. # See: https://github.com/pgrimaud/instagram-user-feed/commit/96ad4cf54d1ad331b337f325c73e664999a6d066 enc_password = '#PWD_INSTAGRAM_BROWSER:0:{}:{}'.format(int(datetime.now().timestamp()), passwd) - login = session.post('https://www.instagram.com/accounts/login/ajax/', + login = session.post('https://www.instagram.com/api/v1/web/accounts/login/ajax/', data={'enc_password': enc_password, 'username': user}, allow_redirects=True) try: resp_json = login.json() + except json.decoder.JSONDecodeError as err: raise ConnectionException( "Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason) @@ -407,16 +411,6 @@ class InstaloaderContext: raise TooManyRequestsException("429 Too Many Requests") if resp.status_code != 200: raise ConnectionException("HTTP error code {}.".format(resp.status_code)) - is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com" - if is_html_query: - # Extract JSON from HTML response - match = re.search('(?<={"raw":").*?(?