From 2b9ed927c7fdaa667760a98993482c27da14735f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Koch-Kramer?= Date: Mon, 19 Mar 2018 21:05:13 +0100 Subject: [PATCH] Proper handle HTTP redirects in get_json() This is necessary to preserve the GET parameters across redirects. Additionally, it is nice to have a log output if a redirect occurs. --- instaloader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/instaloader.py b/instaloader.py index 7cf6adb..289d5b6 100755 --- a/instaloader.py +++ b/instaloader.py @@ -774,7 +774,15 @@ class Instaloader: sess = session if session else self.session try: self._sleep() - resp = sess.get('https://www.instagram.com/' + url, params=params) + resp = sess.get('https://www.instagram.com/' + url, params=params, allow_redirects=False) + while resp.is_redirect: + redirect_url = resp.headers['location'] + self._log('\nHTTP redirect from {} to {}'.format('https://www.instagram.com/' + url, redirect_url)) + if redirect_url.index('https://www.instagram.com/') == 0: + resp = sess.get(redirect_url if redirect_url.endswith('/') else redirect_url + '/', + params=params, allow_redirects=False) + else: + break if resp.status_code == 404: raise QueryReturnedNotFoundException("404") if resp.status_code == 429: