From 0372f7c8e80ccc234a9a9782a52ef3e744be1b78 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 24 Jul 2021 20:34:14 +0200 Subject: [PATCH 1/4] Release of Version 4.7.5 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index c6b3829..e094db4 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.7.4' +__version__ = '4.7.5' try: From ae39ab9893e3b084ae6475402febcf23405a5b62 Mon Sep 17 00:00:00 2001 From: Arman Yeghiazaryan Date: Wed, 4 Aug 2021 19:28:56 +0400 Subject: [PATCH 2/4] Fix Post.location / --geotags (#1244) Closes #1109. --- instaloader/structures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/instaloader/structures.py b/instaloader/structures.py index 797af98..12602f4 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -553,8 +553,8 @@ class Post: return None location_id = int(loc['id']) if any(k not in loc for k in ('name', 'slug', 'has_public_page', 'lat', 'lng')): - loc = self._context.get_json("explore/locations/{0}/".format(location_id), - params={'__a': 1})['graphql']['location'] + loc.update(self._context.get_json("explore/locations/{0}/".format(location_id), + params={'__a': 1})['native_location_data']['location_info']) self._location = PostLocation(location_id, loc['name'], loc['slug'], loc['has_public_page'], loc['lat'], loc['lng']) return self._location From 327fcfd8e850df06f6ff7f765e1a8ef281ffabd9 Mon Sep 17 00:00:00 2001 From: fireattack Date: Wed, 4 Aug 2021 10:42:14 -0500 Subject: [PATCH 3/4] Download best-quality video (#1232) Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com> --- instaloader/instaloadercontext.py | 22 ++++++++++++++++ instaloader/structures.py | 44 ++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 99edaa8..b531a7d 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -527,6 +527,28 @@ class InstaloaderContext: :raises ConnectionException: When download repeatedly failed.""" self.write_raw(self.get_raw(url), filename) + def head(self, url: str, allow_redirects: bool = False) -> requests.Response: + """HEAD a URL anonymously. + + :raises QueryReturnedNotFoundException: When the server responds with a 404. + :raises QueryReturnedForbiddenException: When the server responds with a 403. + :raises ConnectionException: When request failed. + + .. versionadded:: 4.7.6 + """ + with self.get_anonymous_session() as anonymous_session: + resp = anonymous_session.head(url, allow_redirects=allow_redirects) + if resp.status_code == 200: + return resp + else: + if resp.status_code == 403: + # suspected invalid URL signature + raise QueryReturnedForbiddenException("403 when accessing {}.".format(url)) + if resp.status_code == 404: + # 404 not worth retrying. + raise QueryReturnedNotFoundException("404 when accessing {}.".format(url)) + raise ConnectionException("HTTP error code {}.".format(resp.status_code)) + @property def root_rhx_gis(self) -> Optional[str]: """rhx_gis string returned in the / query.""" diff --git a/instaloader/structures.py b/instaloader/structures.py index 12602f4..47507f4 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -4,7 +4,7 @@ import re from base64 import b64decode, b64encode from collections import namedtuple from datetime import datetime -from typing import Any, Dict, Iterable, Iterator, List, Optional, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union from . import __version__ from .exceptions import * @@ -374,13 +374,26 @@ class Post: def video_url(self) -> Optional[str]: """URL of the video, or None.""" if self.is_video: + version_urls = [self._field('video_url')] if self._context.is_logged_in: + version_urls.extend(version['url'] for version in self._iphone_struct['video_versions']) + url_candidates: List[Tuple[int, str]] = [] + for idx, version_url in enumerate(version_urls): + if any(url_candidate[1] == version_url for url_candidate in url_candidates): + # Skip duplicates + continue try: - url = self._iphone_struct['video_versions'][0]['url'] - return url + url_candidates.append(( + int(self._context.head(version_url, allow_redirects=True).headers.get('Content-Length', 0)), + version_url + )) except (InstaloaderException, KeyError, IndexError) as err: - self._context.error('{} Unable to fetch high quality video version of {}.'.format(err, self)) - return self._field('video_url') + self._context.error(f"Video URL candidate {idx+1}/{len(version_urls)} for {self}: {err}") + if not url_candidates: + # All candidates fail: Fallback to default URL and handle errors later at the actual download attempt + return version_urls[0] + url_candidates.sort() + return url_candidates[-1][1] return None @property @@ -1103,7 +1116,26 @@ class StoryItem: def video_url(self) -> Optional[str]: """URL of the video, or None.""" if self.is_video: - return self._node['video_resources'][-1]['src'] + version_urls = [self._node['video_resources'][-1]['src']] + if self._context.is_logged_in: + version_urls.extend(version['url'] for version in self._iphone_struct['video_versions']) + url_candidates: List[Tuple[int, str]] = [] + for idx, version_url in enumerate(version_urls): + if any(url_candidate[1] == version_url for url_candidate in url_candidates): + # Skip duplicates + continue + try: + url_candidates.append(( + int(self._context.head(version_url, allow_redirects=True).headers.get('Content-Length', 0)), + version_url + )) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error(f"Video URL candidate {idx+1}/{len(version_urls)} for {self}: {err}") + if not url_candidates: + # All candidates fail: Fallback to default URL and handle errors later at the actual download attempt + return version_urls[0] + url_candidates.sort() + return url_candidates[-1][1] return None From 1ad9111753bb249ceec11785e4e006a859b7916e Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Wed, 4 Aug 2021 17:46:06 +0200 Subject: [PATCH 4/4] Release of Version 4.7.6 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index e094db4..37549d6 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.7.5' +__version__ = '4.7.6' try: