From b57bbe2af0654ed184151abe89df9b0e55be9d99 Mon Sep 17 00:00:00 2001 From: fireattack Date: Fri, 21 Aug 2020 12:37:38 -0500 Subject: [PATCH 1/8] Get higher Post video and StoryItem image quality if logged-in (#712) --- instaloader/instaloadercontext.py | 2 +- instaloader/structures.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index bf2e0ef..8c37e1a 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -471,7 +471,7 @@ class InstaloaderContext: .. versionadded:: 4.2.1""" with copy_session(self._session, self.request_timeout) as tempsession: - tempsession.headers['User-Agent'] = 'Instagram 123.1.0.26.115 (iPhone12,1; iOS 13_3; en_US; en-US; ' \ + tempsession.headers['User-Agent'] = 'Instagram 146.0.0.27.125 (iPhone12,1; iOS 13_3; en_US; en-US; ' \ 'scale=2.00; 1656x3584; 190542906)' for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']: tempsession.headers.pop(header, None) diff --git a/instaloader/structures.py b/instaloader/structures.py index 6c53071..6753b11 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -328,6 +328,12 @@ class Post: def video_url(self) -> Optional[str]: """URL of the video, or None.""" if self.is_video: + if self._context.is_logged_in: + try: + url = self._iphone_struct['video_versions'][0]['url'] + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality video version of {}.'.format(err, self)) return self._field('video_url') return None @@ -916,11 +922,17 @@ class StoryItem: self._context = context self._node = node self._owner_profile = owner_profile + self._iphone_struct_ = None + if 'iphone_struct' in node: + # if loaded from JSON with load_structure_from_file() + self._iphone_struct_ = node['iphone_struct'] def _asdict(self): node = self._node if self._owner_profile: node['owner'] = self._owner_profile._asdict() + if self._iphone_struct_: + node['iphone_struct'] = self._iphone_struct_ return node @property @@ -945,6 +957,15 @@ class StoryItem: def __hash__(self) -> int: return hash(self.mediaid) + @property + def _iphone_struct(self) -> Dict[str, Any]: + if not self._context.is_logged_in: + raise LoginRequiredException("--login required to access iPhone media info endpoint.") + if not self._iphone_struct_: + data = self._context.get_iphone_json(path='api/v1/media/{}/info/'.format(self.mediaid), params={}) + self._iphone_struct_ = data['items'][0] + return self._iphone_struct_ + @property def owner_profile(self) -> Profile: """:class:`Profile` instance of the story item's owner.""" @@ -996,6 +1017,13 @@ class StoryItem: @property def url(self) -> str: """URL of the picture / video thumbnail of the StoryItem""" + if self.typename == "GraphStoryImage" and self._context.is_logged_in: + try: + orig_url = self._iphone_struct['image_versions2']['candidates'][0]['url'] + url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self)) return self._node['display_resources'][-1]['src'] @property From 1c2e226630fbe19cfaa7c96ac5d6a51bc20d198f Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 14 Nov 2020 18:47:37 +0100 Subject: [PATCH 2/8] Enable default request timeout of 300 seconds (#870) Sets a default request timeout of 300 seconds to fix Instaloader hanging indefinitely when used on an unstable internet connection without using --request-timeout, such as in #810. --- docs/cli-options.rst | 5 ++++- instaloader/__main__.py | 4 ++-- instaloader/instaloader.py | 2 +- instaloader/instaloadercontext.py | 30 +++++++++++++----------------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index c656142..00eaba8 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -238,10 +238,13 @@ How to Download .. option:: --request-timeout N - Seconds to wait before timing out a connection request. + Seconds to wait before timing out a connection request. Defaults to 300. .. versionadded:: 4.3 + .. versionchanged:: 4.6 + Enabled this option by default with a timeout of 300 seconds. + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 1d5e984..31d6058 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -366,8 +366,8 @@ def main(): 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'infinitely.') g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS) - g_how.add_argument('--request-timeout', metavar='N', type=float, - help='seconds to wait before timing out a connection request') + g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0, + help='Seconds to wait before timing out a connection request. Defaults to 300.') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 6d08c8b..47f4e4a 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -179,7 +179,7 @@ class Instaloader: post_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None, max_connection_attempts: int = 3, - request_timeout: Optional[float] = None, + request_timeout: float = 300.0, rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, resume_prefix: Optional[str] = "iterator", check_resume_bbd: bool = True): diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 8c37e1a..92bffee 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -25,10 +25,9 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N new = requests.Session() new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) new.headers = session.headers.copy() - if request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - new.request = partial(new.request, timeout=request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + new.request = partial(new.request, timeout=request_timeout) # type: ignore return new @@ -53,7 +52,7 @@ class InstaloaderContext: """ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, - max_connection_attempts: int = 3, request_timeout: Optional[float] = None, + max_connection_attempts: int = 3, request_timeout: float = 300.0, rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None): self.user_agent = user_agent if user_agent is not None else default_user_agent() @@ -161,10 +160,9 @@ class InstaloaderContext: 'ig_vw': '1920', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header(empty_session_only=True)) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore return session def save_session_to_file(self, sessionfile): @@ -177,10 +175,9 @@ class InstaloaderContext: session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile)) session.headers.update(self._default_http_header()) session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore self._session = session self.username = username @@ -206,10 +203,9 @@ class InstaloaderContext: 'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header()) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore session.get('https://www.instagram.com/web/__mid/') csrf_token = session.cookies.get_dict()['csrftoken'] session.headers.update({'X-CSRFToken': csrf_token}) From e11b88d44ba44f27a27cc378b6ee041b50625eb1 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Thu, 19 Nov 2020 10:32:46 +0100 Subject: [PATCH 3/8] Advertise PEP 561 compliance --- instaloader/py.typed | 0 setup.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 instaloader/py.typed diff --git a/instaloader/py.typed b/instaloader/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index b125674..7a70eec 100755 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ setup( name='instaloader', version=get_version(), packages=['instaloader'], + package_data={'instaloader': ['py.typed']}, url='https://instaloader.github.io/', license='MIT', author='Alexander Graf, André Koch-Kramer', @@ -48,7 +49,7 @@ setup( install_requires=requirements, python_requires='>=3.5', entry_points={'console_scripts': ['instaloader=instaloader.__main__:main']}, - zip_safe=True, + zip_safe=False, keywords=keywords, classifiers=[ 'Development Status :: 5 - Production/Stable', From a045168c79124e2f971ecebf0afa63f1fae7e3e5 Mon Sep 17 00:00:00 2001 From: Thomas <71355143+thomas694@users.noreply.github.com> Date: Wed, 2 Dec 2020 11:22:55 +0100 Subject: [PATCH 4/8] Save content using original filenames (#893) Add feature from issue #892 (Save content using original filenames) - The new feature would download posts and save the images/videos with the original filenames. - The implementation follows the other options for the filename-pattern parameter. As filename is related to a single file and not the whole post, for sidecar nodes the filename is calculated again. - Using this option media from one post is only grouped together by the file modification timestamp. --- docs/basic-usage.rst | 3 +++ instaloader/instaloader.py | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index bec5af8..dba7f02 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -171,6 +171,9 @@ dirname pattern. The following tokens are defined for usage with - ``{mediaid}`` Integer representation of shortcode. +- ``{filename}`` + Instagram's internal filename. + - ``{date_utc}`` (same as ``{date}``) Creation time in UTC timezone. `strftime()-style formatting options `__ diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 1a8618f..11306e4 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -14,6 +14,7 @@ from hashlib import md5 from io import BytesIO from pathlib import Path from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast +from urllib.parse import urlparse import requests import urllib3 # type: ignore @@ -22,7 +23,7 @@ from .exceptions import * from .instaloadercontext import InstaloaderContext, RateController from .nodeiterator import NodeIterator, resumable_iteration from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, - load_structure_from_file, save_structure_to_file) + load_structure_from_file, save_structure_to_file, PostSidecarNode) def get_default_session_filename(username: str) -> str: @@ -101,6 +102,8 @@ class _ArbitraryItemFormatter(string.Formatter): def get_value(self, key, args, kwargs): """Override to substitute {ATTRIBUTE} by attributes of our _item.""" + if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)): + return "{filename}" if hasattr(self._item, key): return getattr(self._item, key) return super().get_value(key, args, kwargs) @@ -492,7 +495,16 @@ class Instaloader: .. versionadded:: 4.2""" self.context.two_factor_login(two_factor_code) - def format_filename(self, item: Union[Post, StoryItem], target: Optional[Union[str, Path]] = None): + @staticmethod + def __prepare_filename(filename_template: str, url: str) -> str: + """Replace filename token inside filename_template with url's filename and assure the directories exist. + + .. versionadded:: 4.6""" + filename = filename_template.replace("{filename}", os.path.splitext(os.path.basename(urlparse(url).path))[0]) + os.makedirs(os.path.dirname(filename), exist_ok=True) + return filename + + def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None): """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. .. versionadded:: 4.1""" @@ -508,8 +520,8 @@ class Instaloader: """ dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) - filename = os.path.join(dirname, self.format_filename(post, target=target)) - os.makedirs(os.path.dirname(filename), exist_ok=True) + filename_template = os.path.join(dirname, self.format_filename(post, target=target)) + filename = self.__prepare_filename(filename_template, post.url) # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True @@ -517,13 +529,21 @@ class Instaloader: if self.download_pictures or self.download_videos: for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1): if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): + suffix = str(edge_number) + if '{filename}' in self.filename_pattern: + suffix = '' + filename = self.__prepare_filename(filename_template, sidecar_node.display_url) # Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails) downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url, - mtime=post.date_local, filename_suffix=str(edge_number)) + mtime=post.date_local, filename_suffix=suffix) if sidecar_node.is_video and self.download_videos: + suffix = str(edge_number) + if '{filename}' in self.filename_pattern: + suffix = '' + filename = self.__prepare_filename(filename_template, sidecar_node.video_url) # Download sidecar video if desired downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url, - mtime=post.date_local, filename_suffix=str(edge_number)) + mtime=post.date_local, filename_suffix=suffix) elif post.typename == 'GraphImage': # Download picture if self.download_pictures: @@ -638,13 +658,14 @@ class Instaloader: date_local = item.date_local dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) - filename = os.path.join(dirname, self.format_filename(item, target=target)) - os.makedirs(os.path.dirname(filename), exist_ok=True) + filename_template = os.path.join(dirname, self.format_filename(item, target=target)) + filename = self.__prepare_filename(filename_template, item.url) downloaded = False if not item.is_video or self.download_video_thumbnails is True: url = item.url downloaded = self.download_pic(filename=filename, url=url, mtime=date_local) if item.is_video and self.download_videos is True: + filename = self.__prepare_filename(filename_template, str(item.video_url)) downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local) # Save caption if desired metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip() From b31f279527bef233abc839c9b17c20045db0399f Mon Sep 17 00:00:00 2001 From: AndyR <59319678+AndreaRe9@users.noreply.github.com> Date: Mon, 14 Dec 2020 22:11:33 +0100 Subject: [PATCH 5/8] Allow selecting range to download from an sidecar (#749) Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com> --- docs/cli-options.rst | 10 ++++++++ instaloader/__main__.py | 5 +++- instaloader/instaloader.py | 37 ++++++++++++++++++++++++--- instaloader/structures.py | 51 +++++++++++++++++++++++++++----------- 4 files changed, 85 insertions(+), 18 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 00eaba8..d84f6f0 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -82,6 +82,16 @@ What to Download of each Post Template to write in txt file for each StoryItem. See :ref:`metadata-text-files`. +.. option:: --slide + + Download only selected images of a sidecar. You can select single images using their + index in the sidecar starting with the leftmost or you can specify a range of images + with the following syntax: ``start_index-end_index``. Example: + ``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only + the first three images. + + .. versionadded:: 4.6 + .. option:: --no-metadata-json Do not create a JSON file containing the metadata of each post. diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 0f7a1e1..0397033 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -267,6 +267,8 @@ def main(): help="Do not download regular posts.") g_prof.add_argument('--no-profile-pic', action='store_true', help='Do not download profile picture.') + g_post.add_argument('--slide', action='store', + help='Set what image/interval of a sidecar you want to download.') g_post.add_argument('--no-pictures', action='store_true', help='Do not download post pictures. Cannot be used together with --fast-update. ' 'Implies --no-video-thumbnails, does not imply --no-videos.') @@ -424,7 +426,8 @@ def main(): max_connection_attempts=args.max_connection_attempts, request_timeout=args.request_timeout, resume_prefix=resume_prefix, - check_resume_bbd=not args.use_aged_resume_files) + check_resume_bbd=not args.use_aged_resume_files, + slide=args.slide) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 11306e4..8b8828e 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -160,6 +160,7 @@ class Instaloader: :param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. + :param slide: :option:`--slide` .. attribute:: context @@ -185,7 +186,8 @@ class Instaloader: request_timeout: float = 300.0, rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, resume_prefix: Optional[str] = "iterator", - check_resume_bbd: bool = True): + check_resume_bbd: bool = True, + slide: Optional[str] = None): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller) @@ -207,6 +209,31 @@ class Instaloader: self.resume_prefix = resume_prefix self.check_resume_bbd = check_resume_bbd + self.slide = slide or "" + self.slide_start = 0 + self.slide_end = -1 + if self.slide != "": + splitted = self.slide.split('-') + if len(splitted) == 1: + if splitted[0] == 'last': + # download only last image of a sidecar + self.slide_start = -1 + else: + if int(splitted[0]) > 0: + self.slide_start = self.slide_end = int(splitted[0])-1 + else: + raise InvalidArgumentException("--slide parameter must be greater than 0.") + elif len(splitted) == 2: + if splitted[1] == 'last': + self.slide_start = int(splitted[0])-1 + elif 0 < int(splitted[0]) < int(splitted[1]): + self.slide_start = int(splitted[0])-1 + self.slide_end = int(splitted[1])-1 + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + @contextmanager def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" @@ -228,7 +255,8 @@ class Instaloader: max_connection_attempts=self.context.max_connection_attempts, request_timeout=self.context.request_timeout, resume_prefix=self.resume_prefix, - check_resume_bbd=self.check_resume_bbd) + check_resume_bbd=self.check_resume_bbd, + slide=self.slide) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors @@ -527,7 +555,10 @@ class Instaloader: downloaded = True if post.typename == 'GraphSidecar': if self.download_pictures or self.download_videos: - for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1): + for edge_number, sidecar_node in enumerate( + post.get_sidecar_nodes(self.slide_start, self.slide_end), + start=post.mediacount if self.slide_start < 0 else self.slide_start + 1 + ): if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): suffix = str(edge_number) if '{filename}' in self.filename_pattern: diff --git a/instaloader/structures.py b/instaloader/structures.py index 97face0..045d636 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -252,26 +252,49 @@ class Post: """Type of post, GraphImage, GraphVideo or GraphSidecar""" return self._field('__typename') - def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]: - """Sidecar nodes of a Post with typename==GraphSidecar.""" + @property + def mediacount(self) -> int: + """ + The number of media in a sidecar Post, or 1 if the Post it not a sidecar. + + .. versionadded:: 4.6 + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + return len(edges) + return 1 + + def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]: + """ + Sidecar nodes of a Post with typename==GraphSidecar. + + .. versionchanged:: 4.6 + Added parameters *start* and *end* to specify a slice of sidecar media. + """ if self.typename == 'GraphSidecar': edges = self._field('edge_sidecar_to_children', 'edges') if any(edge['node']['is_video'] for edge in edges): # video_url is only present in full metadata, issue #558. edges = self._full_metadata['edge_sidecar_to_children']['edges'] + if end < 0: + end = len(edges)-1 + if start < 0: + start = len(edges)-1 for idx, edge in enumerate(edges): - node = edge['node'] - is_video = node['is_video'] - display_url = node['display_url'] - if not is_video and self._context.is_logged_in: - try: - carousel_media = self._iphone_struct['carousel_media'] - orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] - display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) - except (InstaloaderException, KeyError, IndexError) as err: - self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self)) - yield PostSidecarNode(is_video=is_video, display_url=display_url, - video_url=node['video_url'] if is_video else None) + if start <= idx <= end: + node = edge['node'] + is_video = node['is_video'] + display_url = node['display_url'] + if not is_video and self._context.is_logged_in: + try: + carousel_media = self._iphone_struct['carousel_media'] + orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] + display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality image version of {}.'.format( + err, self)) + yield PostSidecarNode(is_video=is_video, display_url=display_url, + video_url=node['video_url'] if is_video else None) @property def caption(self) -> Optional[str]: From 2f69ea62a17bff0aa4c7f639a59c30b87a54ed2c Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 14 Dec 2020 22:16:22 +0100 Subject: [PATCH 6/8] First Alpha Release for Version 4.6 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 155722f..2ea24bb 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.5.5' +__version__ = '4.6a1' try: From 850cd39be4a480b1cc996e5a0e0af27f231e90fa Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 2 Jan 2021 18:07:14 +0100 Subject: [PATCH 7/8] First Release Candidate for Version 4.6 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 2ea24bb..6b8cd6b 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.6a1' +__version__ = '4.6rc1' try: From a2ce906fd07660130b6b4c1377fcc3d05cea6ff5 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 4 Jan 2021 17:26:50 +0100 Subject: [PATCH 8/8] Fix efficiency bug regarding {filename} in filename-pattern Avoid evaluating 'url' when preparing the filename if the filename-pattern does not contain the "{filename}" token. This avoids the URL to be queried even though it is not needed (e.g. with --no-pictures). --- instaloader/instaloader.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index ed53d4b..90ff125 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -524,11 +524,15 @@ class Instaloader: self.context.two_factor_login(two_factor_code) @staticmethod - def __prepare_filename(filename_template: str, url: str) -> str: + def __prepare_filename(filename_template: str, url: Callable[[], str]) -> str: """Replace filename token inside filename_template with url's filename and assure the directories exist. .. versionadded:: 4.6""" - filename = filename_template.replace("{filename}", os.path.splitext(os.path.basename(urlparse(url).path))[0]) + if "{filename}" in filename_template: + filename = filename_template.replace("{filename}", + os.path.splitext(os.path.basename(urlparse(url()).path))[0]) + else: + filename = filename_template os.makedirs(os.path.dirname(filename), exist_ok=True) return filename @@ -549,7 +553,7 @@ class Instaloader: dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) filename_template = os.path.join(dirname, self.format_filename(post, target=target)) - filename = self.__prepare_filename(filename_template, post.url) + filename = self.__prepare_filename(filename_template, lambda: post.url) # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True @@ -563,7 +567,8 @@ class Instaloader: suffix = str(edge_number) if '{filename}' in self.filename_pattern: suffix = '' - filename = self.__prepare_filename(filename_template, sidecar_node.display_url) + # pylint:disable=cell-var-from-loop + filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url) # Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails) downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url, mtime=post.date_local, filename_suffix=suffix) @@ -571,7 +576,8 @@ class Instaloader: suffix = str(edge_number) if '{filename}' in self.filename_pattern: suffix = '' - filename = self.__prepare_filename(filename_template, sidecar_node.video_url) + # pylint:disable=cell-var-from-loop + filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url) # Download sidecar video if desired downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url, mtime=post.date_local, filename_suffix=suffix) @@ -690,13 +696,13 @@ class Instaloader: date_local = item.date_local dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) filename_template = os.path.join(dirname, self.format_filename(item, target=target)) - filename = self.__prepare_filename(filename_template, item.url) + filename = self.__prepare_filename(filename_template, lambda: item.url) downloaded = False if not item.is_video or self.download_video_thumbnails is True: url = item.url downloaded = self.download_pic(filename=filename, url=url, mtime=date_local) if item.is_video and self.download_videos is True: - filename = self.__prepare_filename(filename_template, str(item.video_url)) + filename = self.__prepare_filename(filename_template, lambda: str(item.video_url)) downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local) # Save caption if desired metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()