diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 00eaba8..d84f6f0 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -82,6 +82,16 @@ What to Download of each Post Template to write in txt file for each StoryItem. See :ref:`metadata-text-files`. +.. option:: --slide + + Download only selected images of a sidecar. You can select single images using their + index in the sidecar starting with the leftmost or you can specify a range of images + with the following syntax: ``start_index-end_index``. Example: + ``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only + the first three images. + + .. versionadded:: 4.6 + .. option:: --no-metadata-json Do not create a JSON file containing the metadata of each post. diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 0f7a1e1..0397033 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -267,6 +267,8 @@ def main(): help="Do not download regular posts.") g_prof.add_argument('--no-profile-pic', action='store_true', help='Do not download profile picture.') + g_post.add_argument('--slide', action='store', + help='Set what image/interval of a sidecar you want to download.') g_post.add_argument('--no-pictures', action='store_true', help='Do not download post pictures. Cannot be used together with --fast-update. ' 'Implies --no-video-thumbnails, does not imply --no-videos.') @@ -424,7 +426,8 @@ def main(): max_connection_attempts=args.max_connection_attempts, request_timeout=args.request_timeout, resume_prefix=resume_prefix, - check_resume_bbd=not args.use_aged_resume_files) + check_resume_bbd=not args.use_aged_resume_files, + slide=args.slide) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 11306e4..8b8828e 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -160,6 +160,7 @@ class Instaloader: :param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. + :param slide: :option:`--slide` .. attribute:: context @@ -185,7 +186,8 @@ class Instaloader: request_timeout: float = 300.0, rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, resume_prefix: Optional[str] = "iterator", - check_resume_bbd: bool = True): + check_resume_bbd: bool = True, + slide: Optional[str] = None): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller) @@ -207,6 +209,31 @@ class Instaloader: self.resume_prefix = resume_prefix self.check_resume_bbd = check_resume_bbd + self.slide = slide or "" + self.slide_start = 0 + self.slide_end = -1 + if self.slide != "": + splitted = self.slide.split('-') + if len(splitted) == 1: + if splitted[0] == 'last': + # download only last image of a sidecar + self.slide_start = -1 + else: + if int(splitted[0]) > 0: + self.slide_start = self.slide_end = int(splitted[0])-1 + else: + raise InvalidArgumentException("--slide parameter must be greater than 0.") + elif len(splitted) == 2: + if splitted[1] == 'last': + self.slide_start = int(splitted[0])-1 + elif 0 < int(splitted[0]) < int(splitted[1]): + self.slide_start = int(splitted[0])-1 + self.slide_end = int(splitted[1])-1 + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + @contextmanager def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" @@ -228,7 +255,8 @@ class Instaloader: max_connection_attempts=self.context.max_connection_attempts, request_timeout=self.context.request_timeout, resume_prefix=self.resume_prefix, - check_resume_bbd=self.check_resume_bbd) + check_resume_bbd=self.check_resume_bbd, + slide=self.slide) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors @@ -527,7 +555,10 @@ class Instaloader: downloaded = True if post.typename == 'GraphSidecar': if self.download_pictures or self.download_videos: - for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1): + for edge_number, sidecar_node in enumerate( + post.get_sidecar_nodes(self.slide_start, self.slide_end), + start=post.mediacount if self.slide_start < 0 else self.slide_start + 1 + ): if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): suffix = str(edge_number) if '{filename}' in self.filename_pattern: diff --git a/instaloader/structures.py b/instaloader/structures.py index 97face0..045d636 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -252,26 +252,49 @@ class Post: """Type of post, GraphImage, GraphVideo or GraphSidecar""" return self._field('__typename') - def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]: - """Sidecar nodes of a Post with typename==GraphSidecar.""" + @property + def mediacount(self) -> int: + """ + The number of media in a sidecar Post, or 1 if the Post it not a sidecar. + + .. versionadded:: 4.6 + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + return len(edges) + return 1 + + def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]: + """ + Sidecar nodes of a Post with typename==GraphSidecar. + + .. versionchanged:: 4.6 + Added parameters *start* and *end* to specify a slice of sidecar media. + """ if self.typename == 'GraphSidecar': edges = self._field('edge_sidecar_to_children', 'edges') if any(edge['node']['is_video'] for edge in edges): # video_url is only present in full metadata, issue #558. edges = self._full_metadata['edge_sidecar_to_children']['edges'] + if end < 0: + end = len(edges)-1 + if start < 0: + start = len(edges)-1 for idx, edge in enumerate(edges): - node = edge['node'] - is_video = node['is_video'] - display_url = node['display_url'] - if not is_video and self._context.is_logged_in: - try: - carousel_media = self._iphone_struct['carousel_media'] - orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] - display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) - except (InstaloaderException, KeyError, IndexError) as err: - self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self)) - yield PostSidecarNode(is_video=is_video, display_url=display_url, - video_url=node['video_url'] if is_video else None) + if start <= idx <= end: + node = edge['node'] + is_video = node['is_video'] + display_url = node['display_url'] + if not is_video and self._context.is_logged_in: + try: + carousel_media = self._iphone_struct['carousel_media'] + orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] + display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality image version of {}.'.format( + err, self)) + yield PostSidecarNode(is_video=is_video, display_url=display_url, + video_url=node['video_url'] if is_video else None) @property def caption(self) -> Optional[str]: