1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-07-07 03:40:06 +02:00

Allow selecting range to download from an sidecar (#749)

Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com>
This commit is contained in:
AndyR 2020-12-14 22:11:33 +01:00 committed by GitHub
parent 097bf7fecc
commit b31f279527
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 18 deletions

View File

@ -82,6 +82,16 @@ What to Download of each Post
Template to write in txt file for each StoryItem. See
:ref:`metadata-text-files`.
.. option:: --slide
Download only selected images of a sidecar. You can select single images using their
index in the sidecar starting with the leftmost or you can specify a range of images
with the following syntax: ``start_index-end_index``. Example:
``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only
the first three images.
.. versionadded:: 4.6
.. option:: --no-metadata-json
Do not create a JSON file containing the metadata of each post.

View File

@ -267,6 +267,8 @@ def main():
help="Do not download regular posts.")
g_prof.add_argument('--no-profile-pic', action='store_true',
help='Do not download profile picture.')
g_post.add_argument('--slide', action='store',
help='Set what image/interval of a sidecar you want to download.')
g_post.add_argument('--no-pictures', action='store_true',
help='Do not download post pictures. Cannot be used together with --fast-update. '
'Implies --no-video-thumbnails, does not imply --no-videos.')
@ -424,7 +426,8 @@ def main():
max_connection_attempts=args.max_connection_attempts,
request_timeout=args.request_timeout,
resume_prefix=resume_prefix,
check_resume_bbd=not args.use_aged_resume_files)
check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -160,6 +160,7 @@ class Instaloader:
:param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
:param slide: :option:`--slide`
.. attribute:: context
@ -185,7 +186,8 @@ class Instaloader:
request_timeout: float = 300.0,
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
resume_prefix: Optional[str] = "iterator",
check_resume_bbd: bool = True):
check_resume_bbd: bool = True,
slide: Optional[str] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller)
@ -207,6 +209,31 @@ class Instaloader:
self.resume_prefix = resume_prefix
self.check_resume_bbd = check_resume_bbd
self.slide = slide or ""
self.slide_start = 0
self.slide_end = -1
if self.slide != "":
splitted = self.slide.split('-')
if len(splitted) == 1:
if splitted[0] == 'last':
# download only last image of a sidecar
self.slide_start = -1
else:
if int(splitted[0]) > 0:
self.slide_start = self.slide_end = int(splitted[0])-1
else:
raise InvalidArgumentException("--slide parameter must be greater than 0.")
elif len(splitted) == 2:
if splitted[1] == 'last':
self.slide_start = int(splitted[0])-1
elif 0 < int(splitted[0]) < int(splitted[1]):
self.slide_start = int(splitted[0])-1
self.slide_end = int(splitted[1])-1
else:
raise InvalidArgumentException("Invalid data for --slide parameter.")
else:
raise InvalidArgumentException("Invalid data for --slide parameter.")
@contextmanager
def anonymous_copy(self):
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
@ -228,7 +255,8 @@ class Instaloader:
max_connection_attempts=self.context.max_connection_attempts,
request_timeout=self.context.request_timeout,
resume_prefix=self.resume_prefix,
check_resume_bbd=self.check_resume_bbd)
check_resume_bbd=self.check_resume_bbd,
slide=self.slide)
yield new_loader
self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors
@ -527,7 +555,10 @@ class Instaloader:
downloaded = True
if post.typename == 'GraphSidecar':
if self.download_pictures or self.download_videos:
for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1):
for edge_number, sidecar_node in enumerate(
post.get_sidecar_nodes(self.slide_start, self.slide_end),
start=post.mediacount if self.slide_start < 0 else self.slide_start + 1
):
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
suffix = str(edge_number)
if '{filename}' in self.filename_pattern:

View File

@ -252,26 +252,49 @@ class Post:
"""Type of post, GraphImage, GraphVideo or GraphSidecar"""
return self._field('__typename')
def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]:
"""Sidecar nodes of a Post with typename==GraphSidecar."""
@property
def mediacount(self) -> int:
"""
The number of media in a sidecar Post, or 1 if the Post it not a sidecar.
.. versionadded:: 4.6
"""
if self.typename == 'GraphSidecar':
edges = self._field('edge_sidecar_to_children', 'edges')
return len(edges)
return 1
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
"""
Sidecar nodes of a Post with typename==GraphSidecar.
.. versionchanged:: 4.6
Added parameters *start* and *end* to specify a slice of sidecar media.
"""
if self.typename == 'GraphSidecar':
edges = self._field('edge_sidecar_to_children', 'edges')
if any(edge['node']['is_video'] for edge in edges):
# video_url is only present in full metadata, issue #558.
edges = self._full_metadata['edge_sidecar_to_children']['edges']
if end < 0:
end = len(edges)-1
if start < 0:
start = len(edges)-1
for idx, edge in enumerate(edges):
node = edge['node']
is_video = node['is_video']
display_url = node['display_url']
if not is_video and self._context.is_logged_in:
try:
carousel_media = self._iphone_struct['carousel_media']
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self))
yield PostSidecarNode(is_video=is_video, display_url=display_url,
video_url=node['video_url'] if is_video else None)
if start <= idx <= end:
node = edge['node']
is_video = node['is_video']
display_url = node['display_url']
if not is_video and self._context.is_logged_in:
try:
carousel_media = self._iphone_struct['carousel_media']
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality image version of {}.'.format(
err, self))
yield PostSidecarNode(is_video=is_video, display_url=display_url,
video_url=node['video_url'] if is_video else None)
@property
def caption(self) -> Optional[str]: