diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index bec5af8..dba7f02 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -171,6 +171,9 @@ dirname pattern. The following tokens are defined for usage with - ``{mediaid}`` Integer representation of shortcode. +- ``{filename}`` + Instagram's internal filename. + - ``{date_utc}`` (same as ``{date}``) Creation time in UTC timezone. `strftime()-style formatting options `__ diff --git a/docs/cli-options.rst b/docs/cli-options.rst index c656142..d84f6f0 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -82,6 +82,16 @@ What to Download of each Post Template to write in txt file for each StoryItem. See :ref:`metadata-text-files`. +.. option:: --slide + + Download only selected images of a sidecar. You can select single images using their + index in the sidecar starting with the leftmost or you can specify a range of images + with the following syntax: ``start_index-end_index``. Example: + ``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only + the first three images. + + .. versionadded:: 4.6 + .. option:: --no-metadata-json Do not create a JSON file containing the metadata of each post. @@ -238,10 +248,13 @@ How to Download .. option:: --request-timeout N - Seconds to wait before timing out a connection request. + Seconds to wait before timing out a connection request. Defaults to 300. .. versionadded:: 4.3 + .. versionchanged:: 4.6 + Enabled this option by default with a timeout of 300 seconds. + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 155722f..6b8cd6b 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.5.5' +__version__ = '4.6rc1' try: diff --git a/instaloader/__main__.py b/instaloader/__main__.py index accea66..0397033 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -267,6 +267,8 @@ def main(): help="Do not download regular posts.") g_prof.add_argument('--no-profile-pic', action='store_true', help='Do not download profile picture.') + g_post.add_argument('--slide', action='store', + help='Set what image/interval of a sidecar you want to download.') g_post.add_argument('--no-pictures', action='store_true', help='Do not download post pictures. Cannot be used together with --fast-update. ' 'Implies --no-video-thumbnails, does not imply --no-videos.') @@ -365,8 +367,8 @@ def main(): 'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry ' 'infinitely.') g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS) - g_how.add_argument('--request-timeout', metavar='N', type=float, - help='seconds to wait before timing out a connection request') + g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0, + help='Seconds to wait before timing out a connection request. Defaults to 300.') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -424,7 +426,8 @@ def main(): max_connection_attempts=args.max_connection_attempts, request_timeout=args.request_timeout, resume_prefix=resume_prefix, - check_resume_bbd=not args.use_aged_resume_files) + check_resume_bbd=not args.use_aged_resume_files, + slide=args.slide) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index d89b37a..90ff125 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -14,6 +14,7 @@ from hashlib import md5 from io import BytesIO from pathlib import Path from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast +from urllib.parse import urlparse import requests import urllib3 # type: ignore @@ -22,7 +23,7 @@ from .exceptions import * from .instaloadercontext import InstaloaderContext, RateController from .nodeiterator import NodeIterator, resumable_iteration from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, - load_structure_from_file, save_structure_to_file) + load_structure_from_file, save_structure_to_file, PostSidecarNode) def get_default_session_filename(username: str) -> str: @@ -101,6 +102,8 @@ class _ArbitraryItemFormatter(string.Formatter): def get_value(self, key, args, kwargs): """Override to substitute {ATTRIBUTE} by attributes of our _item.""" + if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)): + return "{filename}" if hasattr(self._item, key): return getattr(self._item, key) return super().get_value(key, args, kwargs) @@ -157,6 +160,7 @@ class Instaloader: :param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. + :param slide: :option:`--slide` .. attribute:: context @@ -179,10 +183,11 @@ class Instaloader: post_metadata_txt_pattern: str = None, storyitem_metadata_txt_pattern: str = None, max_connection_attempts: int = 3, - request_timeout: Optional[float] = None, + request_timeout: float = 300.0, rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, resume_prefix: Optional[str] = "iterator", - check_resume_bbd: bool = True): + check_resume_bbd: bool = True, + slide: Optional[str] = None): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller) @@ -204,6 +209,31 @@ class Instaloader: self.resume_prefix = resume_prefix self.check_resume_bbd = check_resume_bbd + self.slide = slide or "" + self.slide_start = 0 + self.slide_end = -1 + if self.slide != "": + splitted = self.slide.split('-') + if len(splitted) == 1: + if splitted[0] == 'last': + # download only last image of a sidecar + self.slide_start = -1 + else: + if int(splitted[0]) > 0: + self.slide_start = self.slide_end = int(splitted[0])-1 + else: + raise InvalidArgumentException("--slide parameter must be greater than 0.") + elif len(splitted) == 2: + if splitted[1] == 'last': + self.slide_start = int(splitted[0])-1 + elif 0 < int(splitted[0]) < int(splitted[1]): + self.slide_start = int(splitted[0])-1 + self.slide_end = int(splitted[1])-1 + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + else: + raise InvalidArgumentException("Invalid data for --slide parameter.") + @contextmanager def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" @@ -225,7 +255,8 @@ class Instaloader: max_connection_attempts=self.context.max_connection_attempts, request_timeout=self.context.request_timeout, resume_prefix=self.resume_prefix, - check_resume_bbd=self.check_resume_bbd) + check_resume_bbd=self.check_resume_bbd, + slide=self.slide) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors @@ -492,7 +523,20 @@ class Instaloader: .. versionadded:: 4.2""" self.context.two_factor_login(two_factor_code) - def format_filename(self, item: Union[Post, StoryItem], target: Optional[Union[str, Path]] = None): + @staticmethod + def __prepare_filename(filename_template: str, url: Callable[[], str]) -> str: + """Replace filename token inside filename_template with url's filename and assure the directories exist. + + .. versionadded:: 4.6""" + if "{filename}" in filename_template: + filename = filename_template.replace("{filename}", + os.path.splitext(os.path.basename(urlparse(url()).path))[0]) + else: + filename = filename_template + os.makedirs(os.path.dirname(filename), exist_ok=True) + return filename + + def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None): """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. .. versionadded:: 4.1""" @@ -508,22 +552,35 @@ class Instaloader: """ dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) - filename = os.path.join(dirname, self.format_filename(post, target=target)) - os.makedirs(os.path.dirname(filename), exist_ok=True) + filename_template = os.path.join(dirname, self.format_filename(post, target=target)) + filename = self.__prepare_filename(filename_template, lambda: post.url) # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = True if post.typename == 'GraphSidecar': if self.download_pictures or self.download_videos: - for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1): + for edge_number, sidecar_node in enumerate( + post.get_sidecar_nodes(self.slide_start, self.slide_end), + start=post.mediacount if self.slide_start < 0 else self.slide_start + 1 + ): if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): + suffix = str(edge_number) + if '{filename}' in self.filename_pattern: + suffix = '' + # pylint:disable=cell-var-from-loop + filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url) # Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails) downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url, - mtime=post.date_local, filename_suffix=str(edge_number)) + mtime=post.date_local, filename_suffix=suffix) if sidecar_node.is_video and self.download_videos: + suffix = str(edge_number) + if '{filename}' in self.filename_pattern: + suffix = '' + # pylint:disable=cell-var-from-loop + filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url) # Download sidecar video if desired downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url, - mtime=post.date_local, filename_suffix=str(edge_number)) + mtime=post.date_local, filename_suffix=suffix) elif post.typename == 'GraphImage': # Download picture if self.download_pictures: @@ -638,13 +695,14 @@ class Instaloader: date_local = item.date_local dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) - filename = os.path.join(dirname, self.format_filename(item, target=target)) - os.makedirs(os.path.dirname(filename), exist_ok=True) + filename_template = os.path.join(dirname, self.format_filename(item, target=target)) + filename = self.__prepare_filename(filename_template, lambda: item.url) downloaded = False if not item.is_video or self.download_video_thumbnails is True: url = item.url downloaded = self.download_pic(filename=filename, url=url, mtime=date_local) if item.is_video and self.download_videos is True: + filename = self.__prepare_filename(filename_template, lambda: str(item.video_url)) downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local) # Save caption if desired metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip() diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 51e83f3..1407e80 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -25,10 +25,9 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N new = requests.Session() new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) new.headers = session.headers.copy() - if request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - new.request = partial(new.request, timeout=request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + new.request = partial(new.request, timeout=request_timeout) # type: ignore return new @@ -53,7 +52,7 @@ class InstaloaderContext: """ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, - max_connection_attempts: int = 3, request_timeout: Optional[float] = None, + max_connection_attempts: int = 3, request_timeout: float = 300.0, rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None): self.user_agent = user_agent if user_agent is not None else default_user_agent() @@ -161,10 +160,9 @@ class InstaloaderContext: 'ig_vw': '1920', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header(empty_session_only=True)) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore return session def save_session_to_file(self, sessionfile): @@ -177,10 +175,9 @@ class InstaloaderContext: session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile)) session.headers.update(self._default_http_header()) session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore self._session = session self.username = username @@ -206,10 +203,9 @@ class InstaloaderContext: 'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) session.headers.update(self._default_http_header()) - if self.request_timeout is not None: - # Override default timeout behavior. - # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 - session.request = partial(session.request, timeout=self.request_timeout) # type: ignore + # Override default timeout behavior. + # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 + session.request = partial(session.request, timeout=self.request_timeout) # type: ignore session.get('https://www.instagram.com/web/__mid/') csrf_token = session.cookies.get_dict()['csrftoken'] session.headers.update({'X-CSRFToken': csrf_token}) @@ -473,7 +469,7 @@ class InstaloaderContext: .. versionadded:: 4.2.1""" with copy_session(self._session, self.request_timeout) as tempsession: - tempsession.headers['User-Agent'] = 'Instagram 123.1.0.26.115 (iPhone12,1; iOS 13_3; en_US; en-US; ' \ + tempsession.headers['User-Agent'] = 'Instagram 146.0.0.27.125 (iPhone12,1; iOS 13_3; en_US; en-US; ' \ 'scale=2.00; 1656x3584; 190542906)' for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']: tempsession.headers.pop(header, None) diff --git a/instaloader/py.typed b/instaloader/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/instaloader/structures.py b/instaloader/structures.py index 98415c9..045d636 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -252,26 +252,49 @@ class Post: """Type of post, GraphImage, GraphVideo or GraphSidecar""" return self._field('__typename') - def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]: - """Sidecar nodes of a Post with typename==GraphSidecar.""" + @property + def mediacount(self) -> int: + """ + The number of media in a sidecar Post, or 1 if the Post it not a sidecar. + + .. versionadded:: 4.6 + """ + if self.typename == 'GraphSidecar': + edges = self._field('edge_sidecar_to_children', 'edges') + return len(edges) + return 1 + + def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]: + """ + Sidecar nodes of a Post with typename==GraphSidecar. + + .. versionchanged:: 4.6 + Added parameters *start* and *end* to specify a slice of sidecar media. + """ if self.typename == 'GraphSidecar': edges = self._field('edge_sidecar_to_children', 'edges') if any(edge['node']['is_video'] for edge in edges): # video_url is only present in full metadata, issue #558. edges = self._full_metadata['edge_sidecar_to_children']['edges'] + if end < 0: + end = len(edges)-1 + if start < 0: + start = len(edges)-1 for idx, edge in enumerate(edges): - node = edge['node'] - is_video = node['is_video'] - display_url = node['display_url'] - if not is_video and self._context.is_logged_in: - try: - carousel_media = self._iphone_struct['carousel_media'] - orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] - display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) - except (InstaloaderException, KeyError, IndexError) as err: - self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self)) - yield PostSidecarNode(is_video=is_video, display_url=display_url, - video_url=node['video_url'] if is_video else None) + if start <= idx <= end: + node = edge['node'] + is_video = node['is_video'] + display_url = node['display_url'] + if not is_video and self._context.is_logged_in: + try: + carousel_media = self._iphone_struct['carousel_media'] + orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url'] + display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality image version of {}.'.format( + err, self)) + yield PostSidecarNode(is_video=is_video, display_url=display_url, + video_url=node['video_url'] if is_video else None) @property def caption(self) -> Optional[str]: @@ -330,6 +353,12 @@ class Post: def video_url(self) -> Optional[str]: """URL of the video, or None.""" if self.is_video: + if self._context.is_logged_in: + try: + url = self._iphone_struct['video_versions'][0]['url'] + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality video version of {}.'.format(err, self)) return self._field('video_url') return None @@ -934,11 +963,17 @@ class StoryItem: self._context = context self._node = node self._owner_profile = owner_profile + self._iphone_struct_ = None + if 'iphone_struct' in node: + # if loaded from JSON with load_structure_from_file() + self._iphone_struct_ = node['iphone_struct'] def _asdict(self): node = self._node if self._owner_profile: node['owner'] = self._owner_profile._asdict() + if self._iphone_struct_: + node['iphone_struct'] = self._iphone_struct_ return node @property @@ -963,6 +998,15 @@ class StoryItem: def __hash__(self) -> int: return hash(self.mediaid) + @property + def _iphone_struct(self) -> Dict[str, Any]: + if not self._context.is_logged_in: + raise LoginRequiredException("--login required to access iPhone media info endpoint.") + if not self._iphone_struct_: + data = self._context.get_iphone_json(path='api/v1/media/{}/info/'.format(self.mediaid), params={}) + self._iphone_struct_ = data['items'][0] + return self._iphone_struct_ + @property def owner_profile(self) -> Profile: """:class:`Profile` instance of the story item's owner.""" @@ -1014,6 +1058,13 @@ class StoryItem: @property def url(self) -> str: """URL of the picture / video thumbnail of the StoryItem""" + if self.typename == "GraphStoryImage" and self._context.is_logged_in: + try: + orig_url = self._iphone_struct['image_versions2']['candidates'][0]['url'] + url = re.sub(r'&se=\d+(&?)', r'\1', orig_url) + return url + except (InstaloaderException, KeyError, IndexError) as err: + self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self)) return self._node['display_resources'][-1]['src'] @property diff --git a/setup.py b/setup.py index b125674..7a70eec 100755 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ setup( name='instaloader', version=get_version(), packages=['instaloader'], + package_data={'instaloader': ['py.typed']}, url='https://instaloader.github.io/', license='MIT', author='Alexander Graf, André Koch-Kramer', @@ -48,7 +49,7 @@ setup( install_requires=requirements, python_requires='>=3.5', entry_points={'console_scripts': ['instaloader=instaloader.__main__:main']}, - zip_safe=True, + zip_safe=False, keywords=keywords, classifiers=[ 'Development Status :: 5 - Production/Stable',