From e9207f095fc369425229aecf2876813831920cb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Koch-Kramer?= Date: Wed, 8 Nov 2017 15:58:33 +0100 Subject: [PATCH] Use Post class attributes in filename-pattern - Added owner_id and mediaid to Post class properties. - In case of not downloading stories, the attributes of the Post class can now be used in filename-pattern, e.g. {post.owner_id} or {post.mediaid}. Closes #53. --- docs/README.md | 2 +- docs/basic-usage.rst | 3 ++- docs/cli-options.rst | 7 +++++-- instaloader.py | 37 ++++++++++++++++++++++++++++++------- 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/docs/README.md b/docs/README.md index 7fe8d8d..b4c538f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,6 +10,6 @@ https://instaloader.readthedocs.io/ The documentation is created with [Sphinx](http://www.sphinx-doc.org/). To build it, use ``` -pip3 install sphinx +pip3 install sphinx sphinx-autodoc-typehints make html ``` diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index bd4f3b5..94c2bab 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -96,7 +96,8 @@ pattern, the token ``{target}`` is replaced by the target name, and to the target directory. The default is ``--filename-pattern={date}``. The tokens ``{target}`` and ``{profile}`` are replaced like in the dirname pattern. Further, the tokens ``{date}`` and ``{shortcode}`` are -defined. +defined. Additionally, in case of not downloading stories, the attributes of +:class:`.Post` can be used, e.g. ``{post.owner_id}`` or ``{post.mediaid}``. For example, encode the poster's profile name in the filenames with: diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 6893f31..a650b35 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -134,8 +134,11 @@ How to Download with ``--dirname-pattern``. ``{profile}`` is replaced by the profile name, ``{target}`` is replaced by the target you specified, i.e. either ``:feed``, ``#hashtag`` or the profile name. Also, the fields ``{date}`` and - ``{shortcode}`` can be specified. Defaults to ``{date:%Y-%m-%d_%H-%M-%S}``. - See :ref:`filename-specification`. + ``{shortcode}`` can be specified. In case of not downloading stories, the + attributes of the :class:`.Post` class can be used in addition, e.g. + ``{post.owner_id}`` or ``{post.mediaid}``. + Defaults to + ``{date:%Y-%m-%d_%H-%M-%S}``. See :ref:`filename-specification`. .. option:: --user-agent USER_AGENT diff --git a/instaloader.py b/instaloader.py index 48e950b..85fdaf9 100755 --- a/instaloader.py +++ b/instaloader.py @@ -126,7 +126,7 @@ def mediaid_to_shortcode(mediaid: int) -> str: def format_string_contains_key(format_string: str, key: str) -> bool: # pylint:disable=unused-variable for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string): - if field_name == key: + if field_name == key or field_name.startswith(key + '.'): return True return False @@ -176,7 +176,8 @@ class Post: LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"] - def __init__(self, instaloader: 'Instaloader', node: Dict[str, Any], profile: Optional[str] = None): + def __init__(self, instaloader: 'Instaloader', node: Dict[str, Any], + profile: Optional[str] = None, profile_id: Optional[int] = None): """Create a Post instance from a node structure as returned by Instagram. :param instaloader: :class:`Instaloader` instance used for additional queries if neccessary. @@ -186,6 +187,7 @@ class Post: self._instaloader = instaloader self._node = node self._profile = profile + self._profile_id = profile_id self._full_metadata_dict = None @classmethod @@ -206,6 +208,11 @@ class Post: """Media shortcode. URL of the post is instagram.com/p//.""" return self._node['shortcode'] if 'shortcode' in self._node else self._node['code'] + @property + def mediaid(self) -> int: + """The mediaid is a decimal representation of the media shortcode.""" + return int(self._node['id']) + def __repr__(self): return ''.format(self.shortcode) @@ -252,6 +259,13 @@ class Post: self._instaloader.error("Get owner name of {}: {} -- using \'UNKNOWN\'.".format(self, err)) return 'UNKNOWN' + @property + def owner_id(self) -> int: + """The ID of the Post's owner.""" + if self._profile_id: + return self._profile_id + return int(self._field('owner', 'id')) + @property def date(self) -> datetime: """Timestamp when the post was created.""" @@ -889,7 +903,8 @@ class Instaloader: profilename = post.owner_username if needs_profilename else None dirname = self.dirname_pattern.format(profile=profilename, target=target.lower()) filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(), - date=post.date, shortcode=post.shortcode) + date=post.date, shortcode=post.shortcode, + post=post) os.makedirs(os.path.dirname(filename), exist_ok=True) # Download the image(s) / video thumbnail and videos within sidecars if desired @@ -993,6 +1008,10 @@ class Instaloader: :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern """ + if format_string_contains_key(self.filename_pattern, 'post'): + raise InvalidArgumentException("The \"post\" keyword is not supported in the filename pattern when " + "downloading stories.") + if not self.is_logged_in: raise LoginRequiredException('Login required to download stories') @@ -1202,7 +1221,9 @@ class Instaloader: def get_profile_posts(self, profile_metadata: Dict[str, Any]) -> Iterator[Post]: """Retrieve all posts from a profile.""" profile_name = profile_metadata['user']['username'] - yield from (Post(self, node, profile=profile_name) for node in profile_metadata['user']['media']['nodes']) + profile_id = int(profile_metadata['user']['id']) + yield from (Post(self, node, profile=profile_name, profile_id=profile_id) + for node in profile_metadata['user']['media']['nodes']) has_next_page = profile_metadata['user']['media']['page_info']['has_next_page'] end_cursor = profile_metadata['user']['media']['page_info']['end_cursor'] while has_next_page: @@ -1213,7 +1234,8 @@ class Instaloader: 'after': end_cursor}, 'https://www.instagram.com/{0}/'.format(profile_name)) media = data['data']['user']['edge_owner_to_timeline_media'] - yield from (Post(self, edge['node'], profile=profile_name) for edge in media['edges']) + yield from (Post(self, edge['node'], profile=profile_name, profile_id=profile_id) + for edge in media['edges']) has_next_page = media['page_info']['has_next_page'] end_cursor = media['page_info']['end_cursor'] @@ -1468,8 +1490,9 @@ def main(): help='Prefix of filenames. Posts are stored in the directory whose pattern is given with ' '--dirname-pattern. {profile} is replaced by the profile name, ' '{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the ' - 'profile name. Also, the fields date and shortcode can be specified. Defaults to ' - '\'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.') + 'profile name. Also, the fields {date} and {shortcode} can be specified. In case of not ' + 'downloading stories, the attributes of the Post class can be used in addition, e.g. ' + '{post.owner_id} or {post.mediaid}. Defaults to \'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.') g_how.add_argument('--user-agent', help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent())) g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)