From e471bd5ad388304eb0f583ffeaac48e4c15e064d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 10 Sep 2017 12:37:15 +0200 Subject: [PATCH] Post properties caption_mentions and tagged_users caption_mentions is a list of all lowercased profiles that are mentioned in the Post's caption, without preceeding '@'. tagged_users is a list of all lowercased users that are tagged in the Post. This was requested in #47. Just like all properties of instaloader.Post class, caption_mentions and tagged_users are available for --only-if filters. --- instaloader.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/instaloader.py b/instaloader.py index d5f6fb9..fca3654 100755 --- a/instaloader.py +++ b/instaloader.py @@ -291,6 +291,25 @@ class Post: hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)") return re.findall(hashtag_regex, self.caption.lower()) + @property + def caption_mentions(self) -> List[str]: + """List of all lowercased profiles that are mentioned in the Post's caption, without preceeding @.""" + if not self.caption: + return [] + # This regular expression is from jStassen, adjusted to use Python's \w to support Unicode + # http://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/ + mention_regex = re.compile(r"(?:@)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)") + return re.findall(mention_regex, self.caption.lower()) + + @property + def tagged_users(self) -> List[str]: + """List of all lowercased users that are tagged in the Post.""" + try: + return [edge['node']['user']['username' ].lower() for edge in self._field('edge_media_to_tagged_user', + 'edges')] + except KeyError: + return [] + @property def is_video(self) -> bool: """True if the Post is a video."""