1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-05 14:57:08 +02:00

Have --storyitem-filter (--only-if for StoryItems)

This commit is contained in:
Alexander Graf 2018-04-19 15:12:05 +02:00
parent 7688bdce45
commit 7d2830b8b2
4 changed files with 74 additions and 45 deletions

View File

@ -83,14 +83,22 @@ automatically **finds it by its unique ID** and renames the folder likewise.
download stories. Requires :option:`--login`. Does not imply
:option:`--no-profile-pic`.
.. option:: --only-if filter
.. option:: --post-filter filter, --only-if filter
Expression that, if given, must evaluate to True for each post to be
downloaded. Must be a syntactically valid Python expression. Variables are
evaluated to :class:`instaloader.Post` attributes. Example:
``--only-if=viewer_has_liked``. See :ref:`filter-posts` for more
``--post-filter=viewer_has_liked``. See :ref:`filter-posts` for more
examples.
.. option:: --storyitem-filter filter
Expression that, if given, must evaluate to True for each storyitem to be
downloaded. Must be a syntactically valid Python expression. Variables are
evaluated to :class:`instaloader.StoryItem` attributes.
See :ref:`filter-posts` for more examples.
When to Stop Downloading
^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -4,7 +4,7 @@ import ast
import os
import sys
from argparse import ArgumentParser, SUPPRESS
from typing import Callable, List, Optional
from typing import List, Optional
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
StoryItem, __version__, load_structure_from_file)
@ -23,8 +23,9 @@ def usage_string():
{0} --help""".format(argv0, len(argv0), '')
def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post'], bool]:
"""Takes an --only-if=... filter specification and makes a filter_func Callable out of it."""
def filterstr_to_filterfunc(filter_str: str, item_type: type):
"""Takes an --post-filter=... or --storyitem-filter=... filter
specification and makes a filter_func Callable out of it."""
# The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A
# function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace.
@ -34,21 +35,20 @@ def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post
# pylint:disable=no-self-use
if not isinstance(node.ctx, ast.Load):
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
if not hasattr(Post, node.id):
raise InvalidArgumentException("Invalid filter: Name {} is not defined.".format(node.id))
if node.id in Post.LOGIN_REQUIRING_PROPERTIES and not logged_in:
raise InvalidArgumentException("Invalid filter: Name {} requires being logged in.".format(node.id))
new_node = ast.Attribute(ast.copy_location(ast.Name('post', ast.Load()), node), node.id,
if not hasattr(item_type, node.id):
raise InvalidArgumentException("Invalid filter: {} not a {} attribute.".format(node.id,
item_type.__name__))
new_node = ast.Attribute(ast.copy_location(ast.Name('item', ast.Load()), node), node.id,
ast.copy_location(ast.Load(), node))
return ast.copy_location(new_node, node)
input_filename = '<--only-if parameter>'
input_filename = '<command line filter parameter>'
compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')),
filename=input_filename, mode='eval')
def filterfunc(post: 'Post') -> bool:
def filterfunc(item) -> bool:
# pylint:disable=eval-used
return bool(eval(compiled_filter, {'post': post}))
return bool(eval(compiled_filter, {'item': item}))
return filterfunc
@ -59,14 +59,18 @@ def _main(instaloader: Instaloader, targetlist: List[str],
profile_pic: bool = True, profile_pic_only: bool = False,
fast_update: bool = False,
stories: bool = False, stories_only: bool = False,
filter_str: Optional[str] = None) -> None:
post_filter_str: Optional[str] = None,
storyitem_filter_str: Optional[str] = None) -> None:
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
# Parse and generate filter function
if filter_str is not None:
filter_func = filterstr_to_filterfunc(filter_str, username is not None)
instaloader.context.log('Only download posts with property "{}".'.format(filter_str))
else:
filter_func = None
post_filter = None
if post_filter_str is not None:
post_filter = filterstr_to_filterfunc(post_filter_str, Post)
instaloader.context.log('Only download posts with property "{}".'.format(post_filter_str))
storyitem_filter = None
if storyitem_filter_str is not None:
storyitem_filter = filterstr_to_filterfunc(storyitem_filter_str, StoryItem)
instaloader.context.log('Only download storyitems with property "{}".'.format(storyitem_filter_str))
# Login, if desired
if username is not None:
try:
@ -90,9 +94,15 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.context.error_catcher(target):
structure = load_structure_from_file(instaloader.context, target)
if isinstance(structure, Post):
if post_filter is not None and not post_filter(structure):
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
continue
instaloader.context.log("Downloading {} ({})".format(structure, target))
instaloader.download_post(structure, os.path.dirname(target))
elif isinstance(structure, StoryItem):
if storyitem_filter is not None and not storyitem_filter(structure):
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
continue
instaloader.context.log("Attempting to download {} ({})".format(structure, target))
instaloader.download_storyitem(structure, os.path.dirname(target))
elif isinstance(structure, Profile):
@ -112,15 +122,15 @@ def _main(instaloader: Instaloader, targetlist: List[str],
profiles.update([followee.username for followee in followees])
elif target[0] == '#':
instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update,
filter_func=filter_func)
post_filter=post_filter)
elif target == ":feed":
instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count,
filter_func=filter_func)
post_filter=post_filter)
elif target == ":stories":
instaloader.download_stories(fast_update=fast_update)
instaloader.download_stories(fast_update=fast_update, storyitem_filter=storyitem_filter)
elif target == ":saved":
instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count,
filter_func=filter_func)
post_filter=post_filter)
else:
profiles.add(target)
if len(profiles) > 1:
@ -130,7 +140,8 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.context.error_catcher(target):
try:
instaloader.download_profile(target, profile_pic, profile_pic_only, fast_update,
stories, stories_only, filter_func=filter_func)
stories, stories_only, post_filter=post_filter,
storyitem_filter=storyitem_filter)
except ProfileNotExistsException as err:
if not instaloader.context.is_logged_in:
instaloader.context.log(err)
@ -138,7 +149,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.anonymous_copy() as anonymous_loader:
with instaloader.context.error_catcher():
anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
fast_update, filter_func=filter_func)
fast_update, post_filter=post_filter)
else:
raise
except KeyboardInterrupt:
@ -210,10 +221,14 @@ def main():
g_what.add_argument('--stories-only', action='store_true',
help='Rather than downloading regular posts of each specified profile, only download '
'stories. Requires --login. Does not imply --no-profile-pic.')
g_what.add_argument('--only-if', metavar='filter',
g_what.add_argument('--post-filter', '--only-if', metavar='filter',
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
'a syntactically valid python expression. Variables are evaluated to '
'instaloader.Post attributes. Example: --only-if=viewer_has_liked.')
'instaloader.Post attributes. Example: --post-filter=viewer_has_liked.')
g_what.add_argument('--storyitem-filter', metavar='filter',
help='Expression that, if given, must evaluate to True for each storyitem to be downloaded. '
'Must be a syntactically valid python expression. Variables are evaluated to '
'instaloader.StoryItem attributes.')
g_stop = parser.add_argument_group('When to Stop Downloading',
'If none of these options are given, Instaloader goes through all pictures '
@ -279,7 +294,7 @@ def main():
if ':feed-all' in args.profile or ':feed-liked' in args.profile:
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
"eventually --only-if=viewer_has_liked.")
"eventually --post-filter=viewer_has_liked.")
post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None
storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None
@ -304,7 +319,8 @@ def main():
fast_update=args.fast_update,
stories=args.stories,
stories_only=args.stories_only,
filter_str=args.only_if)
post_filter_str=args.post_filter,
storyitem_filter_str=args.storyitem_filter)
loader.close()
except InstaloaderException as err:
raise SystemExit("Fatal error: %s" % err)

View File

@ -372,7 +372,8 @@ class Instaloader:
def download_stories(self,
userids: Optional[List[int]] = None,
fast_update: bool = False,
filename_target: str = ':stories') -> None:
filename_target: str = ':stories',
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None:
"""
Download available stories from user followees or all stories of users whose ID are given.
Does not mark stories as seen.
@ -381,6 +382,7 @@ class Instaloader:
:param userids: List of user IDs to be processed in terms of downloading their stories
:param fast_update: If true, abort when first already-downloaded picture is encountered
:param filename_target: Replacement for {target} in dirname_pattern and filename_pattern
:param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded
"""
if not userids:
@ -392,6 +394,9 @@ class Instaloader:
totalcount = user_story.itemcount
count = 1
for item in user_story.get_items():
if storyitem_filter is not None and not storyitem_filter(item):
self.context.log("<{} skipped>".format(item), flush=True)
continue
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count += 1
with self.context.error_catcher('Download story from user {}'.format(name)):
@ -451,7 +456,7 @@ class Instaloader:
@_requires_login
def download_feed_posts(self, max_count: int = None, fast_update: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
"""
Download pictures from the user's feed.
@ -460,11 +465,11 @@ class Instaloader:
loader = Instaloader()
loader.load_session_from_file('USER')
loader.download_feed_posts(max_count=20, fast_update=True,
filter_func=lambda post: post.viewer_has_liked)
post_filter=lambda post: post.viewer_has_liked)
:param max_count: Maximum count of pictures to download
:param fast_update: If true, abort when first already-downloaded picture is encountered
:param filter_func: function(post), which returns True if given picture should be downloaded
:param post_filter: function(post), which returns True if given picture should be downloaded
"""
self.context.log("Retrieving pictures from your feed...")
count = 1
@ -472,7 +477,7 @@ class Instaloader:
if max_count is not None and count > max_count:
break
name = post.owner_username
if filter_func is not None and not filter_func(post):
if post_filter is not None and not post_filter(post):
self.context.log("<pic by %s skipped>" % name, flush=True)
continue
self.context.log("[%3i] %s " % (count, name), end="", flush=True)
@ -484,12 +489,12 @@ class Instaloader:
@_requires_login
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
"""Download user's saved pictures.
:param max_count: Maximum count of pictures to download
:param fast_update: If true, abort when first already-downloaded picture is encountered
:param filter_func: function(post), which returns True if given picture should be downloaded
:param post_filter: function(post), which returns True if given picture should be downloaded
"""
self.context.log("Retrieving saved posts...")
count = 1
@ -497,7 +502,7 @@ class Instaloader:
if max_count is not None and count > max_count:
break
name = post.owner_username
if filter_func is not None and not filter_func(post):
if post_filter is not None and not post_filter(post):
self.context.log("<pic by {} skipped".format(name), flush=True)
continue
self.context.log("[{:>3}] {} ".format(count, name), end=str(), flush=True)
@ -534,7 +539,7 @@ class Instaloader:
def download_hashtag(self, hashtag: str,
max_count: Optional[int] = None,
filter_func: Optional[Callable[[Post], bool]] = None,
post_filter: Optional[Callable[[Post], bool]] = None,
fast_update: bool = False) -> None:
"""Download pictures of one hashtag.
@ -545,7 +550,7 @@ class Instaloader:
:param hashtag: Hashtag to download, without leading '#'
:param max_count: Maximum count of pictures to download
:param filter_func: function(post), which returns True if given picture should be downloaded
:param post_filter: function(post), which returns True if given picture should be downloaded
:param fast_update: If true, abort when first already-downloaded picture is encountered
"""
hashtag = hashtag.lower()
@ -555,7 +560,7 @@ class Instaloader:
if max_count is not None and count > max_count:
break
self.context.log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
if filter_func is not None and not filter_func(post):
if post_filter is not None and not post_filter(post):
self.context.log('<skipped>')
continue
count += 1
@ -626,7 +631,8 @@ class Instaloader:
profile_pic: bool = True, profile_pic_only: bool = False,
fast_update: bool = False,
download_stories: bool = False, download_stories_only: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
post_filter: Optional[Callable[[Post], bool]] = None,
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None:
"""Download one profile"""
# Get profile main page json
@ -667,7 +673,8 @@ class Instaloader:
# Download stories, if requested
if download_stories or download_stories_only:
with self.context.error_catcher("Download stories of {}".format(profile_name)):
self.download_stories(userids=[profile.userid], filename_target=profile_name, fast_update=fast_update)
self.download_stories(userids=[profile.userid], filename_target=profile_name, fast_update=fast_update,
storyitem_filter=storyitem_filter)
if download_stories_only:
return
@ -678,7 +685,7 @@ class Instaloader:
for post in profile.get_posts():
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count += 1
if filter_func is not None and not filter_func(post):
if post_filter is not None and not post_filter(post):
self.context.log('<skipped>')
continue
with self.context.error_catcher('Download profile {}'.format(profile_name)):

View File

@ -36,8 +36,6 @@ class Post:
parameter and exported into JSON files with :option:`--metadata-json`.
"""
LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"]
def __init__(self, context: InstaloaderContext, node: Dict[str, Any],
owner_profile: Optional['Profile'] = None):
"""Create a Post instance from a node structure as returned by Instagram.