mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-20 17:22:31 +01:00
--latest-stamps, like --fast-update, but without the need to keep downloaded files (#1131)
Adds the --latest-stamps command line option, pointing to a file where the latest time each profile was scraped. On the next run, only posts newer than that time are downloaded. Fixes #1122.
This commit is contained in:
parent
5345470ebf
commit
d142fb70b0
10
README.rst
10
README.rst
@ -86,6 +86,16 @@ To later **update your local copy** of that profiles, you may run
|
||||
If ``--fast-update`` is given, Instaloader stops when arriving at the
|
||||
first already-downloaded picture.
|
||||
|
||||
Alternatively, you can use ``--latest-stamps`` to have Instaloader store
|
||||
the time each profile was last downloaded and only download newer media:
|
||||
|
||||
::
|
||||
|
||||
instaloader --latest-stamps -- profile [profile ...]
|
||||
|
||||
With this option it's possible to move or delete downloaded media and still keep
|
||||
the archive updated.
|
||||
|
||||
When updating profiles, Instaloader
|
||||
automatically **detects profile name changes** and renames the target directory
|
||||
accordingly.
|
||||
|
@ -39,6 +39,16 @@ To later **update your local copy** of that profiles, you may run
|
||||
If :option:`--fast-update` is given, Instaloader stops when arriving at the
|
||||
first already-downloaded picture.
|
||||
|
||||
Alternatively, you can use :option:`--latest-stamps` to have Instaloader store
|
||||
the time each profile was last downloaded and only download newer media:
|
||||
|
||||
::
|
||||
|
||||
instaloader --latest-stamps -- profile [profile ...]
|
||||
|
||||
With this option it's possible to move or delete downloaded media and still keep
|
||||
the archive updated.
|
||||
|
||||
When updating profiles, Instaloader
|
||||
automatically **detects profile name changes** and renames the target directory
|
||||
accordingly.
|
||||
|
@ -146,6 +146,23 @@ Which Posts to Download
|
||||
This flag is recommended when you use Instaloader to update your personal
|
||||
Instagram archive.
|
||||
|
||||
.. option:: --latest-stamps [STAMPSFILE]
|
||||
|
||||
Works similarly to :option:`--fast-update`, but instead of relying on already
|
||||
downloaded media, the time each profile was downloaded is stored, and only
|
||||
media newer than the last download is fetched. This allows updating your
|
||||
personal Instagram archive while emptying the target directories.
|
||||
|
||||
Only works for media associated with a specific profile, and that is returned
|
||||
in chronological order: profile posts, profile stories, profile IGTV posts
|
||||
and profile tagged posts.
|
||||
|
||||
By default, the information is stored in
|
||||
``~/.config/instaloader/latest-stamps.ini``, but you can specify an
|
||||
alternative location.
|
||||
|
||||
.. versionadded:: 4.8
|
||||
|
||||
.. option:: --post-filter filter, --only-if filter
|
||||
|
||||
Expression that, if given, must evaluate to True for each post to be
|
||||
|
@ -84,3 +84,9 @@ Loading and Saving
|
||||
.. autofunction:: get_json_structure
|
||||
|
||||
.. autofunction:: save_structure_to_file
|
||||
|
||||
LatestStamps
|
||||
""""""""""""
|
||||
|
||||
.. autoclass:: LatestStamps
|
||||
:no-show-inheritance:
|
||||
|
@ -15,6 +15,7 @@ else:
|
||||
from .exceptions import *
|
||||
from .instaloader import Instaloader
|
||||
from .instaloadercontext import InstaloaderContext, RateController
|
||||
from .lateststamps import LatestStamps
|
||||
from .nodeiterator import NodeIterator, FrozenNodeIterator, resumable_iteration
|
||||
from .structures import (Hashtag, Highlight, Post, PostSidecarNode, PostComment, PostCommentAnswer, PostLocation,
|
||||
Profile, Story, StoryItem, TopSearchResults, load_structure_from_file, save_structure_to_file,
|
||||
|
@ -11,8 +11,9 @@ from typing import List, Optional
|
||||
from . import (AbortDownloadException, BadCredentialsException, Instaloader, InstaloaderException,
|
||||
InvalidArgumentException, Post, Profile, ProfileNotExistsException, StoryItem,
|
||||
TwoFactorAuthRequiredException, __version__, load_structure_from_file)
|
||||
from .instaloader import get_default_session_filename
|
||||
from .instaloader import (get_default_session_filename, get_default_stamps_filename)
|
||||
from .instaloadercontext import default_user_agent
|
||||
from .lateststamps import LatestStamps
|
||||
|
||||
|
||||
def usage_string():
|
||||
@ -76,6 +77,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
download_tagged: bool = False,
|
||||
download_igtv: bool = False,
|
||||
fast_update: bool = False,
|
||||
latest_stamps_file: Optional[str] = None,
|
||||
max_count: Optional[int] = None, post_filter_str: Optional[str] = None,
|
||||
storyitem_filter_str: Optional[str] = None) -> None:
|
||||
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
|
||||
@ -88,6 +90,9 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
if storyitem_filter_str is not None:
|
||||
storyitem_filter = filterstr_to_filterfunc(storyitem_filter_str, StoryItem)
|
||||
instaloader.context.log('Only download storyitems with property "{}".'.format(storyitem_filter_str))
|
||||
latest_stamps = None
|
||||
if latest_stamps_file is not None:
|
||||
latest_stamps = LatestStamps(latest_stamps_file)
|
||||
# Login, if desired
|
||||
if username is not None:
|
||||
if not re.match(r"^[A-Za-z0-9._]+$", username):
|
||||
@ -172,7 +177,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
post_filter=post_filter)
|
||||
elif re.match(r"^[A-Za-z0-9._]+$", target):
|
||||
try:
|
||||
profile = instaloader.check_profile_id(target)
|
||||
profile = instaloader.check_profile_id(target, latest_stamps)
|
||||
if instaloader.context.is_logged_in and profile.has_blocked_viewer:
|
||||
if download_profile_pic or ((download_posts or download_tagged or download_igtv)
|
||||
and not profile.is_private):
|
||||
@ -191,7 +196,8 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
instaloader.context.log("Trying again anonymously, helps in case you are just blocked.")
|
||||
with instaloader.anonymous_copy() as anonymous_loader:
|
||||
with instaloader.context.error_catcher():
|
||||
anonymous_retry_profiles.add(anonymous_loader.check_profile_id(target))
|
||||
anonymous_retry_profiles.add(anonymous_loader.check_profile_id(target,
|
||||
latest_stamps))
|
||||
instaloader.context.error("Warning: {} will be downloaded anonymously (\"{}\")."
|
||||
.format(target, err))
|
||||
else:
|
||||
@ -212,14 +218,15 @@ def _main(instaloader: Instaloader, targetlist: List[str],
|
||||
instaloader.download_profiles(profiles,
|
||||
download_profile_pic, download_posts, download_tagged, download_igtv,
|
||||
download_highlights, download_stories,
|
||||
fast_update, post_filter, storyitem_filter)
|
||||
fast_update, post_filter, storyitem_filter, latest_stamps=latest_stamps)
|
||||
if anonymous_retry_profiles:
|
||||
instaloader.context.log("Downloading anonymously: {}"
|
||||
.format(' '.join([p.username for p in anonymous_retry_profiles])))
|
||||
with instaloader.anonymous_copy() as anonymous_loader:
|
||||
anonymous_loader.download_profiles(anonymous_retry_profiles,
|
||||
download_profile_pic, download_posts, download_tagged, download_igtv,
|
||||
fast_update=fast_update, post_filter=post_filter)
|
||||
fast_update=fast_update, post_filter=post_filter,
|
||||
latest_stamps=latest_stamps)
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted by user.", file=sys.stderr)
|
||||
except AbortDownloadException as exc:
|
||||
@ -324,7 +331,10 @@ def main():
|
||||
g_cond.add_argument('-F', '--fast-update', action='store_true',
|
||||
help='For each target, stop when encountering the first already-downloaded picture. This '
|
||||
'flag is recommended when you use Instaloader to update your personal Instagram archive.')
|
||||
|
||||
g_cond.add_argument('--latest-stamps', nargs='?', metavar='STAMPSFILE', const=get_default_stamps_filename(),
|
||||
help='Store the timestamps of latest media scraped for each profile. This allows updating '
|
||||
'your personal Instagram archive even if you delete the destination directories. '
|
||||
'If STAMPSFILE is not provided, defaults to ' + get_default_stamps_filename())
|
||||
g_cond.add_argument('--post-filter', '--only-if', metavar='filter',
|
||||
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
|
||||
'a syntactically valid python expression. Variables are evaluated to '
|
||||
@ -464,6 +474,7 @@ def main():
|
||||
download_tagged=args.tagged,
|
||||
download_igtv=args.igtv,
|
||||
fast_update=args.fast_update,
|
||||
latest_stamps_file=args.latest_stamps,
|
||||
max_count=int(args.count) if args.count is not None else None,
|
||||
post_filter_str=args.post_filter,
|
||||
storyitem_filter_str=args.storyitem_filter)
|
||||
|
@ -11,6 +11,7 @@ from contextlib import contextmanager, suppress
|
||||
from datetime import datetime, timezone
|
||||
from functools import wraps
|
||||
from io import BytesIO
|
||||
from itertools import takewhile
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast
|
||||
from urllib.parse import urlparse
|
||||
@ -20,23 +21,29 @@ import urllib3 # type: ignore
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import InstaloaderContext, RateController
|
||||
from .lateststamps import LatestStamps
|
||||
from .nodeiterator import NodeIterator, resumable_iteration
|
||||
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
|
||||
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
|
||||
|
||||
|
||||
def get_default_session_filename(username: str) -> str:
|
||||
"""Returns default session filename for given username."""
|
||||
sessionfilename = "session-{}".format(username)
|
||||
def _get_config_dir() -> str:
|
||||
if platform.system() == "Windows":
|
||||
# on Windows, use %LOCALAPPDATA%\Instaloader\session-USERNAME
|
||||
# on Windows, use %LOCALAPPDATA%\Instaloader
|
||||
localappdata = os.getenv("LOCALAPPDATA")
|
||||
if localappdata is not None:
|
||||
return os.path.join(localappdata, "Instaloader", sessionfilename)
|
||||
return os.path.join(localappdata, "Instaloader")
|
||||
# legacy fallback - store in temp dir if %LOCALAPPDATA% is not set
|
||||
return os.path.join(tempfile.gettempdir(), ".instaloader-" + getpass.getuser(), sessionfilename)
|
||||
# on Unix, use ~/.config/instaloader/session-USERNAME
|
||||
return os.path.join(os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config")), "instaloader", sessionfilename)
|
||||
return os.path.join(tempfile.gettempdir(), ".instaloader-" + getpass.getuser())
|
||||
# on Unix, use ~/.config/instaloader
|
||||
return os.path.join(os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config")), "instaloader")
|
||||
|
||||
|
||||
def get_default_session_filename(username: str) -> str:
|
||||
"""Returns default session filename for given username."""
|
||||
configdir = _get_config_dir()
|
||||
sessionfilename = "session-{}".format(username)
|
||||
return os.path.join(configdir, sessionfilename)
|
||||
|
||||
|
||||
def get_legacy_session_filename(username: str) -> str:
|
||||
@ -46,6 +53,17 @@ def get_legacy_session_filename(username: str) -> str:
|
||||
return filename.lower()
|
||||
|
||||
|
||||
def get_default_stamps_filename() -> str:
|
||||
"""
|
||||
Returns default filename for latest stamps database.
|
||||
|
||||
.. versionadded:: 4.8
|
||||
|
||||
"""
|
||||
configdir = _get_config_dir()
|
||||
return os.path.join(configdir, "latest-stamps.ini")
|
||||
|
||||
|
||||
def format_string_contains_key(format_string: str, key: str) -> bool:
|
||||
# pylint:disable=unused-variable
|
||||
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string):
|
||||
@ -497,6 +515,25 @@ class Instaloader:
|
||||
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
|
||||
self.context.log('') # log output of _get_and_write_raw() does not produce \n
|
||||
|
||||
def download_profilepic_if_new(self, profile: Profile, latest_stamps: Optional[LatestStamps]) -> None:
|
||||
"""
|
||||
Downloads and saves profile pic if it has not been downloaded before.
|
||||
|
||||
:param latest_stamps: Database with the last downloaded data. If not present,
|
||||
the profile pic is downloaded unless it already exists
|
||||
|
||||
.. versionadded:: 4.8
|
||||
"""
|
||||
if latest_stamps is None:
|
||||
self.download_profilepic(profile)
|
||||
return
|
||||
profile_pic_basename = profile.profile_pic_url.split('/')[-1].split('?')[0]
|
||||
saved_basename = latest_stamps.get_profile_pic(profile.username)
|
||||
if saved_basename == profile_pic_basename:
|
||||
return
|
||||
self.download_profilepic(profile)
|
||||
latest_stamps.set_profile_pic(profile.username, profile_pic_basename)
|
||||
|
||||
def download_profilepic(self, profile: Profile) -> None:
|
||||
"""Downloads and saves profile pic."""
|
||||
self.download_title_pic(profile.profile_pic_url, profile.username.lower(), 'profile_pic', profile)
|
||||
@ -731,7 +768,8 @@ class Instaloader:
|
||||
userids: Optional[List[Union[int, Profile]]] = None,
|
||||
fast_update: bool = False,
|
||||
filename_target: Optional[str] = ':stories',
|
||||
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None:
|
||||
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None,
|
||||
latest_stamps: Optional[LatestStamps] = None) -> None:
|
||||
"""
|
||||
Download available stories from user followees or all stories of users whose ID are given.
|
||||
Does not mark stories as seen.
|
||||
@ -742,7 +780,11 @@ class Instaloader:
|
||||
:param filename_target: Replacement for {target} in dirname_pattern and filename_pattern
|
||||
or None if profile name should be used instead
|
||||
:param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded
|
||||
:param latest_stamps: Database with the last times each user was scraped
|
||||
:raises LoginRequiredException: If called without being logged in.
|
||||
|
||||
.. versionchanged:: 4.8
|
||||
Add `latest_stamps` parameter.
|
||||
"""
|
||||
|
||||
if not userids:
|
||||
@ -761,7 +803,14 @@ class Instaloader:
|
||||
self.context.log(msg)
|
||||
totalcount = user_story.itemcount
|
||||
count = 1
|
||||
for item in user_story.get_items():
|
||||
stories_to_download = user_story.get_items()
|
||||
if latest_stamps is not None:
|
||||
# pylint:disable=cell-var-from-loop
|
||||
last_scraped = latest_stamps.get_last_story_timestamp(name)
|
||||
stories_to_download = takewhile(lambda s: s.date_utc.replace(tzinfo=timezone.utc) > last_scraped,
|
||||
stories_to_download)
|
||||
scraped_timestamp = datetime.now().astimezone()
|
||||
for item in stories_to_download:
|
||||
if storyitem_filter is not None and not storyitem_filter(item):
|
||||
self.context.log("<{} skipped>".format(item), flush=True)
|
||||
continue
|
||||
@ -771,6 +820,8 @@ class Instaloader:
|
||||
downloaded = self.download_storyitem(item, filename_target if filename_target else name)
|
||||
if fast_update and not downloaded:
|
||||
break
|
||||
if latest_stamps is not None:
|
||||
latest_stamps.set_last_story_timestamp(name, scraped_timestamp)
|
||||
|
||||
def download_storyitem(self, item: StoryItem, target: Union[str, Path]) -> bool:
|
||||
"""Download one user story.
|
||||
@ -1134,25 +1185,49 @@ class Instaloader:
|
||||
|
||||
def download_tagged(self, profile: Profile, fast_update: bool = False,
|
||||
target: Optional[str] = None,
|
||||
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
post_filter: Optional[Callable[[Post], bool]] = None,
|
||||
latest_stamps: Optional[LatestStamps] = None) -> None:
|
||||
"""Download all posts where a profile is tagged.
|
||||
|
||||
.. versionadded:: 4.1"""
|
||||
.. versionadded:: 4.1
|
||||
|
||||
.. versionchanged:: 4.8
|
||||
Add `latest_stamps` parameter."""
|
||||
self.context.log("Retrieving tagged posts for profile {}.".format(profile.username))
|
||||
self.posts_download_loop(profile.get_tagged_posts(),
|
||||
posts_to_download: Iterator[Post] = profile.get_tagged_posts()
|
||||
if latest_stamps is not None:
|
||||
last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username)
|
||||
posts_to_download = takewhile(lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped,
|
||||
posts_to_download)
|
||||
scraped_timestamp = datetime.now().astimezone()
|
||||
self.posts_download_loop(posts_to_download,
|
||||
target if target
|
||||
else (Path(_PostPathFormatter.sanitize_path(profile.username)) /
|
||||
_PostPathFormatter.sanitize_path(':tagged')),
|
||||
fast_update, post_filter)
|
||||
if latest_stamps is not None:
|
||||
latest_stamps.set_last_tagged_timestamp(profile.username, scraped_timestamp)
|
||||
|
||||
def download_igtv(self, profile: Profile, fast_update: bool = False,
|
||||
post_filter: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
post_filter: Optional[Callable[[Post], bool]] = None,
|
||||
latest_stamps: Optional[LatestStamps] = None) -> None:
|
||||
"""Download IGTV videos of a profile.
|
||||
|
||||
.. versionadded:: 4.3"""
|
||||
.. versionadded:: 4.3
|
||||
|
||||
.. versionchanged:: 4.8
|
||||
Add `latest_stamps` parameter."""
|
||||
self.context.log("Retrieving IGTV videos for profile {}.".format(profile.username))
|
||||
self.posts_download_loop(profile.get_igtv_posts(), profile.username, fast_update, post_filter,
|
||||
posts_to_download: Iterator[Post] = profile.get_igtv_posts()
|
||||
if latest_stamps is not None:
|
||||
last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username)
|
||||
posts_to_download = takewhile(lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped,
|
||||
posts_to_download)
|
||||
scraped_timestamp = datetime.now().astimezone()
|
||||
self.posts_download_loop(posts_to_download, profile.username, fast_update, post_filter,
|
||||
total_count=profile.igtvcount, owner_profile=profile)
|
||||
if latest_stamps is not None:
|
||||
latest_stamps.set_last_igtv_timestamp(profile.username, scraped_timestamp)
|
||||
|
||||
def _get_id_filename(self, profile_name: str) -> str:
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
@ -1164,9 +1239,22 @@ class Instaloader:
|
||||
return os.path.join(self.dirname_pattern.format(),
|
||||
'{0}_id'.format(profile_name.lower()))
|
||||
|
||||
def load_profile_id(self, profile_name: str) -> Optional[int]:
|
||||
"""
|
||||
Load ID of profile from profile directory.
|
||||
|
||||
.. versionadded:: 4.8
|
||||
"""
|
||||
id_filename = self._get_id_filename(profile_name)
|
||||
try:
|
||||
with open(id_filename, 'rb') as id_file:
|
||||
return int(id_file.read())
|
||||
except (FileNotFoundError, ValueError):
|
||||
return None
|
||||
|
||||
def save_profile_id(self, profile: Profile):
|
||||
"""
|
||||
Store ID of profile locally.
|
||||
Store ID of profile on profile directory.
|
||||
|
||||
.. versionadded:: 4.0.6
|
||||
"""
|
||||
@ -1176,13 +1264,18 @@ class Instaloader:
|
||||
text_file.write(str(profile.userid) + "\n")
|
||||
self.context.log("Stored ID {0} for profile {1}.".format(profile.userid, profile.username))
|
||||
|
||||
def check_profile_id(self, profile_name: str) -> Profile:
|
||||
def check_profile_id(self, profile_name: str, latest_stamps: Optional[LatestStamps] = None) -> Profile:
|
||||
"""
|
||||
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
||||
has changed and return current name of the profile, and store ID of profile.
|
||||
|
||||
:param profile_name: Profile name
|
||||
:param latest_stamps: Database of downloaded data. If present, IDs are retrieved from it,
|
||||
otherwise from the target directory
|
||||
:return: Instance of current profile
|
||||
|
||||
.. versionchanged:: 4.8
|
||||
Add `latest_stamps` parameter.
|
||||
"""
|
||||
profile = None
|
||||
profile_name_not_exists_err = None
|
||||
@ -1190,10 +1283,11 @@ class Instaloader:
|
||||
profile = Profile.from_username(self.context, profile_name)
|
||||
except ProfileNotExistsException as err:
|
||||
profile_name_not_exists_err = err
|
||||
id_filename = self._get_id_filename(profile_name)
|
||||
try:
|
||||
with open(id_filename, 'rb') as id_file:
|
||||
profile_id = int(id_file.read())
|
||||
if latest_stamps is None:
|
||||
profile_id = self.load_profile_id(profile_name)
|
||||
else:
|
||||
profile_id = latest_stamps.get_profile_id(profile_name)
|
||||
if profile_id is not None:
|
||||
if (profile is None) or \
|
||||
(profile_id != profile.userid):
|
||||
if profile is not None:
|
||||
@ -1205,22 +1299,26 @@ class Instaloader:
|
||||
profile_from_id = Profile.from_id(self.context, profile_id)
|
||||
newname = profile_from_id.username
|
||||
self.context.log("Profile {0} has changed its name to {1}.".format(profile_name, newname))
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||
os.rename(self.dirname_pattern.format(profile=profile_name.lower(),
|
||||
target=profile_name.lower()),
|
||||
self.dirname_pattern.format(profile=newname.lower(),
|
||||
target=newname.lower()))
|
||||
if latest_stamps is None:
|
||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||
os.rename(self.dirname_pattern.format(profile=profile_name.lower(),
|
||||
target=profile_name.lower()),
|
||||
self.dirname_pattern.format(profile=newname.lower(),
|
||||
target=newname.lower()))
|
||||
else:
|
||||
os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower()),
|
||||
'{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower()))
|
||||
else:
|
||||
os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower()),
|
||||
'{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower()))
|
||||
latest_stamps.rename_profile(profile_name, newname)
|
||||
return profile_from_id
|
||||
# profile exists and profile id matches saved id
|
||||
return profile
|
||||
except (FileNotFoundError, ValueError):
|
||||
pass
|
||||
if profile is not None:
|
||||
self.save_profile_id(profile)
|
||||
if latest_stamps is None:
|
||||
self.save_profile_id(profile)
|
||||
else:
|
||||
latest_stamps.save_profile_id(profile.username, profile.userid)
|
||||
return profile
|
||||
if profile_name_not_exists_err:
|
||||
raise profile_name_not_exists_err
|
||||
@ -1235,7 +1333,8 @@ class Instaloader:
|
||||
fast_update: bool = False,
|
||||
post_filter: Optional[Callable[[Post], bool]] = None,
|
||||
storyitem_filter: Optional[Callable[[Post], bool]] = None,
|
||||
raise_errors: bool = False):
|
||||
raise_errors: bool = False,
|
||||
latest_stamps: Optional[LatestStamps] = None):
|
||||
"""High-level method to download set of profiles.
|
||||
|
||||
:param profiles: Set of profiles to download.
|
||||
@ -1251,11 +1350,15 @@ class Instaloader:
|
||||
:param raise_errors:
|
||||
Whether :exc:`LoginRequiredException` and :exc:`PrivateProfileNotFollowedException` should be raised or
|
||||
catched and printed with :meth:`InstaloaderContext.error_catcher`.
|
||||
:param latest_stamps: :option:`--latest-stamps`.
|
||||
|
||||
.. versionadded:: 4.1
|
||||
|
||||
.. versionchanged:: 4.3
|
||||
Add `igtv` parameter.
|
||||
|
||||
.. versionchanged:: 4.8
|
||||
Add `latest_stamps` parameter.
|
||||
"""
|
||||
|
||||
@contextmanager
|
||||
@ -1274,7 +1377,7 @@ class Instaloader:
|
||||
# Download profile picture
|
||||
if profile_pic:
|
||||
with self.context.error_catcher('Download profile picture of {}'.format(profile_name)):
|
||||
self.download_profilepic(profile)
|
||||
self.download_profilepic_if_new(profile, latest_stamps)
|
||||
|
||||
# Save metadata as JSON if desired.
|
||||
if self.save_metadata:
|
||||
@ -1296,12 +1399,14 @@ class Instaloader:
|
||||
# Download tagged, if requested
|
||||
if tagged:
|
||||
with self.context.error_catcher('Download tagged of {}'.format(profile_name)):
|
||||
self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter)
|
||||
self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter,
|
||||
latest_stamps=latest_stamps)
|
||||
|
||||
# Download IGTV, if requested
|
||||
if igtv:
|
||||
with self.context.error_catcher('Download IGTV of {}'.format(profile_name)):
|
||||
self.download_igtv(profile, fast_update=fast_update, post_filter=post_filter)
|
||||
self.download_igtv(profile, fast_update=fast_update, post_filter=post_filter,
|
||||
latest_stamps=latest_stamps)
|
||||
|
||||
# Download highlights, if requested
|
||||
if highlights:
|
||||
@ -1311,14 +1416,23 @@ class Instaloader:
|
||||
# Iterate over pictures and download them
|
||||
if posts:
|
||||
self.context.log("Retrieving posts from profile {}.".format(profile_name))
|
||||
self.posts_download_loop(profile.get_posts(), profile_name, fast_update, post_filter,
|
||||
posts_to_download: Iterator[Post] = profile.get_posts()
|
||||
if latest_stamps is not None:
|
||||
# pylint:disable=cell-var-from-loop
|
||||
last_scraped = latest_stamps.get_last_post_timestamp(profile_name)
|
||||
posts_to_download = takewhile(lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped,
|
||||
posts_to_download)
|
||||
scraped_timestamp = datetime.now().astimezone()
|
||||
self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter,
|
||||
total_count=profile.mediacount, owner_profile=profile)
|
||||
if latest_stamps is not None:
|
||||
latest_stamps.set_last_post_timestamp(profile_name, scraped_timestamp)
|
||||
|
||||
if stories and profiles:
|
||||
with self.context.error_catcher("Download stories"):
|
||||
self.context.log("Downloading stories")
|
||||
self.download_stories(userids=list(profiles), fast_update=fast_update, filename_target=None,
|
||||
storyitem_filter=storyitem_filter)
|
||||
storyitem_filter=storyitem_filter, latest_stamps=latest_stamps)
|
||||
|
||||
def download_profile(self, profile_name: Union[str, Profile],
|
||||
profile_pic: bool = True, profile_pic_only: bool = False,
|
||||
|
113
instaloader/lateststamps.py
Normal file
113
instaloader/lateststamps.py
Normal file
@ -0,0 +1,113 @@
|
||||
import configparser
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LatestStamps:
|
||||
"""LatestStamps class.
|
||||
|
||||
Convenience class for retrieving and storing data from the :option:`--latest-stamps` file.
|
||||
|
||||
:param latest_stamps_file: path to file.
|
||||
|
||||
.. versionadded:: 4.8"""
|
||||
PROFILE_ID = 'profile-id'
|
||||
PROFILE_PIC = 'profile-pic'
|
||||
POST_TIMESTAMP = 'post-timestamp'
|
||||
TAGGED_TIMESTAMP = 'tagged-timestamp'
|
||||
IGTV_TIMESTAMP = 'igtv-timestamp'
|
||||
STORY_TIMESTAMP = 'story-timestamp'
|
||||
ISO_FORMAT = '%Y-%m-%dT%H:%M:%S.%f%z'
|
||||
|
||||
def __init__(self, latest_stamps_file):
|
||||
self.file = latest_stamps_file
|
||||
self.data = configparser.ConfigParser()
|
||||
self.data.read(latest_stamps_file)
|
||||
|
||||
def _save(self):
|
||||
with open(self.file, 'w') as f:
|
||||
self.data.write(f)
|
||||
|
||||
def _ensure_section(self, section: str):
|
||||
if not self.data.has_section(section):
|
||||
self.data.add_section(section)
|
||||
|
||||
def get_profile_id(self, profile_name: str) -> Optional[int]:
|
||||
"""Returns stored ID of profile."""
|
||||
try:
|
||||
return self.data.getint(profile_name, self.PROFILE_ID)
|
||||
except (configparser.Error, ValueError):
|
||||
return None
|
||||
|
||||
def save_profile_id(self, profile_name: str, profile_id: int):
|
||||
"""Stores ID of profile."""
|
||||
self._ensure_section(profile_name)
|
||||
self.data.set(profile_name, self.PROFILE_ID, str(profile_id))
|
||||
self._save()
|
||||
|
||||
def rename_profile(self, old_profile: str, new_profile: str):
|
||||
"""Renames a profile."""
|
||||
self._ensure_section(new_profile)
|
||||
for option in [self.PROFILE_ID, self.PROFILE_PIC, self.POST_TIMESTAMP,
|
||||
self.TAGGED_TIMESTAMP, self.IGTV_TIMESTAMP, self.STORY_TIMESTAMP]:
|
||||
if self.data.has_option(old_profile, option):
|
||||
value = self.data.get(old_profile, option)
|
||||
self.data.set(new_profile, option, value)
|
||||
self.data.remove_section(old_profile)
|
||||
self._save()
|
||||
|
||||
def _get_timestamp(self, section: str, key: str) -> datetime:
|
||||
try:
|
||||
return datetime.strptime(self.data.get(section, key), self.ISO_FORMAT)
|
||||
except (configparser.Error, ValueError):
|
||||
return datetime.fromtimestamp(0, timezone.utc)
|
||||
|
||||
def _set_timestamp(self, section: str, key: str, timestamp: datetime):
|
||||
self._ensure_section(section)
|
||||
self.data.set(section, key, timestamp.strftime(self.ISO_FORMAT))
|
||||
self._save()
|
||||
|
||||
def get_last_post_timestamp(self, profile_name: str) -> datetime:
|
||||
"""Returns timestamp of last download of a profile's posts."""
|
||||
return self._get_timestamp(profile_name, self.POST_TIMESTAMP)
|
||||
|
||||
def set_last_post_timestamp(self, profile_name: str, timestamp: datetime):
|
||||
"""Sets timestamp of last download of a profile's posts."""
|
||||
self._set_timestamp(profile_name, self.POST_TIMESTAMP, timestamp)
|
||||
|
||||
def get_last_tagged_timestamp(self, profile_name: str) -> datetime:
|
||||
"""Returns timestamp of last download of a profile's tagged posts."""
|
||||
return self._get_timestamp(profile_name, self.TAGGED_TIMESTAMP)
|
||||
|
||||
def set_last_tagged_timestamp(self, profile_name: str, timestamp: datetime):
|
||||
"""Sets timestamp of last download of a profile's tagged posts."""
|
||||
self._set_timestamp(profile_name, self.TAGGED_TIMESTAMP, timestamp)
|
||||
|
||||
def get_last_igtv_timestamp(self, profile_name: str) -> datetime:
|
||||
"""Returns timestamp of last download of a profile's igtv posts."""
|
||||
return self._get_timestamp(profile_name, self.IGTV_TIMESTAMP)
|
||||
|
||||
def set_last_igtv_timestamp(self, profile_name: str, timestamp: datetime):
|
||||
"""Sets timestamp of last download of a profile's igtv posts."""
|
||||
self._set_timestamp(profile_name, self.IGTV_TIMESTAMP, timestamp)
|
||||
|
||||
def get_last_story_timestamp(self, profile_name: str) -> datetime:
|
||||
"""Returns timestamp of last download of a profile's stories."""
|
||||
return self._get_timestamp(profile_name, self.STORY_TIMESTAMP)
|
||||
|
||||
def set_last_story_timestamp(self, profile_name: str, timestamp: datetime):
|
||||
"""Sets timestamp of last download of a profile's stories."""
|
||||
self._set_timestamp(profile_name, self.STORY_TIMESTAMP, timestamp)
|
||||
|
||||
def get_profile_pic(self, profile_name: str) -> str:
|
||||
"""Returns filename of profile's last downloaded profile pic."""
|
||||
try:
|
||||
return self.data.get(profile_name, self.PROFILE_PIC)
|
||||
except configparser.Error:
|
||||
return ""
|
||||
|
||||
def set_profile_pic(self, profile_name: str, profile_pic: str):
|
||||
"""Sets filename of profile's last downloaded profile pic."""
|
||||
self._ensure_section(profile_name)
|
||||
self.data.set(profile_name, self.PROFILE_PIC, profile_pic)
|
||||
self._save()
|
Loading…
Reference in New Issue
Block a user