diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 950e6ce..15fa555 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1010,7 +1010,10 @@ class Instaloader: enabled=self.resume_prefix is not None ) as (is_resuming, start_index): for number, post in enumerate(posts, start=start_index + 1): - if (max_count is not None and number > max_count) or not takewhile(post): + should_stop = not takewhile(post) + if should_stop and post.is_pinned: + continue + if (max_count is not None and number > max_count) or should_stop: break if displayed_count is not None: self.context.log("[{0:{w}d}/{1:{w}d}] ".format(number, displayed_count, @@ -1042,7 +1045,7 @@ class Instaloader: except PostChangedException: post_changed = True continue - if fast_update and not downloaded and not post_changed: + if fast_update and not downloaded and not post_changed and not post.is_pinned: # disengage fast_update for first post when resuming if not is_resuming or number > 0: break diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py index b5fabae..a49656d 100644 --- a/instaloader/nodeiterator.py +++ b/instaloader/nodeiterator.py @@ -76,7 +76,8 @@ class NodeIterator(Iterator[T]): node_wrapper: Callable[[Dict], T], query_variables: Optional[Dict[str, Any]] = None, query_referer: Optional[str] = None, - first_data: Optional[Dict[str, Any]] = None): + first_data: Optional[Dict[str, Any]] = None, + is_first: Optional[Callable[[T], bool]] = None): self._context = context self._query_hash = query_hash self._edge_extractor = edge_extractor @@ -91,6 +92,7 @@ class NodeIterator(Iterator[T]): else: self._data = self._query() self._first_node: Optional[Dict] = None + self._is_first = is_first def _query(self, after: Optional[str] = None) -> Dict: pagination_variables = {'first': NodeIterator._graphql_page_length} # type: Dict[str, Any] @@ -128,8 +130,12 @@ class NodeIterator(Iterator[T]): self._page_index, self._total_index = page_index, total_index raise item = self._node_wrapper(node) - if self._first_node is None: - self._first_node = node + if self._is_first is not None: + if self._is_first(item): + self._first_node = node + else: + if self._first_node is None: + self._first_node = node return item if self._data['page_info']['has_next_page']: query_response = self._query(self._data['page_info']['end_cursor']) @@ -168,7 +174,13 @@ class NodeIterator(Iterator[T]): """ If this iterator has produced any items, returns the first item produced. + It is possible to override what is considered the first item (for example, to consider the + newest item in case items are not in strict chronological order) by passing a callback + function as the `is_first` parameter when creating the class. + .. versionadded:: 4.8 + .. versionchanged:: 4.9.2 + What is considered the first item can be overridden. """ return self._node_wrapper(self._first_node) if self._first_node is not None else None diff --git a/instaloader/structures.py b/instaloader/structures.py index 5efa541..adaa222 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -7,7 +7,7 @@ from contextlib import suppress from datetime import datetime from itertools import islice from pathlib import Path -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union from unicodedata import normalize from . import __version__ @@ -644,6 +644,13 @@ class Post: loc.get('lat'), loc.get('lng')) return self._location + @property + def is_pinned(self) -> bool: + """True if this Post has been pinned by at least one user. + + .. versionadded: 4.9.2""" + return 'pinned_for_users' in self._node and bool(self._node['pinned_for_users']) + class Profile: """ @@ -970,6 +977,7 @@ class Profile: {'id': self.userid}, 'https://www.instagram.com/{0}/'.format(self.username), self._metadata('edge_owner_to_timeline_media'), + Profile._make_is_newest_checker() ) def get_saved_posts(self) -> NodeIterator[Post]: @@ -1003,6 +1011,7 @@ class Profile: lambda n: Post(self._context, n, self if int(n['owner']['id']) == self.userid else None), {'id': self.userid}, 'https://www.instagram.com/{0}/'.format(self.username), + is_first=Profile._make_is_newest_checker() ) def get_igtv_posts(self) -> NodeIterator[Post]: @@ -1020,8 +1029,22 @@ class Profile: {'id': self.userid}, 'https://www.instagram.com/{0}/channel/'.format(self.username), self._metadata('edge_felix_video_timeline'), + Profile._make_is_newest_checker() ) + @staticmethod + def _make_is_newest_checker() -> Callable[[Post], bool]: + newest_date: Optional[datetime] = None + def is_newest(p: Post) -> bool: + nonlocal newest_date + post_date = p.date_local + if newest_date is None or post_date > newest_date: + newest_date = post_date + return True + else: + return False + return is_newest + def get_followers(self) -> NodeIterator['Profile']: """ Retrieve list of followers of given profile.