From 185244782519ed8ba98e204f22c5039c4a775dd3 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 12 Jan 2024 08:07:54 +0100 Subject: [PATCH] Fix downloading comments Closes issue #2125 by using an iphone endpoint for posts that have more than 50 comments. --- instaloader/nodeiterator.py | 4 ++ instaloader/structures.py | 73 ++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py index a5c907c..37544d7 100644 --- a/instaloader/nodeiterator.py +++ b/instaloader/nodeiterator.py @@ -184,6 +184,10 @@ class NodeIterator(Iterator[T]): """ return self._node_wrapper(self._first_node) if self._first_node is not None else None + @staticmethod + def page_length() -> int: + return NodeIterator._graphql_page_length + def freeze(self) -> FrozenNodeIterator: """Freeze the iterator for later resuming.""" remaining_data = None diff --git a/instaloader/structures.py b/instaloader/structures.py index 86d5840..998adf2 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -545,11 +545,74 @@ class Post: except KeyError: return self._field('edge_media_to_comment', 'count') + def _get_comments_via_iphone_endpoint(self) -> Iterable[PostComment]: + """ + Iterate over all comments of the post via an iPhone endpoint. + + .. versionadded:: 4.10.3 + fallback for :issue:`2125`. + """ + def _query(min_id=None): + pagination_params = {"min_id": min_id} if min_id is not None else {} + return self._context.get_iphone_json( + f"api/v1/media/{self.mediaid}/comments/", + { + "can_support_threading": "true", + "permalink_enabled": "false", + **pagination_params, + }, + ) + + def _answers(comment_node): + def _answer(child_comment): + return PostCommentAnswer( + id=int(child_comment["pk"]), + created_at_utc=datetime.utcfromtimestamp(child_comment["created_at"]), + text=child_comment["text"], + owner=Profile.from_iphone_struct(self._context, child_comment["user"]), + likes_count=child_comment["comment_like_count"], + ) + + child_comment_count = comment_node["child_comment_count"] + if child_comment_count == 0: + return + preview_child_comments = comment_node["preview_child_comments"] + if child_comment_count == len(preview_child_comments): + yield from ( + _answer(child_comment) for child_comment in preview_child_comments + ) + return + pk = comment_node["pk"] + answers_json = self._context.get_iphone_json( + f"api/v1/media/{self.mediaid}/comments/{pk}/child_comments/", + {"max_id": ""}, + ) + yield from ( + _answer(child_comment) for child_comment in answers_json["child_comments"] + ) + + def _paginated_comments(comments_json): + for comment_node in comments_json.get("comments", []): + yield PostComment( + id=int(comment_node["pk"]), + created_at_utc=datetime.utcfromtimestamp(comment_node["created_at"]), + text=comment_node["text"], + owner=Profile.from_iphone_struct(self._context, comment_node["user"]), + likes_count=comment_node["comment_like_count"], + answers=_answers(comment_node), + ) + + next_min_id = comments_json.get("next_min_id") + if next_min_id: + yield from _paginated_comments(_query(next_min_id)) + + return _paginated_comments(_query()) + def get_comments(self) -> Iterable[PostComment]: - r"""Iterate over all comments of the post. + """Iterate over all comments of the post. Each comment is represented by a PostComment NamedTuple with fields text (string), created_at (datetime), - id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator`\ [:class:`PostCommentAnswer`]) + id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator` [:class:`PostCommentAnswer`]) if available. .. versionchanged:: 4.7 @@ -596,6 +659,12 @@ class Post: if self.comments == len(comment_edges) + answers_count: # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them return [_postcomment(comment['node']) for comment in comment_edges] + + if self.comments > NodeIterator.page_length(): + # comments pagination via our graphql query does not work reliably anymore (issue #2125), fallback to an + # iphone endpoint if needed. + return self._get_comments_via_iphone_endpoint() + return NodeIterator( self._context, '97b41c52301f77ce508f55e66d17620e',