From f6731566cd23e368c10acd8cda6974d7933d524b Mon Sep 17 00:00:00 2001
From: Alexander Graf <17130992+aandergr@users.noreply.github.com>
Date: Sat, 13 Mar 2021 11:17:56 +0100
Subject: [PATCH] Support resuming of downloading comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: André Koch-Kramer <koch-kramer@web.de>
---
 instaloader/instaloader.py  | 39 ++++++++++++++++++++++++++++++++-----
 instaloader/nodeiterator.py |  4 ++--
 instaloader/structures.py   | 14 +++++++------
 3 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py
index c71acb2..e665100 100644
--- a/instaloader/instaloader.py
+++ b/instaloader/instaloader.py
@@ -335,18 +335,47 @@ class Instaloader:
                             combined_answers.extend(y['answers'])
                         unique_comments_list[-1]['answers'] = get_unique_comments(combined_answers)
             return unique_comments_list
+
+        def get_new_comments(new_comments, start):
+            for idx, comment in enumerate(new_comments, start=start+1):
+                if idx % 250 == 0:
+                    self.context.log('{}'.format(idx), end='…', flush=True)
+                yield comment
+
+        def save_comments(extended_comments):
+            unique_comments = get_unique_comments(extended_comments, combine_answers=True)
+            answer_ids = set(int(answer['id']) for comment in unique_comments for answer in comment.get('answers', []))
+            with open(filename, 'w') as file:
+                file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, unique_comments)),
+                                      indent=4))
+
+        base_filename = filename
         filename += '_comments.json'
         try:
             with open(filename) as fp:
                 comments = json.load(fp)
         except (FileNotFoundError, json.decoder.JSONDecodeError):
             comments = list()
-        comments.extend(_postcomment_asdict(comment) for comment in post.get_comments())
+
+        comments_iterator = post.get_comments()
+        try:
+            with resumable_iteration(
+                    context=self.context,
+                    iterator=comments_iterator,
+                    load=load_structure_from_file,
+                    save=save_structure_to_file,
+                    format_path=lambda magic: "{}_{}_{}.json.xz".format(base_filename, self.resume_prefix, magic),
+                    check_bbd=self.check_resume_bbd,
+                    enabled=self.resume_prefix is not None
+            ) as (_is_resuming, start_index):
+                comments.extend(_postcomment_asdict(comment)
+                                for comment in get_new_comments(comments_iterator, start_index))
+        except (KeyboardInterrupt, AbortDownloadException):
+            if comments:
+                save_comments(comments)
+            raise
         if comments:
-            comments = get_unique_comments(comments, combine_answers=True)
-            answer_ids = set(int(answer['id']) for comment in comments for answer in comment.get('answers', []))
-            with open(filename, 'w') as file:
-                file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, comments)), indent=4))
+            save_comments(comments)
             self.context.log('comments', end=' ', flush=True)
 
     def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py
index 5ddf0b8..d8d74c3 100644
--- a/instaloader/nodeiterator.py
+++ b/instaloader/nodeiterator.py
@@ -5,7 +5,7 @@ import os
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from lzma import LZMAError
-from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar
+from typing import Any, Callable, Dict, Iterable, Iterator, NamedTuple, Optional, Tuple, TypeVar
 
 from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
 from .instaloadercontext import InstaloaderContext
@@ -204,7 +204,7 @@ class NodeIterator(Iterator[T]):
 
 @contextmanager
 def resumable_iteration(context: InstaloaderContext,
-                        iterator: Iterator,
+                        iterator: Iterable,
                         load: Callable[[InstaloaderContext, str], Any],
                         save: Callable[[FrozenNodeIterator, str], None],
                         format_path: Callable[[str], str],
diff --git a/instaloader/structures.py b/instaloader/structures.py
index bfb2c77..6cca9e4 100644
--- a/instaloader/structures.py
+++ b/instaloader/structures.py
@@ -4,7 +4,7 @@ import re
 from base64 import b64decode, b64encode
 from collections import namedtuple
 from datetime import datetime
-from typing import Any, Dict, Iterator, List, Optional, Union
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Union
 
 from . import __version__
 from .exceptions import *
@@ -426,12 +426,15 @@ class Post:
         except KeyError:
             return self._field('edge_media_to_comment', 'count')
 
-    def get_comments(self) -> Iterator[PostComment]:
+    def get_comments(self) -> Iterable[PostComment]:
         r"""Iterate over all comments of the post.
 
         Each comment is represented by a PostComment namedtuple with fields text (string), created_at (datetime),
         id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator`\ [:class:`PostCommentAnswer`])
         if available.
+
+        .. versionchanged:: 4.7
+           Change return type to ``Iterable``.
         """
         def _postcommentanswer(node):
             return PostCommentAnswer(id=int(node['id']),
@@ -466,16 +469,15 @@ class Post:
                                answers=_postcommentanswers(node))
         if self.comments == 0:
             # Avoid doing additional requests if there are no comments
-            return
+            return []
 
         comment_edges = self._field('edge_media_to_comment', 'edges')
         answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
 
         if self.comments == len(comment_edges) + answers_count:
             # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
-            yield from (_postcomment(comment['node']) for comment in comment_edges)
-            return
-        yield from NodeIterator(
+            return [_postcomment(comment['node']) for comment in comment_edges]
+        return NodeIterator(
             self._context,
             '97b41c52301f77ce508f55e66d17620e',
             lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'],