mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-04 17:32:30 +01:00
Support resuming of downloading comments
Co-Authored-By: André Koch-Kramer <koch-kramer@web.de>
This commit is contained in:
parent
cd13211603
commit
f6731566cd
@ -335,18 +335,47 @@ class Instaloader:
|
|||||||
combined_answers.extend(y['answers'])
|
combined_answers.extend(y['answers'])
|
||||||
unique_comments_list[-1]['answers'] = get_unique_comments(combined_answers)
|
unique_comments_list[-1]['answers'] = get_unique_comments(combined_answers)
|
||||||
return unique_comments_list
|
return unique_comments_list
|
||||||
|
|
||||||
|
def get_new_comments(new_comments, start):
|
||||||
|
for idx, comment in enumerate(new_comments, start=start+1):
|
||||||
|
if idx % 250 == 0:
|
||||||
|
self.context.log('{}'.format(idx), end='…', flush=True)
|
||||||
|
yield comment
|
||||||
|
|
||||||
|
def save_comments(extended_comments):
|
||||||
|
unique_comments = get_unique_comments(extended_comments, combine_answers=True)
|
||||||
|
answer_ids = set(int(answer['id']) for comment in unique_comments for answer in comment.get('answers', []))
|
||||||
|
with open(filename, 'w') as file:
|
||||||
|
file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, unique_comments)),
|
||||||
|
indent=4))
|
||||||
|
|
||||||
|
base_filename = filename
|
||||||
filename += '_comments.json'
|
filename += '_comments.json'
|
||||||
try:
|
try:
|
||||||
with open(filename) as fp:
|
with open(filename) as fp:
|
||||||
comments = json.load(fp)
|
comments = json.load(fp)
|
||||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||||
comments = list()
|
comments = list()
|
||||||
comments.extend(_postcomment_asdict(comment) for comment in post.get_comments())
|
|
||||||
|
comments_iterator = post.get_comments()
|
||||||
|
try:
|
||||||
|
with resumable_iteration(
|
||||||
|
context=self.context,
|
||||||
|
iterator=comments_iterator,
|
||||||
|
load=load_structure_from_file,
|
||||||
|
save=save_structure_to_file,
|
||||||
|
format_path=lambda magic: "{}_{}_{}.json.xz".format(base_filename, self.resume_prefix, magic),
|
||||||
|
check_bbd=self.check_resume_bbd,
|
||||||
|
enabled=self.resume_prefix is not None
|
||||||
|
) as (_is_resuming, start_index):
|
||||||
|
comments.extend(_postcomment_asdict(comment)
|
||||||
|
for comment in get_new_comments(comments_iterator, start_index))
|
||||||
|
except (KeyboardInterrupt, AbortDownloadException):
|
||||||
if comments:
|
if comments:
|
||||||
comments = get_unique_comments(comments, combine_answers=True)
|
save_comments(comments)
|
||||||
answer_ids = set(int(answer['id']) for comment in comments for answer in comment.get('answers', []))
|
raise
|
||||||
with open(filename, 'w') as file:
|
if comments:
|
||||||
file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, comments)), indent=4))
|
save_comments(comments)
|
||||||
self.context.log('comments', end=' ', flush=True)
|
self.context.log('comments', end=' ', flush=True)
|
||||||
|
|
||||||
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
|
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
|
||||||
|
@ -5,7 +5,7 @@ import os
|
|||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from lzma import LZMAError
|
from lzma import LZMAError
|
||||||
from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar
|
from typing import Any, Callable, Dict, Iterable, Iterator, NamedTuple, Optional, Tuple, TypeVar
|
||||||
|
|
||||||
from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
|
from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
|
||||||
from .instaloadercontext import InstaloaderContext
|
from .instaloadercontext import InstaloaderContext
|
||||||
@ -204,7 +204,7 @@ class NodeIterator(Iterator[T]):
|
|||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def resumable_iteration(context: InstaloaderContext,
|
def resumable_iteration(context: InstaloaderContext,
|
||||||
iterator: Iterator,
|
iterator: Iterable,
|
||||||
load: Callable[[InstaloaderContext, str], Any],
|
load: Callable[[InstaloaderContext, str], Any],
|
||||||
save: Callable[[FrozenNodeIterator, str], None],
|
save: Callable[[FrozenNodeIterator, str], None],
|
||||||
format_path: Callable[[str], str],
|
format_path: Callable[[str], str],
|
||||||
|
@ -4,7 +4,7 @@ import re
|
|||||||
from base64 import b64decode, b64encode
|
from base64 import b64decode, b64encode
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
from typing import Any, Dict, Iterable, Iterator, List, Optional, Union
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .exceptions import *
|
from .exceptions import *
|
||||||
@ -426,12 +426,15 @@ class Post:
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
return self._field('edge_media_to_comment', 'count')
|
return self._field('edge_media_to_comment', 'count')
|
||||||
|
|
||||||
def get_comments(self) -> Iterator[PostComment]:
|
def get_comments(self) -> Iterable[PostComment]:
|
||||||
r"""Iterate over all comments of the post.
|
r"""Iterate over all comments of the post.
|
||||||
|
|
||||||
Each comment is represented by a PostComment namedtuple with fields text (string), created_at (datetime),
|
Each comment is represented by a PostComment namedtuple with fields text (string), created_at (datetime),
|
||||||
id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator`\ [:class:`PostCommentAnswer`])
|
id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator`\ [:class:`PostCommentAnswer`])
|
||||||
if available.
|
if available.
|
||||||
|
|
||||||
|
.. versionchanged:: 4.7
|
||||||
|
Change return type to ``Iterable``.
|
||||||
"""
|
"""
|
||||||
def _postcommentanswer(node):
|
def _postcommentanswer(node):
|
||||||
return PostCommentAnswer(id=int(node['id']),
|
return PostCommentAnswer(id=int(node['id']),
|
||||||
@ -466,16 +469,15 @@ class Post:
|
|||||||
answers=_postcommentanswers(node))
|
answers=_postcommentanswers(node))
|
||||||
if self.comments == 0:
|
if self.comments == 0:
|
||||||
# Avoid doing additional requests if there are no comments
|
# Avoid doing additional requests if there are no comments
|
||||||
return
|
return []
|
||||||
|
|
||||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||||
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
|
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
|
||||||
|
|
||||||
if self.comments == len(comment_edges) + answers_count:
|
if self.comments == len(comment_edges) + answers_count:
|
||||||
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
||||||
yield from (_postcomment(comment['node']) for comment in comment_edges)
|
return [_postcomment(comment['node']) for comment in comment_edges]
|
||||||
return
|
return NodeIterator(
|
||||||
yield from NodeIterator(
|
|
||||||
self._context,
|
self._context,
|
||||||
'97b41c52301f77ce508f55e66d17620e',
|
'97b41c52301f77ce508f55e66d17620e',
|
||||||
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'],
|
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'],
|
||||||
|
Loading…
Reference in New Issue
Block a user