diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 2efc299..5bd6e39 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -8,7 +8,7 @@ import string import sys import tempfile from contextlib import contextmanager, suppress -from datetime import datetime +from datetime import datetime, timezone from functools import wraps from io import BytesIO from typing import Callable, Iterator, List, Optional, Any @@ -185,12 +185,17 @@ class Instaloader: self.context.log('json', end=' ', flush=True) def update_comments(self, filename: str, post: Post) -> None: + def _postcomment_asdict(comment): + return {'id': comment.id, + 'created_at': comment.created_at_utc.replace(tzinfo=timezone.utc).timestamp(), + 'text': comment.text, + 'owner': comment.owner._asdict()} filename += '_comments.json' try: comments = json.load(open(filename)) except FileNotFoundError: comments = list() - comments.extend(post.get_comments()) + comments.extend(_postcomment_asdict(comment) for comment in post.get_comments()) if comments: with open(filename, 'w') as file: comments_list = sorted(sorted(list(comments), key=lambda t: t['id']), diff --git a/instaloader/structures.py b/instaloader/structures.py index 4df3fce..c8a6d9c 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -25,6 +25,7 @@ def mediaid_to_shortcode(mediaid: int) -> str: PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url']) +PostComment = namedtuple('PostComment', ['id', 'created_at_utc', 'text', 'owner']) PostLocation = namedtuple('PostLocation', ['id', 'name', 'slug', 'has_public_page', 'lat', 'lng']) @@ -273,25 +274,31 @@ class Post: """Comment count""" return self._field('edge_media_to_comment', 'count') - def get_comments(self) -> Iterator[Dict[str, Any]]: + def get_comments(self) -> Iterator[PostComment]: """Iterate over all comments of the post. - Each comment is represented by a dictionary having the keys text, created_at, id and owner, which is a - dictionary with keys username, profile_pic_url and id. + Each comment is represented by a PostComment namedtuple with fields text (string), created_at (datetime), + id (int) and owner (:class:`Profile`). """ + def _postcomment(node): + return PostComment(id=int(node['id']), + created_at_utc=datetime.utcfromtimestamp(node['created_at']), + text=node['text'], + owner=Profile(self._context, node['owner'])) if self.comments == 0: # Avoid doing additional requests if there are no comments return comment_edges = self._field('edge_media_to_comment', 'edges') if self.comments == len(comment_edges): # If the Post's metadata already contains all comments, don't do GraphQL requests to obtain them - yield from (comment['node'] for comment in comment_edges) + yield from (_postcomment(comment['node']) for comment in comment_edges) return - yield from self._context.graphql_node_list("33ba35852cb50da46f5b5e889df7d159", - {'shortcode': self.shortcode}, - 'https://www.instagram.com/p/' + self.shortcode + '/', - lambda d: d['data']['shortcode_media']['edge_media_to_comment'], - self._rhx_gis) + yield from (_postcomment(node) for node in + self._context.graphql_node_list("33ba35852cb50da46f5b5e889df7d159", + {'shortcode': self.shortcode}, + 'https://www.instagram.com/p/' + self.shortcode + '/', + lambda d: d['data']['shortcode_media']['edge_media_to_comment'], + self._rhx_gis)) def get_likes(self) -> Iterator['Profile']: """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""