Download comments

Close #5
2024-10-05 14:57:08 +02:00 · 2017-07-20 22:30:12 +02:00 · 2017-07-20 22:30:12 +02:00 · 169ce1a300
commit 169ce1a300
parent ee8e159d56
3 changed files with 94 additions and 16 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -9,4 +9,4 @@ python:
 install:
 - pip install pylint requests
 script:
- - python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods instaloader
+ - python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods,too-many-lines instaloader
--- a/README.rst
+++ b/README.rst
@ -118,6 +118,9 @@ renames the folder likewise.
                           link. This requires an additional request to the
                           Instagram server for each picture, which is why it is
                           disabled by default.
+--comments                 Download and update comments for each post. This
+                           requires an additional request to the Instagram server
+                           for each post, which is why it is disabled by default.

 When to Stop Downloading
 ^^^^^^^^^^^^^^^^^^^^^^^^
--- a/instaloader.py
+++ b/instaloader.py
@ -249,7 +249,7 @@ class Instaloader:
                if page_info['has_next_page']:
                    resp = tmpsession.get('https://www.instagram.com/graphql/query/',
                                          params={'query_id': 17851374694183129,
-                                                  'variables': '{"id":"' + str(profile_id) + '","first":500},"after":"' +
+                                                  'variables': '{"id":"' + str(profile_id) + '","first":500,"after":"' +
                                                               page_info['end_cursor'] + '"}'})
                    data = resp.json()
                else:
@ -290,7 +290,7 @@ class Instaloader:
                if page_info['has_next_page']:
                    resp = tmpsession.get('https://www.instagram.com/graphql/query/',
                                          params={'query_id': 17874545323001329,
-                                                  'variables': '{"id":"' + str(profile_id) + '","first":500},"after":"' +
+                                                  'variables': '{"id":"' + str(profile_id) + '","first":500,"after":"' +
                                                               page_info['end_cursor'] + '"}'})
                    data = resp.json()
                else:
@ -299,6 +299,38 @@ class Instaloader:
            raise ConnectionException("ConnectionError({0}): unable to gather followees.".format(resp.status_code))
        return followees

+    def get_comments(self, shortcode: str) -> List[Dict[str, Any]]:
+        tmpsession = copy_session(self.session)
+        header = self.default_http_header(empty_session_only=True)
+        del header['Connection']
+        del header['Content-Length']
+        header['authority'] = 'www.instagram.com'
+        header['scheme'] = 'https'
+        header['accept'] = '*/*'
+        header['referer'] = 'https://www.instagram.com/p/' + shortcode + '/'
+        tmpsession.headers = header
+        resp = tmpsession.get('https://www.instagram.com/graphql/query/',
+                              params={'query_id': 17852405266163336,
+                                      'variables': '{"shortcode":"' + shortcode + '","first":500}'})
+        if resp.status_code == 200:
+            data = resp.json()
+            comments = []
+            while True:
+                edge_media_to_comment = data['data']['shortcode_media']['edge_media_to_comment']
+                comments.extend([comment['node'] for comment in edge_media_to_comment['edges']])
+                page_info = edge_media_to_comment['page_info']
+                if page_info['has_next_page']:
+                    resp = tmpsession.get('https://www.instagram.com/graphql/query/',
+                                          params={'query_id': 17852405266163336,
+                                                  'variables': '{"shortcode":"' + shortcode + '","first":500,"after":"'
+                                                               + page_info['end_cursor'] + '"}'})
+                    data = resp.json()
+                else:
+                    break
+        else:
+            raise ConnectionException("ConnectionError({0}): unable to gather comments.".format(resp.status_code))
+        return comments
+
    def download_pic(self, name: str, url: str, date_epoch: float, outputlabel: Optional[str] = None,
                     filename_suffix: Optional[str] = None) -> bool:
        """Downloads and saves picture with given url under given directory with given timestamp.
@ -330,6 +362,32 @@ class Instaloader:
        else:
            raise ConnectionException("File \'" + url + "\' could not be downloaded.")

+    def update_comments(self, name: str, shortcode: str, date_epoch: float) -> None:
+        if self.profile_subdirs:
+            filename = name.lower() + '/' + _epoch_to_string(date_epoch) + '_comments.json'
+        else:
+            filename = name.lower() + '__' + _epoch_to_string(date_epoch) + '_comments.json'
+        try:
+            comments = json.load(open(filename))
+        except FileNotFoundError:
+            comments = list()
+        comments.extend(self.get_comments(shortcode))
+        if comments:
+            with open(filename, 'w') as file:
+                comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
+                                       key=lambda t: t['created_at'], reverse=True)
+                unique_comments_list = [comments_list[0]]
+                #for comment in comments_list:
+                #    if unique_comments_list[-1]['id'] != comment['id']:
+                #        unique_comments_list.append(comment)
+                #file.write(json.dumps(unique_comments_list, indent=4))
+                #pylint:disable=invalid-name
+                for x, y in zip(comments_list[:-1], comments_list[1:]):
+                    if x['id'] != y['id']:
+                        unique_comments_list.append(y)
+                file.write(json.dumps(unique_comments_list, indent=4))
+            self._log('comments', end=' ', flush=True)
+
    def save_caption(self, name: str, date_epoch: float, caption: str) -> None:
        """Updates picture caption"""
        # pylint:disable=too-many-branches
@ -543,7 +601,7 @@ class Instaloader:
            return location_json["entry_data"]["LocationsPage"][0]["location"]

    def download_node(self, node: Dict[str, Any], name: str,
-                      download_videos: bool = True, geotags: bool = False) -> bool:
+                      download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool:
        """
        Download everything associated with one instagram node, i.e. picture, caption and video.

@ -551,6 +609,7 @@ class Instaloader:
        :param name: Name of profile to which this node belongs
        :param download_videos: True, if videos should be downloaded
        :param geotags: Download geotags
+        :param download_comments: Update comments
        :return: True if something was downloaded, False otherwise, i.e. file was already there
        """
        # pylint:disable=too-many-branches,too-many-locals
@ -601,12 +660,15 @@ class Instaloader:
            location = self.get_location(node_code)
            if location:
                self.save_location(name, location, date)
+        if download_comments:
+            self.update_comments(name, node_code, date)
        self._log()
        return downloaded

    def download_feed_pics(self, max_count: int = None, fast_update: bool = False,
                           filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
-                           download_videos: bool = True, geotags: bool = False) -> None:
+                           download_videos: bool = True, geotags: bool = False,
+                           download_comments: bool = False) -> None:
        """
        Download pictures from the user's feed.

@ -624,6 +686,7 @@ class Instaloader:
        :param filter_func: function(node), which returns True if given picture should not be downloaded
        :param download_videos: True, if videos should be downloaded
        :param geotags: Download geotags
+        :param download_comments: Update comments
        """
        # pylint:disable=too-many-locals
        data = self.get_feed_json()
@ -646,7 +709,8 @@ class Instaloader:
                self._log("[%3i] %s " % (count, name), end="", flush=True)
                count += 1
                downloaded = self.download_node(node, name,
-                                                download_videos=download_videos, geotags=geotags)
+                                                download_videos=download_videos, geotags=geotags,
+                                                download_comments=download_comments)
                if fast_update and not downloaded:
                    return
            if not feed["page_info"]["has_next_page"]:
@ -662,6 +726,7 @@ class Instaloader:
                         max_count: Optional[int] = None,
                         filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
                         fast_update: bool = False, download_videos: bool = True, geotags: bool = False,
+                         download_comments: bool = False,
                         lookup_username: bool = False) -> None:
        """Download pictures of one hashtag.

@ -675,6 +740,7 @@ class Instaloader:
        :param fast_update: If true, abort when first already-downloaded picture is encountered
        :param download_videos: True, if videos should be downloaded
        :param geotags: Download geotags
+        :param download_comments: Update comments
        :param lookup_username: Lookup username to encode it in the downloaded file's path, rather than the hashtag
        """
        data = self.get_hashtag_json(hashtag)
@ -694,7 +760,8 @@ class Instaloader:
                    continue
                count += 1
                downloaded = self.download_node(node, pathname,
-                                                download_videos=download_videos, geotags=geotags)
+                                                download_videos=download_videos, geotags=geotags,
+                                                download_comments=download_comments)
                if fast_update and not downloaded:
                    return
            if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
@ -742,7 +809,7 @@ class Instaloader:

    def download(self, name: str,
                 profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
-                 fast_update: bool = False) -> None:
+                 download_comments: bool = False, fast_update: bool = False) -> None:
        """Download one profile"""
        # pylint:disable=too-many-branches,too-many-locals
        # Get profile main page json
@ -785,7 +852,8 @@ class Instaloader:
                self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
                count += 1
                downloaded = self.download_node(node, name,
-                                                download_videos=download_videos, geotags=geotags)
+                                                download_videos=download_videos, geotags=geotags,
+                                                download_comments=download_comments)
                if fast_update and not downloaded:
                    return
            data = self.get_json(name, max_id=get_last_id(data))
@ -808,6 +876,7 @@ class Instaloader:
    def download_profiles(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
                          sessionfile: Optional[str] = None, max_count: Optional[int] = None,
                          profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
+                          download_comments: bool = False,
                          fast_update: bool = False, hashtag_lookup_username: bool = False) -> None:
        """Download set of profiles and handle sessions"""
        # pylint:disable=too-many-branches,too-many-locals,too-many-statements
@ -835,7 +904,7 @@ class Instaloader:
                    self._log("Retrieving pictures with hashtag {0}".format(pentry))
                    self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
                                          download_videos=download_videos, geotags=geotags,
-                                          lookup_username=hashtag_lookup_username)
+                                          download_comments=download_comments, lookup_username=hashtag_lookup_username)
                elif pentry[0] == '@':
                    if username is not None:
                        self._log("Retrieving followees of %s..." % pentry[1:])
@ -847,7 +916,8 @@ class Instaloader:
                    if username is not None:
                        self._log("Retrieving pictures from your feed...")
                        self.download_feed_pics(fast_update=fast_update, max_count=max_count,
-                                                download_videos=download_videos, geotags=geotags)
+                                                download_videos=download_videos, geotags=geotags,
+                                                download_comments=download_comments)
                    else:
                        print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
                elif pentry == ":feed-liked":
@ -858,7 +928,8 @@ class Instaloader:
                                                not node["likes"]["viewer_has_liked"]
                                                if "likes" in node
                                                else not node["viewer_has_liked"],
-                                                download_videos=download_videos, geotags=geotags)
+                                                download_videos=download_videos, geotags=geotags,
+                                                download_comments=download_comments)
                    else:
                        print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
                else:
@ -870,7 +941,7 @@ class Instaloader:
                try:
                    try:
                        self.download(target, profile_pic_only, download_videos,
-                                      geotags, fast_update)
+                                      geotags, download_comments, fast_update)
                    except ProfileNotExistsException as err:
                        if username is not None:
                            self._log(err)
@ -878,7 +949,7 @@ class Instaloader:
                            anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output,
                                                           self.profile_subdirs, self.user_agent)
                            anonymous_loader.download(target, profile_pic_only, download_videos,
-                                                      geotags, fast_update)
+                                                      geotags, download_comments, fast_update)
                        else:
                            raise err
                except NonfatalException as err:
@ -920,6 +991,10 @@ def main():
                             'text file with the location\'s name and a Google Maps link. '
                             'This requires an additional request to the Instagram '
                             'server for each picture, which is why it is disabled by default.')
+    g_what.add_argument('-C', '--comments', action='store_true',
+                        help='Download and update comments for each post. '
+                             'This requires an additional request to the Instagram '
+                             'server for each post, which is why it is disabled by default.')

    g_stop = parser.add_argument_group('When to Stop Downloading',
                                       'If none of these options are given, Instaloader goes through all pictures '
@ -978,8 +1053,8 @@ def main():
                             profile_subdirs=not args.no_profile_subdir, user_agent=args.user_agent)
        loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
                                 int(args.count) if args.count is not None else None,
-                                 args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update,
-                                 args.hashtag_username)
+                                 args.profile_pic_only, not args.skip_videos, args.geotags, args.download_comments,
+                                 args.fast_update, args.hashtag_username)
    except InstaloaderException as err:
        raise SystemExit("Fatal error: %s" % err)