[reddit] add ability to load more comments (#15)

The 'extractor.reddit.morecomments' option enables the use of the '/api/morechildren' API endpoint (1) to load even more comments than the usual submission-request provides. Possible values are the booleans 'true' and 'false' (default). Note: this feature comes at the cost of 1 extra API call towards the rate limit for every 100 extra comments. (1) https://www.reddit.com/dev/api/#GET_api_morechildren
2024-11-23 03:02:50 +01:00 · 2017-06-13 18:49:07 +02:00 · 2017-06-13 18:49:07 +02:00 · 56bec79e6a
commit 56bec79e6a
parent 05ed95e5b0
1 changed files with 28 additions and 5 deletions
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@ -108,6 +108,7 @@ class RedditAPI():
    def __init__(self, extractor):
        self.extractor = extractor
        self.comments = extractor.config("comments", 500)
+        self.morecomments = extractor.config("morecomments", False)
        self.refresh_token = extractor.config("refresh-token")
        self.log = extractor.log
        self.session = extractor.session
@ -116,9 +117,10 @@ class RedditAPI():
    def submission(self, submission_id):
        """Fetch the (submission, comments)=-tuple for a submission id"""
        endpoint = "/comments/" + submission_id + "/.json"
+        link_id = "t3_" + submission_id if self.morecomments else None
        submission, comments = self._call(endpoint, {"limit": self.comments})
        return (submission["data"]["children"][0]["data"],
-                self._unfold(comments))
+                self._flatten(comments, link_id))

    def submissions_subreddit(self, subreddit, params):
        """Collect all (submission, comments)-tuples of a subreddit"""
@ -126,6 +128,24 @@ class RedditAPI():
        params["limit"] = 100
        return self._pagination(endpoint, params)

+    def morechildren(self, link_id, children):
+        """Load additional comments from a submission"""
+        endpoint = "/api/morechildren"
+        params = {"link_id": link_id, "api_type": "json"}
+        index, done = 0, False
+        while not done:
+            if len(children) - index < 100:
+                done = True
+            params["children"] = ",".join(children[index:index + 100])
+            index += 100
+
+            data = self._call(endpoint, params)["json"]
+            for thing in data["data"]["things"]:
+                if thing["kind"] == "more":
+                    children.extend(thing["data"]["children"])
+                else:
+                    yield thing["data"]
+
    def authenticate(self):
        """Authenticate the application by requesting an access token"""
        access_token = self._authenticate_impl(self.refresh_token)
@ -190,15 +210,18 @@ class RedditAPI():
                return
            params["after"] = data["after"]

-    @staticmethod
-    def _unfold(comments):
-        # TODO: order?
+    def _flatten(self, comments, link_id=None):
+        extra = []
        queue = comments["data"]["children"]
        while queue:
-            comment = queue.pop()
+            comment = queue.pop(0)
            if comment["kind"] == "more":
+                if link_id:
+                    extra.extend(comment["data"]["children"])
                continue
            comment = comment["data"]
            yield comment
            if comment["replies"]:
                queue += comment["replies"]["data"]["children"]
+        if link_id and extra:
+            yield from self.morechildren(link_id, extra)