[tumblr] implement 'pagination' option (#5880)

restore pagination behavior from before de670bd7de
2024-11-25 04:02:32 +01:00 · 2024-07-23 20:31:04 +02:00 · 2024-07-23 20:31:04 +02:00 · 540eaa5add
commit 540eaa5add
parent 7b445ec255
2 changed files with 52 additions and 10 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -3735,6 +3735,23 @@ Description
    use an extra HTTP request to find the URL to its full-resolution version.
 extractor.tumblr.pagination
 ---------------------------
 Type
    ``string``
 Default
    ``"offset"``
 Description
    Controls how to paginate over blog posts.
    * ``"api"``: ``next`` parameter provided by the API
      (potentially misses posts due to a
      `bug <https://github.com/tumblr/docs/issues/76>`__
      in Tumblr's API)
    * ``"before"``: timestamp of last post
    * ``"offset"``: post offset number
 extractor.tumblr.ratelimit
 --------------------------
 Type
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@ -386,7 +386,7 @@ class TumblrAPI(oauth.OAuth1API):
    def posts(self, blog, params):
        """Retrieve published posts"""
        params["offset"] = self.extractor.config("offset")
-        params["limit"] = "50"
+        params["limit"] = 50
        params["reblog_info"] = "true"
        params["type"] = self.posts_type
        params["before"] = self.before
@ -398,8 +398,14 @@ class TumblrAPI(oauth.OAuth1API):
    def likes(self, blog):
        """Retrieve liked posts"""
        endpoint = "/v2/blog/{}/likes".format(blog)
        params = {"limit": "50", "before": self.before}
-        return self._pagination(blog, "/likes", params, key="liked_posts")
+        while True:
            posts = self._call(endpoint, params)["liked_posts"]
            if not posts:
                return
            yield from posts
            params["before"] = posts[-1]["liked_timestamp"]
    def _call(self, endpoint, params, **kwargs):
        url = self.ROOT + endpoint
@ -474,6 +480,7 @@ class TumblrAPI(oauth.OAuth1API):
        if self.api_key:
            params["api_key"] = self.api_key
        strategy = self.extractor.config("pagination")
        while True:
            data = self._call(endpoint, params)
@ -481,13 +488,31 @@ class TumblrAPI(oauth.OAuth1API):
                self.BLOG_CACHE[blog] = data["blog"]
                cache = False
-            yield from data[key]
+            posts = data[key]
            yield from posts
-            try:
+            if strategy == "api":
-                endpoint = data["_links"]["next"]["href"]
+                try:
-            except KeyError:
+                    endpoint = data["_links"]["next"]["href"]
-                return
+                except KeyError:
                    return
-            params = None
+                params = None
-            if self.api_key:
+                if self.api_key:
-                endpoint += "&api_key=" + self.api_key
+                    endpoint += "&api_key=" + self.api_key
            elif strategy == "before":
                if not posts:
                    return
                timestamp = posts[-1]["timestamp"] + 1
                if params["before"] and timestamp >= params["before"]:
                    return
                params["before"] = timestamp
                params["offset"] = None
            else:  # offset
                params["offset"] = \
                    text.parse_int(params["offset"]) + params["limit"]
                params["before"] = None
                if params["offset"] >= data["total_posts"]:
                    return