diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 54da76a1..b8f58be2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -934,7 +934,7 @@ Consider all listed sites to potentially be NSFW.
Tumblr |
https://www.tumblr.com/ |
- Days, Likes, Posts, Tag Searches, User Profiles |
+ Days, Likes, Posts, Search Results, Tag Searches, User Profiles |
OAuth |
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 73455d2f..5dcd3374 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -21,8 +21,8 @@ BASE_PATTERN = (
r"([\w-]+\.tumblr\.com)))"
)
-POST_TYPES = frozenset((
- "text", "quote", "link", "answer", "video", "audio", "photo", "chat"))
+POST_TYPES = frozenset(("text", "quote", "link", "answer", "video",
+ "audio", "photo", "chat", "search"))
class TumblrExtractor(Extractor):
@@ -83,14 +83,21 @@ class TumblrExtractor(Extractor):
return
if post["type"] not in self.types:
continue
- if not blog:
- blog = self.api.info(self.blog)
- blog["uuid"] = self.blog
- if self.avatar:
- url = self.api.avatar(self.blog)
- yield Message.Directory, {"blog": blog}
- yield self._prepare_avatar(url, post.copy(), blog)
+ if "blog" in post:
+ blog = post["blog"]
+ self.blog = blog["name"] + ".tumblr.com"
+ else:
+ if not blog:
+ blog = self.api.info(self.blog)
+ blog["uuid"] = self.blog
+
+ if self.avatar:
+ url = self.api.avatar(self.blog)
+ yield Message.Directory, {"blog": blog}
+ yield self._prepare_avatar(url, post.copy(), blog)
+
+ post["blog"] = blog
reblog = "reblogged_from_id" in post
if reblog and self._skip_reblog(post):
@@ -99,7 +106,6 @@ class TumblrExtractor(Extractor):
if "trail" in post:
del post["trail"]
- post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
posts = []
@@ -349,6 +355,19 @@ class TumblrLikesExtractor(TumblrExtractor):
return self.api.likes(self.blog)
+class TumblrSearchExtractor(TumblrExtractor):
+ """Extractor for a Tumblr search"""
+ subcategory = "search"
+ pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
+ r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
+ example = "https://www.tumblr.com/search/QUERY"
+
+ def posts(self):
+ _, _, _, search, mode, post_type, query = self.groups
+ params = text.parse_query(query)
+ return self.api.search(text.unquote(search), params, mode, post_type)
+
+
class TumblrAPI(oauth.OAuth1API):
"""Interface for the Tumblr API v2
@@ -394,7 +413,8 @@ class TumblrAPI(oauth.OAuth1API):
if self.before and params["offset"]:
self.log.warning("'offset' and 'date-max' cannot be used together")
- return self._pagination(blog, "/posts", params, cache=True)
+ endpoint = "/v2/blog/{}/posts".format(blog)
+ return self._pagination(endpoint, params, blog=blog, cache=True)
def likes(self, blog):
"""Retrieve liked posts"""
@@ -410,6 +430,20 @@ class TumblrAPI(oauth.OAuth1API):
yield from posts
params["before"] = posts[-1]["liked_timestamp"]
+ def search(self, query, params, mode="top", post_type=None):
+ """Retrieve search results"""
+ endpoint = "/v2/timeline/search"
+
+ params["limit"] = "50"
+ params["days"] = params.pop("t", None)
+ params["query"] = query
+ params["mode"] = mode
+ params["reblog_info"] = "true" if self.extractor.reblogs else "false"
+ if post_type:
+ params["post_type_filter"] = post_type
+
+ return self._pagination(endpoint, params)
+
def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
kwargs["params"] = params
@@ -478,20 +512,28 @@ class TumblrAPI(oauth.OAuth1API):
raise exception.StopExtraction(data)
- def _pagination(self, blog, endpoint, params, key="posts", cache=False):
- endpoint = "/v2/blog/{}{}".format(blog, endpoint)
+ def _pagination(self, endpoint, params,
+ blog=None, key="posts", cache=False):
if self.api_key:
params["api_key"] = self.api_key
strategy = self.extractor.config("pagination")
+ if not strategy and "offset" not in params:
+ strategy = "api"
+
while True:
data = self._call(endpoint, params)
- if cache:
- self.BLOG_CACHE[blog] = data["blog"]
- cache = False
+ if "timeline" in data:
+ data = data["timeline"]
+ posts = data["elements"]
+
+ else:
+ if cache:
+ self.BLOG_CACHE[blog] = data["blog"]
+ cache = False
+ posts = data[key]
- posts = data[key]
yield from posts
if strategy == "api":
diff --git a/test/results/tumblr.py b/test/results/tumblr.py
index 4d0d6abc..50b67676 100644
--- a/test/results/tumblr.py
+++ b/test/results/tumblr.py
@@ -360,4 +360,21 @@ __tests__ = (
"#class" : tumblr.TumblrLikesExtractor,
},
+{
+ "#url" : "https://www.tumblr.com/search/nathan fielder",
+ "#category": ("", "tumblr", "search"),
+ "#class" : tumblr.TumblrSearchExtractor,
+},
+
+{
+ "#url" : "https://www.tumblr.com/search/nathan fielder/recent/quote?src=typed_query",
+ "#category": ("", "tumblr", "search"),
+ "#class" : tumblr.TumblrSearchExtractor,
+},
+
+{
+ "#url" : "https://www.tumblr.com/search/nathan%20fielder?t=90",
+ "#category": ("", "tumblr", "search"),
+ "#class" : tumblr.TumblrSearchExtractor,
+},
)