1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-21 18:22:30 +01:00

merge #6394: [tumblr] add 'search' extractor

This commit is contained in:
Mike Fährmann 2024-11-08 08:17:46 +01:00
commit 6205e255f4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 77 additions and 18 deletions

View File

@ -934,7 +934,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Tumblr</td>
<td>https://www.tumblr.com/</td>
<td>Days, Likes, Posts, Tag Searches, User Profiles</td>
<td>Days, Likes, Posts, Search Results, Tag Searches, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>

View File

@ -21,8 +21,8 @@ BASE_PATTERN = (
r"([\w-]+\.tumblr\.com)))"
)
POST_TYPES = frozenset((
"text", "quote", "link", "answer", "video", "audio", "photo", "chat"))
POST_TYPES = frozenset(("text", "quote", "link", "answer", "video",
"audio", "photo", "chat", "search"))
class TumblrExtractor(Extractor):
@ -83,14 +83,21 @@ class TumblrExtractor(Extractor):
return
if post["type"] not in self.types:
continue
if not blog:
blog = self.api.info(self.blog)
blog["uuid"] = self.blog
if self.avatar:
url = self.api.avatar(self.blog)
yield Message.Directory, {"blog": blog}
yield self._prepare_avatar(url, post.copy(), blog)
if "blog" in post:
blog = post["blog"]
self.blog = blog["name"] + ".tumblr.com"
else:
if not blog:
blog = self.api.info(self.blog)
blog["uuid"] = self.blog
if self.avatar:
url = self.api.avatar(self.blog)
yield Message.Directory, {"blog": blog}
yield self._prepare_avatar(url, post.copy(), blog)
post["blog"] = blog
reblog = "reblogged_from_id" in post
if reblog and self._skip_reblog(post):
@ -99,7 +106,6 @@ class TumblrExtractor(Extractor):
if "trail" in post:
del post["trail"]
post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
posts = []
@ -349,6 +355,19 @@ class TumblrLikesExtractor(TumblrExtractor):
return self.api.likes(self.blog)
class TumblrSearchExtractor(TumblrExtractor):
"""Extractor for a Tumblr search"""
subcategory = "search"
pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
example = "https://www.tumblr.com/search/QUERY"
def posts(self):
_, _, _, search, mode, post_type, query = self.groups
params = text.parse_query(query)
return self.api.search(text.unquote(search), params, mode, post_type)
class TumblrAPI(oauth.OAuth1API):
"""Interface for the Tumblr API v2
@ -394,7 +413,8 @@ class TumblrAPI(oauth.OAuth1API):
if self.before and params["offset"]:
self.log.warning("'offset' and 'date-max' cannot be used together")
return self._pagination(blog, "/posts", params, cache=True)
endpoint = "/v2/blog/{}/posts".format(blog)
return self._pagination(endpoint, params, blog=blog, cache=True)
def likes(self, blog):
"""Retrieve liked posts"""
@ -410,6 +430,20 @@ class TumblrAPI(oauth.OAuth1API):
yield from posts
params["before"] = posts[-1]["liked_timestamp"]
def search(self, query, params, mode="top", post_type=None):
"""Retrieve search results"""
endpoint = "/v2/timeline/search"
params["limit"] = "50"
params["days"] = params.pop("t", None)
params["query"] = query
params["mode"] = mode
params["reblog_info"] = "true" if self.extractor.reblogs else "false"
if post_type:
params["post_type_filter"] = post_type
return self._pagination(endpoint, params)
def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
kwargs["params"] = params
@ -478,20 +512,28 @@ class TumblrAPI(oauth.OAuth1API):
raise exception.StopExtraction(data)
def _pagination(self, blog, endpoint, params, key="posts", cache=False):
endpoint = "/v2/blog/{}{}".format(blog, endpoint)
def _pagination(self, endpoint, params,
blog=None, key="posts", cache=False):
if self.api_key:
params["api_key"] = self.api_key
strategy = self.extractor.config("pagination")
if not strategy and "offset" not in params:
strategy = "api"
while True:
data = self._call(endpoint, params)
if cache:
self.BLOG_CACHE[blog] = data["blog"]
cache = False
if "timeline" in data:
data = data["timeline"]
posts = data["elements"]
else:
if cache:
self.BLOG_CACHE[blog] = data["blog"]
cache = False
posts = data[key]
posts = data[key]
yield from posts
if strategy == "api":

View File

@ -360,4 +360,21 @@ __tests__ = (
"#class" : tumblr.TumblrLikesExtractor,
},
{
"#url" : "https://www.tumblr.com/search/nathan fielder",
"#category": ("", "tumblr", "search"),
"#class" : tumblr.TumblrSearchExtractor,
},
{
"#url" : "https://www.tumblr.com/search/nathan fielder/recent/quote?src=typed_query",
"#category": ("", "tumblr", "search"),
"#class" : tumblr.TumblrSearchExtractor,
},
{
"#url" : "https://www.tumblr.com/search/nathan%20fielder?t=90",
"#category": ("", "tumblr", "search"),
"#class" : tumblr.TumblrSearchExtractor,
},
)