mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-21 18:22:30 +01:00
merge #6394: [tumblr] add 'search' extractor
This commit is contained in:
commit
6205e255f4
@ -934,7 +934,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>Tumblr</td>
|
||||
<td>https://www.tumblr.com/</td>
|
||||
<td>Days, Likes, Posts, Tag Searches, User Profiles</td>
|
||||
<td>Days, Likes, Posts, Search Results, Tag Searches, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
@ -21,8 +21,8 @@ BASE_PATTERN = (
|
||||
r"([\w-]+\.tumblr\.com)))"
|
||||
)
|
||||
|
||||
POST_TYPES = frozenset((
|
||||
"text", "quote", "link", "answer", "video", "audio", "photo", "chat"))
|
||||
POST_TYPES = frozenset(("text", "quote", "link", "answer", "video",
|
||||
"audio", "photo", "chat", "search"))
|
||||
|
||||
|
||||
class TumblrExtractor(Extractor):
|
||||
@ -83,14 +83,21 @@ class TumblrExtractor(Extractor):
|
||||
return
|
||||
if post["type"] not in self.types:
|
||||
continue
|
||||
if not blog:
|
||||
blog = self.api.info(self.blog)
|
||||
blog["uuid"] = self.blog
|
||||
|
||||
if self.avatar:
|
||||
url = self.api.avatar(self.blog)
|
||||
yield Message.Directory, {"blog": blog}
|
||||
yield self._prepare_avatar(url, post.copy(), blog)
|
||||
if "blog" in post:
|
||||
blog = post["blog"]
|
||||
self.blog = blog["name"] + ".tumblr.com"
|
||||
else:
|
||||
if not blog:
|
||||
blog = self.api.info(self.blog)
|
||||
blog["uuid"] = self.blog
|
||||
|
||||
if self.avatar:
|
||||
url = self.api.avatar(self.blog)
|
||||
yield Message.Directory, {"blog": blog}
|
||||
yield self._prepare_avatar(url, post.copy(), blog)
|
||||
|
||||
post["blog"] = blog
|
||||
|
||||
reblog = "reblogged_from_id" in post
|
||||
if reblog and self._skip_reblog(post):
|
||||
@ -99,7 +106,6 @@ class TumblrExtractor(Extractor):
|
||||
|
||||
if "trail" in post:
|
||||
del post["trail"]
|
||||
post["blog"] = blog
|
||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||
posts = []
|
||||
|
||||
@ -349,6 +355,19 @@ class TumblrLikesExtractor(TumblrExtractor):
|
||||
return self.api.likes(self.blog)
|
||||
|
||||
|
||||
class TumblrSearchExtractor(TumblrExtractor):
|
||||
"""Extractor for a Tumblr search"""
|
||||
subcategory = "search"
|
||||
pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
|
||||
r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
|
||||
example = "https://www.tumblr.com/search/QUERY"
|
||||
|
||||
def posts(self):
|
||||
_, _, _, search, mode, post_type, query = self.groups
|
||||
params = text.parse_query(query)
|
||||
return self.api.search(text.unquote(search), params, mode, post_type)
|
||||
|
||||
|
||||
class TumblrAPI(oauth.OAuth1API):
|
||||
"""Interface for the Tumblr API v2
|
||||
|
||||
@ -394,7 +413,8 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
if self.before and params["offset"]:
|
||||
self.log.warning("'offset' and 'date-max' cannot be used together")
|
||||
|
||||
return self._pagination(blog, "/posts", params, cache=True)
|
||||
endpoint = "/v2/blog/{}/posts".format(blog)
|
||||
return self._pagination(endpoint, params, blog=blog, cache=True)
|
||||
|
||||
def likes(self, blog):
|
||||
"""Retrieve liked posts"""
|
||||
@ -410,6 +430,20 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
yield from posts
|
||||
params["before"] = posts[-1]["liked_timestamp"]
|
||||
|
||||
def search(self, query, params, mode="top", post_type=None):
|
||||
"""Retrieve search results"""
|
||||
endpoint = "/v2/timeline/search"
|
||||
|
||||
params["limit"] = "50"
|
||||
params["days"] = params.pop("t", None)
|
||||
params["query"] = query
|
||||
params["mode"] = mode
|
||||
params["reblog_info"] = "true" if self.extractor.reblogs else "false"
|
||||
if post_type:
|
||||
params["post_type_filter"] = post_type
|
||||
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def _call(self, endpoint, params, **kwargs):
|
||||
url = self.ROOT + endpoint
|
||||
kwargs["params"] = params
|
||||
@ -478,20 +512,28 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
|
||||
raise exception.StopExtraction(data)
|
||||
|
||||
def _pagination(self, blog, endpoint, params, key="posts", cache=False):
|
||||
endpoint = "/v2/blog/{}{}".format(blog, endpoint)
|
||||
def _pagination(self, endpoint, params,
|
||||
blog=None, key="posts", cache=False):
|
||||
if self.api_key:
|
||||
params["api_key"] = self.api_key
|
||||
|
||||
strategy = self.extractor.config("pagination")
|
||||
if not strategy and "offset" not in params:
|
||||
strategy = "api"
|
||||
|
||||
while True:
|
||||
data = self._call(endpoint, params)
|
||||
|
||||
if cache:
|
||||
self.BLOG_CACHE[blog] = data["blog"]
|
||||
cache = False
|
||||
if "timeline" in data:
|
||||
data = data["timeline"]
|
||||
posts = data["elements"]
|
||||
|
||||
else:
|
||||
if cache:
|
||||
self.BLOG_CACHE[blog] = data["blog"]
|
||||
cache = False
|
||||
posts = data[key]
|
||||
|
||||
posts = data[key]
|
||||
yield from posts
|
||||
|
||||
if strategy == "api":
|
||||
|
@ -360,4 +360,21 @@ __tests__ = (
|
||||
"#class" : tumblr.TumblrLikesExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.tumblr.com/search/nathan fielder",
|
||||
"#category": ("", "tumblr", "search"),
|
||||
"#class" : tumblr.TumblrSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.tumblr.com/search/nathan fielder/recent/quote?src=typed_query",
|
||||
"#category": ("", "tumblr", "search"),
|
||||
"#class" : tumblr.TumblrSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.tumblr.com/search/nathan%20fielder?t=90",
|
||||
"#category": ("", "tumblr", "search"),
|
||||
"#class" : tumblr.TumblrSearchExtractor,
|
||||
},
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user