1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2025-01-31 19:51:34 +01:00

[wallhaven] extract 'search[tags]' and 'search[tag_id]' metadata

(#6772)
This commit is contained in:
Mike Fährmann 2025-01-06 17:18:04 +01:00
parent 270aaea8ab
commit 46b6b71159
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 38 additions and 4 deletions

View File

@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor):
class WallhavenSearchExtractor(WallhavenExtractor):
"""Extractor for search results on wallhaven.cc"""
subcategory = "search"
directory_fmt = ("{category}", "{search[q]}")
directory_fmt = ("{category}", "{search[tags]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
example = "https://wallhaven.cc/search?q=QUERY"
@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
self.params = text.parse_query(match.group(1))
def wallpapers(self):
return self.api.search(self.params.copy())
return self.api.search(self.params)
def metadata(self):
return {"search": self.params}
@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor):
def wallpapers(self):
params = {"q": "@" + self.username}
return self.api.search(params.copy())
return self.api.search(params)
def metadata(self):
return {"username": self.username}
@ -215,20 +215,35 @@ class WallhavenAPI():
def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
params_ptr = None
params = {}
else:
params_ptr = params
params = params.copy()
if metadata is None:
metadata = self.extractor.config("metadata")
while True:
data = self._call(endpoint, params)
meta = data.get("meta")
if params_ptr is not None:
if meta and "query" in meta:
query = meta["query"]
if isinstance(query, dict):
params_ptr["tags"] = query.get("tag")
params_ptr["tag_id"] = query.get("id")
else:
params_ptr["tags"] = query
params_ptr["tag_id"] = 0
params_ptr = None
if metadata:
for wp in data["data"]:
yield self.info(str(wp["id"]))
else:
yield from data["data"]
meta = data.get("meta")
if not meta or meta["current_page"] >= meta["last_page"]:
return
params["page"] = meta["current_page"] + 1

View File

@ -12,6 +12,14 @@ __tests__ = (
"#url" : "https://wallhaven.cc/search?q=touhou",
"#category": ("", "wallhaven", "search"),
"#class" : wallhaven.WallhavenSearchExtractor,
"#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+",
"#range" : "1-10",
"search": {
"q" : "touhou",
"tags" : "touhou",
"tag_id": 0,
},
},
{
@ -20,6 +28,17 @@ __tests__ = (
"#class" : wallhaven.WallhavenSearchExtractor,
"#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+",
"#count" : "<= 30",
"search": {
"categories": "111",
"order" : "asc",
"page" : "3",
"purity" : "100",
"sorting" : "date_added",
"q" : "id:87",
"tags" : "Fujibayashi Kyou",
"tag_id" : 87,
},
},
{