mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 04:02:32 +01:00
[booru] add base classes for tag/pool/post extractors
This commit is contained in:
parent
900577b013
commit
74199422a8
@ -22,7 +22,8 @@ class BooruExtractor(Extractor):
|
||||
def __init__(self):
|
||||
Extractor.__init__(self)
|
||||
self.page = "page"
|
||||
self.headers = self.params = {}
|
||||
self.params = {"limit": 50}
|
||||
self.headers = {}
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
@ -50,12 +51,12 @@ class BooruExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
# Override this method in derived classes
|
||||
return {
|
||||
"category": self.info["category"],
|
||||
"category": self.category,
|
||||
}
|
||||
|
||||
def get_file_metadata(self, data):
|
||||
"""Collect metadata for a downloadable file"""
|
||||
data["category"] = self.info["category"]
|
||||
data["category"] = self.category
|
||||
return text.nameext_from_url(self.get_file_url(data), data)
|
||||
|
||||
def get_file_url(self, data):
|
||||
@ -75,10 +76,10 @@ class JSONBooruExtractor(BooruExtractor):
|
||||
self.request(self.api_url, verify=True, params=self.params,
|
||||
headers=self.headers).text
|
||||
)
|
||||
if len(images) == 0:
|
||||
return
|
||||
for data in images:
|
||||
yield data
|
||||
if len(images) < self.params["limit"]:
|
||||
return
|
||||
self.update_page()
|
||||
|
||||
|
||||
@ -90,8 +91,56 @@ class XMLBooruExtractor(BooruExtractor):
|
||||
root = ET.fromstring(
|
||||
self.request(self.api_url, verify=True, params=self.params).text
|
||||
)
|
||||
if len(root) == 0:
|
||||
return
|
||||
for item in root:
|
||||
yield item.attrib
|
||||
if len(root) < self.params["limit"]:
|
||||
return
|
||||
self.update_page()
|
||||
|
||||
|
||||
class BooruTagExtractor(BooruExtractor):
|
||||
"""Extract images based on search-tags"""
|
||||
|
||||
directory_fmt = ["{category}", "{tags}"]
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.tags = text.unquote(match.group(1))
|
||||
self.params["tags"] = self.tags
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
|
||||
|
||||
class BooruPoolExtractor(BooruExtractor):
|
||||
"""Extract image-pools"""
|
||||
|
||||
directory_fmt = ["{category}", "pool", "{pool}"]
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.pool = match.group(1)
|
||||
self.params["tags"] = "pool:" + self.pool
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"pool": self.pool,
|
||||
}
|
||||
|
||||
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
"""Extract single images"""
|
||||
|
||||
directory_fmt = ["{category}"]
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.post = match.group(1)
|
||||
self.params["tags"] = "id:" + self.post
|
||||
|
Loading…
Reference in New Issue
Block a user