mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[gelbooru] tag-splitting for non-api mode
This commit is contained in:
parent
2eefaa99a3
commit
1d43cbbf52
@ -621,6 +621,8 @@ extractor.3dbooru.tags
|
||||
----------------------
|
||||
extractor.e621.tags
|
||||
-------------------
|
||||
extractor.gelbooru.tags
|
||||
-----------------------
|
||||
extractor.konachan.tags
|
||||
-----------------------
|
||||
extractor.rule34.tags
|
||||
|
@ -31,10 +31,7 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
def __init__(self, match):
|
||||
super().__init__()
|
||||
self.params = {}
|
||||
self.prepare = None
|
||||
|
||||
if self.post_url and self.config("tags", False):
|
||||
self.prepare = self._extended_tags
|
||||
self.extags = self.post_url and self.config("tags", False)
|
||||
|
||||
def skip(self, num):
|
||||
pages = num // self.per_page
|
||||
@ -62,8 +59,8 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
if url.startswith("/"):
|
||||
url = text.urljoin(self.api_url, url)
|
||||
image.update(data)
|
||||
if self.prepare:
|
||||
self.prepare(image)
|
||||
if self.extags:
|
||||
self.extended_tags(image)
|
||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||
|
||||
if len(images) < self.per_page:
|
||||
@ -89,17 +86,16 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {}
|
||||
|
||||
def _extended_tags(self, image):
|
||||
def extended_tags(self, image, page=None):
|
||||
"""Rerieve extended tag information"""
|
||||
url = self.post_url.format(image["id"])
|
||||
page = self.request(url).text
|
||||
tag_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||
|
||||
if not page:
|
||||
url = self.post_url.format(image["id"])
|
||||
page = self.request(url).text
|
||||
tags = collections.defaultdict(list)
|
||||
tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
|
||||
for tag_type, tag_name in pattern.findall(tag_html):
|
||||
for tag_type, tag_name in pattern.findall(tags_html):
|
||||
tags[tag_type].append(text.unquote(tag_name))
|
||||
|
||||
for key, value in tags.items():
|
||||
image["tags_" + key] = " ".join(value)
|
||||
|
||||
@ -185,7 +181,7 @@ class GelbooruPoolMixin(PoolMixin):
|
||||
|
||||
return {
|
||||
"pool": text.parse_int(self.pool),
|
||||
"pool_name": text.unescape(name or ""),
|
||||
"pool_name": text.unescape(name),
|
||||
"count": len(self.posts),
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,8 @@ class GelbooruExtractor(booru.XmlParserMixin,
|
||||
data["rating"] = (data["rating"] or "?")[0].lower()
|
||||
data["tags"] = " ".join(
|
||||
[tag.replace(" ", "_") for tag in data["tags"].split(", ")])
|
||||
if self.extags:
|
||||
self.extended_tags(data, page)
|
||||
return data
|
||||
|
||||
|
||||
|
@ -68,7 +68,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
|
||||
r"/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+)"]
|
||||
test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
|
||||
"url": "a45c77f8fbde66091fe2346d6341f9cf3c6b1bc5",
|
||||
"url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
|
||||
"keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
|
||||
})]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user