1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

[gelbooru_v02] implement 'notes' extraction

same code as for 'moebooru' works here as well
This commit is contained in:
Mike Fährmann 2022-11-04 12:11:43 +01:00
parent 942bc84962
commit 88954aa2e4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -110,6 +110,23 @@ class GelbooruV02Extractor(booru.BooruExtractor):
for key, value in tags.items():
post["tags_" + key] = " ".join(value)
def _notes(self, post, page):
note_container = text.extract(page, 'id="note-container"', "<img ")[0]
if not note_container:
return
post["notes"] = notes = []
for note in note_container.split('class="note-box"')[1:]:
extr = text.extract_from(note)
notes.append({
"width" : int(extr("width:", "p")),
"height": int(extr("height:", "p")),
"y" : int(extr("top:", "p")),
"x" : int(extr("left:", "p")),
"id" : int(extr('id="note-body-', '"')),
"body" : text.unescape(text.remove_html(extr(">", "</div>"))),
})
INSTANCES = {
"realbooru": {
@ -285,15 +302,81 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
"options": (("tags", True),),
("https://rule34.xxx/index.php?page=post&s=view&id=863", {
"pattern": r"https://api-cdn\.rule34\.xxx/images"
r"/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg",
"content": ("a43f418aa350039af0d11cae501396a33bbe2201",
"67b516295950867e1c1ab6bc13b35d3b762ed2a3"),
"options": (("tags", True), ("notes", True)),
"keyword": {
"tags_artist": "danraku",
"tags_character": "kashima_(kantai_collection)",
"tags_copyright": "kantai_collection",
"tags_artist": "reverse_noise yamu_(reverse_noise)",
"tags_character": "hong_meiling",
"tags_copyright": "touhou",
"tags_general": str,
"tags_metadata": str,
"tags_metadata": "censored translated",
"notes": [
{
"body": "It feels angry, I'm losing myself... "
"It won't calm down!",
"height": 65,
"id": 93586,
"width": 116,
"x": 22,
"y": 333,
},
{
"body": "REPUTATION OF RAGE",
"height": 272,
"id": 93587,
"width": 199,
"x": 78,
"y": 442,
},
],
},
}),
("https://hypnohub.net/index.php?page=post&s=view&id=1439", {
"pattern": r"https://hypnohub\.net/images"
r"/90/24/90245c3c5250c2a8173255d3923a010b\.jpg",
"content": "5987c5d2354f22e5fa9b7ee7ce4a6f7beb8b2b71",
"options": (("tags", True), ("notes", True)),
"keyword": {
"tags_artist": "brokenteapot",
"tags_character": "hsien-ko",
"tags_copyright": "capcom darkstalkers",
"tags_general": str,
"tags_metadata": "dialogue text translated",
"notes": [
{
"body": "Master Master Master "
"Master Master Master",
"height": 83,
"id": 10577,
"width": 129,
"x": 259,
"y": 20,
},
{
"body": "Response Response Response "
"Response Response Response",
"height": 86,
"id": 10578,
"width": 125,
"x": 126,
"y": 20,
},
{
"body": "Obedience Obedience Obedience "
"Obedience Obedience Obedience",
"height": 80,
"id": 10579,
"width": 98,
"x": 20,
"y": 20,
},
],
},
}),
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
@ -316,11 +399,6 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
"url": "5a6ebe07bfff8e6d27f7c30b5480f27abcb577d2",
"content": "1c3831b6fbaa4686e3c79035b5d98460b1c85c43",
}),
("https://hypnohub.net/index.php?page=post&s=view&id=73964", {
"pattern": r"https://hypnohub\.net/images/7a/37"
r"/7a37c0ba372f35767fb10c904a398831\.png",
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
}),
)
def __init__(self, match):