diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py index 7bceb9a6..653b73f1 100644 --- a/gallery_dl/extractor/agnph.py +++ b/gallery_dl/extractor/agnph.py @@ -12,6 +12,8 @@ from . import booru from .. import text from xml.etree import ElementTree +import collections +import re BASE_PATTERN = r"(?:https?://)?agn\.ph" @@ -22,6 +24,17 @@ class AgnphExtractor(booru.BooruExtractor): page_start = 1 per_page = 45 + TAG_TYPES = { + "a": "artist", + "b": "copyright", + "c": "character", + "d": "species", + "m": "general", + } + + def _init(self): + self.cookies.set("confirmed_age", "true", domain="agn.ph") + def _prepare(self, post): post["date"] = text.parse_timestamp(post["created_at"]) post["status"] = post["status"].strip() @@ -50,6 +63,23 @@ class AgnphExtractor(booru.BooruExtractor): params["page"] += 1 + def _html(self, post): + url = "{}/gallery/post/show/{}/".format(self.root, post["id"]) + return self.request(url).text + + def _tags(self, post, page): + tag_container = text.extr( + page, '