From 46a3f666dffb43ef68f46f9f9ac39dc9d5d7ec73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 22 Oct 2016 18:48:09 +0200 Subject: [PATCH] [whentai] add single image extractor --- gallery_dl/extractor/whentai.py | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/gallery_dl/extractor/whentai.py b/gallery_dl/extractor/whentai.py index 7a19755a..92962c1d 100644 --- a/gallery_dl/extractor/whentai.py +++ b/gallery_dl/extractor/whentai.py @@ -64,3 +64,41 @@ class WhentaiUserExtractor(Extractor): "title": title, } data["from"] = imageid + + +class WhentaiImageExtractor(Extractor): + """Extractor for single images from whentai.com""" + category = "whentai" + subcategory = "image" + directory_fmt = ["{category}", "{user}"] + filename_fmt = "{category}_{image-id:>05}_{title}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?whentai\.com/view/(\d+)"] + + def __init__(self, match): + Extractor.__init__(self) + self.imageid = match.group(1) + self.url = "http://whentai.com/view/" + self.imageid + self.session.headers["Referer"] = self.url + + def items(self): + data = self.get_image_metadata() + url = self.get_image_url(data["user"]) + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + def get_image_url(self, user): + data = {"type": "image", "cnt": "1", "paid": "0", "post": "1", + "from": str(int(self.imageid) + 1), "author": user.replace("_", " ")} + page = self.request("http://whentai.com/ajax/getuploadslist", + method="POST", data=data).text + return text.extract(page, 'src="', '"')[0].replace("/t2", "/") + + def get_image_metadata(self): + """Collect url and metadata for image""" + page = self.request(self.url).text + return text.extract_all(page, ( + ("title" , '
  • \n', '
  • '), + ("user-id", '/users/', '/'), + ("user" , '', '"'), + ), values={"image-id": self.imageid})[0]