diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 84497eb3..69ac8652 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import config, text -import re import os.path import time import random @@ -27,7 +26,7 @@ info = { class ExhentaiExtractor(Extractor): - api_url = "http://exhentai.org/api.php" + api_url = "http://exhentai.org/api.php" def __init__(self, match): Extractor.__init__(self) @@ -64,39 +63,43 @@ class ExhentaiExtractor(Extractor): image["name"] = name image["extension"] = ext[1:] if "/fullimg.php" in image[urlkey]: - time.sleep( random.uniform(1, 2) ) + time.sleep(random.uniform(1, 2)) yield Message.Url, image[urlkey], image def get_job_metadata(self, page): - title , pos = text.extract(page, '

', '

') - title_jp, pos = text.extract(page, '

', '

', pos) - date , pos = text.extract(page, '>Posted:', '', pos) - language, pos = text.extract(page, '>Language:', '', pos) - size , pos = text.extract(page, '>File Size:', ' ', pos) - url , pos = text.extract(page, 'hentai.org/s/', '"', pos) - return { - "category": info["category"], - "gallery-id": self.gid, + """Collect metadata for extractor-job""" + data = { + "category" : info["category"], + "gallery-id" : self.gid, "gallery-token": self.token, - "title": title, - "title-jp": title_jp, - "date": date, - "language": language, - "size": size, - }, "http://exhentai.org/s/" + url + } + data, _ = text.extract_all(page, ( + ("title" , '

', '

'), + ("title_jp", '

', '

'), + ("date" , '>Posted:', ''), + ("language", '>Language:', ''), + ("size" , '>File Size:', ' '), + ("count" , '>Length:', ' '), + ("url" , 'hentai.org/s/', '"'), + ), values=data) + url = "http://exhentai.org/s/" + data["url"] + del data["url"] + return data, url def get_images(self, url): - time.sleep( random.uniform(3, 6) ) + """Collect url and metadata for all images in this gallery""" + time.sleep(random.uniform(3, 6)) page = self.request(url).text - data = {} - _ , pos = text.extract(page, '
', ' :: ', pos) - data["origurl"] , pos = text.extract(page, 'http://exhentai.org/fullimg.php', '"', pos) - data["gid"] , pos = text.extract(page, 'var gid=' , ';', pos) - data["startkey"], pos = text.extract(page, 'var startkey="', '";', pos) - data["showkey"] , pos = text.extract(page, 'var showkey="' , '";', pos) + data, pos = text.extract_all(page, ( + (None , '