diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 84497eb3..69ac8652 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import config, text
-import re
import os.path
import time
import random
@@ -27,7 +26,7 @@ info = {
class ExhentaiExtractor(Extractor):
- api_url = "http://exhentai.org/api.php"
+ api_url = "http://exhentai.org/api.php"
def __init__(self, match):
Extractor.__init__(self)
@@ -64,39 +63,43 @@ class ExhentaiExtractor(Extractor):
image["name"] = name
image["extension"] = ext[1:]
if "/fullimg.php" in image[urlkey]:
- time.sleep( random.uniform(1, 2) )
+ time.sleep(random.uniform(1, 2))
yield Message.Url, image[urlkey], image
def get_job_metadata(self, page):
- title , pos = text.extract(page, '
', '
')
- title_jp, pos = text.extract(page, '', '
', pos)
- date , pos = text.extract(page, '>Posted:', ' | ', pos)
- language, pos = text.extract(page, '>Language:', ' | ', pos)
- size , pos = text.extract(page, '>File Size:', ' ', pos)
- url , pos = text.extract(page, 'hentai.org/s/', '"', pos)
- return {
- "category": info["category"],
- "gallery-id": self.gid,
+ """Collect metadata for extractor-job"""
+ data = {
+ "category" : info["category"],
+ "gallery-id" : self.gid,
"gallery-token": self.token,
- "title": title,
- "title-jp": title_jp,
- "date": date,
- "language": language,
- "size": size,
- }, "http://exhentai.org/s/" + url
+ }
+ data, _ = text.extract_all(page, (
+ ("title" , '', ''),
+ ("title_jp", '', ''),
+ ("date" , '>Posted: | ', ' | '),
+ ("language", '>Language:', ' | '),
+ ("size" , '>File Size:', ' '),
+ ("count" , '>Length: | ', ' '),
+ ("url" , 'hentai.org/s/', '"'),
+ ), values=data)
+ url = "http://exhentai.org/s/" + data["url"]
+ del data["url"]
+ return data, url
def get_images(self, url):
- time.sleep( random.uniform(3, 6) )
+ """Collect url and metadata for all images in this gallery"""
+ time.sleep(random.uniform(3, 6))
page = self.request(url).text
- data = {}
- _ , pos = text.extract(page, '', ' :: ', pos)
- data["origurl"] , pos = text.extract(page, 'http://exhentai.org/fullimg.php', '"', pos)
- data["gid"] , pos = text.extract(page, 'var gid=' , ';', pos)
- data["startkey"], pos = text.extract(page, 'var startkey="', '";', pos)
- data["showkey"] , pos = text.extract(page, 'var showkey="' , '";', pos)
+ data, pos = text.extract_all(page, (
+ (None , ' ', ' :: '),
+ ("origurl" , 'http://exhentai.org/fullimg.php', '"'),
+ ("gid" , 'var gid=', ';'),
+ ("startkey", 'var startkey="', '";'),
+ ("showkey" , 'var showkey="', '";'),
+ ))
if data["origurl"]:
data["origurl"] = "http://exhentai.org/fullimg.php" + text.unescape(data["origurl"])
else:
@@ -111,10 +114,7 @@ class ExhentaiExtractor(Extractor):
"showkey": data["showkey"],
}
while True:
- time.sleep( random.uniform(3, 6) )
- # page = safe_request(
- # self.session, self.api_url, method="POST", json=request
- # ).json
+ time.sleep(random.uniform(3, 6))
page = self.session.post(self.api_url, json=request).json()
data["imgkey"] , pos = text.extract(page["i3"], "'", "'")
data["url"] , pos = text.extract(page["i3"], ' |