mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[exhentai] use text.extract_all
This commit is contained in:
parent
1fa6a99f18
commit
353ac1e00b
@ -10,7 +10,6 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import config, text
|
||||
import re
|
||||
import os.path
|
||||
import time
|
||||
import random
|
||||
@ -27,7 +26,7 @@ info = {
|
||||
|
||||
class ExhentaiExtractor(Extractor):
|
||||
|
||||
api_url = "http://exhentai.org/api.php"
|
||||
api_url = "http://exhentai.org/api.php"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
@ -64,39 +63,43 @@ class ExhentaiExtractor(Extractor):
|
||||
image["name"] = name
|
||||
image["extension"] = ext[1:]
|
||||
if "/fullimg.php" in image[urlkey]:
|
||||
time.sleep( random.uniform(1, 2) )
|
||||
time.sleep(random.uniform(1, 2))
|
||||
yield Message.Url, image[urlkey], image
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
title , pos = text.extract(page, '<h1 id="gn">', '</h1>')
|
||||
title_jp, pos = text.extract(page, '<h1 id="gj">', '</h1>', pos)
|
||||
date , pos = text.extract(page, '>Posted:</td><td class="gdt2">', '</td>', pos)
|
||||
language, pos = text.extract(page, '>Language:</td><td class="gdt2">', '</td>', pos)
|
||||
size , pos = text.extract(page, '>File Size:</td><td class="gdt2">', ' ', pos)
|
||||
url , pos = text.extract(page, 'hentai.org/s/', '"', pos)
|
||||
return {
|
||||
"category": info["category"],
|
||||
"gallery-id": self.gid,
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category" : info["category"],
|
||||
"gallery-id" : self.gid,
|
||||
"gallery-token": self.token,
|
||||
"title": title,
|
||||
"title-jp": title_jp,
|
||||
"date": date,
|
||||
"language": language,
|
||||
"size": size,
|
||||
}, "http://exhentai.org/s/" + url
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
("title" , '<h1 id="gn">', '</h1>'),
|
||||
("title_jp", '<h1 id="gj">', '</h1>'),
|
||||
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
|
||||
("language", '>Language:</td><td class="gdt2">', '</td>'),
|
||||
("size" , '>File Size:</td><td class="gdt2">', ' '),
|
||||
("count" , '>Length:</td><td class="gdt2">', ' '),
|
||||
("url" , 'hentai.org/s/', '"'),
|
||||
), values=data)
|
||||
url = "http://exhentai.org/s/" + data["url"]
|
||||
del data["url"]
|
||||
return data, url
|
||||
|
||||
def get_images(self, url):
|
||||
time.sleep( random.uniform(3, 6) )
|
||||
"""Collect url and metadata for all images in this gallery"""
|
||||
time.sleep(random.uniform(3, 6))
|
||||
page = self.request(url).text
|
||||
data = {}
|
||||
_ , pos = text.extract(page, '<div id="i3"><a onclick="return load_image(', '')
|
||||
data["imgkey"] , pos = text.extract(page, "'", "'", pos)
|
||||
data["url"] , pos = text.extract(page, '<img id="img" src="', '"', pos)
|
||||
data["title"] , pos = text.extract(page, '<div id="i4"><div>', ' :: ', pos)
|
||||
data["origurl"] , pos = text.extract(page, 'http://exhentai.org/fullimg.php', '"', pos)
|
||||
data["gid"] , pos = text.extract(page, 'var gid=' , ';', pos)
|
||||
data["startkey"], pos = text.extract(page, 'var startkey="', '";', pos)
|
||||
data["showkey"] , pos = text.extract(page, 'var showkey="' , '";', pos)
|
||||
data, pos = text.extract_all(page, (
|
||||
(None , '<div id="i3"><a onclick="return load_image(', ''),
|
||||
("imgkey" , "'", "'"),
|
||||
("url" , '<img id="img" src="', '"'),
|
||||
("title" , '<div id="i4"><div>', ' :: '),
|
||||
("origurl" , 'http://exhentai.org/fullimg.php', '"'),
|
||||
("gid" , 'var gid=', ';'),
|
||||
("startkey", 'var startkey="', '";'),
|
||||
("showkey" , 'var showkey="', '";'),
|
||||
))
|
||||
if data["origurl"]:
|
||||
data["origurl"] = "http://exhentai.org/fullimg.php" + text.unescape(data["origurl"])
|
||||
else:
|
||||
@ -111,10 +114,7 @@ class ExhentaiExtractor(Extractor):
|
||||
"showkey": data["showkey"],
|
||||
}
|
||||
while True:
|
||||
time.sleep( random.uniform(3, 6) )
|
||||
# page = safe_request(
|
||||
# self.session, self.api_url, method="POST", json=request
|
||||
# ).json
|
||||
time.sleep(random.uniform(3, 6))
|
||||
page = self.session.post(self.api_url, json=request).json()
|
||||
data["imgkey"] , pos = text.extract(page["i3"], "'", "'")
|
||||
data["url"] , pos = text.extract(page["i3"], '<img id="img" src="', '"', pos)
|
||||
|
Loading…
Reference in New Issue
Block a user