mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-26 04:32:51 +01:00
[nhentai] add extractor for search results
This commit is contained in:
parent
52d41c41e7
commit
f39153b6e9
@ -51,7 +51,7 @@ MangaDex https://mangadex.org/ Chapters, Manga
|
||||
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
||||
MangaPark https://mangapark.me/ Chapters, Manga
|
||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||
nhentai https://nhentai.net/ Galleries
|
||||
nhentai https://nhentai.net/ Galleries, Search Results
|
||||
Niconico Seiga http://seiga.nicovideo.jp Images from Users, individual Images Required
|
||||
nijie https://nijie.info/ Images from Users, individual Images Required
|
||||
Nyafuu Archive https://archive.nyafuu.org/ Threads
|
||||
|
@ -9,47 +9,18 @@
|
||||
"""Extract images from https://nhentai.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class NhentaiGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from nhentai.net"""
|
||||
class NHentaiExtractor(Extractor):
|
||||
"""Base class for nhentai extractors"""
|
||||
category = "nhentai"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
archive_fmt = "{gallery_id}_{num}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||
test = [("https://nhentai.net/g/147850/", {
|
||||
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
|
||||
"keyword": "2f94976e657f3043a89997e22f4de8e1b22d9175",
|
||||
})]
|
||||
root = "https://nhentai.net"
|
||||
media_url = "https://i.nhentai.net"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
|
||||
def items(self):
|
||||
ginfo = self.get_gallery_info()
|
||||
data = self.get_job_metadata(ginfo)
|
||||
urlfmt = "https://i.nhentai.net/galleries/{}/{{}}.{{}}".format(
|
||||
data["media_id"])
|
||||
extdict = {"j": "jpg", "p": "png", "g": "gif"}
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for data["num"], image in enumerate(ginfo["images"]["pages"], 1):
|
||||
ext = extdict.get(image["t"], "jpg")
|
||||
data["width"] = image["w"]
|
||||
data["height"] = image["h"]
|
||||
data["extension"] = ext
|
||||
yield Message.Url, urlfmt.format(data["num"], ext), data
|
||||
|
||||
def get_gallery_info(self):
|
||||
"""Extract and return gallery-info"""
|
||||
url = "https://nhentai.net/api/gallery/" + self.gid
|
||||
return self.request(url).json()
|
||||
|
||||
def get_job_metadata(self, ginfo):
|
||||
"""Collect metadata for extractor-job"""
|
||||
@staticmethod
|
||||
def transform_to_metadata(ginfo):
|
||||
"""Transform an nhentai API response into compatible metadata"""
|
||||
title_en = ginfo["title"].get("english", "")
|
||||
title_ja = ginfo["title"].get("japanese", "")
|
||||
return {
|
||||
@ -62,3 +33,79 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
"title_en": title_en,
|
||||
"title_ja": title_ja,
|
||||
}
|
||||
|
||||
|
||||
class NhentaiGalleryExtractor(NHentaiExtractor):
|
||||
"""Extractor for image galleries from nhentai.net"""
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
archive_fmt = "{gallery_id}_{num}"
|
||||
pattern = [r"(?:https?://)?nhentai\.net/g/(\d+)"]
|
||||
test = [("https://nhentai.net/g/147850/", {
|
||||
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
|
||||
"keyword": "2f94976e657f3043a89997e22f4de8e1b22d9175",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
NHentaiExtractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
|
||||
def items(self):
|
||||
ginfo = self.get_gallery_info(self.gid)
|
||||
data = self.transform_to_metadata(ginfo)
|
||||
urlfmt = "{}/galleries/{}/{{}}.{{}}".format(
|
||||
self.media_url, data["media_id"])
|
||||
extdict = {"j": "jpg", "p": "png", "g": "gif"}
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for data["num"], image in enumerate(ginfo["images"]["pages"], 1):
|
||||
ext = extdict.get(image["t"], "jpg")
|
||||
data["width"] = image["w"]
|
||||
data["height"] = image["h"]
|
||||
data["extension"] = ext
|
||||
yield Message.Url, urlfmt.format(data["num"], ext), data
|
||||
|
||||
def get_gallery_info(self, gallery_id):
|
||||
"""Extract and return info about a gallery by ID"""
|
||||
url = "{}/api/gallery/{}".format(self.root, gallery_id)
|
||||
return self.request(url).json()
|
||||
|
||||
|
||||
class NhentaiSearchExtractor(NHentaiExtractor):
|
||||
"""Extractor for nhentai search results"""
|
||||
category = "nhentai"
|
||||
subcategory = "search"
|
||||
pattern = [r"(?:https?://)?nhentai\.net/search/?\?(.*)"]
|
||||
|
||||
def __init__(self, match):
|
||||
NHentaiExtractor.__init__(self)
|
||||
self.params = text.parse_query(match.group(1))
|
||||
|
||||
if "q" in self.params:
|
||||
self.params["query"] = self.params["q"]
|
||||
del self.params["q"]
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for ginfo in self._pagination("galleries/search", self.params):
|
||||
url = "{}/g/{}/".format(self.root, ginfo["id"])
|
||||
yield Message.Queue, url, self.transform_to_metadata(ginfo)
|
||||
|
||||
def _pagination(self, endpoint, params):
|
||||
"""Pagination over API responses"""
|
||||
url = "{}/api/{}".format(self.root, endpoint)
|
||||
params["page"] = util.safe_int(params.get("page"), 1)
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params, fatal=False).json()
|
||||
|
||||
if "error" in data:
|
||||
self.log.error("API request failed: \"%s\"", data["error"])
|
||||
return
|
||||
|
||||
yield from data["result"]
|
||||
|
||||
if params["page"] >= data["num_pages"]:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
Loading…
Reference in New Issue
Block a user