1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 03:02:50 +01:00

[hentainexus] add search extractor (#256)

This commit is contained in:
Mike Fährmann 2019-05-15 17:25:46 +02:00
parent c02f12ce2f
commit 4891f4a328
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 39 additions and 2 deletions

View File

@ -36,7 +36,7 @@ Hentai Foundry https://www.hentai-foundry.com/ |hentaifoundry-C|
Hentai2Read https://hentai2read.com/ Chapters, Manga
HentaiFox https://hentaifox.com/ Galleries, Search Results
HentaiHere https://hentaihere.com/ Chapters, Manga
Hentainexus https://hentainexus.com/ Galleries
Hentainexus https://hentainexus.com/ Galleries, Search Results
Hitomi.la https://hitomi.la/ Galleries
Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches
Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional

View File

@ -8,7 +8,7 @@
"""Extractors for https://hentainexus.com/"""
from .common import GalleryExtractor
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
import json
@ -58,3 +58,40 @@ class HentainexusGalleryExtractor(GalleryExtractor):
base = extr('"', '"')
return [(base + img, None) for img in json.loads(imgs)]
class HentainexusSearchExtractor(Extractor):
"""Extractor for search results on hentainexus.com"""
category = "hentainexus"
subcategory = "search"
root = "https://hentainexus.com"
pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com"
r"(?:/page/\d+)?/?(?:\?(q=[^/?#]+))?$")
test = (
("https://hentainexus.com/?q=tag:%22heart+pupils%22%20tag:group", {
"pattern": HentainexusGalleryExtractor.pattern,
"count": ">= 50",
}),
("https://hentainexus.com/page/3?q=tag:%22heart+pupils%22"),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.params = text.parse_query(match.group(1))
def items(self):
yield Message.Version, 1
params = self.params
path = "/"
while path:
page = self.request(self.root + path, params=params).text
extr = text.extract_from(page)
while True:
gallery_id = extr('<a href="/view/', '"')
if not gallery_id:
break
yield Message.Queue, self.root + "/view/" + gallery_id, {}
path = extr('class="pagination-next" href="', '"')