diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index c7dc8f87..417c095b 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -12,16 +12,20 @@ from .common import GalleryExtractor, Extractor, Message from .. import text -class HentaifoxGalleryExtractor(GalleryExtractor): - """Extractor for image galleries on hentaifox.com""" +class HentaifoxBase(): + """Base class for hentaifox extractors""" category = "hentaifox" + root = "https://hentaifox.com" + + +class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): + """Extractor for image galleries on hentaifox.com""" pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))" test = ("https://hentaifox.com/gallery/56622/", { "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", "count": 24, - "keyword": "d0df47e073e32a7752236ab151949c3820f9d81e", + "keyword": "38f8517605feb6854d48833297da6b05c6541b69", }) - root = "https://hentaifox.com" def __init__(self, match): GalleryExtractor.__init__(self, match) @@ -30,7 +34,7 @@ class HentaifoxGalleryExtractor(GalleryExtractor): def metadata(self, page): title, pos = text.extract(page, "

", "

") data = text.extract_all(page, ( - ("parodies" , ">Parodies:" , ""), + ("parody" , ">Parodies:" , ""), ("characters", ">Characters:", ""), ("tags" , ">Tags:" , ""), ("artist" , ">Artists:" , ""), @@ -39,9 +43,10 @@ class HentaifoxGalleryExtractor(GalleryExtractor): ), pos)[0] for key, value in data.items(): - data[key] = text.remove_html(value).replace(" , ", ", ") + data[key] = text.split_html(value)[::2] data["gallery_id"] = text.parse_int(self.gallery_id) data["title"] = text.unescape(title) + data["type"] = data["type"][0] if data["type"] else "" data["language"] = "English" data["lang"] = "en" return data @@ -53,9 +58,8 @@ class HentaifoxGalleryExtractor(GalleryExtractor): ] -class HentaifoxSearchExtractor(Extractor): +class HentaifoxSearchExtractor(HentaifoxBase, Extractor): """Extractor for search results and listings on hentaifox.com""" - category = "hentaifox" subcategory = "search" pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com" r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)") @@ -76,7 +80,6 @@ class HentaifoxSearchExtractor(Extractor): }, }), ) - root = "https://hentaifox.com" def __init__(self, match): Extractor.__init__(self, match) diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 4de7c938..5a518beb 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -20,7 +20,7 @@ class HitomiGalleryExtractor(GalleryExtractor): test = ( ("https://hitomi.la/galleries/867789.html", { "url": "cb759868d090fe0e2655c3e29ebf146054322b6d", - "keyword": "52951edb50163180eb669a78aef0bab0522d32b7", + "keyword": "07536afc5696cb4983a4831ab4c70c1d155f875c", }), ("https://hitomi.la/galleries/1036181.html", { # "aa" subdomain for gallery-id ending in 1 (#142) @@ -30,8 +30,8 @@ class HitomiGalleryExtractor(GalleryExtractor): ) def __init__(self, match): - self.gid = text.parse_int(match.group(1)) - url = "https://hitomi.la/galleries/{}.html".format(self.gid) + self.gallery_id = text.parse_int(match.group(1)) + url = "https://hitomi.la/galleries/{}.html".format(self.gallery_id) GalleryExtractor.__init__(self, match, url) def metadata(self, page): @@ -49,23 +49,22 @@ class HitomiGalleryExtractor(GalleryExtractor): lang = None if lang == "N/A" else text.remove_html(lang) return { - "gallery_id": self.gid, - "title": text.unescape(" ".join(title.split())), - "artist": self._prepare(artist), - "group": self._prepare(group), - "type": text.remove_html(gtype).capitalize(), - "lang": util.language_to_code(lang), - "language": lang, - "date": date, - "series": self._prepare(series), + "gallery_id": self.gallery_id, + "title" : text.unescape(title.strip()), + "artist" : self._prepare(artist), + "group" : self._prepare(group), + "parody" : self._prepare(series), "characters": self._prepare(chars), - "tags": self._prepare(tags), + "tags" : self._prepare(tags), + "type" : text.remove_html(gtype).capitalize(), + "lang" : util.language_to_code(lang), + "language" : lang, + "date" : date, } def images(self, page): # see https://ltn.hitomi.la/common.js - frontends = 2 - offset = self.gid % frontends if self.gid % 10 != 1 else 0 + offset = self.gallery_id % 2 if self.gallery_id % 10 != 1 else 0 subdomain = chr(97 + offset) + "a" base = "https://" + subdomain + ".hitomi.la/galleries/" @@ -78,10 +77,7 @@ class HitomiGalleryExtractor(GalleryExtractor): @staticmethod def _prepare(value): - if not value or "