mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
adjust metadata types for GalleryExtractors
This commit is contained in:
parent
13e0f2a78f
commit
26c4365baa
@ -12,16 +12,20 @@ from .common import GalleryExtractor, Extractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class HentaifoxGalleryExtractor(GalleryExtractor):
|
||||
"""Extractor for image galleries on hentaifox.com"""
|
||||
class HentaifoxBase():
|
||||
"""Base class for hentaifox extractors"""
|
||||
category = "hentaifox"
|
||||
root = "https://hentaifox.com"
|
||||
|
||||
|
||||
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
|
||||
"""Extractor for image galleries on hentaifox.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
|
||||
test = ("https://hentaifox.com/gallery/56622/", {
|
||||
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
|
||||
"count": 24,
|
||||
"keyword": "d0df47e073e32a7752236ab151949c3820f9d81e",
|
||||
"keyword": "38f8517605feb6854d48833297da6b05c6541b69",
|
||||
})
|
||||
root = "https://hentaifox.com"
|
||||
|
||||
def __init__(self, match):
|
||||
GalleryExtractor.__init__(self, match)
|
||||
@ -30,7 +34,7 @@ class HentaifoxGalleryExtractor(GalleryExtractor):
|
||||
def metadata(self, page):
|
||||
title, pos = text.extract(page, "<h1>", "</h1>")
|
||||
data = text.extract_all(page, (
|
||||
("parodies" , ">Parodies:" , "</a></span>"),
|
||||
("parody" , ">Parodies:" , "</a></span>"),
|
||||
("characters", ">Characters:", "</a></span>"),
|
||||
("tags" , ">Tags:" , "</a></span>"),
|
||||
("artist" , ">Artists:" , "</a></span>"),
|
||||
@ -39,9 +43,10 @@ class HentaifoxGalleryExtractor(GalleryExtractor):
|
||||
), pos)[0]
|
||||
|
||||
for key, value in data.items():
|
||||
data[key] = text.remove_html(value).replace(" , ", ", ")
|
||||
data[key] = text.split_html(value)[::2]
|
||||
data["gallery_id"] = text.parse_int(self.gallery_id)
|
||||
data["title"] = text.unescape(title)
|
||||
data["type"] = data["type"][0] if data["type"] else ""
|
||||
data["language"] = "English"
|
||||
data["lang"] = "en"
|
||||
return data
|
||||
@ -53,9 +58,8 @@ class HentaifoxGalleryExtractor(GalleryExtractor):
|
||||
]
|
||||
|
||||
|
||||
class HentaifoxSearchExtractor(Extractor):
|
||||
class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
|
||||
"""Extractor for search results and listings on hentaifox.com"""
|
||||
category = "hentaifox"
|
||||
subcategory = "search"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
|
||||
r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)")
|
||||
@ -76,7 +80,6 @@ class HentaifoxSearchExtractor(Extractor):
|
||||
},
|
||||
}),
|
||||
)
|
||||
root = "https://hentaifox.com"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -20,7 +20,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
test = (
|
||||
("https://hitomi.la/galleries/867789.html", {
|
||||
"url": "cb759868d090fe0e2655c3e29ebf146054322b6d",
|
||||
"keyword": "52951edb50163180eb669a78aef0bab0522d32b7",
|
||||
"keyword": "07536afc5696cb4983a4831ab4c70c1d155f875c",
|
||||
}),
|
||||
("https://hitomi.la/galleries/1036181.html", {
|
||||
# "aa" subdomain for gallery-id ending in 1 (#142)
|
||||
@ -30,8 +30,8 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
self.gid = text.parse_int(match.group(1))
|
||||
url = "https://hitomi.la/galleries/{}.html".format(self.gid)
|
||||
self.gallery_id = text.parse_int(match.group(1))
|
||||
url = "https://hitomi.la/galleries/{}.html".format(self.gallery_id)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
@ -49,23 +49,22 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
lang = None if lang == "N/A" else text.remove_html(lang)
|
||||
|
||||
return {
|
||||
"gallery_id": self.gid,
|
||||
"title": text.unescape(" ".join(title.split())),
|
||||
"artist": self._prepare(artist),
|
||||
"group": self._prepare(group),
|
||||
"type": text.remove_html(gtype).capitalize(),
|
||||
"lang": util.language_to_code(lang),
|
||||
"language": lang,
|
||||
"date": date,
|
||||
"series": self._prepare(series),
|
||||
"gallery_id": self.gallery_id,
|
||||
"title" : text.unescape(title.strip()),
|
||||
"artist" : self._prepare(artist),
|
||||
"group" : self._prepare(group),
|
||||
"parody" : self._prepare(series),
|
||||
"characters": self._prepare(chars),
|
||||
"tags": self._prepare(tags),
|
||||
"tags" : self._prepare(tags),
|
||||
"type" : text.remove_html(gtype).capitalize(),
|
||||
"lang" : util.language_to_code(lang),
|
||||
"language" : lang,
|
||||
"date" : date,
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
# see https://ltn.hitomi.la/common.js
|
||||
frontends = 2
|
||||
offset = self.gid % frontends if self.gid % 10 != 1 else 0
|
||||
offset = self.gallery_id % 2 if self.gallery_id % 10 != 1 else 0
|
||||
subdomain = chr(97 + offset) + "a"
|
||||
base = "https://" + subdomain + ".hitomi.la/galleries/"
|
||||
|
||||
@ -78,10 +77,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _prepare(value):
|
||||
if not value or "<ul " not in value:
|
||||
return ""
|
||||
value = ", ".join(text.extract_iter(
|
||||
value, '.html">', '<'))
|
||||
return string.capwords(
|
||||
text.unescape(value)
|
||||
)
|
||||
return [
|
||||
text.unescape(string.capwords(v))
|
||||
for v in text.extract_iter(value or "", '.html">', '<')
|
||||
]
|
||||
|
@ -32,6 +32,7 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
|
||||
"title_ja" : str,
|
||||
"gallery_id": 147850,
|
||||
"media_id" : 867789,
|
||||
"count" : 16,
|
||||
"date" : 1446050915,
|
||||
"scanlator" : "",
|
||||
"artist" : ["morris"],
|
||||
@ -40,8 +41,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
|
||||
"characters": list,
|
||||
"tags" : list,
|
||||
"type" : "manga",
|
||||
"language" : ["translated", "english"],
|
||||
"lang" : "en",
|
||||
"language" : "English",
|
||||
"width" : int,
|
||||
"height" : int,
|
||||
},
|
||||
@ -63,12 +64,11 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
|
||||
for tag in data["tags"]:
|
||||
info[tag["type"]].append(tag["name"])
|
||||
|
||||
language = ""
|
||||
for language in info["language"]:
|
||||
if language != "translated":
|
||||
lang = util.language_to_code(language)
|
||||
language = language.capitalize()
|
||||
break
|
||||
else:
|
||||
lang = ""
|
||||
|
||||
return {
|
||||
"title" : title_en or title_ja,
|
||||
@ -84,8 +84,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
|
||||
"characters": info["character"],
|
||||
"tags" : info["tag"],
|
||||
"type" : info["category"][0] if info["category"] else "",
|
||||
"language" : info["language"],
|
||||
"lang" : lang,
|
||||
"lang" : util.language_to_code(language),
|
||||
"language" : language,
|
||||
}
|
||||
|
||||
def images(self, _):
|
||||
|
@ -20,24 +20,24 @@ class PururinGalleryExtractor(GalleryExtractor):
|
||||
test = ("https://pururin.io/gallery/38661/iowant-2", {
|
||||
"pattern": r"https://cdn.pururin.io/assets/images/data/38661/\d+\.jpg",
|
||||
"keyword": {
|
||||
"artist": "Shoda Norihiro",
|
||||
"title" : "Iowant 2!!",
|
||||
"title_en" : "Iowant 2!!",
|
||||
"title_jp" : "",
|
||||
"gallery_id": 38661,
|
||||
"count" : 19,
|
||||
"artist" : ["Shoda Norihiro"],
|
||||
"group" : ["Obsidian Order"],
|
||||
"parody" : ["Kantai Collection"],
|
||||
"characters": ["Iowa", "Teitoku"],
|
||||
"tags" : list,
|
||||
"type" : "Doujinshi",
|
||||
"collection": "",
|
||||
"convention": "C92",
|
||||
"count": 19,
|
||||
"extension": "jpg",
|
||||
"gallery_id": 38661,
|
||||
"group": "Obsidian Order",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"parody": "Kantai Collection",
|
||||
"rating": float,
|
||||
"scanlator": "",
|
||||
"tags": list,
|
||||
"title": "Iowant 2!!",
|
||||
"title_jp": str,
|
||||
"type": "Doujinshi",
|
||||
"uploader": "demo"
|
||||
"rating" : float,
|
||||
"uploader" : "demo",
|
||||
"scanlator" : "",
|
||||
"lang" : "en",
|
||||
"language" : "English",
|
||||
}
|
||||
})
|
||||
root = "https://pururin.io"
|
||||
@ -74,18 +74,19 @@ class PururinGalleryExtractor(GalleryExtractor):
|
||||
self._ext = info["image_extension"]
|
||||
self._cnt = info["total_pages"]
|
||||
|
||||
for key in ("tags", "characters"):
|
||||
for key in ("artist", "group", "parody", "tags", "characters"):
|
||||
data[key] = [
|
||||
text.unescape(item)
|
||||
for item in text.extract_iter(data[key], 'title="', '"')
|
||||
]
|
||||
for key in ("artist", "group", "parody", "type", "collection",
|
||||
"language", "scanlator", "convention"):
|
||||
for key in ("type", "collection", "language", "scanlator",
|
||||
"convention"):
|
||||
data[key] = text.unescape(text.extract(
|
||||
data[key], 'title="', '"')[0] or "")
|
||||
|
||||
data["gallery_id"] = text.parse_int(self.gallery_id)
|
||||
data["title"] = info["title"]
|
||||
data["title"] = info["title"] or info.get("j_title") or ""
|
||||
data["title_en"] = info["title"]
|
||||
data["title_jp"] = info.get("j_title") or ""
|
||||
data["uploader"] = text.remove_html(data["uploader"])
|
||||
data["rating"] = text.parse_float(data["rating"])
|
||||
|
@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
(("https://original-work.simply-hentai.com"
|
||||
"/amazon-no-hiyaku-amazon-elixir"), {
|
||||
"url": "258289249990502c3138719cb89e995a60861e49",
|
||||
"keyword": "468a0a3db4fc6ad7fcae0facefb9753831c0404d",
|
||||
"keyword": "18ab9defca53dbb2aeb7965193e93e0ea125b76b",
|
||||
}),
|
||||
("https://www.simply-hentai.com/notfound", {
|
||||
"exception": exception.GalleryDLException,
|
||||
@ -55,14 +55,14 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
|
||||
return {
|
||||
"gallery_id": text.parse_int(gid),
|
||||
"title": text.unescape(title),
|
||||
"series": text.remove_html(series),
|
||||
"characters": ", ".join(text.split_html(chars)),
|
||||
"tags": text.split_html(tags),
|
||||
"artist": ", ".join(text.split_html(artist)),
|
||||
"lang": util.language_to_code(lang),
|
||||
"language": lang,
|
||||
"date": text.remove_html(date),
|
||||
"title" : text.unescape(title),
|
||||
"artist" : text.split_html(artist),
|
||||
"parody" : text.split_html(series),
|
||||
"characters": text.split_html(chars),
|
||||
"tags" : text.split_html(tags),
|
||||
"lang" : util.language_to_code(lang),
|
||||
"language" : lang,
|
||||
"date" : text.remove_html(date),
|
||||
}
|
||||
|
||||
def images(self, _):
|
||||
|
@ -48,24 +48,24 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
|
||||
("https://www.tsumino.com/Book/Info/40996", {
|
||||
"url": "84bf30a86623039fc87855680fada884dc8a1ddd",
|
||||
"keyword": {
|
||||
"artist": "Itou Life",
|
||||
"characters": "Carmilla, Gudako, Gudao, Lancelot, Nightingale",
|
||||
"collection": "",
|
||||
"count": 42,
|
||||
"date": "2018 June 29",
|
||||
"title" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou",
|
||||
"title_en" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou",
|
||||
"title_jp" : "シコシコ大好きナイチンゲール + 会場限定おまけ本",
|
||||
"gallery_id": 40996,
|
||||
"group": "Itou Life",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"page": int,
|
||||
"parodies": "Fate/Grand Order",
|
||||
"rating": float,
|
||||
"tags": str,
|
||||
"thumbnail": "http://www.tsumino.com/Image/Thumb/40996",
|
||||
"title": r"re:Shikoshiko Daisuki Nightingale \+ Kaijou Gentei",
|
||||
"title_jp": "シコシコ大好きナイチンゲール + 会場限定おまけ本",
|
||||
"type": "Doujinshi",
|
||||
"uploader": "sehki"
|
||||
"date" : "2018 June 29",
|
||||
"count" : 42,
|
||||
"collection": "",
|
||||
"artist" : ["Itou Life"],
|
||||
"group" : ["Itou Life"],
|
||||
"parody" : ["Fate/Grand Order"],
|
||||
"characters": list,
|
||||
"tags" : list,
|
||||
"type" : "Doujinshi",
|
||||
"rating" : float,
|
||||
"uploader" : "sehki",
|
||||
"lang" : "en",
|
||||
"language" : "English",
|
||||
"thumbnail" : "http://www.tsumino.com/Image/Thumb/40996",
|
||||
},
|
||||
}),
|
||||
("https://www.tsumino.com/Read/View/45834"),
|
||||
@ -81,6 +81,8 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
|
||||
title, pos = extr(page, '"og:title" content="', '"')
|
||||
thumb, pos = extr(page, '"og:image" content="', '"', pos)
|
||||
title_en, _, title_jp = text.unescape(title).partition("/")
|
||||
title_en = title_en.strip()
|
||||
title_jp = title_jp.strip()
|
||||
|
||||
uploader , pos = extr(page, 'id="Uploader">' , '</div>', pos)
|
||||
date , pos = extr(page, 'id="Uploaded">' , '</div>', pos)
|
||||
@ -95,19 +97,20 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
|
||||
|
||||
return {
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
"title": title_en.strip(),
|
||||
"title_jp": title_jp.strip(),
|
||||
"title": title_en or title_jp,
|
||||
"title_en": title_en,
|
||||
"title_jp": title_jp,
|
||||
"thumbnail": thumb,
|
||||
"uploader": text.remove_html(uploader),
|
||||
"date": date.strip(),
|
||||
"rating": text.parse_float(rating.partition(" ")[0]),
|
||||
"type": text.remove_html(gtype),
|
||||
"collection": text.remove_html(collection),
|
||||
"group": text.remove_html(group),
|
||||
"artist": ", ".join(text.split_html(artist)),
|
||||
"parodies": ", ".join(text.split_html(parody)),
|
||||
"characters": ", ".join(text.split_html(character)),
|
||||
"tags": ", ".join(text.split_html(tags)),
|
||||
"group": text.split_html(group),
|
||||
"artist": text.split_html(artist),
|
||||
"parody": text.split_html(parody),
|
||||
"characters": text.split_html(character),
|
||||
"tags": text.split_html(tags),
|
||||
"language": "English",
|
||||
"lang": "en",
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user