1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

[horne] add support for horne.red (#2700)

This commit is contained in:
Mike Fährmann 2022-06-25 14:24:46 +02:00
parent 7af4d2047b
commit 241e82e18d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 171 additions and 64 deletions

View File

@ -541,12 +541,6 @@ Consider all sites to be NSFW unless otherwise known.
<td>individual Images, User Profiles</td> <td>individual Images, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td> <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr> </tr>
<tr>
<td>nijie</td>
<td>https://nijie.info/</td>
<td>Doujin, Favorites, Illustrations, individual Images, Nuita History, User Profiles</td>
<td>Required</td>
</tr>
<tr> <tr>
<td>Nozomi.la</td> <td>Nozomi.la</td>
<td>https://nozomi.la/</td> <td>https://nozomi.la/</td>
@ -1004,6 +998,22 @@ Consider all sites to be NSFW unless otherwise known.
<td></td> <td></td>
</tr> </tr>
<tr>
<td colspan="4"><strong>Nijie Instances</strong></td>
</tr>
<tr>
<td>nijie</td>
<td>https://nijie.info/</td>
<td>Doujin, Favorites, Illustrations, individual Images, Nuita History, User Profiles</td>
<td>Required</td>
</tr>
<tr>
<td>horne</td>
<td>https://horne.red/</td>
<td>Doujin, Favorites, Illustrations, individual Images, Nuitas, User Profiles</td>
<td>Required</td>
</tr>
<tr> <tr>
<td colspan="4"><strong>Philomena Instances</strong></td> <td colspan="4"><strong>Philomena Instances</strong></td>
</tr> </tr>

View File

@ -6,31 +6,31 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for https://nijie.info/""" """Extractors for nijie instances"""
from .common import Extractor, Message, AsynchronousMixin from .common import BaseExtractor, Message, AsynchronousMixin
from .. import text, exception from .. import text, exception
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info" class NijieExtractor(AsynchronousMixin, BaseExtractor):
class NijieExtractor(AsynchronousMixin, Extractor):
"""Base class for nijie extractors""" """Base class for nijie extractors"""
category = "nijie" basecategory = "Nijie"
directory_fmt = ("{category}", "{user_id}") directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{image_id}_p{num}.{extension}" filename_fmt = "{image_id}_p{num}.{extension}"
archive_fmt = "{image_id}_{num}" archive_fmt = "{image_id}_{num}"
cookiedomain = "nijie.info"
cookienames = ("nemail", "nlogin")
root = "https://nijie.info"
view_url = "https://nijie.info/view.php?id="
popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) self._init_category(match)
self.user_id = text.parse_int(match.group(1)) self.cookiedomain = "." + self.root.rpartition("/")[2]
self.cookienames = (self.category + "_tok",)
if self.category == "horne":
self._extract_data = self._extract_data_horne
BaseExtractor.__init__(self, match)
self.user_id = text.parse_int(match.group(match.lastindex))
self.user_name = None self.user_name = None
self.session.headers["Referer"] = self.root + "/" self.session.headers["Referer"] = self.root + "/"
@ -39,13 +39,21 @@ class NijieExtractor(AsynchronousMixin, Extractor):
for image_id in self.image_ids(): for image_id in self.image_ids():
response = self.request(self.view_url + image_id, fatal=False) url = "{}/view.php?id={}".format(self.root, image_id)
response = self.request(url, fatal=False)
if response.status_code >= 400: if response.status_code >= 400:
continue continue
page = response.text page = response.text
data = self._extract_data(page) data = self._extract_data(page)
data["image_id"] = text.parse_int(image_id) data["image_id"] = text.parse_int(image_id)
if self.user_name:
data["user_id"] = self.user_id
data["user_name"] = self.user_name
else:
data["user_id"] = data["artist_id"]
data["user_name"] = data["artist_name"]
yield Message.Directory, data yield Message.Directory, data
for image in self._extract_images(page): for image in self._extract_images(page):
@ -68,24 +76,41 @@ class NijieExtractor(AsynchronousMixin, Extractor):
"description": text.unescape(extr( "description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")), '"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr( "date" : text.parse_datetime(extr(
'"datePublished": "', '"') + "+0900", '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
"%a %b %d %H:%M:%S %Y%z"), "artist_id" : text.parse_int(extr('/members.php?id=', '"')),
"artist_id" : text.parse_int(extr(
'"sameAs": "https://nijie.info/members.php?id=', '"')),
"artist_name": keywords[1], "artist_name": keywords[1],
"tags" : keywords[2:-1], "tags" : keywords[2:-1],
} }
data["user_id"] = data["artist_id"] return data
data["user_name"] = data["artist_name"]
@staticmethod
def _extract_data_horne(page):
"""Extract image metadata from 'page'"""
extr = text.extract_from(page)
keywords = text.unescape(extr(
'name="keywords" content="', '" />')).split(",")
data = {
"title" : keywords[0].strip(),
"description": text.unescape(extr(
'property="og:description" content="', '"')),
"artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
"date" : text.parse_datetime(extr(
"itemprop='datePublished' content=", "<").rpartition(">")[2],
"%Y-%m-%d %H:%M:%S", 9),
}
return data return data
@staticmethod @staticmethod
def _extract_images(page): def _extract_images(page):
"""Extract image URLs from 'page'""" """Extract image URLs from 'page'"""
images = text.extract_iter(page, '<a href="./view_popup.php', '</a>') images = text.extract_iter(page, "/view_popup.php", "</a>")
for num, image in enumerate(images): for num, image in enumerate(images):
url = "https:" + text.extract(image, 'src="', '"')[0] src = text.extract(image, 'src="', '"')[0]
url = url.replace("/__rs_l120x120/", "/") if not src:
continue
url = ("https:" + src).replace("/__rs_l120x120/", "/")
yield text.nameext_from_url(url, { yield text.nameext_from_url(url, {
"num": num, "num": num,
"url": url, "url": url,
@ -112,7 +137,7 @@ class NijieExtractor(AsynchronousMixin, Extractor):
data = {"email": username, "password": password, "save": "on"} data = {"email": username, "password": password, "save": "on"}
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if "//nijie.info/login.php" in response.text: if "/login.php" in response.text:
raise exception.AuthenticationError() raise exception.AuthenticationError()
return self.session.cookies return self.session.cookies
@ -132,12 +157,27 @@ class NijieExtractor(AsynchronousMixin, Extractor):
params["p"] += 1 params["p"] += 1
BASE_PATTERN = NijieExtractor.update({
"nijie": {
"root": "https://nijie.info",
"pattern": r"(?:www\.)?nijie\.info",
},
"horne": {
"root": "https://horne.red",
"pattern": r"(?:www\.)?horne\.red",
},
})
class NijieUserExtractor(NijieExtractor): class NijieUserExtractor(NijieExtractor):
"""Extractor for nijie user profiles""" """Extractor for nijie user profiles"""
subcategory = "user" subcategory = "user"
cookiedomain = None cookiedomain = None
pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)" pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
test = ("https://nijie.info/members.php?id=44",) test = (
("https://nijie.info/members.php?id=44"),
("https://horne.red/members.php?id=58000"),
)
def items(self): def items(self):
fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
@ -172,6 +212,25 @@ class NijieIllustrationExtractor(NijieExtractor):
"user_name": "ED", "user_name": "ED",
}, },
}), }),
("https://horne.red/members_illust.php?id=58000", {
"pattern": r"https://pic\.nijie\.net/\d+/horne/\d+/\d+/\d+"
r"/illust/\d+_\d+_[0-9a-f]+_[0-9a-f]+\.png",
"range": "1-20",
"count": 20,
"keyword": {
"artist_id": 58000,
"artist_name": "のえるわ",
"date": "type:datetime",
"description": str,
"image_id": int,
"num": int,
"tags": list,
"title": str,
"url": str,
"user_id": 58000,
"user_name": "のえるわ",
},
}),
("https://nijie.info/members_illust.php?id=43", { ("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,
}), }),
@ -182,34 +241,47 @@ class NijieIllustrationExtractor(NijieExtractor):
class NijieDoujinExtractor(NijieExtractor): class NijieDoujinExtractor(NijieExtractor):
"""Extractor for doujin entries of a nijie-user""" """Extractor for doujin entries of a nijie user"""
subcategory = "doujin" subcategory = "doujin"
pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)" pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
test = ("https://nijie.info/members_dojin.php?id=6782", { test = (
("https://nijie.info/members_dojin.php?id=6782", {
"count": ">= 18", "count": ">= 18",
"keyword": { "keyword": {
"user_id" : 6782, "user_id" : 6782,
"user_name": "ジョニー@アビオン村", "user_name": "ジョニー@アビオン村",
}, },
}) }),
("https://horne.red/members_dojin.php?id=58000"),
)
def image_ids(self): def image_ids(self):
return self._pagination("members_dojin") return self._pagination("members_dojin")
class NijieFavoriteExtractor(NijieExtractor): class NijieFavoriteExtractor(NijieExtractor):
"""Extractor for all favorites/bookmarks of a nijie-user""" """Extractor for all favorites/bookmarks of a nijie user"""
subcategory = "favorite" subcategory = "favorite"
directory_fmt = ("{category}", "bookmarks", "{user_id}") directory_fmt = ("{category}", "bookmarks", "{user_id}")
archive_fmt = "f_{user_id}_{image_id}_{num}" archive_fmt = "f_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)" pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
test = ("https://nijie.info/user_like_illust_view.php?id=44", { test = (
("https://nijie.info/user_like_illust_view.php?id=44", {
"count": ">= 16", "count": ">= 16",
"keyword": { "keyword": {
"user_id" : 44, "user_id" : 44,
"user_name": "ED", "user_name": "ED",
}, },
}) }),
("https://horne.red/user_like_illust_view.php?id=58000", {
"range": "1-5",
"count": 5,
"keyword": {
"user_id" : 58000,
"user_name": "のえるわ",
},
}),
)
def image_ids(self): def image_ids(self):
return self._pagination("user_like_illust_view") return self._pagination("user_like_illust_view")
@ -227,14 +299,17 @@ class NijieNuitaExtractor(NijieExtractor):
directory_fmt = ("{category}", "nuita", "{user_id}") directory_fmt = ("{category}", "nuita", "{user_id}")
archive_fmt = "n_{user_id}_{image_id}_{num}" archive_fmt = "n_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)" pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
test = ("https://nijie.info/history_nuita.php?id=728995", { test = (
("https://nijie.info/history_nuita.php?id=728995", {
"range": "1-10", "range": "1-10",
"count": 10, "count": 10,
"keyword": { "keyword": {
"user_id" : 728995, "user_id" : 728995,
"user_name": "", "user_name": "",
}, },
}) }),
("https://horne.red/history_nuita.php?id=58000"),
)
def image_ids(self): def image_ids(self):
return self._pagination("history_nuita") return self._pagination("history_nuita")
@ -252,7 +327,7 @@ class NijieNuitaExtractor(NijieExtractor):
class NijieImageExtractor(NijieExtractor): class NijieImageExtractor(NijieExtractor):
"""Extractor for a work/image from nijie.info""" """Extractor for a nijie work/image"""
subcategory = "image" subcategory = "image"
pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)" pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
test = ( test = (
@ -265,11 +340,26 @@ class NijieImageExtractor(NijieExtractor):
"count": 0, "count": 0,
}), }),
("https://nijie.info/view_popup.php?id=70720"), ("https://nijie.info/view_popup.php?id=70720"),
("https://horne.red/view.php?id=8716", {
"count": 4,
"keyword": {
"artist_id": 58000,
"artist_name": "のえるわ",
"date": "dt:2018-02-04 14:47:24",
"description": "ノエル「そんなことしなくても、"
"言ってくれたら咥えるのに・・・♡」",
"image_id": 8716,
"tags": ["男の娘", "フェラ", "オリキャラ", "うちのこ"],
"title": "ノエル「いまどきそんな、恵方巻ネタなんてやらなくても・・・」",
"user_id": 58000,
"user_name": "のえるわ",
},
}),
) )
def __init__(self, match): def __init__(self, match):
NijieExtractor.__init__(self, match) NijieExtractor.__init__(self, match)
self.image_id = match.group(1) self.image_id = match.group(match.lastindex)
def image_ids(self): def image_ids(self):
return (self.image_id,) return (self.image_id,)

View File

@ -47,6 +47,7 @@ CATEGORY_MAP = {
"hentaihere" : "HentaiHere", "hentaihere" : "HentaiHere",
"hentaiimg" : "Hentai Image", "hentaiimg" : "Hentai Image",
"hitomi" : "Hitomi.la", "hitomi" : "Hitomi.la",
"horne" : "horne",
"idolcomplex" : "Idol Complex", "idolcomplex" : "Idol Complex",
"illusioncardsbooru": "Illusion Game Cards", "illusioncardsbooru": "Illusion Game Cards",
"imagebam" : "ImageBam", "imagebam" : "ImageBam",
@ -270,6 +271,7 @@ AUTH_MAP = {
"fantia" : _COOKIES, "fantia" : _COOKIES,
"flickr" : _OAUTH, "flickr" : _OAUTH,
"furaffinity" : _COOKIES, "furaffinity" : _COOKIES,
"horne" : "Required",
"idolcomplex" : "Supported", "idolcomplex" : "Supported",
"imgbb" : "Supported", "imgbb" : "Supported",
"inkbunny" : "Supported", "inkbunny" : "Supported",

View File

@ -89,7 +89,7 @@ class TestCookiedict(unittest.TestCase):
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self): def test_domain(self):
for category in ["exhentai", "idolcomplex", "nijie"]: for category in ["exhentai", "idolcomplex", "nijie", "horne"]:
extr = _get_extractor(category) extr = _get_extractor(category)
cookies = extr.session.cookies cookies = extr.session.cookies
for key in self.cdict: for key in self.cdict:
@ -107,7 +107,8 @@ class TestCookieLogin(unittest.TestCase):
extr_cookies = { extr_cookies = {
"exhentai" : ("ipb_member_id", "ipb_pass_hash"), "exhentai" : ("ipb_member_id", "ipb_pass_hash"),
"idolcomplex": ("login", "pass_hash"), "idolcomplex": ("login", "pass_hash"),
"nijie" : ("nemail", "nlogin"), "nijie" : ("nijie_tok",),
"horne" : ("horne_tok",),
} }
for category, cookienames in extr_cookies.items(): for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames} cookies = {name: "value" for name in cookienames}
@ -199,10 +200,13 @@ class TestCookieUtils(unittest.TestCase):
def _get_extractor(category): def _get_extractor(category):
for extr in extractor.extractors(): URLS = {
if extr.category == category and hasattr(extr, "_login_impl"): "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/",
url = next(extr._get_tests())[0] "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
return extr.from_url(url) "nijie" : "https://nijie.info/view.php?id=1",
"horne" : "https://horne.red/view.php?id=1",
}
return extractor.find(URLS[category])
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -312,6 +312,7 @@ def setup_test_config():
config.set(("extractor", "nijie") , "username", email) config.set(("extractor", "nijie") , "username", email)
config.set(("extractor", "seiga") , "username", email) config.set(("extractor", "seiga") , "username", email)
config.set(("extractor", "horne") , "username", email2)
config.set(("extractor", "pinterest") , "username", email2) config.set(("extractor", "pinterest") , "username", email2)
config.set(("extractor", "pinterest") , "username", None) # login broken config.set(("extractor", "pinterest") , "username", None) # login broken