mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 11:12:40 +01:00
359e31e462
Static images are now only available in WebP format over the 'w' subdomain. GIFs also got their own 'g' subdomain.
245 lines
8.2 KiB
Python
245 lines
8.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2019-2023 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://nozomi.la/"""
|
|
|
|
from .common import Extractor, Message
|
|
from .. import text
|
|
|
|
|
|
def decode_nozomi(n):
|
|
for i in range(0, len(n), 4):
|
|
yield (n[i] << 24) + (n[i+1] << 16) + (n[i+2] << 8) + n[i+3]
|
|
|
|
|
|
class NozomiExtractor(Extractor):
|
|
"""Base class for nozomi extractors"""
|
|
category = "nozomi"
|
|
root = "https://nozomi.la"
|
|
filename_fmt = "{postid} {dataid}.{extension}"
|
|
archive_fmt = "{dataid}"
|
|
|
|
def items(self):
|
|
|
|
data = self.metadata()
|
|
self.session.headers["Origin"] = self.root
|
|
self.session.headers["Referer"] = self.root + "/"
|
|
|
|
for post_id in map(str, self.posts()):
|
|
url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
|
|
post_id[-1], post_id[-3:-1], post_id)
|
|
response = self.request(url, fatal=False)
|
|
|
|
if response.status_code >= 400:
|
|
self.log.warning(
|
|
"Skipping post %s ('%s %s')",
|
|
post_id, response.status_code, response.reason)
|
|
continue
|
|
|
|
post = response.json()
|
|
post["tags"] = self._list(post.get("general"))
|
|
post["artist"] = self._list(post.get("artist"))
|
|
post["copyright"] = self._list(post.get("copyright"))
|
|
post["character"] = self._list(post.get("character"))
|
|
|
|
try:
|
|
post["date"] = text.parse_datetime(
|
|
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
|
|
except Exception:
|
|
post["date"] = None
|
|
|
|
post.update(data)
|
|
|
|
images = post["imageurls"]
|
|
for key in ("general", "imageurl", "imageurls"):
|
|
if key in post:
|
|
del post[key]
|
|
|
|
yield Message.Directory, post
|
|
for post["num"], image in enumerate(images, 1):
|
|
post["filename"] = post["dataid"] = did = image["dataid"]
|
|
post["is_video"] = video = bool(image.get("is_video"))
|
|
|
|
ext = image["type"]
|
|
if video:
|
|
subdomain = "v"
|
|
elif ext == "gif":
|
|
subdomain = "g"
|
|
else:
|
|
subdomain = "w"
|
|
ext = "webp"
|
|
|
|
post["extension"] = ext
|
|
post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
|
|
subdomain, did[-1], did[-3:-1], did, ext)
|
|
yield Message.Url, url, post
|
|
|
|
def posts(self):
|
|
url = "https://n.nozomi.la" + self.nozomi
|
|
offset = (text.parse_int(self.pnum, 1) - 1) * 256
|
|
|
|
while True:
|
|
headers = {"Range": "bytes={}-{}".format(offset, offset+255)}
|
|
response = self.request(url, headers=headers)
|
|
yield from decode_nozomi(response.content)
|
|
|
|
offset += 256
|
|
cr = response.headers.get("Content-Range", "").rpartition("/")[2]
|
|
if text.parse_int(cr, offset) <= offset:
|
|
return
|
|
|
|
def metadata(self):
|
|
return {}
|
|
|
|
@staticmethod
|
|
def _list(src):
|
|
return [x["tagname_display"] for x in src] if src else ()
|
|
|
|
|
|
class NozomiPostExtractor(NozomiExtractor):
|
|
"""Extractor for individual posts on nozomi.la"""
|
|
subcategory = "post"
|
|
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
|
|
test = (
|
|
("https://nozomi.la/post/3649262.html", {
|
|
"url": "e5525e717aec712843be8b88592d6406ae9e60ba",
|
|
"pattern": r"https://w\.nozomi\.la/2/15/aaa9f7c632cde1e1a5baaff3fb"
|
|
r"6a6d857ec73df7fdc5cf5a358caf604bf73152\.webp",
|
|
"content": "6d62c4a7fea50c0a89d499603c4e7a2b4b9bffa8",
|
|
"keyword": {
|
|
"artist" : ["hammer (sunset beach)"],
|
|
"character": ["patchouli knowledge"],
|
|
"copyright": ["touhou"],
|
|
"dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
|
|
"date" : "dt:2016-07-26 02:32:03",
|
|
"extension": "webp",
|
|
"filename" : str,
|
|
"height" : 768,
|
|
"is_video" : False,
|
|
"postid" : 3649262,
|
|
"tags" : list,
|
|
"type" : "jpg",
|
|
"url" : str,
|
|
"width" : 1024,
|
|
},
|
|
}),
|
|
# multiple images per post
|
|
("https://nozomi.la/post/25588032.html", {
|
|
"url": "fb956ccedcf2cf509739d26e2609e910244aa56c",
|
|
"keyword": "516ca5cbd0d2a46a8ce26679d6e08de5ac42184b",
|
|
"count": 7,
|
|
}),
|
|
# empty 'date' (#1163)
|
|
("https://nozomi.la/post/130309.html", {
|
|
"keyword": {"date": None},
|
|
}),
|
|
# gif
|
|
("https://nozomi.la/post/1647.html", {
|
|
"pattern": r"https://g\.nozomi\.la/a/f0/d1b06469e00d72e4f6346209c1"
|
|
r"49db459d76b58a074416c260ed93cc31fa9f0a\.gif",
|
|
"content": "952efb78252bbc9fb56df2e8fafb68d5e6364181",
|
|
}),
|
|
# video
|
|
("https://nozomi.la/post/2269847.html", {
|
|
"pattern": r"https://v\.nozomi\.la/d/0e/ff88398862669783691b31519f"
|
|
r"2bea3a35c24b6e62e3ba2d89b4409e41c660ed\.webm",
|
|
"content": "57065e6c16da7b1c7098a63b36fb0c6c6f1b9bca",
|
|
}),
|
|
)
|
|
|
|
def __init__(self, match):
|
|
NozomiExtractor.__init__(self, match)
|
|
self.post_id = match.group(1)
|
|
|
|
def posts(self):
|
|
return (self.post_id,)
|
|
|
|
|
|
class NozomiIndexExtractor(NozomiExtractor):
|
|
"""Extractor for the nozomi.la index"""
|
|
subcategory = "index"
|
|
pattern = (r"(?:https?://)?nozomi\.la/"
|
|
r"(?:(index(?:-Popular)?)-(\d+)\.html)?(?:$|#|\?)")
|
|
test = (
|
|
("https://nozomi.la/"),
|
|
("https://nozomi.la/index-2.html"),
|
|
("https://nozomi.la/index-Popular-33.html"),
|
|
)
|
|
|
|
def __init__(self, match):
|
|
NozomiExtractor.__init__(self, match)
|
|
index, self.pnum = match.groups()
|
|
self.nozomi = "/{}.nozomi".format(index or "index")
|
|
|
|
|
|
class NozomiTagExtractor(NozomiExtractor):
|
|
"""Extractor for posts from tag searches on nozomi.la"""
|
|
subcategory = "tag"
|
|
directory_fmt = ("{category}", "{search_tags}")
|
|
archive_fmt = "t_{search_tags}_{dataid}"
|
|
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
|
|
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
|
|
"pattern": r"^https://[wgv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
|
|
"count": ">= 25",
|
|
"range": "1-25",
|
|
})
|
|
|
|
def __init__(self, match):
|
|
NozomiExtractor.__init__(self, match)
|
|
tags, self.pnum = match.groups()
|
|
self.tags = text.unquote(tags)
|
|
self.nozomi = "/nozomi/{}.nozomi".format(self.tags)
|
|
|
|
def metadata(self):
|
|
return {"search_tags": self.tags}
|
|
|
|
|
|
class NozomiSearchExtractor(NozomiExtractor):
|
|
"""Extractor for search results on nozomi.la"""
|
|
subcategory = "search"
|
|
directory_fmt = ("{category}", "{search_tags:J }")
|
|
archive_fmt = "t_{search_tags}_{dataid}"
|
|
pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
|
|
test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", {
|
|
"count": ">= 5",
|
|
})
|
|
|
|
def __init__(self, match):
|
|
NozomiExtractor.__init__(self, match)
|
|
self.tags = text.unquote(match.group(1)).split()
|
|
|
|
def metadata(self):
|
|
return {"search_tags": self.tags}
|
|
|
|
def posts(self):
|
|
result = None
|
|
positive = []
|
|
negative = []
|
|
|
|
def nozomi(path):
|
|
url = "https://j.nozomi.la/" + path + ".nozomi"
|
|
return decode_nozomi(self.request(url).content)
|
|
|
|
for tag in self.tags:
|
|
(negative if tag[0] == "-" else positive).append(
|
|
tag.replace("/", ""))
|
|
|
|
for tag in positive:
|
|
ids = nozomi("nozomi/" + tag)
|
|
if result is None:
|
|
result = set(ids)
|
|
else:
|
|
result.intersection_update(ids)
|
|
|
|
if result is None:
|
|
result = set(nozomi("index"))
|
|
for tag in negative:
|
|
result.difference_update(nozomi("nozomi/" + tag[1:]))
|
|
|
|
return sorted(result, reverse=True) if result else ()
|