1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 11:12:40 +01:00
gallery-dl/gallery_dl/extractor/nozomi.py

190 lines
6.0 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://nozomi.la/"""
from .common import Extractor, Message
from .. import text
def decode_nozomi(n):
for i in range(0, len(n), 4):
yield (n[i] << 24) + (n[i+1] << 16) + (n[i+2] << 8) + n[i+3]
class NozomiExtractor(Extractor):
"""Base class for nozomi extractors"""
category = "nozomi"
root = "https://nozomi.la"
filename_fmt = "{postid} {dataid}.{extension}"
archive_fmt = "{dataid}"
2023-09-18 23:50:25 +02:00
def _init(self):
self.session.headers["Origin"] = self.root
2023-09-18 23:50:25 +02:00
def items(self):
data = self.metadata()
2019-10-14 23:49:46 +02:00
for post_id in map(str, self.posts()):
url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
post_id[-1], post_id[-3:-1], post_id)
2019-10-17 23:05:04 +02:00
response = self.request(url, fatal=False)
2019-10-17 23:05:04 +02:00
if response.status_code >= 400:
self.log.warning(
"Skipping post %s ('%s %s')",
post_id, response.status_code, response.reason)
continue
post = response.json()
post["tags"] = self._list(post.get("general"))
post["artist"] = self._list(post.get("artist"))
post["copyright"] = self._list(post.get("copyright"))
post["character"] = self._list(post.get("character"))
try:
post["date"] = text.parse_datetime(
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
except Exception:
post["date"] = None
post.update(data)
images = post["imageurls"]
for key in ("general", "imageurl", "imageurls"):
if key in post:
del post[key]
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
2022-10-13 23:01:14 +02:00
post["filename"] = post["dataid"] = did = image["dataid"]
post["is_video"] = video = bool(image.get("is_video"))
ext = image["type"]
if video:
subdomain = "v"
elif ext == "gif":
subdomain = "g"
else:
subdomain = "w"
ext = "webp"
post["extension"] = ext
2022-10-13 23:01:14 +02:00
post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
subdomain, did[-1], did[-3:-1], did, ext)
yield Message.Url, url, post
def posts(self):
url = "https://n.nozomi.la" + self.nozomi
offset = (text.parse_int(self.pnum, 1) - 1) * 256
while True:
headers = {"Range": "bytes={}-{}".format(offset, offset+255)}
response = self.request(url, headers=headers)
yield from decode_nozomi(response.content)
offset += 256
cr = response.headers.get("Content-Range", "").rpartition("/")[2]
if text.parse_int(cr, offset) <= offset:
return
def metadata(self):
return {}
@staticmethod
def _list(src):
return [x["tagname_display"] for x in src] if src else ()
class NozomiPostExtractor(NozomiExtractor):
"""Extractor for individual posts on nozomi.la"""
subcategory = "post"
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
example = "https://nozomi.la/post/12345.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
self.post_id = match.group(1)
def posts(self):
return (self.post_id,)
class NozomiIndexExtractor(NozomiExtractor):
"""Extractor for the nozomi.la index"""
subcategory = "index"
pattern = (r"(?:https?://)?nozomi\.la/"
r"(?:(index(?:-Popular)?)-(\d+)\.html)?(?:$|#|\?)")
example = "https://nozomi.la/index-1.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
index, self.pnum = match.groups()
self.nozomi = "/{}.nozomi".format(index or "index")
class NozomiTagExtractor(NozomiExtractor):
"""Extractor for posts from tag searches on nozomi.la"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
example = "https://nozomi.la/tag/TAG-1.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
tags, self.pnum = match.groups()
self.tags = text.unquote(tags)
self.nozomi = "/nozomi/{}.nozomi".format(self.tags)
def metadata(self):
return {"search_tags": self.tags}
2019-10-14 23:49:46 +02:00
class NozomiSearchExtractor(NozomiExtractor):
"""Extractor for search results on nozomi.la"""
subcategory = "search"
directory_fmt = ("{category}", "{search_tags:J }")
archive_fmt = "t_{search_tags}_{dataid}"
2019-10-14 23:49:46 +02:00
pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
example = "https://nozomi.la/search.html?q=QUERY"
2019-10-14 23:49:46 +02:00
def __init__(self, match):
NozomiExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1)).split()
2019-10-14 23:49:46 +02:00
def metadata(self):
return {"search_tags": self.tags}
def posts(self):
result = None
2022-07-14 14:59:11 +02:00
positive = []
negative = []
2019-10-14 23:49:46 +02:00
def nozomi(path):
url = "https://j.nozomi.la/" + path + ".nozomi"
return decode_nozomi(self.request(url).content)
2019-10-14 23:49:46 +02:00
for tag in self.tags:
(negative if tag[0] == "-" else positive).append(
tag.replace("/", ""))
2019-10-14 23:49:46 +02:00
for tag in positive:
ids = nozomi("nozomi/" + tag)
if result is None:
result = set(ids)
2019-10-14 23:49:46 +02:00
else:
result.intersection_update(ids)
2022-07-14 14:59:11 +02:00
if result is None:
result = set(nozomi("index"))
for tag in negative:
result.difference_update(nozomi("nozomi/" + tag[1:]))
2019-10-14 23:49:46 +02:00
return sorted(result, reverse=True) if result else ()