mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-21 18:22:30 +01:00
[rule34vault] update
- implement 'tags' categorization - don't use 'totalCount' for pagination end - update tests
This commit is contained in:
parent
d5fa1d6aba
commit
7c0d2ca07d
@ -8,6 +8,7 @@
|
||||
|
||||
from .booru import BooruExtractor
|
||||
from .. import text
|
||||
import collections
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?rule34vault\.com"
|
||||
|
||||
@ -19,11 +20,19 @@ class Rule34vaultExtractor(BooruExtractor):
|
||||
filename_fmt = "{category}_{id}.{extension}"
|
||||
per_page = 100
|
||||
|
||||
TAG_TYPES = {
|
||||
1: "general",
|
||||
2: "copyright",
|
||||
4: "character",
|
||||
8: "artist",
|
||||
}
|
||||
|
||||
def _file_url(self, post):
|
||||
post_id = post["id"]
|
||||
extension = "jpg" if post["type"] == 0 else "mp4"
|
||||
return "{}/posts/{}/{}/{}.{}".format(
|
||||
post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
|
||||
self.root_cdn, post_id // 1000, post_id, post_id, extension)
|
||||
return url
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
@ -36,6 +45,13 @@ class Rule34vaultExtractor(BooruExtractor):
|
||||
if "tags" not in post:
|
||||
post.update(self._fetch_post(post["id"]))
|
||||
|
||||
tags = collections.defaultdict(list)
|
||||
for tag in post["tags"]:
|
||||
tags[tag["type"]].append(tag["value"])
|
||||
types = self.TAG_TYPES
|
||||
for type, values in tags.items():
|
||||
post["tags_" + types[type]] = values
|
||||
|
||||
def _fetch_post(self, post_id):
|
||||
url = "{}/api/v2/post/{}".format(self.root, post_id)
|
||||
return self.request(url).json()
|
||||
@ -45,19 +61,19 @@ class Rule34vaultExtractor(BooruExtractor):
|
||||
|
||||
if params is None:
|
||||
params = {}
|
||||
params["CountTotal"] = True
|
||||
params["CountTotal"] = False
|
||||
params["Skip"] = self.page_start * self.per_page
|
||||
params["take"] = self.per_page
|
||||
threshold = self.per_page
|
||||
|
||||
while True:
|
||||
data = self.request(url, method="POST", json=params).json()
|
||||
|
||||
yield from data["items"]
|
||||
|
||||
if params["Skip"] + params["take"] > data["totalCount"]:
|
||||
if len(data["items"]) < threshold:
|
||||
return
|
||||
if "cursor" in data:
|
||||
params["cursor"] = data["cursor"]
|
||||
params["cursor"] = data.get("cursor")
|
||||
params["Skip"] += params["take"]
|
||||
|
||||
|
||||
@ -65,7 +81,7 @@ class Rule34vaultPostExtractor(Rule34vaultExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/post/(\d+)"
|
||||
example = "https://rule34vault.com/post/399437"
|
||||
example = "https://rule34vault.com/post/12345"
|
||||
|
||||
def posts(self):
|
||||
return (self._fetch_post(self.groups[0]),)
|
||||
@ -76,7 +92,7 @@ class Rule34vaultPlaylistExtractor(Rule34vaultExtractor):
|
||||
directory_fmt = ("{category}", "{playlist_id}")
|
||||
archive_fmt = "p_{playlist_id}_{id}"
|
||||
pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
|
||||
example = "https://rule34vault.com/playlists/view/2"
|
||||
example = "https://rule34vault.com/playlists/view/12345"
|
||||
|
||||
def metadata(self):
|
||||
return {"playlist_id": self.groups[0]}
|
||||
@ -90,7 +106,7 @@ class Rule34vaultTagExtractor(Rule34vaultExtractor):
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)$"
|
||||
pattern = BASE_PATTERN + r"/(?!p(?:ost|laylists)/)([^/?#]+)"
|
||||
example = "https://rule34vault.com/TAG"
|
||||
|
||||
def metadata(self):
|
||||
|
@ -16,21 +16,6 @@ __tests__ = (
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34vault.com/post/486545",
|
||||
"#class": rule34vault.Rule34vaultPostExtractor,
|
||||
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/486/486545/486545\.jpg",
|
||||
"#sha1_content": "8f53c4c9d049842d23b51fb3cf8ce11bcbe21f07",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34vault.com/post/382937",
|
||||
"#comment": "video",
|
||||
"#class" : rule34vault.Rule34vaultPostExtractor,
|
||||
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/382/382937/382937\.mp4",
|
||||
"#sha1_content": "b962e3e2304139767c3792508353e6e83a85a2af",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34vault.com/playlists/view/20164",
|
||||
"#class": rule34vault.Rule34vaultPlaylistExtractor,
|
||||
@ -38,4 +23,68 @@ __tests__ = (
|
||||
"#count" : 55,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34vault.com/post/280517",
|
||||
"#comment": "image",
|
||||
"#class" : rule34vault.Rule34vaultPostExtractor,
|
||||
"#options": {"tags": True},
|
||||
"#pattern" : "https://r34xyz.b-cdn.net/posts/280/280517/280517.jpg",
|
||||
"#sha1_content": "1e19d601b4a79c06e6f885a83a5003e7e2a17057",
|
||||
|
||||
"created" : "2023-09-01T11:57:57.317331Z",
|
||||
"date" : "dt:2023-09-01 11:57:57",
|
||||
"extension" : "jpg",
|
||||
"file_url" : "https://r34xyz.b-cdn.net/posts/280/280517/280517.jpg",
|
||||
"filename" : "280517",
|
||||
"height" : 1152,
|
||||
"id" : 280517,
|
||||
"likes" : range(3, 100),
|
||||
"posted" : "2023-09-01T12:01:41.008547Z",
|
||||
"status" : 2,
|
||||
"type" : 0,
|
||||
"uploaderId": 20678,
|
||||
"views" : range(90, 999),
|
||||
"width" : 768,
|
||||
"data": {
|
||||
"sources": [
|
||||
"https://trynectar.ai/view/87c98fc8-e4f3-447c-a0d3-024b1890580a",
|
||||
],
|
||||
},
|
||||
"tags": [
|
||||
"ai generated",
|
||||
"demon slayer",
|
||||
"kamado nezuko",
|
||||
"school uniform",
|
||||
"sfw",
|
||||
],
|
||||
"tags_character": [
|
||||
"kamado nezuko",
|
||||
],
|
||||
"tags_copyright": [
|
||||
"demon slayer",
|
||||
],
|
||||
"tags_general": [
|
||||
"ai generated",
|
||||
"school uniform",
|
||||
"sfw",
|
||||
],
|
||||
"uploader": {
|
||||
"created" : "2023-07-24T04:33:36.734495Z",
|
||||
"data" : None,
|
||||
"displayName" : "quick1e",
|
||||
"emailVerified": False,
|
||||
"id" : 20678,
|
||||
"role" : 1,
|
||||
"userName" : "quick1e",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34vault.com/post/382937",
|
||||
"#comment": "video",
|
||||
"#class" : rule34vault.Rule34vaultPostExtractor,
|
||||
"#urls" : "https://r34xyz.b-cdn.net/posts/382/382937/382937.mp4",
|
||||
"#sha1_content": "b962e3e2304139767c3792508353e6e83a85a2af",
|
||||
},
|
||||
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user