1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-21 18:22:30 +01:00

[rule34vault] update

- implement 'tags' categorization
- don't use 'totalCount' for pagination end
- update tests
This commit is contained in:
Mike Fährmann 2024-11-03 09:59:25 +01:00
parent d5fa1d6aba
commit 7c0d2ca07d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 88 additions and 23 deletions

View File

@ -8,6 +8,7 @@
from .booru import BooruExtractor
from .. import text
import collections
BASE_PATTERN = r"(?:https?://)?rule34vault\.com"
@ -19,11 +20,19 @@ class Rule34vaultExtractor(BooruExtractor):
filename_fmt = "{category}_{id}.{extension}"
per_page = 100
TAG_TYPES = {
1: "general",
2: "copyright",
4: "character",
8: "artist",
}
def _file_url(self, post):
post_id = post["id"]
extension = "jpg" if post["type"] == 0 else "mp4"
return "{}/posts/{}/{}/{}.{}".format(
post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
self.root_cdn, post_id // 1000, post_id, post_id, extension)
return url
def _prepare(self, post):
post.pop("files", None)
@ -36,6 +45,13 @@ class Rule34vaultExtractor(BooruExtractor):
if "tags" not in post:
post.update(self._fetch_post(post["id"]))
tags = collections.defaultdict(list)
for tag in post["tags"]:
tags[tag["type"]].append(tag["value"])
types = self.TAG_TYPES
for type, values in tags.items():
post["tags_" + types[type]] = values
def _fetch_post(self, post_id):
url = "{}/api/v2/post/{}".format(self.root, post_id)
return self.request(url).json()
@ -45,19 +61,19 @@ class Rule34vaultExtractor(BooruExtractor):
if params is None:
params = {}
params["CountTotal"] = True
params["CountTotal"] = False
params["Skip"] = self.page_start * self.per_page
params["take"] = self.per_page
threshold = self.per_page
while True:
data = self.request(url, method="POST", json=params).json()
yield from data["items"]
if params["Skip"] + params["take"] > data["totalCount"]:
if len(data["items"]) < threshold:
return
if "cursor" in data:
params["cursor"] = data["cursor"]
params["cursor"] = data.get("cursor")
params["Skip"] += params["take"]
@ -65,7 +81,7 @@ class Rule34vaultPostExtractor(Rule34vaultExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/(\d+)"
example = "https://rule34vault.com/post/399437"
example = "https://rule34vault.com/post/12345"
def posts(self):
return (self._fetch_post(self.groups[0]),)
@ -76,7 +92,7 @@ class Rule34vaultPlaylistExtractor(Rule34vaultExtractor):
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
example = "https://rule34vault.com/playlists/view/2"
example = "https://rule34vault.com/playlists/view/12345"
def metadata(self):
return {"playlist_id": self.groups[0]}
@ -90,7 +106,7 @@ class Rule34vaultTagExtractor(Rule34vaultExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/([^/?#]+)$"
pattern = BASE_PATTERN + r"/(?!p(?:ost|laylists)/)([^/?#]+)"
example = "https://rule34vault.com/TAG"
def metadata(self):

View File

@ -16,21 +16,6 @@ __tests__ = (
"#count" : 10,
},
{
"#url" : "https://rule34vault.com/post/486545",
"#class": rule34vault.Rule34vaultPostExtractor,
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/486/486545/486545\.jpg",
"#sha1_content": "8f53c4c9d049842d23b51fb3cf8ce11bcbe21f07",
},
{
"#url" : "https://rule34vault.com/post/382937",
"#comment": "video",
"#class" : rule34vault.Rule34vaultPostExtractor,
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/382/382937/382937\.mp4",
"#sha1_content": "b962e3e2304139767c3792508353e6e83a85a2af",
},
{
"#url" : "https://rule34vault.com/playlists/view/20164",
"#class": rule34vault.Rule34vaultPlaylistExtractor,
@ -38,4 +23,68 @@ __tests__ = (
"#count" : 55,
},
{
"#url" : "https://rule34vault.com/post/280517",
"#comment": "image",
"#class" : rule34vault.Rule34vaultPostExtractor,
"#options": {"tags": True},
"#pattern" : "https://r34xyz.b-cdn.net/posts/280/280517/280517.jpg",
"#sha1_content": "1e19d601b4a79c06e6f885a83a5003e7e2a17057",
"created" : "2023-09-01T11:57:57.317331Z",
"date" : "dt:2023-09-01 11:57:57",
"extension" : "jpg",
"file_url" : "https://r34xyz.b-cdn.net/posts/280/280517/280517.jpg",
"filename" : "280517",
"height" : 1152,
"id" : 280517,
"likes" : range(3, 100),
"posted" : "2023-09-01T12:01:41.008547Z",
"status" : 2,
"type" : 0,
"uploaderId": 20678,
"views" : range(90, 999),
"width" : 768,
"data": {
"sources": [
"https://trynectar.ai/view/87c98fc8-e4f3-447c-a0d3-024b1890580a",
],
},
"tags": [
"ai generated",
"demon slayer",
"kamado nezuko",
"school uniform",
"sfw",
],
"tags_character": [
"kamado nezuko",
],
"tags_copyright": [
"demon slayer",
],
"tags_general": [
"ai generated",
"school uniform",
"sfw",
],
"uploader": {
"created" : "2023-07-24T04:33:36.734495Z",
"data" : None,
"displayName" : "quick1e",
"emailVerified": False,
"id" : 20678,
"role" : 1,
"userName" : "quick1e",
},
},
{
"#url" : "https://rule34vault.com/post/382937",
"#comment": "video",
"#class" : rule34vault.Rule34vaultPostExtractor,
"#urls" : "https://r34xyz.b-cdn.net/posts/382/382937/382937.mp4",
"#sha1_content": "b962e3e2304139767c3792508353e6e83a85a2af",
},
)