1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[rule34xyz] add support (#1078, #4960)

This commit is contained in:
Mike Fährmann 2024-11-03 10:12:26 +01:00
parent 7c0d2ca07d
commit d787c0c4ea
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 259 additions and 0 deletions

View File

@ -793,6 +793,12 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Rule 34 XYZ</td>
<td>https://rule34.xyz/</td>
<td>Playlists, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Saint</td>
<td>https://saint2.su/</td>

View File

@ -140,6 +140,7 @@ modules = [
"redgifs",
"rule34us",
"rule34vault",
"rule34xyz",
"saint",
"sankaku",
"sankakucomplex",

View File

@ -0,0 +1,125 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://rule34.xyz/"""
from .booru import BooruExtractor
from .. import text
import collections
BASE_PATTERN = r"(?:https?://)?rule34\.xyz"
class Rule34xyzExtractor(BooruExtractor):
category = "rule34xyz"
root = "https://rule34.xyz"
root_cdn = "https://rule34xyz.b-cdn.net"
filename_fmt = "{category}_{id}.{extension}"
per_page = 60
TAG_TYPES = {
0: "general",
1: "copyright",
2: "character",
3: "artist",
}
def _file_url(self, post):
post["files"] = files = {
link["type"]: link["url"]
for link in post.pop("imageLinks")
}
post["file_url"] = url = (
files.get(10) or files.get(40) or files.get(41) or files[2])
return url
def _prepare(self, post):
post.pop("filesPreview", None)
post.pop("tagsWithType", None)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%f")
def _tags(self, post, _):
if post.get("tagsWithType") is None:
post.update(self._fetch_post(post["id"]))
tags = collections.defaultdict(list)
for tag in post["tagsWithType"]:
tags[tag["type"]].append(tag["value"])
types = self.TAG_TYPES
for type, values in tags.items():
post["tags_" + types[type]] = values
def _fetch_post(self, post_id):
url = "{}/api/post/{}".format(self.root, post_id)
return self.request(url).json()
def _pagination(self, endpoint, params=None):
url = "{}/api{}".format(self.root, endpoint)
if params is None:
params = {}
params["IncludeLinks"] = "true"
params["IncludeTags"] = "true"
params["OrderBy"] = "0"
params["Skip"] = self.page_start * self.per_page
params["Take"] = self.per_page
params["DisableTotal"] = "true"
threshold = self.per_page
while True:
data = self.request(url, params=params).json()
yield from data["items"]
if len(data["items"]) < threshold:
return
params["Skip"] += params["Take"]
class Rule34xyzPostExtractor(Rule34xyzExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/(\d+)"
example = "https://rule34.xyz/post/12345"
def posts(self):
return (self._fetch_post(self.groups[0]),)
class Rule34xyzPlaylistExtractor(Rule34xyzExtractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
example = "https://rule34.xyz/playlists/view/12345"
def metadata(self):
return {"playlist_id": self.groups[0]}
def posts(self):
endpoint = "/playlist-item"
params = {"PlaylistId": self.groups[0]}
return self._pagination(endpoint, params)
class Rule34xyzTagExtractor(Rule34xyzExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/([^/?#]+)$"
example = "https://rule34.xyz/TAG"
def metadata(self):
self.tags = text.unquote(self.groups[0]).replace("_", " ")
return {"search_tags": self.tags}
def posts(self):
endpoint = "/post/search"
params = {"Tag": self.tags}
return self._pagination(endpoint, params)

View File

@ -120,6 +120,7 @@ CATEGORY_MAP = {
"rule34hentai" : "Rule34Hentai",
"rule34us" : "Rule 34",
"rule34vault" : "R34 Vault",
"rule34xyz" : "Rule 34 XYZ",
"sankaku" : "Sankaku Channel",
"sankakucomplex" : "Sankaku Complex",
"seiga" : "Niconico Seiga",

126
test/results/rule34xyz.py Normal file
View File

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import rule34xyz
__tests__ = (
{
"#url" : "https://rule34.xyz/sfw",
"#class": rule34xyz.Rule34xyzTagExtractor,
"#pattern": r"https://rule34(\.xyz|xyz\.b-cdn\.net)/posts/\d+/\d+/\d+\.(pic|mov\d*)\.(jpg|mp4)",
"#range" : "1-150",
"#count" : 150,
"search_tags": "sfw",
},
{
"#url" : "https://rule34.xyz/playlists/view/119",
"#class": rule34xyz.Rule34xyzPlaylistExtractor,
"#pattern": r"https://rule34(\.xyz|xyz\.b-cdn\.net)/posts/\d+/\d+/\d+\.(pic|mov\d*)\.(jpg|mp4)",
"#count" : 64,
"playlist_id": "119",
},
{
"#url" : "https://rule34.xyz/post/3613851",
"#comment": "image",
"#class" : rule34xyz.Rule34xyzPostExtractor,
"#options" : {"tags": True},
"#urls" : "https://rule34xyz.b-cdn.net/posts/3613/3613851/3613851.pic.jpg",
"#sha1_content": "4d7146db258fd5b1645a1a5fc01550d102f495e1",
"attributes": 1,
"comments" : 0,
"created" : "2023-03-29T06:00:59.136819",
"date" : "dt:2023-03-29 06:00:59",
"duration" : None,
"error" : None,
"extension" : "jpg",
"file_url" : "https://rule34xyz.b-cdn.net/posts/3613/3613851/3613851.pic.jpg",
"filename" : "3613851.pic",
"id" : 3613851,
"likes" : range(3, 100),
"posted" : "2023-03-29T06:01:07.900161",
"type" : 0,
"uploaderId": 9741,
"views" : range(200, 2000),
"status" : 2,
"files" : dict,
"sources": [
"https://twitter.com/DesireDelta13/status/1636502494292373505?t=OrmlnC85cELyY5BPmBy9Hw&s=19",
],
"tags": [
"doki doki literature club",
"doki doki takeover",
"friday night funkin",
"friday night funkin mod",
"yuri (doki doki literature club)",
"desiredelta",
"1girls",
"big breasts",
"clothed",
"clothed female",
"female",
"female focus",
"female only",
"holding microphone",
"holding object",
"long hair",
"long purple hair",
"looking at viewer",
"microphone",
"open hand",
"open mouth",
"purple background",
"purple hair",
"solo",
"solo female",
"solo focus",
"sweater",
"white outline",
"jpeg",
"safe for work",
"sfw",
],
"tags_artist": [
"desiredelta",
],
"tags_character": [
"yuri (doki doki literature club)",
],
"tags_copyright": [
"doki doki literature club",
"friday night funkin",
"friday night funkin mod",
],
"tags_general": list,
"uploader": {
"avatarUrl" : None,
"bookmarks" : 0,
"certified" : True,
"created" : "2021-04-03T08:29:51.373823",
"email" : "agent.rulexxx-uploader@z.com",
"id" : 9741,
"isSystemAccount": True,
"name" : "agent.rulexxx-uploader",
"role" : 2,
"uploadedPosts" : range(100000, 999999),
"webId" : None,
},
},
{
"#url" : "https://rule34.xyz/post/3571567",
"#comment": "video",
"#class" : rule34xyz.Rule34xyzPostExtractor,
"#urls" : "https://rule34xyz.b-cdn.net/posts/3571/3571567/3571567.mov720.mp4",
"#sha1_content": "c0a5e7e887774f91527f00e6142c435a3c482c1f",
},
)