[inkbunny] add 'user' and 'post' extractors (#283)

2025-01-31 19:51:34 +01:00 · 2020-07-22 22:19:53 +02:00 · 2020-07-22 22:19:53 +02:00 · 87202b8d74
commit 87202b8d74
parent b62ea72533
6 changed files with 215 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,7 @@
 # Changelog

+## Unreleased
+
 ## 1.14.3 - 2020-07-18
 ### Additions
 - [8muses] support `comics.8muses.com` URLs
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -59,6 +59,7 @@ ImgBB                https://imgbb.com/                  Albums, individual Imag
 imgbox               https://imgbox.com/                 Galleries, individual Images
 imgth                https://imgth.com/                  Galleries
 imgur                https://imgur.com/                  |imgur-C|
+Inkbunny             https://inkbunny.net/               Posts, User Profiles
 Instagram            https://www.instagram.com/          |instagram-C|                                      Optional
 Issuu                https://issuu.com/                  Publications, User Profiles
 Jaimini's Box        https://jaiminisbox.com/reader/     Chapters, Manga
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -54,6 +54,7 @@ modules = [
    "imgbox",
    "imgth",
    "imgur",
+    "inkbunny",
    "instagram",
    "issuu",
    "kabeuchi",
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://inkbunny.net/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?inkbunny\.net"
+
+
+class InkbunnyExtractor(Extractor):
+    """Base class for inkbunny extractors"""
+    category = "inkbunny"
+    directory_fmt = ("{category}", "{post[username]!l}")
+    filename_fmt = "{post[submission_id]} {file_id} {post[title]}.{extension}"
+    archive_fmt = "{file_id}"
+    root = "https://inkbunny.net"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.item = match.group(1)
+
+    def items(self):
+        to_bool = ("deleted", "digitalsales", "favorite", "forsale",
+                   "friends_only", "guest_block", "hidden", "printsales",
+                   "public", "scraps")
+
+        for post in self.posts():
+            post["date"] = text.parse_datetime(
+                post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+            post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
+            files = post["files"]
+
+            for key in to_bool:
+                post[key] = (post[key] == "t")
+
+            del post["keywords"]
+            del post["files"]
+
+            yield Message.Directory, {"post": post}
+            for file in files:
+                file["post"] = post
+                file["deleted"] = (file["deleted"] == "t")
+                file["date"] = text.parse_datetime(
+                    file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+                text.nameext_from_url(file["file_name"], file)
+                yield Message.Url, file["file_url_full"], file
+
+
+class InkbunnyUserExtractor(InkbunnyExtractor):
+    """Extractor for inkbunny user profile"""
+    subcategory = "user"
+    pattern = BASE_PATTERN + r"/(?!s/)([^/?&#]+)"
+    test = ("https://inkbunny.net/soina", {
+        #  "pattern": "",
+        "range": "20-50",
+        "keyword": {
+            "date": "type:datetime",
+            "deleted": bool,
+            "file_id": "re:[0-9]+",
+            "filename": r"re:[0-9]+_soina_\w+",
+            "full_file_md5": "re:[0-9a-f]{32}",
+            "mimetype": str,
+            "submission_file_order": "re:[0-9]+",
+            "submission_id": "re:[0-9]+",
+            "user_id": "20969",
+            "post": {
+                "comments_count": "re:[0-9]+",
+                "date": "type:datetime",
+                "deleted": bool,
+                "digitalsales": bool,
+                "favorite": bool,
+                "favorites_count": "re:[0-9]+",
+                "forsale": bool,
+                "friends_only": bool,
+                "guest_block": bool,
+                "hidden": bool,
+                "pagecount": "re:[0-9]+",
+                "pools": list,
+                "pools_count": int,
+                "printsales": bool,
+                "public": bool,
+                "rating_id": "re:[0-9]+",
+                "rating_name": str,
+                "ratings": list,
+                "scraps": bool,
+                "submission_id": "re:[0-9]+",
+                "tags": list,
+                "title": str,
+                "type_name": str,
+                "user_id": "20969",
+                "username": "soina",
+                "views": str,
+            },
+        },
+    })
+
+    def posts(self):
+        api = InkbunnyAPI(self)
+        return api.search(username=self.item)
+
+
+class InkbunnyPostExtractor(InkbunnyExtractor):
+    """Extractor for individual Inkbunny posts"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/s/(\d+)"
+    test = (
+        ("https://inkbunny.net/s/1829715", {
+            "url": "5967eadf1fcfa7214744f5f814717ab73d14dfd3",
+            "content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8",
+        }),
+        ("https://inkbunny.net/s/2044094", {
+            "count": 4,
+        }),
+    )
+
+    def posts(self):
+        api = InkbunnyAPI(self)
+        return api.detail(self.item)
+
+
+class InkbunnyAPI():
+    """Interface for the Inkunny API
+
+    Ref: https://wiki.inkbunny.net/wiki/API
+    """
+
+    def __init__(self, extractor):
+        self.extractor = extractor
+        self.session_id = None
+
+    def detail(self, submission_ids):
+        """Get full details about submissions with the given IDs"""
+        params = {"submission_ids": submission_ids}
+        return self._call("submissions", params)["submissions"]
+
+    def search(self, username):
+        """Perform a search"""
+        params = {"username": username}
+        return self._pagination_search(params)
+
+    def set_allowed_ratings(self, nudity=True, sexual=True,
+                            violence=True, strong_violence=True):
+        """Change allowed submission ratings"""
+        params = {
+            "tag[2]": "yes" if nudity else "no",
+            "tag[3]": "yes" if violence else "no",
+            "tag[4]": "yes" if sexual else "no",
+            "tag[5]": "yes" if strong_violence else "no",
+        }
+        self._call("userrating", params)
+
+    def authenticate(self):
+        username, password = self.extractor._get_auth_info()
+        if username:
+            self.session_id = self._authenticate_impl(username, password)
+        else:
+            self.session_id = self._authenticate_impl("guest", "")
+            self.set_allowed_ratings()
+
+    @cache(maxage=360*24*3600, keyarg=1)
+    def _authenticate_impl(self, username, password):
+        self.extractor.log.info("Logging in as %s", username)
+
+        url = "https://inkbunny.net/api_login.php"
+        data = {"username": username, "password": password}
+        response = self.extractor.request(url, method="POST", data=data)
+
+        data = response.json()
+        if "sid" not in data:
+            raise exception.AuthenticationError(data.get("error_message"))
+        return data["sid"]
+
+    def _call(self, endpoint, params):
+        if not self.session_id:
+            self.authenticate()
+
+        url = "https://inkbunny.net/api_" + endpoint + ".php"
+        params["sid"] = self.session_id
+        response = self.extractor.request(url, params=params)
+
+        if response.status_code < 400:
+            return response.json()
+
+    def _pagination_search(self, params):
+        params["get_rid"] = "yes"
+        params["submission_ids_only"] = "yes"
+
+        while True:
+            data = self._call("search", params)
+            yield from self.detail(
+                ",".join(s["submission_id"] for s in data["submissions"]))
+
+            if data["page"] >= data["pages_count"]:
+                return
+            if "get_rid" in params:
+                del params["get_rid"]
+                params["rid"] = data["rid"]
+                params["page"] = 2
+            else:
+                params["page"] += 1
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-__version__ = "1.14.3"
+__version__ = "1.14.4-dev"
--- a/test/test_results.py
+++ b/test/test_results.py
@ -317,7 +317,7 @@ def setup_test_config():
    config.set(("extractor", "mangoxo")   , "password", "5zbQF10_5u25259Ma")

    for category in ("danbooru", "instagram", "twitter", "subscribestar",
-                     "e621"):
+                     "e621", "inkbunny"):
        config.set(("extractor", category), "username", None)

    config.set(("extractor", "mastodon.social"), "access-token",