# -*- coding: utf-8 -*- # Copyright 2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://inkbunny.net/""" from .common import Extractor, Message from .. import text, exception from ..cache import cache BASE_PATTERN = r"(?:https?://)?(?:www\.)?inkbunny\.net" class InkbunnyExtractor(Extractor): """Base class for inkbunny extractors""" category = "inkbunny" directory_fmt = ("{category}", "{username!l}") filename_fmt = "{submission_id} {file_id} {title}.{extension}" archive_fmt = "{file_id}" root = "https://inkbunny.net" def __init__(self, match): Extractor.__init__(self, match) self.api = InkbunnyAPI(self) def items(self): self.api.authenticate() to_bool = ("deleted", "favorite", "friends_only", "guest_block", "hidden", "public", "scraps") for post in self.posts(): post["date"] = text.parse_datetime( post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") post["tags"] = [kw["keyword_name"] for kw in post["keywords"]] post["ratings"] = [r["name"] for r in post["ratings"]] files = post["files"] for key in to_bool: if key in post: post[key] = (post[key] == "t") del post["keywords"] del post["files"] yield Message.Directory, post for post["num"], file in enumerate(files, 1): post.update(file) post["deleted"] = (file["deleted"] == "t") post["date"] = text.parse_datetime( file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") text.nameext_from_url(file["file_name"], post) url = file["file_url_full"] if "/private_files/" in url: url += "?sid=" + self.api.session_id yield Message.Url, url, post class InkbunnyUserExtractor(InkbunnyExtractor): """Extractor for inkbunny user profiles""" subcategory = "user" pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])" test = ( ("https://inkbunny.net/soina", { "pattern": r"https://[\w.]+\.metapix\.net/files/full" r"/\d+/\d+_soina_.+", "range": "20-50", "keyword": { "date" : "type:datetime", "deleted" : bool, "file_id" : "re:[0-9]+", "filename" : r"re:[0-9]+_soina_\w+", "full_file_md5": "re:[0-9a-f]{32}", "mimetype" : str, "submission_id": "re:[0-9]+", "user_id" : "20969", "comments_count" : "re:[0-9]+", "deleted" : bool, "favorite" : bool, "favorites_count": "re:[0-9]+", "friends_only" : bool, "guest_block" : bool, "hidden" : bool, "pagecount" : "re:[0-9]+", "pools" : list, "pools_count" : int, "public" : bool, "rating_id" : "re:[0-9]+", "rating_name" : str, "ratings" : list, "scraps" : bool, "tags" : list, "title" : str, "type_name" : str, "username" : "soina", "views" : str, }, }), ("https://inkbunny.net/gallery/soina", { "range": "1-25", "keyword": {"scraps": False}, }), ("https://inkbunny.net/scraps/soina", { "range": "1-25", "keyword": {"scraps": True}, }), ) def __init__(self, match): kind, self.user = match.groups() if not kind: self.scraps = None elif kind[0] == "g": self.subcategory = "gallery" self.scraps = "no" else: self.subcategory = "scraps" self.scraps = "only" InkbunnyExtractor.__init__(self, match) def posts(self): orderby = self.config("orderby") params = { "username": self.user, "scraps" : self.scraps, "orderby" : orderby, } if orderby and orderby.startswith("unread_"): params["unread_submissions"] = "yes" return self.api.search(params) class InkbunnyPoolExtractor(InkbunnyExtractor): """Extractor for inkbunny pools""" subcategory = "pool" pattern = (BASE_PATTERN + r"/(?:" r"poolview_process\.php\?pool_id=(\d+)|" r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))") test = ( ("https://inkbunny.net/poolview_process.php?pool_id=28985", { "count": 9, }), ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff" "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"), ) def __init__(self, match): InkbunnyExtractor.__init__(self, match) pid = match.group(1) if pid: self.pool_id = pid self.orderby = "pool_order" else: params = text.parse_query(match.group(2)) self.pool_id = params.get("pool_id") self.orderby = params.get("orderby", "pool_order") def posts(self): params = { "pool_id": self.pool_id, "orderby": self.orderby, } return self.api.search(params) class InkbunnyFavoriteExtractor(InkbunnyExtractor): """Extractor for inkbunny user favorites""" subcategory = "favorite" pattern = (BASE_PATTERN + r"/(?:" r"userfavorites_process\.php\?favs_user_id=(\d+)|" r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))") test = ( ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", { "pattern": r"https://[\w.]+\.metapix\.net/files/full" r"/\d+/\d+_\w+_.+", "range": "20-50", }), ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff" "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"), ) def __init__(self, match): InkbunnyExtractor.__init__(self, match) uid = match.group(1) if uid: self.user_id = uid self.orderby = self.config("orderby", "fav_datetime") else: params = text.parse_query(match.group(2)) self.user_id = params.get("user_id") self.orderby = params.get("orderby", "fav_datetime") def posts(self): params = { "favs_user_id": self.user_id, "orderby" : self.orderby, } if self.orderby and self.orderby.startswith("unread_"): params["unread_submissions"] = "yes" return self.api.search(params) class InkbunnySearchExtractor(InkbunnyExtractor): """Extractor for inkbunny search results""" subcategory = "search" pattern = (BASE_PATTERN + r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)") test = (("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff" "&mode=search&page=1&orderby=create_datetime&text=cute" "&stringtype=and&keywords=yes&title=yes&description=no&artist=" "&favsby=&type=&days=&keyword_id=&user_id=&random=&md5="), { "range": "1-10", "count": 10, }) def __init__(self, match): InkbunnyExtractor.__init__(self, match) self.params = text.parse_query(match.group(1)) self.params.pop("rid", None) def posts(self): return self.api.search(self.params) class InkbunnyFollowingExtractor(InkbunnyExtractor): """Extractor for inkbunny user watches""" subcategory = "following" pattern = (BASE_PATTERN + r"/(?:" r"watchlist_process\.php\?mode=watching&user_id=(\d+)|" r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))") test = ( (("https://inkbunny.net/watchlist_process.php" "?mode=watching&user_id=20969"), { "pattern": InkbunnyUserExtractor.pattern, "count": ">= 90", }), ("https://inkbunny.net/usersviewall.php?rid=ffffffffff" "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="), ) def __init__(self, match): InkbunnyExtractor.__init__(self, match) self.user_id = match.group(1) or \ text.parse_query(match.group(2)).get("user_id") def items(self): url = self.root + "/watchlist_process.php" params = {"mode": "watching", "user_id": self.user_id} with self.request(url, params=params) as response: url, _, params = response.url.partition("?") page = response.text params = text.parse_query(params) params["page"] = text.parse_int(params.get("page"), 1) data = {"_extractor": InkbunnyUserExtractor} while True: cnt = 0 for user in text.extract_iter( page, '= data["pages_count"]: return if "get_rid" in params: del params["get_rid"] params["rid"] = data["rid"] params["page"] += 1 @cache(maxage=360*24*3600, keyarg=1) def _authenticate_impl(api, username, password): api.extractor.log.info("Logging in as %s", username) url = "https://inkbunny.net/api_login.php" data = {"username": username, "password": password} data = api.extractor.request(url, method="POST", data=data).json() if "sid" not in data: raise exception.AuthenticationError(data.get("error_message")) return data["sid"]