From 93eca64a73439827c2ee2e46910b4f88340782c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Sep 2024 17:21:17 +0200 Subject: [PATCH] [civitai] add initial support (#3706, #3787, #4129, #5995) --- docs/configuration.rst | 30 +++- docs/supportedsites.md | 8 +- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/civitai.py | 229 +++++++++++++++++++++++++++++++ test/results/civitai.py | 91 ++++++++++++ 5 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 gallery_dl/extractor/civitai.py create mode 100644 test/results/civitai.py diff --git a/docs/configuration.rst b/docs/configuration.rst index ceaf78bc..964779f6 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1501,7 +1501,7 @@ Type Default ``["image", "video", "download", "gallery"]`` Description - Determines the type and order of files to be downloaded. + Determines the type and order of files to download. Available types are ``image``, @@ -1510,6 +1510,34 @@ Description ``gallery``. +extractor.civitai.api-key +------------------------- +Type + ``string`` +Description + The API Key value generated in your + `User Account Settings `__ + to make authorized API requests. + + See `API/Authorization `__ + for details. + + +extractor.civitai.files +----------------------- +Type + ``list`` of ``strings`` +Default + ``["image"]`` +Description + Determines the type and order of files to download. + + Available types are + ``model``, + ``image``, + ``gallery``. + + extractor.cohost.asks --------------------- Type diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d303f599..4e319879 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -157,10 +157,16 @@ Consider all listed sites to potentially be NSFW. Articles, Creators, Followed Users, Recent Images + + Civitai + https://www.civitai.com/ + Models, Search Results, Tag Searches, User Profiles + + cohost! https://cohost.org/ - Posts, Tag Searches, User Profiles + Likes, Posts, Tag Searches, User Profiles diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 34b78c85..826771c2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -36,6 +36,7 @@ modules = [ "catbox", "chevereto", "cien", + "civitai", "cohost", "comicvine", "cyberdrop", diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py new file mode 100644 index 00000000..6787724d --- /dev/null +++ b/gallery_dl/extractor/civitai.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.civitai.com/""" + +from .common import Extractor, Message +from .. import text +import itertools +import re + +BASE_PATTERN = r"(?:https?://)?civitai\.com" + + +class CivitaiExtractor(Extractor): + """Base class for civitai extractors""" + category = "civitai" + root = "https://civitai.com" + request_interval = (0.5, 1.5) + + def _init(self): + self.api = CivitaiAPI(self) + + def items(self): + data = {"_extractor": CivitaiModelExtractor} + for model in self.models(): + url = "{}/models/{}".format(self.root, model["id"]) + yield Message.Queue, url, data + + +class CivitaiModelExtractor(CivitaiExtractor): + subcategory = "model" + directory_fmt = ("{category}", "{user[username]}", + "{model[id]}{model[name]:? //}", + "{version[id]}{version[name]:? //}") + archive_fmt = "{file[hash]}" + pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?" + example = "https://civitai.com/models/12345/TITLE" + + def items(self): + model_id, version_id = self.groups + self._sub = re.compile(r"/width=\d*/").sub + + model = self.api.model(model_id) + creator = model["creator"] + versions = model["modelVersions"] + del model["creator"] + del model["modelVersions"] + + if version_id: + version_id = int(version_id) + for version in versions: + if version["id"] == version_id: + break + else: + version = self.api.model_version(version_id) + versions = (version,) + + for version in versions: + version["date"] = text.parse_datetime( + version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + + data = { + "model" : model, + "version": version, + "user" : creator, + } + + yield Message.Directory, data + for file in self._extract_files(model, version): + file.update(data) + yield Message.Url, file["url"], file + + def _extract_files(self, model, version): + filetypes = self.config("files") + if filetypes is None: + return self._extract_files_image(model, version) + + generators = { + "model" : self._extract_files_model, + "image" : self._extract_files_image, + "gallery" : self._extract_files_gallery, + "gallerie": self._extract_files_gallery, + } + if isinstance(filetypes, str): + filetypes = filetypes.split(",") + + return itertools.chain.from_iterable( + generators[ft.rstrip("s")](model, version) + for ft in filetypes + ) + + def _extract_files_model(self, model, version): + return [ + { + "num" : num, + "file" : file, + "filename" : file["name"], + "extension": "bin", + "url" : file["downloadUrl"], + "_http_headers" : { + "Authorization": self.api.headers.get("Authorization")}, + "_http_validate": self._validate_file_model, + } + for num, file in enumerate(version["files"], 1) + ] + + def _extract_files_image(self, model, version): + return [ + text.nameext_from_url(file["url"], { + "num" : num, + "file": file, + "url" : self._sub("/w/", file["url"]), + }) + for num, file in enumerate(version["images"], 1) + ] + + def _extract_files_gallery(self, model, version): + params = { + "modelId" : model["id"], + "modelVersionId": version["id"], + } + + for num, file in enumerate(self.api.images(params), 1): + yield text.nameext_from_url(file["url"], { + "num" : num, + "file": file, + "url" : self._sub("/w/", file["url"]), + }) + + def _validate_file_model(self, response): + if response.headers.get("Content-Type", "").startswith("text/html"): + alert = text.extr( + response.text, 'mantine-Alert-message">', "") + if alert: + msg = "\"{}\" - 'api-key' required".format( + text.remove_html(alert)) + else: + msg = "'api-key' required to download this file" + self.log.warning(msg) + return False + return True + + +class CivitaiTagExtractor(CivitaiExtractor): + subcategory = "tag" + pattern = BASE_PATTERN + r"/tag/([^?/#]+)" + example = "https://civitai.com/tag/TAG" + + def models(self): + tag = text.unquote(self.groups[0]) + return self.api.models({"tag": tag}) + + +class CivitaiSearchExtractor(CivitaiExtractor): + subcategory = "search" + pattern = BASE_PATTERN + r"/search/models\?([^#]+)" + example = "https://civitai.com/search/models?query=QUERY" + + def models(self): + params = text.parse_query(self.groups[0]) + return self.api.models(params) + + +class CivitaiUserExtractor(CivitaiExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/models)?/?(?:$|\?|#)" + example = "https://civitai.com/user/USER/models" + + def models(self): + params = {"username": text.unquote(self.groups[0])} + return self.api.models(params) + + +class CivitaiAPI(): + """Interface for the Civitai Public REST API + + https://developer.civitai.com/docs/api/public-rest + """ + + def __init__(self, extractor): + self.extractor = extractor + self.root = extractor.root + "/api" + self.headers = {"Content-Type": "application/json"} + + api_key = extractor.config("api-key") + if api_key: + extractor.log.debug("Using api_key authentication") + self.headers["Authorization"] = "Bearer " + api_key + + def images(self, params): + endpoint = "/v1/images" + return self._pagination(endpoint, params) + + def model(self, model_id): + endpoint = "/v1/models/{}".format(model_id) + return self._call(endpoint) + + def model_version(self, model_version_id): + endpoint = "/v1/model-versions/{}".format(model_version_id) + return self._call(endpoint) + + def models(self, params): + return self._pagination("/v1/models", params) + + def _call(self, endpoint, params=None): + if endpoint[0] == "/": + url = self.root + endpoint + else: + url = endpoint + + response = self.extractor.request( + url, params=params, headers=self.headers) + return response.json() + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + yield from data["items"] + + try: + endpoint = data["metadata"]["nextPage"] + except KeyError: + return + params = None diff --git a/test/results/civitai.py b/test/results/civitai.py new file mode 100644 index 00000000..c28f4fc5 --- /dev/null +++ b/test/results/civitai.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import civitai + + +__tests__ = ( +{ + "#url" : "https://civitai.com/models/703211/maid-classic", + "#category": ("", "civitai", "model"), + "#class" : civitai.CivitaiModelExtractor, + "#urls" : [ + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/5c4efa68-bb58-47c5-a716-98cd0f51f047/w/26962950.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/69bf3279-df2c-4ec8-b795-479e9cd3db1b/w/26962948.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/2dd1dc69-45a6-4beb-b36b-2e2bc65e3cda/w/26962957.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/52b6efa7-801c-4901-90b4-fa3964d23480/w/26887862.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/c4d3bcd5-0e23-4f4e-9f34-d13b2f2bf14c/w/26887856.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/68568d22-c4f3-45cb-ac32-82f1cedf968f/w/26887852.jpeg", + ], + + "model" : { + "allowCommercialUse": ["RentCivit"], + "allowDerivatives": True, + "allowDifferentLicense": True, + "allowNoCredit": True, + "cosmetic" : None, + "description": "

The strength of Lora is recommended to be around 1.0.

", + "id" : 703211, + "minor" : False, + "name" : "メイド クラシック/maid classic", + "nsfw" : False, + "nsfwLevel" : 1, + "poi" : False, + "stats" : dict, + "tags" : ["clothing"], + "type" : "LORA" + }, + "user" : { + "image" : None, + "username": "bolero537" + }, + "file" : dict, + "version": dict, + "num" : range(1, 3), +}, + +{ + "#url" : "https://civitai.com/models/703211?modelVersionId=786644", + "#category": ("", "civitai", "model"), + "#class" : civitai.CivitaiModelExtractor, + "#urls" : [ + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/52b6efa7-801c-4901-90b4-fa3964d23480/w/26887862.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/c4d3bcd5-0e23-4f4e-9f34-d13b2f2bf14c/w/26887856.jpeg", + "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/68568d22-c4f3-45cb-ac32-82f1cedf968f/w/26887852.jpeg", + ], + + "version": { + "availability": "Public", + "baseModel" : "Pony", + "createdAt" : "2024-08-30T15:28:47.661Z", + "date" : "dt:2024-08-30 15:28:47", + "downloadUrl" : "https://civitai.com/api/download/models/786644", + "files" : list, + "id" : 786644, + "images" : list, + "index" : 1, + "name" : "v1.0 pony", + "nsfwLevel" : 1, + "publishedAt" : "2024-08-30T15:39:17.674Z", + "stats" : dict, + "status" : "Published", + "trainedWords": [ + "maid", + "madi apron", + "maid headdress", + "long sleeves", + ], + }, + "user" : { + "image" : None, + "username": "bolero537" + }, + "file" : dict, + "model" : dict, + "num" : range(1, 3), +}, + +)