diff --git a/docs/configuration.rst b/docs/configuration.rst index e830066f..022e3c52 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1058,7 +1058,8 @@ Description extractor.*.postprocessors -------------------------- Type - ``list`` of |Postprocessor Configuration|_ objects + * |Postprocessor Configuration|_ object + * ``list`` of |Postprocessor Configuration|_ objects Example .. code:: json @@ -2363,6 +2364,18 @@ Description * ``"hitomi"``: Download the corresponding gallery from ``hitomi.la`` +extractor.exhentai.tags +----------------------- +Type + ``bool`` +Default + ``false`` +Description + Group ``tags`` by type and + provide them as ``tags_`` metadata fields, + for example ``tags_artist`` or ``tags_character``. + + extractor.fanbox.comments ------------------------- Type @@ -3032,6 +3045,18 @@ Description | ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original) +extractor.koharu.tags +--------------------- +Type + ``bool`` +Default + ``false`` +Description + Group ``tags`` by type and + provide them as ``tags_`` metadata fields, + for example ``tags_artist`` or ``tags_character``. + + extractor.lolisafe.domain ------------------------- Type @@ -5156,8 +5181,9 @@ Type Default ``false`` Description - Categorize tags by their respective types - and provide them as ``tags_`` metadata fields. + Group ``tags`` by type and + provide them as ``tags_`` metadata fields, + for example ``tags_artist`` or ``tags_character``. Note: This requires 1 additional HTTP request per post. diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py index 7c04dcb1..b60157ed 100644 --- a/gallery_dl/extractor/koharu.py +++ b/gallery_dl/extractor/koharu.py @@ -11,6 +11,7 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, exception from ..cache import cache +import collections BASE_PATTERN = ( r"(?i)(?:https?://)?(" @@ -108,14 +109,24 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor): url = "{}/books/detail/{}/{}".format( self.root_api, self.groups[1], self.groups[2]) self.data = data = self.request(url, headers=self.headers).json() + data["date"] = text.parse_timestamp(data["created_at"] // 1000) tags = [] - for tag in data["tags"]: + types = self.TAG_TYPES + tags_data = data["tags"] + + for tag in tags_data: name = tag["name"] namespace = tag.get("namespace", 0) - tags.append(self.TAG_TYPES[namespace] + ":" + name) + tags.append(types[namespace] + ":" + name) data["tags"] = tags - data["date"] = text.parse_timestamp(data["created_at"] // 1000) + + if self.config("tags", False): + tags = collections.defaultdict(list) + for tag in tags_data : + tags[tag.get("namespace", 0)].append(tag["name"]) + for type, values in tags.items(): + data["tags_" + types[type]] = values try: if self.cbz: diff --git a/test/results/koharu.py b/test/results/koharu.py index 8ba22146..0aed25f6 100644 --- a/test/results/koharu.py +++ b/test/results/koharu.py @@ -12,6 +12,7 @@ __tests__ = ( "#url" : "https://niyaniya.moe/g/14216/6c67076fdd45", "#category": ("", "koharu", "gallery"), "#class" : koharu.KoharuGalleryExtractor, + "#options" : {"tags": True}, "#pattern" : r"https://kisakisexo.xyz/download/59896/a4fbd1828229/f47639c6abaf1903dd69c36a3d961da84741a1831aa07a2906ce9c74156a5d75\?v=1721626410802&w=0", "#count" : 1, @@ -43,6 +44,36 @@ __tests__ = ( "other:uncensored", "other:vanilla", ], + "tags_artist": [ + "ouchi kaeru", + ], + "tags_female": [ + "busty", + ], + "tags_general": [ + "beach", + "booty", + "dark skin", + "fingering", + "handjob", + "light hair", + "nakadashi", + "outdoors", + "ponytail", + "swimsuit", + "x-ray", + ], + "tags_language": [ + "english", + "translated", + ], + "tags_magazine": [ + "comic kairakuten 2024-08", + ], + "tags_other": [ + "uncensored", + "vanilla", + ], "title" : "[Ouchi Kaeru] Summer Business (Comic Kairakuten 2024-08)", "updated_at": 1721626410802, },