[kissgoddess] add 'gallery' and 'model' extractors

(closes #1052, #2304)
2024-11-25 20:22:36 +01:00 · 2022-02-20 04:45:37 +01:00 · 2022-02-20 04:45:37 +01:00 · fdfdc1b614
commit fdfdc1b614
parent 79a461a2c1
4 changed files with 88 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -385,6 +385,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Soundtracks</td>
    <td></td>
 </tr>
 <tr>
    <td>Kiss Goddess</td>
    <td>https://kissgoddess.com/</td>
    <td>Galleries, Models</td>
    <td></td>
 </tr>
 <tr>
    <td>Kohlchan</td>
    <td>https://kohlchan.net/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -67,6 +67,7 @@ modules = [
    "keenspot",
    "kemonoparty",
    "khinsider",
    "kissgoddess",
    "kohlchan",
    "komikcast",
    "lightroom",
--- a/gallery_dl/extractor/kissgoddess.py
+++ b/gallery_dl/extractor/kissgoddess.py
@ -0,0 +1,80 @@
 # -*- coding: utf-8 -*-
 # Copyright 2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extractors for https://kissgoddess.com/"""
 from .common import GalleryExtractor, Extractor, Message
 from .. import text, exception
 class KissgoddessGalleryExtractor(GalleryExtractor):
    """Extractor for image galleries on kissgoddess.com"""
    category = "kissgoddess"
    root = "https://kissgoddess.com"
    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
    test = ("https://kissgoddess.com/album/18285.html", {
        "pattern": r"https://pic\.kissgoddess\.com"
                   r"/gallery/16473/18285/s/\d+\.jpg",
        "count": 8,
        "keyword": {
            "gallery_id": 18285,
            "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
        },
    })
    def __init__(self, match):
        self.gallery_id = match.group(1)
        url = "{}/album/{}.html".format(self.root, self.gallery_id)
        GalleryExtractor.__init__(self, match, url)
    def metadata(self, page):
        return {
            "gallery_id": text.parse_int(self.gallery_id),
            "title"     : text.extract(
                page, '<title>', "<")[0].rpartition(" | ")[0],
        }
    def images(self, page):
        pnum = 1
        while page:
            for url in text.extract_iter(page, "<img src='", "'"):
                yield url, None
            pnum += 1
            url = "{}/album/{}_{}.html".format(
                self.root, self.gallery_id, pnum)
            try:
                page = self.request(url).text
            except exception.HttpError:
                return
 class KissgoddessModelExtractor(Extractor):
    """Extractor for all galleries of a model on kissgoddess.com"""
    category = "kissgoddess"
    subcategory = "model"
    root = "https://kissgoddess.com"
    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/people/([^./?#]+)"
    test = ("https://kissgoddess.com/people/aya-hazuki.html", {
        "pattern": KissgoddessGalleryExtractor.pattern,
        "count": ">= 7",
    })
    def __init__(self, match):
        Extractor.__init__(self, match)
        self.model = match.group(1)
    def items(self):
        url = "{}/people/{}.html".format(self.root, self.model)
        page = self.request(url).text
        data = {"_extractor": KissgoddessGalleryExtractor}
        for path in text.extract_iter(page, 'thumb"><a href="/album/', '"'):
            url = self.root + "/album/" + path
            yield Message.Queue, url, data
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -58,6 +58,7 @@ CATEGORY_MAP = {
    "joyreactor"     : "JoyReactor",
    "kabeuchi"       : "かべうち",
    "kireicake"      : "Kirei Cake",
    "kissgoddess"    : "Kiss Goddess",
    "lineblog"       : "LINE BLOG",
    "livedoor"       : "livedoor Blog",
    "omgmiamiswimwear": "Omg Miami Swimwear",