[koharu] add 'favorite' extractor (#5893)

2024-11-22 10:42:34 +01:00 · 2024-07-31 23:29:34 +02:00 · 2024-07-31 23:29:34 +02:00 · c372242a06
commit c372242a06
parent 2bf76461ce
4 changed files with 97 additions and 34 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -385,6 +385,7 @@ Type
 Default
    * ``"0.5-1.5"``
        ``[Danbooru]``, ``[E621]``, ``[foolfuuka]:search``, ``itaku``,
        ``koharu``,
        ``newgrounds``, ``[philomena]``, ``pixiv:novel``, ``plurk``,
        ``poipiku`` , ``pornpics``, ``soundgasm``, ``urlgalleries``,
        ``vk``, ``zerochan``
@ -438,6 +439,7 @@ Description
    * ``imgbb``
    * ``inkbunny``
    * ``kemonoparty``
    * ``koharu``
    * ``mangadex``
    * ``mangoxo``
    * ``pillowfort``
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -478,7 +478,7 @@ Consider all listed sites to potentially be NSFW.
 <tr>
    <td>Koharu</td>
    <td>https://koharu.to/</td>
-    <td>Galleries, Search Results</td>
+    <td>Favorites, Galleries, Search Results</td>
    <td></td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/koharu.py
+++ b/gallery_dl/extractor/koharu.py
@ -10,15 +10,53 @@
 from .common import GalleryExtractor, Extractor, Message
 from .. import text, exception
 from ..cache import cache
 BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to"
-class KoharuGalleryExtractor(GalleryExtractor):
+class KoharuExtractor(Extractor):
-    """Extractor for koharu galleries"""
+    """Base class for koharu extractors"""
    category = "koharu"
    root = "https://koharu.to"
    root_api = "https://api.koharu.to"
    request_interval = (0.5, 1.5)
    def _init(self):
        self.headers = {
            "Accept" : "*/*",
            "Referer": self.root + "/",
            "Origin" : self.root,
        }
    def _pagination(self, endpoint, params):
        url_api = self.root_api + endpoint
        while True:
            data = self.request(
                url_api, params=params, headers=self.headers).json()
            try:
                entries = data["entries"]
            except KeyError:
                return
            for entry in entries:
                url = "{}/g/{}/{}".format(
                    self.root, entry["id"], entry["public_key"])
                entry["_extractor"] = KoharuGalleryExtractor
                yield Message.Queue, url, entry
            try:
                if data["limit"] * data["page"] >= data["total"]:
                    return
            except Exception:
                pass
            params["page"] += 1
 class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
    """Extractor for koharu galleries"""
    filename_fmt = "{num:>03}.{extension}"
    directory_fmt = ("{category}", "{id} {title}")
    archive_fmt = "{id}_{num}"
@ -130,46 +168,47 @@ class KoharuGalleryExtractor(GalleryExtractor):
        return fmt
-class KoharuSearchExtractor(Extractor):
+class KoharuSearchExtractor(KoharuExtractor):
    """Extractor for koharu search results"""
    category = "koharu"
    subcategory = "search"
    root = "https://koharu.to"
    root_api = "https://api.koharu.to"
    request_interval = (1.0, 2.0)
    pattern = BASE_PATTERN + r"/\?([^#]*)"
    example = "https://koharu.to/?s=QUERY"
    def _init(self):
        self.headers = {
            "Accept" : "*/*",
            "Referer": self.root + "/",
            "Origin" : self.root,
        }
    def items(self):
        url_api = self.root_api + "/books"
        params = text.parse_query(self.groups[0])
        params["page"] = text.parse_int(params.get("page"), 1)
        return self._pagination("/books", params)
        while True:
            data = self.request(
                url_api, params=params, headers=self.headers).json()
-            try:
+class KoharuFavoriteExtractor(KoharuExtractor):
-                entries = data["entries"]
+    """Extractor for koharu favorites"""
-            except KeyError:
+    subcategory = "favorite"
    pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
    example = "https://koharu.to/favorites"
    def items(self):
        self.login()
        params = text.parse_query(self.groups[0])
        params["page"] = text.parse_int(params.get("page"), 1)
        return self._pagination("/favorites", params)
    def login(self):
        username, password = self._get_auth_info()
        if username:
            self.headers["Authorization"] = \
                "Bearer " + self._login_impl(username, password)
            return
-            for entry in entries:
+        raise exception.AuthenticationError("Username and password required")
                url = "{}/g/{}/{}/".format(
                    self.root, entry["id"], entry["public_key"])
                entry["_extractor"] = KoharuGalleryExtractor
                yield Message.Queue, url, entry
-            try:
+    @cache(maxage=28*86400, keyarg=1)
-                if data["limit"] * data["page"] >= data["total"]:
+    def _login_impl(self, username, password):
-                    return
+        self.log.info("Logging in as %s", username)
-            except Exception:
+
-                pass
+        url = "https://auth.koharu.to/login"
-            params["page"] += 1
+        data = {"uname": username, "passwd": password}
        response = self.request(
            url, method="POST", headers=self.headers, data=data)
        return response.json()["session"]
--- a/test/results/koharu.py
+++ b/test/results/koharu.py
@ -85,4 +85,26 @@ __tests__ = (
    "#count"   : ">= 50",
 },
 {
    "#url"     : "https://koharu.to/favorites",
    "#category": ("", "koharu", "favorite"),
    "#class"   : koharu.KoharuFavoriteExtractor,
    "#pattern" : koharu.KoharuGalleryExtractor.pattern,
    "#auth"    : True,
    "#urls"    : [
        "https://koharu.to/g/14216/6c67076fdd45",
    ],
 },
 {
    "#url"     : "https://koharu.to/favorites?cat=6&sort=4",
    "#category": ("", "koharu", "favorite"),
    "#class"   : koharu.KoharuFavoriteExtractor,
    "#pattern" : koharu.KoharuGalleryExtractor.pattern,
    "#auth"    : True,
    "#urls"    : [
        "https://koharu.to/g/14216/6c67076fdd45",
    ],
 },
 )