[nijie] support /history_nuita.php listings (closes #2541)

2024-11-22 02:32:33 +01:00 · 2022-05-01 17:45:38 +02:00 · 2022-05-01 17:45:38 +02:00 · d11e2191ae
commit d11e2191ae
parent 4aca29b7b4
4 changed files with 47 additions and 10 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1648,7 +1648,7 @@ Description


 extractor.nijie.include
----------------------------
+-----------------------
 Type
    ``string`` or ``list`` of ``strings``
 Default
@ -1658,7 +1658,7 @@ Description
    when processing a user profile.

    Possible values are
-    ``"illustration"``, ``"doujin"``, ``"favorite"``.
+    ``"illustration"``, ``"doujin"``, ``"favorite"``, ``"nuita"``.

    You can use ``"all"`` instead of listing all values separately.

--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -538,7 +538,7 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
    <td>nijie</td>
    <td>https://nijie.info/</td>
-    <td>Doujin, Favorites, Illustrations, individual Images, User Profiles</td>
+    <td>Doujin, Favorites, Illustrations, individual Images, Nuita History, User Profiles</td>
    <td>Required</td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@ -91,6 +91,10 @@ class NijieExtractor(AsynchronousMixin, Extractor):
                "url": url,
            })

+    @staticmethod
+    def _extract_user_name(page):
+        return text.unescape(text.extract(page, "<br />", "<")[0] or "")
+
    def login(self):
        """Login and obtain session cookies"""
        if not self._check_cookies(self.cookienames):
@ -119,9 +123,8 @@ class NijieExtractor(AsynchronousMixin, Extractor):
        while True:
            page = self.request(url, params=params, notfound="artist").text

-            if not self.user_name:
-                self.user_name = text.unescape(text.extract(
-                    page, '<br />', '<')[0] or "")
+            if self.user_name is None:
+                self.user_name = self._extract_user_name(page)
            yield from text.extract_iter(page, 'illust_id="', '"')

            if '<a rel="next"' not in page:
@ -137,11 +140,12 @@ class NijieUserExtractor(NijieExtractor):
    test = ("https://nijie.info/members.php?id=44",)

    def items(self):
-        base = "{}/{{}}.php?id={}".format(self.root, self.user_id)
+        fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
        return self._dispatch_extractors((
-            (NijieIllustrationExtractor, base.format("members_illust")),
-            (NijieDoujinExtractor      , base.format("members_dojin")),
-            (NijieFavoriteExtractor    , base.format("user_like_illust_view")),
+            (NijieIllustrationExtractor, fmt("members_illust")),
+            (NijieDoujinExtractor      , fmt("members_dojin")),
+            (NijieFavoriteExtractor    , fmt("user_like_illust_view")),
+            (NijieNuitaExtractor       , fmt("history_nuita")),
        ), ("illustration", "doujin"))


@ -217,6 +221,36 @@ class NijieFavoriteExtractor(NijieExtractor):
        return data


+class NijieNuitaExtractor(NijieExtractor):
+    """Extractor for a nijie user's 抜いた list"""
+    subcategory = "nuita"
+    directory_fmt = ("{category}", "nuita", "{user_id}")
+    archive_fmt = "n_{user_id}_{image_id}_{num}"
+    pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
+    test = ("https://nijie.info/history_nuita.php?id=728995", {
+        "range": "1-10",
+        "count": 10,
+        "keyword": {
+            "user_id"  : 728995,
+            "user_name": "莚",
+        },
+    })
+
+    def image_ids(self):
+        return self._pagination("history_nuita")
+
+    def _extract_data(self, page):
+        data = NijieExtractor._extract_data(page)
+        data["user_id"] = self.user_id
+        data["user_name"] = self.user_name
+        return data
+
+    @staticmethod
+    def _extract_user_name(page):
+        return text.unescape(text.extract(
+            page, "<title>", "さんの抜いた")[0] or "")
+
+
 class NijieImageExtractor(NijieExtractor):
    """Extractor for a work/image from nijie.info"""
    subcategory = "image"
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -183,6 +183,9 @@ SUBCATEGORY_MAP = {
    "mangadex": {
        "feed" : "Followed Feed",
    },
+    "nijie": {
+        "nuita" : "Nuita History",
+    },
    "pinterest": {
        "board": "",
        "pinit": "pin.it Links",