[pixiv] support unlisted artworks (#5162)

2024-11-22 02:32:33 +01:00 · 2024-10-05 17:08:45 +02:00 · 2024-10-05 17:08:45 +02:00 · 7f945c44f5
commit 7f945c44f5
parent c5be50fdaa
4 changed files with 68 additions and 36 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -700,7 +700,7 @@ Consider all listed sites to potentially be NSFW.
 <tr>
    <td>Pixiv</td>
    <td>https://www.pixiv.net/</td>
-    <td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, User Profiles, individual Images</td>
+    <td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, Unlisted Works, User Profiles, individual Images</td>
    <td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@ -91,11 +91,10 @@ class PixivExtractor(Extractor):
        del work["image_urls"]
        del work["meta_pages"]

-        files = []
        if work["type"] == "ugoira":
            if self.load_ugoira:
                try:
-                    self._extract_ugoira(work, files)
+                    return self._extract_ugoira(work)
                except exception.StopExtraction as exc:
                    self.log.warning(
                        "Unable to retrieve Ugoira metatdata (%s - %s)",
@ -106,24 +105,27 @@ class PixivExtractor(Extractor):
            if url == self.sanity_url:
                if self.sanity_workaround:
                    self.log.warning("%s: 'sanity_level' warning", work["id"])
-                    self._extract_ajax(work, files)
+                    body = self._request_ajax("/illust/" + str(work["id"]))
+                    return self._extract_ajax(work, body)
                else:
                    self.log.warning(
                        "%s: Unable to download work ('sanity_level' warning)",
                        work["id"])
            else:
-                files.append({"url": url})
+                return ({"url": url},)

        else:
-            for num, img in enumerate(meta_pages):
-                files.append({
+            return [
+                {
                    "url"   : img["image_urls"]["original"],
                    "suffix": "_p{:02}".format(num),
-                })
+                }
+                for num, img in enumerate(meta_pages)
+            ]

-        return files
+        return ()

-    def _extract_ugoira(self, work, files):
+    def _extract_ugoira(self, work):
        ugoira = self.api.ugoira_metadata(work["id"])
        url = ugoira["zip_urls"]["medium"]
        work["frames"] = frames = ugoira["frames"]
@ -142,26 +144,30 @@ class PixivExtractor(Extractor):
                except exception.HttpError:
                    pass
            else:
-                return self.log.warning(
+                self.log.warning(
                    "Unable to find Ugoira frame URLs (%s)", work["id"])

-            for num in range(len(frames)):
-                url = "{}{}.{}".format(base, num, ext)
-                files.append(text.nameext_from_url(url, {
-                    "url": url,
-                    "num": num,
+            return [
+                {
+                    "url": "{}{}.{}".format(base, num, ext),
                    "suffix": "_p{:02}".format(num),
                    "_ugoira_frame_index": num,
-                }))
+                }
+                for num in range(len(frames))
+            ]
        else:
-            files.append({
-                "url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
-            })
+            url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
+            return ({"url": url},)

-    def _extract_ajax(self, work, files):
-        url = "{}/ajax/illust/{}".format(self.root, work["id"])
+    def _request_ajax(self, endpoint):
+        url = "{}/ajax{}".format(self.root, endpoint)
        data = self.request(url, headers=self.headers_web).json()
-        body = data["body"]
+        return data["body"]
+
+    def _extract_ajax(self, work, body):
+        url = self._extract_ajax_url(body)
+        if not url:
+            return ()

        for key_app, key_ajax in (
            ("title"            , "illustTitle"),
@ -198,21 +204,18 @@ class PixivExtractor(Extractor):
                translated_name = None
            tags.append({"name": name, "translated_name": translated_name})

-        url = self._extract_ajax_url(body)
-        if not url:
-            return
-
        work["page_count"] = count = body["pageCount"]
        if count == 1:
-            files.append({"url": url})
-        else:
-            base, _, ext = url.rpartition("_p0.")
-            for num in range(count):
-                url = "{}_p{}.{}".format(base, num, ext)
-                files.append({
-                    "url"   : url,
-                    "suffix": "_p{:02}".format(num),
-                })
+            return ({"url": url},)
+
+        base, _, ext = url.rpartition("_p0.")
+        return [
+            {
+                "url"   : "{}_p{}.{}".format(base, num, ext),
+                "suffix": "_p{:02}".format(num),
+            }
+            for num in range(count)
+        ]

    def _extract_ajax_url(self, body):
        try:
@ -432,6 +435,22 @@ class PixivWorkExtractor(PixivExtractor):
        return works


+class PixivUnlistedExtractor(PixivExtractor):
+    """Extractor for a unlisted pixiv illustrations"""
+    subcategory = "unlisted"
+    pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)"
+    example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0"
+
+    def _extract_files(self, work):
+        body = self._request_ajax("/illust/unlisted/" + work["id"])
+        work["id_unlisted"] = work["id"]
+        work["id"] = text.parse_int(body["illustId"])
+        return self._extract_ajax(work, body)
+
+    def works(self):
+        return ({"id": self.groups[0], "user": {"id": 1}},)
+
+
 class PixivFavoriteExtractor(PixivExtractor):
    """Extractor for all favorites/bookmarks of a pixiv user"""
    subcategory = "favorite"
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -194,6 +194,9 @@ SUBCATEGORY_MAP = {
    "bluesky": {
        "posts": "",
    },
+    "boosty": {
+        "feed": "Subscriptions Feed",
+    },
    "civitai": {
        "tag-models": "Tag Searches (Models)",
        "tag-images": "Tag Searches (Images)",
@ -270,6 +273,7 @@ SUBCATEGORY_MAP = {
        "novel-user": "",
        "pixivision": "pixivision",
        "sketch": "Sketch",
+        "unlisted": "Unlisted Works",
        "work": "individual Images",
    },
    "poringa": {
--- a/test/results/pixiv.py
+++ b/test/results/pixiv.py
@ -308,6 +308,15 @@ __tests__ = (
    "#class"   : pixiv.PixivWorkExtractor,
 },

+{
+    "#url"     : "https://www.pixiv.net/en/artworks/unlisted/eE3fTYaROT9IsZmep386",
+    "#class"   : pixiv.PixivUnlistedExtractor,
+    "#urls"    : "https://i.pximg.net/img-original/img/2020/10/15/00/46/12/85017704-149014193e4d3e23a6b8bd5e38b51ed4_p0.png",
+
+    "id"         : 85017704,
+    "id_unlisted": "eE3fTYaROT9IsZmep386",
+},
+
 {
    "#url"     : "https://www.pixiv.net/en/users/173530/bookmarks/artworks",
    "#category": ("", "pixiv", "favorite"),