[pixiv] support unlisted artworks (#5162)

2024-11-22 02:32:33 +01:00 · 2024-10-05 17:08:45 +02:00 · 2024-10-05 17:08:45 +02:00 · 7f945c44f5
commit 7f945c44f5
parent c5be50fdaa
4 changed files with 68 additions and 36 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -700,7 +700,7 @@ Consider all listed sites to potentially be NSFW.
 <tr>
    <td>Pixiv</td>
    <td>https://www.pixiv.net/</td>
-    <td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, User Profiles, individual Images</td>
+    <td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, Unlisted Works, User Profiles, individual Images</td>
    <td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@ -91,11 +91,10 @@ class PixivExtractor(Extractor):
        del work["image_urls"]
        del work["meta_pages"]
        files = []
        if work["type"] == "ugoira":
            if self.load_ugoira:
                try:
-                    self._extract_ugoira(work, files)
+                    return self._extract_ugoira(work)
                except exception.StopExtraction as exc:
                    self.log.warning(
                        "Unable to retrieve Ugoira metatdata (%s - %s)",
@ -106,24 +105,27 @@ class PixivExtractor(Extractor):
            if url == self.sanity_url:
                if self.sanity_workaround:
                    self.log.warning("%s: 'sanity_level' warning", work["id"])
-                    self._extract_ajax(work, files)
+                    body = self._request_ajax("/illust/" + str(work["id"]))
                    return self._extract_ajax(work, body)
                else:
                    self.log.warning(
                        "%s: Unable to download work ('sanity_level' warning)",
                        work["id"])
            else:
-                files.append({"url": url})
+                return ({"url": url},)
        else:
-            for num, img in enumerate(meta_pages):
+            return [
-                files.append({
+                {
                    "url"   : img["image_urls"]["original"],
                    "suffix": "_p{:02}".format(num),
-                })
+                }
                for num, img in enumerate(meta_pages)
            ]
-        return files
+        return ()
-    def _extract_ugoira(self, work, files):
+    def _extract_ugoira(self, work):
        ugoira = self.api.ugoira_metadata(work["id"])
        url = ugoira["zip_urls"]["medium"]
        work["frames"] = frames = ugoira["frames"]
@ -142,26 +144,30 @@ class PixivExtractor(Extractor):
                except exception.HttpError:
                    pass
            else:
-                return self.log.warning(
+                self.log.warning(
                    "Unable to find Ugoira frame URLs (%s)", work["id"])
-            for num in range(len(frames)):
+            return [
-                url = "{}{}.{}".format(base, num, ext)
+                {
-                files.append(text.nameext_from_url(url, {
+                    "url": "{}{}.{}".format(base, num, ext),
                    "url": url,
                    "num": num,
                    "suffix": "_p{:02}".format(num),
                    "_ugoira_frame_index": num,
-                }))
+                }
                for num in range(len(frames))
            ]
        else:
-            files.append({
+            url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
-                "url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
+            return ({"url": url},)
            })
-    def _extract_ajax(self, work, files):
+    def _request_ajax(self, endpoint):
-        url = "{}/ajax/illust/{}".format(self.root, work["id"])
+        url = "{}/ajax{}".format(self.root, endpoint)
        data = self.request(url, headers=self.headers_web).json()
-        body = data["body"]
+        return data["body"]
    def _extract_ajax(self, work, body):
        url = self._extract_ajax_url(body)
        if not url:
            return ()
        for key_app, key_ajax in (
            ("title"            , "illustTitle"),
@ -198,21 +204,18 @@ class PixivExtractor(Extractor):
                translated_name = None
            tags.append({"name": name, "translated_name": translated_name})
        url = self._extract_ajax_url(body)
        if not url:
            return
        work["page_count"] = count = body["pageCount"]
        if count == 1:
-            files.append({"url": url})
+            return ({"url": url},)
-        else:
+
-            base, _, ext = url.rpartition("_p0.")
+        base, _, ext = url.rpartition("_p0.")
-            for num in range(count):
+        return [
-                url = "{}_p{}.{}".format(base, num, ext)
+            {
-                files.append({
+                "url"   : "{}_p{}.{}".format(base, num, ext),
-                    "url"   : url,
+                "suffix": "_p{:02}".format(num),
-                    "suffix": "_p{:02}".format(num),
+            }
-                })
+            for num in range(count)
        ]
    def _extract_ajax_url(self, body):
        try:
@ -432,6 +435,22 @@ class PixivWorkExtractor(PixivExtractor):
        return works
 class PixivUnlistedExtractor(PixivExtractor):
    """Extractor for a unlisted pixiv illustrations"""
    subcategory = "unlisted"
    pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)"
    example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0"
    def _extract_files(self, work):
        body = self._request_ajax("/illust/unlisted/" + work["id"])
        work["id_unlisted"] = work["id"]
        work["id"] = text.parse_int(body["illustId"])
        return self._extract_ajax(work, body)
    def works(self):
        return ({"id": self.groups[0], "user": {"id": 1}},)
 class PixivFavoriteExtractor(PixivExtractor):
    """Extractor for all favorites/bookmarks of a pixiv user"""
    subcategory = "favorite"
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -194,6 +194,9 @@ SUBCATEGORY_MAP = {
    "bluesky": {
        "posts": "",
    },
    "boosty": {
        "feed": "Subscriptions Feed",
    },
    "civitai": {
        "tag-models": "Tag Searches (Models)",
        "tag-images": "Tag Searches (Images)",
@ -270,6 +273,7 @@ SUBCATEGORY_MAP = {
        "novel-user": "",
        "pixivision": "pixivision",
        "sketch": "Sketch",
        "unlisted": "Unlisted Works",
        "work": "individual Images",
    },
    "poringa": {
--- a/test/results/pixiv.py
+++ b/test/results/pixiv.py
@ -308,6 +308,15 @@ __tests__ = (
    "#class"   : pixiv.PixivWorkExtractor,
 },
 {
    "#url"     : "https://www.pixiv.net/en/artworks/unlisted/eE3fTYaROT9IsZmep386",
    "#class"   : pixiv.PixivUnlistedExtractor,
    "#urls"    : "https://i.pximg.net/img-original/img/2020/10/15/00/46/12/85017704-149014193e4d3e23a6b8bd5e38b51ed4_p0.png",
    "id"         : 85017704,
    "id_unlisted": "eE3fTYaROT9IsZmep386",
 },
 {
    "#url"     : "https://www.pixiv.net/en/users/173530/bookmarks/artworks",
    "#category": ("", "pixiv", "favorite"),