From 7f945c44f5c16c26abfff29dd754f8b7a19d4bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 5 Oct 2024 17:08:45 +0200 Subject: [PATCH] [pixiv] support unlisted artworks (#5162) --- docs/supportedsites.md | 2 +- gallery_dl/extractor/pixiv.py | 89 +++++++++++++++++++++-------------- scripts/supportedsites.py | 4 ++ test/results/pixiv.py | 9 ++++ 4 files changed, 68 insertions(+), 36 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d5f8d723..fbee25e9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -700,7 +700,7 @@ Consider all listed sites to potentially be NSFW. Pixiv https://www.pixiv.net/ - Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, User Profiles, individual Images + Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, Novels, Novel Bookmarks, Novel Series, pixivision, Rankings, Search Results, Series, Sketch, Unlisted Works, User Profiles, individual Images OAuth diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 31d12912..8642d0e1 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -91,11 +91,10 @@ class PixivExtractor(Extractor): del work["image_urls"] del work["meta_pages"] - files = [] if work["type"] == "ugoira": if self.load_ugoira: try: - self._extract_ugoira(work, files) + return self._extract_ugoira(work) except exception.StopExtraction as exc: self.log.warning( "Unable to retrieve Ugoira metatdata (%s - %s)", @@ -106,24 +105,27 @@ class PixivExtractor(Extractor): if url == self.sanity_url: if self.sanity_workaround: self.log.warning("%s: 'sanity_level' warning", work["id"]) - self._extract_ajax(work, files) + body = self._request_ajax("/illust/" + str(work["id"])) + return self._extract_ajax(work, body) else: self.log.warning( "%s: Unable to download work ('sanity_level' warning)", work["id"]) else: - files.append({"url": url}) + return ({"url": url},) else: - for num, img in enumerate(meta_pages): - files.append({ + return [ + { "url" : img["image_urls"]["original"], "suffix": "_p{:02}".format(num), - }) + } + for num, img in enumerate(meta_pages) + ] - return files + return () - def _extract_ugoira(self, work, files): + def _extract_ugoira(self, work): ugoira = self.api.ugoira_metadata(work["id"]) url = ugoira["zip_urls"]["medium"] work["frames"] = frames = ugoira["frames"] @@ -142,26 +144,30 @@ class PixivExtractor(Extractor): except exception.HttpError: pass else: - return self.log.warning( + self.log.warning( "Unable to find Ugoira frame URLs (%s)", work["id"]) - for num in range(len(frames)): - url = "{}{}.{}".format(base, num, ext) - files.append(text.nameext_from_url(url, { - "url": url, - "num": num, + return [ + { + "url": "{}{}.{}".format(base, num, ext), "suffix": "_p{:02}".format(num), "_ugoira_frame_index": num, - })) + } + for num in range(len(frames)) + ] else: - files.append({ - "url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1), - }) + url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1) + return ({"url": url},) - def _extract_ajax(self, work, files): - url = "{}/ajax/illust/{}".format(self.root, work["id"]) + def _request_ajax(self, endpoint): + url = "{}/ajax{}".format(self.root, endpoint) data = self.request(url, headers=self.headers_web).json() - body = data["body"] + return data["body"] + + def _extract_ajax(self, work, body): + url = self._extract_ajax_url(body) + if not url: + return () for key_app, key_ajax in ( ("title" , "illustTitle"), @@ -198,21 +204,18 @@ class PixivExtractor(Extractor): translated_name = None tags.append({"name": name, "translated_name": translated_name}) - url = self._extract_ajax_url(body) - if not url: - return - work["page_count"] = count = body["pageCount"] if count == 1: - files.append({"url": url}) - else: - base, _, ext = url.rpartition("_p0.") - for num in range(count): - url = "{}_p{}.{}".format(base, num, ext) - files.append({ - "url" : url, - "suffix": "_p{:02}".format(num), - }) + return ({"url": url},) + + base, _, ext = url.rpartition("_p0.") + return [ + { + "url" : "{}_p{}.{}".format(base, num, ext), + "suffix": "_p{:02}".format(num), + } + for num in range(count) + ] def _extract_ajax_url(self, body): try: @@ -432,6 +435,22 @@ class PixivWorkExtractor(PixivExtractor): return works +class PixivUnlistedExtractor(PixivExtractor): + """Extractor for a unlisted pixiv illustrations""" + subcategory = "unlisted" + pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)" + example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0" + + def _extract_files(self, work): + body = self._request_ajax("/illust/unlisted/" + work["id"]) + work["id_unlisted"] = work["id"] + work["id"] = text.parse_int(body["illustId"]) + return self._extract_ajax(work, body) + + def works(self): + return ({"id": self.groups[0], "user": {"id": 1}},) + + class PixivFavoriteExtractor(PixivExtractor): """Extractor for all favorites/bookmarks of a pixiv user""" subcategory = "favorite" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 70ae097a..ff33ccfa 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -194,6 +194,9 @@ SUBCATEGORY_MAP = { "bluesky": { "posts": "", }, + "boosty": { + "feed": "Subscriptions Feed", + }, "civitai": { "tag-models": "Tag Searches (Models)", "tag-images": "Tag Searches (Images)", @@ -270,6 +273,7 @@ SUBCATEGORY_MAP = { "novel-user": "", "pixivision": "pixivision", "sketch": "Sketch", + "unlisted": "Unlisted Works", "work": "individual Images", }, "poringa": { diff --git a/test/results/pixiv.py b/test/results/pixiv.py index 02fdde8c..182c522e 100644 --- a/test/results/pixiv.py +++ b/test/results/pixiv.py @@ -308,6 +308,15 @@ __tests__ = ( "#class" : pixiv.PixivWorkExtractor, }, +{ + "#url" : "https://www.pixiv.net/en/artworks/unlisted/eE3fTYaROT9IsZmep386", + "#class" : pixiv.PixivUnlistedExtractor, + "#urls" : "https://i.pximg.net/img-original/img/2020/10/15/00/46/12/85017704-149014193e4d3e23a6b8bd5e38b51ed4_p0.png", + + "id" : 85017704, + "id_unlisted": "eE3fTYaROT9IsZmep386", +}, + { "#url" : "https://www.pixiv.net/en/users/173530/bookmarks/artworks", "#category": ("", "pixiv", "favorite"),