[artstation] add album extractor (#80)

2024-11-22 18:53:21 +01:00 · 2018-03-17 16:23:35 +01:00 · 2018-03-17 16:23:35 +01:00 · 40ca562d7b
commit 40ca562d7b
parent 7121eeae8b
2 changed files with 80 additions and 17 deletions
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -10,17 +10,17 @@ Site                 URL                                 Capabilities
 arch.b4k.co          https://arch.b4k.co/                Threads
 Archive of Sins      https://archiveofsins.com/          Threads
 Archived.Moe         https://archived.moe/               Threads
-ArtStation           https://www.artstation.com/         Images from Users, individual Images, Likes
+ArtStation           https://www.artstation.com/         |Images from Use-1|
 Danbooru             https://danbooru.donmai.us/         Pools, Popular Images, Posts, Tag-Searches
 Desuarchive          https://desuarchive.org/            Threads
-DeviantArt           https://www.deviantart.com/         |Collections, De-1|                                Optional (OAuth)
+DeviantArt           https://www.deviantart.com/         |Collections, De-2|                                Optional (OAuth)
 Doki Reader          https://kobato.hologfx.com/         Chapters, Manga
 Dynasty Reader       https://dynasty-scans.com/          Chapters
 e621                 https://e621.net/                   Pools, Popular Images, Posts, Tag-Searches
 ExHentai             https://exhentai.org/               Galleries                                          Optional
 Fallen Angels Scans  https://www.fascans.com/            Chapters, Manga
 Fireden              https://boards.fireden.net/         Threads
-Flickr               https://www.flickr.com/             |Images from Use-2|                                Optional (OAuth)
+Flickr               https://www.flickr.com/             |Images from Use-3|                                Optional (OAuth)
 Futaba Channel       https://www.2chan.net/              Threads
 Gelbooru             https://gelbooru.com/               Pools, Posts, Tag-Searches
 Gfycat               https://gfycat.com/                 individual Images
@ -57,7 +57,7 @@ nijie                https://nijie.info/                 Images from Users, indi
 Nyafuu Archive       https://archive.nyafuu.org/         Threads
 Pawoo                https://pawoo.net                   Images from Users, Images from Statuses
 Pinterest            https://www.pinterest.com           Boards, Pins, pin.it Links
-Pixiv                https://www.pixiv.net/              |Images from Use-3|                                Required
+Pixiv                https://www.pixiv.net/              |Images from Use-4|                                Required
 PowerManga           https://powermanga.org/             Chapters, Manga
 Pure Mashiro         http://reader.puremashiro.moe/      Chapters, Manga
 Read Comic Online    http://readcomiconline.to/          Comic-Issues, Comics
@ -91,6 +91,7 @@ Turboimagehost       https://turboimagehost.com/         individual Images
 ==================== =================================== ================================================== ================

 .. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/
-.. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals
-.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
-.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images
+.. |Images from Use-1| replace:: Images from Users, Albums, individual Images, Likes
+.. |Collections, De-2| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals
+.. |Images from Use-3| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
+.. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@ -17,8 +17,8 @@ import string
 class ArtstationExtractor(Extractor):
    """Base class for artstation extractors"""
    category = "artstation"
-    directory_fmt = ["{category}", "{username}"]
    filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}"
+    directory_fmt = ["{category}", "{userinfo[username]}"]
    archive_fmt = "{asset[id]}"
    root = "https://www.artstation.com"
    per_page = 50
@ -29,18 +29,15 @@ class ArtstationExtractor(Extractor):
        self.external = self.config("external", False)

    def items(self):
-        userinfo = None
+        data = self.metadata()
        yield Message.Version, 1
+        yield Message.Directory, data

        for project_id in self.projects():
            for asset in self.get_project_assets(project_id):
-                if not userinfo:
-                    userinfo = self.get_user_info(
-                        self.user or asset["user"]["username"])
-                    yield Message.Directory, userinfo
-
                adict = asset["asset"]
-                asset["userinfo"] = userinfo
+                if data:
+                    asset.update(data)

                if adict["has_image"]:
                    url = adict["image_url"]
@ -51,6 +48,10 @@ class ArtstationExtractor(Extractor):
                    url = text.extract(adict["player_embedded"], '"', '"')[0]
                    yield Message.Queue, url, asset

+    def metadata(self):
+        """Return general metadata"""
+        return {"userinfo": self.get_user_info(self.user)}
+
    def projects(self):
        """Return an iterable containing all relevant project IDs"""

@ -111,7 +112,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
    """Extractor for all projects of an artstation user"""
    subcategory = "user"
    pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
-               r"/(?!artwork|projects)([^/?&#]+)/?$",
+               r"/(?!artwork|projects)([^/?&#]+)(?:/albums/all)?/?$",
               r"(?:https?://)?((?!www)\w+)\.artstation\.com"
               r"(?:/(?:projects/?)?)?$"]
    test = [
@ -120,6 +121,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
                       r"/images/images/\d+/\d+/\d+/large/[^/]+",
            "count": ">= 6",
        }),
+        ("https://www.artstation.com/gaerikim/albums/all/", None),
        ("https://gaerikim.artstation.com/", None),
        ("https://gaerikim.artstation.com/projects/", None),
    ]
@ -129,10 +131,55 @@ class ArtstationUserExtractor(ArtstationExtractor):
        return self._pagination(url)


+class ArtstationAlbumExtractor(ArtstationExtractor):
+    """Extractor for all projects of an artstation user"""
+    subcategory = "album"
+    directory_fmt = ["{category}", "{userinfo[username]}", "Albums",
+                     "{album[id]} - {album[title]}"]
+    archive_fmt = "a_{album[id]}_{asset[id]}"
+    pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
+               r"/(?!artwork|projects)([^/?&#]+)/albums/(\d+)",
+               r"(?:https?://)?((?!www)\w+)\.artstation\.com"
+               r"/albums/(\d+)"]
+    test = [
+        ("https://www.artstation.com/huimeiye/albums/770899", {
+            "count": 2,
+        }),
+        ("https://www.artstation.com/huimeiye/albums/770898", {
+            "exception": exception.NotFoundError,
+        }),
+        ("https://huimeiye.artstation.com/albums/770899", None),
+    ]
+
+    def __init__(self, match):
+        ArtstationExtractor.__init__(self, match)
+        self.album_id = util.safe_int(match.group(2))
+
+    def metadata(self):
+        userinfo = self.get_user_info(self.user)
+        album = None
+
+        for album in userinfo["albums_with_community_projects"]:
+            if album["id"] == self.album_id:
+                break
+        else:
+            raise exception.NotFoundError("album")
+
+        return {
+            "userinfo": userinfo,
+            "album": album
+        }
+
+    def projects(self):
+        url = "{}/users/{}/projects.json?album_id={}".format(
+            self.root, self.user, self.album_id)
+        return self._pagination(url)
+
+
 class ArtstationLikesExtractor(ArtstationExtractor):
    """Extractor for liked projects of an artstation user"""
    subcategory = "likes"
-    directory_fmt = ["{category}", "{username}", "Likes"]
+    directory_fmt = ["{category}", "{userinfo[username]}", "Likes"]
    archive_fmt = "f_{userinfo[id]}_{asset[id]}"
    pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
               r"/(?!artwork|projects)([^/?&#]+)/likes/?"]
@ -175,6 +222,21 @@ class ArtstationImageExtractor(ArtstationExtractor):
    def __init__(self, match):
        ArtstationExtractor.__init__(self)
        self.project_id = match.group(1)
+        self.assets = None
+
+    def metadata(self):
+        self.assets = [
+            asset.copy()
+            for asset in ArtstationExtractor.get_project_assets(
+                self, self.project_id
+            )
+        ]
+
+        self.user = self.assets[0]["user"]["username"]
+        return ArtstationExtractor.metadata(self)

    def projects(self):
        return (self.project_id,)
+
+    def get_project_assets(self, project_id):
+        return self.assets