From 40ca562d7bcf3bf7dfff29944d1872dc849d8672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 17 Mar 2018 16:23:35 +0100 Subject: [PATCH] [artstation] add album extractor (#80) --- docs/supportedsites.rst | 15 +++--- gallery_dl/extractor/artstation.py | 82 ++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index bf645f3b..89bd06b6 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -10,17 +10,17 @@ Site URL Capabilities arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads -ArtStation https://www.artstation.com/ Images from Users, individual Images, Likes +ArtStation https://www.artstation.com/ |Images from Use-1| Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Desuarchive https://desuarchive.org/ Threads -DeviantArt https://www.deviantart.com/ |Collections, De-1| Optional (OAuth) +DeviantArt https://www.deviantart.com/ |Collections, De-2| Optional (OAuth) Doki Reader https://kobato.hologfx.com/ Chapters, Manga Dynasty Reader https://dynasty-scans.com/ Chapters e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches ExHentai https://exhentai.org/ Galleries Optional Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fireden https://boards.fireden.net/ Threads -Flickr https://www.flickr.com/ |Images from Use-2| Optional (OAuth) +Flickr https://www.flickr.com/ |Images from Use-3| Optional (OAuth) Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images @@ -57,7 +57,7 @@ nijie https://nijie.info/ Images from Users, indi Nyafuu Archive https://archive.nyafuu.org/ Threads Pawoo https://pawoo.net Images from Users, Images from Statuses Pinterest https://www.pinterest.com Boards, Pins, pin.it Links -Pixiv https://www.pixiv.net/ |Images from Use-3| Required +Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics @@ -91,6 +91,7 @@ Turboimagehost https://turboimagehost.com/ individual Images ==================== =================================== ================================================== ================ .. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/ -.. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals -.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images +.. |Images from Use-1| replace:: Images from Users, Albums, individual Images, Likes +.. |Collections, De-2| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals +.. |Images from Use-3| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results +.. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 6a316bb2..d4bcca50 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -17,8 +17,8 @@ import string class ArtstationExtractor(Extractor): """Base class for artstation extractors""" category = "artstation" - directory_fmt = ["{category}", "{username}"] filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}" + directory_fmt = ["{category}", "{userinfo[username]}"] archive_fmt = "{asset[id]}" root = "https://www.artstation.com" per_page = 50 @@ -29,18 +29,15 @@ class ArtstationExtractor(Extractor): self.external = self.config("external", False) def items(self): - userinfo = None + data = self.metadata() yield Message.Version, 1 + yield Message.Directory, data for project_id in self.projects(): for asset in self.get_project_assets(project_id): - if not userinfo: - userinfo = self.get_user_info( - self.user or asset["user"]["username"]) - yield Message.Directory, userinfo - adict = asset["asset"] - asset["userinfo"] = userinfo + if data: + asset.update(data) if adict["has_image"]: url = adict["image_url"] @@ -51,6 +48,10 @@ class ArtstationExtractor(Extractor): url = text.extract(adict["player_embedded"], '"', '"')[0] yield Message.Queue, url, asset + def metadata(self): + """Return general metadata""" + return {"userinfo": self.get_user_info(self.user)} + def projects(self): """Return an iterable containing all relevant project IDs""" @@ -111,7 +112,7 @@ class ArtstationUserExtractor(ArtstationExtractor): """Extractor for all projects of an artstation user""" subcategory = "user" pattern = [r"(?:https?://)?(?:www\.)?artstation\.com" - r"/(?!artwork|projects)([^/?&#]+)/?$", + r"/(?!artwork|projects)([^/?&#]+)(?:/albums/all)?/?$", r"(?:https?://)?((?!www)\w+)\.artstation\.com" r"(?:/(?:projects/?)?)?$"] test = [ @@ -120,6 +121,7 @@ class ArtstationUserExtractor(ArtstationExtractor): r"/images/images/\d+/\d+/\d+/large/[^/]+", "count": ">= 6", }), + ("https://www.artstation.com/gaerikim/albums/all/", None), ("https://gaerikim.artstation.com/", None), ("https://gaerikim.artstation.com/projects/", None), ] @@ -129,10 +131,55 @@ class ArtstationUserExtractor(ArtstationExtractor): return self._pagination(url) +class ArtstationAlbumExtractor(ArtstationExtractor): + """Extractor for all projects of an artstation user""" + subcategory = "album" + directory_fmt = ["{category}", "{userinfo[username]}", "Albums", + "{album[id]} - {album[title]}"] + archive_fmt = "a_{album[id]}_{asset[id]}" + pattern = [r"(?:https?://)?(?:www\.)?artstation\.com" + r"/(?!artwork|projects)([^/?&#]+)/albums/(\d+)", + r"(?:https?://)?((?!www)\w+)\.artstation\.com" + r"/albums/(\d+)"] + test = [ + ("https://www.artstation.com/huimeiye/albums/770899", { + "count": 2, + }), + ("https://www.artstation.com/huimeiye/albums/770898", { + "exception": exception.NotFoundError, + }), + ("https://huimeiye.artstation.com/albums/770899", None), + ] + + def __init__(self, match): + ArtstationExtractor.__init__(self, match) + self.album_id = util.safe_int(match.group(2)) + + def metadata(self): + userinfo = self.get_user_info(self.user) + album = None + + for album in userinfo["albums_with_community_projects"]: + if album["id"] == self.album_id: + break + else: + raise exception.NotFoundError("album") + + return { + "userinfo": userinfo, + "album": album + } + + def projects(self): + url = "{}/users/{}/projects.json?album_id={}".format( + self.root, self.user, self.album_id) + return self._pagination(url) + + class ArtstationLikesExtractor(ArtstationExtractor): """Extractor for liked projects of an artstation user""" subcategory = "likes" - directory_fmt = ["{category}", "{username}", "Likes"] + directory_fmt = ["{category}", "{userinfo[username]}", "Likes"] archive_fmt = "f_{userinfo[id]}_{asset[id]}" pattern = [r"(?:https?://)?(?:www\.)?artstation\.com" r"/(?!artwork|projects)([^/?&#]+)/likes/?"] @@ -175,6 +222,21 @@ class ArtstationImageExtractor(ArtstationExtractor): def __init__(self, match): ArtstationExtractor.__init__(self) self.project_id = match.group(1) + self.assets = None + + def metadata(self): + self.assets = [ + asset.copy() + for asset in ArtstationExtractor.get_project_assets( + self, self.project_id + ) + ] + + self.user = self.assets[0]["user"]["username"] + return ArtstationExtractor.metadata(self) def projects(self): return (self.project_id,) + + def get_project_assets(self, project_id): + return self.assets