diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index eb64b447..fa509106 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -79,7 +79,7 @@ SmugMug https://www.smugmug.com/ |Albums, individ-5| The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) Twitter https://twitter.com/ Media Timelines, Timelines, Tweets -Wallhaven https://alpha.wallhaven.cc/ individual Images, Search Results +Wallhaven https://alpha.wallhaven.cc/ individual Images, Search Results Optional Warosu https://warosu.org/ Threads World Three http://www.slide.world-three.org/ Chapters, Manga XVideos https://www.xvideos.com/ Images from Users, Galleries diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index cd70ef4f..1c6746eb 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -9,7 +9,8 @@ """Extract images from https://alpha.wallhaven.cc/""" from .common import Extractor, Message -from .. import text +from .. import text, exception +from ..cache import cache class WallhavenExtractor(Extractor): @@ -18,6 +19,35 @@ class WallhavenExtractor(Extractor): filename_fmt = "{category}_{id}_{width}x{height}.{extension}" root = "https://alpha.wallhaven.cc" + def login(self): + """Login and set necessary cookies""" + username, password = self._get_auth_info() + if username: + cookie = self._login_impl(username, password) + self.session.cookies.set_cookie(cookie) + + @cache(maxage=365*24*60*60, keyarg=1) + def _login_impl(self, username, password): + """Actual login implementation""" + self.log.info("Logging in as %s", username) + + url = "{}/auth/login".format(self.root) + page = self.request(url).text + pos = page.index('name="_token"') + + data = { + "username": username, + "password": password, + "_token": text.extract(page, 'value="', '"', pos)[0] + } + response = self.request( + url, method="POST", data=data, allow_redirects=False) + + for cookie in response.cookies: + if cookie.name.startswith("remember_"): + return cookie + raise exception.AuthenticationError() + def get_wallpaper_data(self, wallpaper_id): """Extract url and metadata for a wallpaper""" url = "{}/wallpaper/{}".format(self.root, wallpaper_id) @@ -61,9 +91,10 @@ class WallhavenSearchExtractor(WallhavenExtractor): archive_fmt = "s_{search[q]}_{id}" pattern = [r"(?:https?://)?alpha\.wallhaven\.cc/search\?([^/?#]+)"] test = [ - ("https://alpha.wallhaven.cc/search?q=id%3A87", { - "url": "0a8ba15e6eb94178a8720811c4bdcca0e20d537a", - "keyword": "7e5840cff08ca53cab1963002c4c1c5868f16020", + ("https://alpha.wallhaven.cc/search?q=touhou", None), + (("https://alpha.wallhaven.cc/search?q=id%3A87" + "&categories=111&purity=100&sorting=relevance&order=desc&page=3"), { + "url": "1b9b6d97b9670e32ef5dd6942a095549ab543d91", "range": (1, 3), }), ] @@ -74,6 +105,7 @@ class WallhavenSearchExtractor(WallhavenExtractor): self.params = text.parse_query(match.group(1)) def items(self): + self.login() yield Message.Version, 1 yield Message.Directory, {"search": self.params} @@ -127,6 +159,10 @@ class WallhavenImageExtractor(WallhavenExtractor): "favorites": int, }, }), + # NSFW + ("https://alpha.wallhaven.cc/wallpaper/8536", { + "url": "8431c6f1eec3a6f113980eeec9dfcb707de7ddcf", + }), ("https://whvn.cc/8114", None), ] @@ -135,6 +171,7 @@ class WallhavenImageExtractor(WallhavenExtractor): self.wallpaper_id = match.group(1) def items(self): + self.login() url, data = self.get_wallpaper_data(self.wallpaper_id) yield Message.Version, 1 yield Message.Directory, data diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 98bbb793..7161b487 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -96,6 +96,7 @@ AUTH_MAP = { "seiga" : "Required", "smugmug" : "Optional (OAuth)", "tumblr" : "Optional (OAuth)", + "wallhaven" : "Optional", } IGNORE_LIST = (