2018-09-17 21:19:25 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2019-02-08 13:45:40 +01:00
|
|
|
# Copyright 2018-2019 Mike Fährmann
|
2018-09-17 21:19:25 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extract images from https://alpha.wallhaven.cc/"""
|
|
|
|
|
|
|
|
from .common import Extractor, Message
|
2018-09-19 15:15:14 +02:00
|
|
|
from .. import text, exception
|
|
|
|
from ..cache import cache
|
2018-09-17 21:19:25 +02:00
|
|
|
|
|
|
|
|
|
|
|
class WallhavenExtractor(Extractor):
|
|
|
|
"""Base class for wallhaven extractors"""
|
|
|
|
category = "wallhaven"
|
|
|
|
filename_fmt = "{category}_{id}_{width}x{height}.{extension}"
|
|
|
|
root = "https://alpha.wallhaven.cc"
|
|
|
|
|
2018-09-19 15:15:14 +02:00
|
|
|
def login(self):
|
|
|
|
"""Login and set necessary cookies"""
|
|
|
|
username, password = self._get_auth_info()
|
|
|
|
if username:
|
2019-01-30 17:09:32 +01:00
|
|
|
self._update_cookies(self._login_impl(username, password))
|
2018-09-19 15:15:14 +02:00
|
|
|
|
|
|
|
@cache(maxage=365*24*60*60, keyarg=1)
|
|
|
|
def _login_impl(self, username, password):
|
|
|
|
self.log.info("Logging in as %s", username)
|
|
|
|
url = "{}/auth/login".format(self.root)
|
|
|
|
page = self.request(url).text
|
|
|
|
pos = page.index('name="_token"')
|
|
|
|
|
|
|
|
data = {
|
|
|
|
"username": username,
|
|
|
|
"password": password,
|
|
|
|
"_token": text.extract(page, 'value="', '"', pos)[0]
|
|
|
|
}
|
2019-01-30 17:09:32 +01:00
|
|
|
response = self.request(url, method="POST", data=data)
|
2018-09-19 15:15:14 +02:00
|
|
|
|
2019-01-30 17:09:32 +01:00
|
|
|
if response.history:
|
|
|
|
for cookie in response.history[0].cookies:
|
|
|
|
if cookie.name.startswith("remember_"):
|
|
|
|
return {cookie.name: cookie.value}
|
2018-09-19 15:15:14 +02:00
|
|
|
raise exception.AuthenticationError()
|
|
|
|
|
2018-09-17 21:19:25 +02:00
|
|
|
def get_wallpaper_data(self, wallpaper_id):
|
|
|
|
"""Extract url and metadata for a wallpaper"""
|
|
|
|
url = "{}/wallpaper/{}".format(self.root, wallpaper_id)
|
|
|
|
page = self.request(url).text
|
|
|
|
|
|
|
|
title, pos = text.extract(page, 'name="title" content="', '"')
|
|
|
|
url, pos = text.extract(
|
|
|
|
page, 'property="og:image" content="', '"', pos)
|
|
|
|
resolution, pos = text.extract(
|
|
|
|
page, '<h3 class="showcase-resolution"', '<', pos)
|
|
|
|
colors , pos = text.extract(page, '<ul ', '</ul>', pos)
|
|
|
|
uploader, pos = text.extract(page, 'alt="', '"', pos)
|
|
|
|
date , pos = text.extract(page, 'datetime="', '"', pos)
|
|
|
|
category, pos = text.extract(page, 'Category</dt><dd>', '<', pos)
|
|
|
|
size , pos = text.extract(page, 'Size</dt><dd>', '<', pos)
|
|
|
|
views , pos = text.extract(page, 'Views</dt><dd>', '<', pos)
|
|
|
|
favs , pos = text.extract(page, 'Favorites</dt><dd>', '</dt>', pos)
|
|
|
|
|
|
|
|
width, _, height = resolution.rpartition(">")[2].partition("x")
|
|
|
|
|
|
|
|
return text.urljoin(self.root, url), {
|
|
|
|
"id": text.parse_int(wallpaper_id),
|
|
|
|
"width": text.parse_int(width),
|
|
|
|
"height": text.parse_int(height),
|
|
|
|
"colors": list(text.extract_iter(colors, '#', '"')),
|
|
|
|
"tags": title.rpartition(" | ")[0].lstrip("#").split(", #"),
|
|
|
|
"uploader": text.unescape(uploader),
|
|
|
|
"wh_category": category,
|
|
|
|
"date": date,
|
|
|
|
"size": size,
|
|
|
|
"views": text.parse_int(views.replace(",", "")),
|
|
|
|
"favorites": text.parse_int(
|
|
|
|
text.remove_html(favs).partition(" ")[0]),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class WallhavenSearchExtractor(WallhavenExtractor):
|
|
|
|
"""Extractor for search results on wallhaven.cc"""
|
|
|
|
subcategory = "search"
|
2019-02-08 13:45:40 +01:00
|
|
|
directory_fmt = ("{category}", "{search[q]}")
|
2018-09-17 21:19:25 +02:00
|
|
|
archive_fmt = "s_{search[q]}_{id}"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = r"(?:https?://)?alpha\.wallhaven\.cc/search\?([^/?#]+)"
|
|
|
|
test = (
|
|
|
|
("https://alpha.wallhaven.cc/search?q=touhou"),
|
2018-09-19 15:15:14 +02:00
|
|
|
(("https://alpha.wallhaven.cc/search?q=id%3A87"
|
2019-02-02 15:37:54 +01:00
|
|
|
"&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
|
|
|
|
"url": "29b54803e3fae5e337fdd29d47d51302d78bec9a",
|
2018-10-08 23:30:06 +02:00
|
|
|
"range": "1-3",
|
2018-09-17 21:19:25 +02:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
)
|
2018-09-17 21:19:25 +02:00
|
|
|
per_page = 24
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
WallhavenExtractor.__init__(self)
|
|
|
|
self.params = text.parse_query(match.group(1))
|
|
|
|
|
|
|
|
def items(self):
|
2018-09-19 15:15:14 +02:00
|
|
|
self.login()
|
2018-09-17 21:19:25 +02:00
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Directory, {"search": self.params}
|
|
|
|
|
|
|
|
for wp_id in self.wallpapers():
|
|
|
|
wp_url, wp_data = self.get_wallpaper_data(wp_id)
|
|
|
|
wp_data["search"] = self.params
|
|
|
|
yield Message.Url, wp_url, wp_data
|
|
|
|
|
|
|
|
def wallpapers(self):
|
|
|
|
"""Yield wallpaper IDs from search results"""
|
|
|
|
url = "{}/search".format(self.root)
|
|
|
|
params = self.params.copy()
|
|
|
|
headers = {
|
|
|
|
"Referer": url,
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
}
|
|
|
|
|
|
|
|
params["page"] = 1
|
|
|
|
while True:
|
|
|
|
page = self.request(url, params=params, headers=headers).text
|
|
|
|
|
|
|
|
ids = list(text.extract_iter(page, 'data-wallpaper-id="', '"'))
|
|
|
|
yield from ids
|
|
|
|
|
|
|
|
if len(ids) < self.per_page:
|
|
|
|
return
|
|
|
|
params["page"] += 1
|
|
|
|
|
|
|
|
|
|
|
|
class WallhavenImageExtractor(WallhavenExtractor):
|
|
|
|
"""Extractor for individual wallpaper on wallhaven.cc"""
|
|
|
|
subcategory = "image"
|
|
|
|
archive_fmt = "{id}"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = (r"(?:https?://)?(?:alpha\.wallhaven\.cc/wallpaper"
|
|
|
|
r"|whvn\.cc)/(\d+)")
|
|
|
|
test = (
|
2018-09-17 21:19:25 +02:00
|
|
|
("https://alpha.wallhaven.cc/wallpaper/8114", {
|
|
|
|
"pattern": "https://[^.]+.wallhaven.cc/[^/]+/full/[^-]+-8114.jpg",
|
|
|
|
"content": "497212679383a465da1e35bd75873240435085a2",
|
|
|
|
"keyword": {
|
|
|
|
"id": 8114,
|
|
|
|
"width": 1920,
|
|
|
|
"height": 1200,
|
|
|
|
"colors": list,
|
|
|
|
"tags": list,
|
|
|
|
"uploader": "AksumkA",
|
|
|
|
"date": "2014-08-31T06:17:19+00:00",
|
|
|
|
"wh_category": "Anime",
|
|
|
|
"size": "272.3 KiB",
|
|
|
|
"views": int,
|
|
|
|
"favorites": int,
|
|
|
|
},
|
|
|
|
}),
|
2018-09-19 15:15:14 +02:00
|
|
|
# NSFW
|
|
|
|
("https://alpha.wallhaven.cc/wallpaper/8536", {
|
|
|
|
"url": "8431c6f1eec3a6f113980eeec9dfcb707de7ddcf",
|
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://whvn.cc/8114"),
|
|
|
|
)
|
2018-09-17 21:19:25 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
WallhavenExtractor.__init__(self)
|
|
|
|
self.wallpaper_id = match.group(1)
|
|
|
|
|
|
|
|
def items(self):
|
2018-09-19 15:15:14 +02:00
|
|
|
self.login()
|
2018-09-17 21:19:25 +02:00
|
|
|
url, data = self.get_wallpaper_data(self.wallpaper_id)
|
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Directory, data
|
|
|
|
yield Message.Url, url, data
|