2015-10-04 04:13:50 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2017-02-23 20:58:39 +01:00
|
|
|
# Copyright 2015-2017 Mike Fährmann
|
2015-10-04 04:13:50 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extract images from http://www.deviantart.com/"""
|
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
from .common import Extractor, Message
|
|
|
|
from .. import text, exception
|
|
|
|
from ..cache import cache
|
2017-03-08 16:40:20 +01:00
|
|
|
import time
|
2017-04-03 18:23:13 +02:00
|
|
|
import re
|
2015-10-04 04:13:50 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
class DeviantartExtractor(Extractor):
|
|
|
|
"""Base class for deviantart extractors"""
|
2015-11-21 04:26:30 +01:00
|
|
|
category = "deviantart"
|
2017-01-12 21:08:49 +01:00
|
|
|
directory_fmt = ["{category}", "{username}"]
|
2015-11-21 04:26:30 +01:00
|
|
|
filename_fmt = "{category}_{index}_{title}.{extension}"
|
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def __init__(self):
|
2017-01-12 21:08:49 +01:00
|
|
|
Extractor.__init__(self)
|
2017-03-08 16:40:20 +01:00
|
|
|
self.api = DeviantartAPI(self)
|
2017-03-13 21:42:16 +01:00
|
|
|
self.offset = 0
|
|
|
|
|
|
|
|
def skip(self, num):
|
|
|
|
self.offset += num
|
|
|
|
return num
|
2015-10-04 04:13:50 +02:00
|
|
|
|
|
|
|
def items(self):
|
2017-01-12 21:08:49 +01:00
|
|
|
first = True
|
2015-10-04 04:13:50 +02:00
|
|
|
yield Message.Version, 1
|
2017-04-03 14:56:47 +02:00
|
|
|
for deviation in self.deviations():
|
2017-02-22 01:42:08 +01:00
|
|
|
if "content" not in deviation:
|
|
|
|
continue
|
2017-01-12 21:08:49 +01:00
|
|
|
if first:
|
|
|
|
first = False
|
2017-04-03 14:56:47 +02:00
|
|
|
yield Message.Directory, deviation["author"].copy()
|
2017-04-03 18:23:13 +02:00
|
|
|
self.prepare(deviation)
|
|
|
|
yield Message.Url, deviation["content"]["src"], deviation
|
2017-04-03 14:56:47 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
"""Return an iterable containing all relevant Deviation-objects"""
|
|
|
|
return []
|
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
@staticmethod
|
|
|
|
def prepare(deviation):
|
|
|
|
"""Adjust the contents of a Deviation-object"""
|
|
|
|
del deviation["stats"]
|
|
|
|
deviation["index"] = deviation["url"].rsplit("-", 1)[1]
|
|
|
|
text.nameext_from_url(deviation["content"]["src"], deviation)
|
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
|
|
|
|
class DeviantartUserExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for all works from an artist on deviantart.com"""
|
|
|
|
subcategory = "user"
|
|
|
|
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
|
|
|
|
test = [("http://shimoda7.deviantart.com/gallery/", {
|
|
|
|
"url": "63bfa8efba199e27181943c9060f6770f91a8441",
|
|
|
|
"keyword": "ca77ad61f387be7dabb61eb322b5185bccec69ea",
|
2015-12-13 04:36:44 +01:00
|
|
|
})]
|
2015-12-06 21:13:57 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2017-04-03 14:56:47 +02:00
|
|
|
DeviantartExtractor.__init__(self)
|
|
|
|
self.user = match.group(1)
|
2015-12-06 21:13:57 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def deviations(self):
|
|
|
|
return self.api.gallery_all(self.user, self.offset)
|
2016-11-06 10:44:50 +01:00
|
|
|
|
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
class DeviantartImageExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for single images from deviantart.com"""
|
|
|
|
subcategory = "image"
|
|
|
|
pattern = [r"(?:https?://)?([^\.]+\.deviantart\.com/art/.+-\d+)"]
|
|
|
|
test = [
|
|
|
|
(("http://shimoda7.deviantart.com/art/"
|
|
|
|
"For-the-sake-of-a-memory-10073852"), {
|
|
|
|
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
|
|
|
|
"keyword": "65f3c66cc1c9cf33757a71b86688fde4549fb045",
|
|
|
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
|
|
|
}),
|
|
|
|
("https://zzz.deviantart.com/art/zzz-1234567890", {
|
|
|
|
"exception": exception.NotFoundError,
|
|
|
|
}),
|
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DeviantartExtractor.__init__(self)
|
|
|
|
self.url = "https://" + match.group(1)
|
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
response = self.session.get(self.url)
|
|
|
|
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
|
|
|
|
if response.status_code != 200 or not deviation_id:
|
|
|
|
raise exception.NotFoundError("image")
|
|
|
|
return (self.api.deviation(deviation_id),)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
class DeviantartFavouritesExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for an artist's favourites from deviantart.com"""
|
|
|
|
subcategory = "favourites"
|
|
|
|
directory_fmt = ["{category}", "{subcategory}",
|
|
|
|
"{collection[owner]} - {collection[title]}"]
|
|
|
|
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/favourites"
|
|
|
|
r"(?:/(\d+)/([^/?]+))?"]
|
|
|
|
test = [
|
|
|
|
("http://h3813067.deviantart.com/favourites/", {
|
|
|
|
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
|
|
|
|
"keyword": "35a275b0f737aa9bd1f32ba13604d6e9a7054a14",
|
|
|
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
|
|
|
}),
|
|
|
|
("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
|
|
|
|
"url": "9e8d971c80db099b95d1c785399e2bc6eb96cd07",
|
|
|
|
"keyword": "cf65309a880799a4a82a7b2f0389e5bc88f5730f",
|
|
|
|
}),
|
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DeviantartExtractor.__init__(self)
|
|
|
|
self.user, self.favid, self.favname = match.groups()
|
|
|
|
if not self.favname:
|
|
|
|
self.favname = "Featured"
|
|
|
|
self.collection = {
|
|
|
|
"owner": self.user,
|
|
|
|
"title": self.favname,
|
|
|
|
"index": self.favid or 0,
|
|
|
|
}
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
yield Message.Version, 1
|
|
|
|
for deviation in self.deviations():
|
|
|
|
if "content" not in deviation:
|
|
|
|
continue
|
|
|
|
self.prepare(deviation)
|
|
|
|
yield Message.Directory, deviation
|
|
|
|
yield Message.Url, deviation["content"]["src"], deviation
|
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
regex = re.compile(self.favname.replace("-", ".") + "$")
|
|
|
|
for folder in self.api.collections_folders(self.user):
|
|
|
|
if regex.match(folder["name"]):
|
|
|
|
self.collection["title"] = folder["name"]
|
|
|
|
return self.api.collections_folderid(
|
|
|
|
self.user, folder["folderid"], self.offset)
|
|
|
|
raise exception.NotFoundError("collection")
|
|
|
|
|
|
|
|
def prepare(self, deviation):
|
|
|
|
DeviantartExtractor.prepare(deviation)
|
|
|
|
deviation["collection"] = self.collection
|
|
|
|
|
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
class DeviantartAPI():
|
|
|
|
"""Minimal interface for the deviantart API"""
|
2017-03-08 16:40:20 +01:00
|
|
|
def __init__(self, extractor, client_id="5388",
|
2017-01-12 21:08:49 +01:00
|
|
|
client_secret="76b08c69cfb27f26d6161f9ab6d061a1"):
|
2017-03-08 16:40:20 +01:00
|
|
|
self.session = extractor.session
|
|
|
|
self.session.headers["dA-minor-version"] = "20160316"
|
|
|
|
self.log = extractor.log
|
2017-01-12 21:08:49 +01:00
|
|
|
self.client_id = client_id
|
|
|
|
self.client_secret = client_secret
|
2017-03-08 16:40:20 +01:00
|
|
|
self.delay = 0
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def deviation(self, deviation_id):
|
|
|
|
"""Query and return info about a single Deviation"""
|
|
|
|
endpoint = "deviation/" + deviation_id
|
|
|
|
return self._call(endpoint)
|
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
def gallery_all(self, username, offset=0):
|
2017-04-03 14:56:47 +02:00
|
|
|
"""Yield all Deviation-objects of a specific user"""
|
|
|
|
endpoint = "gallery/all"
|
2017-03-08 16:40:20 +01:00
|
|
|
params = {"username": username, "offset": offset, "limit": 10}
|
2017-04-03 14:56:47 +02:00
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
|
|
|
def collections_folders(self, username, offset=0):
|
|
|
|
"""Yield all collection folders of a specific user"""
|
|
|
|
endpoint = "collections/folders"
|
|
|
|
params = {"username": username, "offset": offset, "limit": 10}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
|
|
|
def collections_folderid(self, username, folder_id, offset=0):
|
|
|
|
"""Yield all Deviation-objects contained in a collection folder"""
|
|
|
|
endpoint = "collections/" + folder_id
|
|
|
|
params = {"username": username, "offset": offset, "limit": 10}
|
|
|
|
return self._pagination(endpoint, params)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
|
|
|
def authenticate(self):
|
2017-04-03 14:56:47 +02:00
|
|
|
"""Authenticate the application by requesting an access token"""
|
|
|
|
access_token = self._authenticate_impl(
|
2017-01-12 21:08:49 +01:00
|
|
|
self.client_id, self.client_secret
|
|
|
|
)
|
2017-04-03 14:56:47 +02:00
|
|
|
self.session.headers["Authorization"] = access_token
|
2017-01-12 21:08:49 +01:00
|
|
|
|
|
|
|
@cache(maxage=3600, keyarg=1)
|
|
|
|
def _authenticate_impl(self, client_id, client_secret):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Actual authenticate implementation"""
|
2017-01-12 21:08:49 +01:00
|
|
|
url = "https://www.deviantart.com/oauth2/token"
|
|
|
|
data = {
|
|
|
|
"grant_type": "client_credentials",
|
|
|
|
"client_id": client_id,
|
|
|
|
"client_secret": client_secret,
|
|
|
|
}
|
|
|
|
response = self.session.post(url, data=data)
|
|
|
|
if response.status_code != 200:
|
2017-03-08 16:40:20 +01:00
|
|
|
raise exception.AuthenticationError()
|
2017-01-12 21:08:49 +01:00
|
|
|
return "Bearer " + response.json()["access_token"]
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def _call(self, endpoint, params=None):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Call an API endpoint"""
|
2017-04-03 14:56:47 +02:00
|
|
|
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
|
2017-03-13 21:42:16 +01:00
|
|
|
tries = 1
|
2017-03-08 16:40:20 +01:00
|
|
|
while True:
|
|
|
|
if self.delay:
|
|
|
|
time.sleep(self.delay)
|
|
|
|
|
2017-03-13 21:42:16 +01:00
|
|
|
self.authenticate()
|
2017-03-08 16:40:20 +01:00
|
|
|
response = self.session.get(url, params=params)
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
break
|
|
|
|
elif response.status_code == 429:
|
|
|
|
self.delay += 1
|
|
|
|
self.log.debug("rate limit (delay: %d)", self.delay)
|
|
|
|
else:
|
|
|
|
self.delay = 1
|
2017-03-13 21:42:16 +01:00
|
|
|
self.log.debug("http status code %d (%d/3)",
|
|
|
|
response.status_code, tries)
|
2017-03-08 16:40:20 +01:00
|
|
|
tries += 1
|
2017-03-13 21:42:16 +01:00
|
|
|
if tries > 3:
|
2017-03-08 16:40:20 +01:00
|
|
|
raise Exception(response.text)
|
|
|
|
try:
|
|
|
|
return response.json()
|
|
|
|
except ValueError:
|
|
|
|
return {}
|
2017-04-03 14:56:47 +02:00
|
|
|
|
|
|
|
def _pagination(self, endpoint, params=None):
|
|
|
|
while True:
|
|
|
|
data = self._call(endpoint, params)
|
|
|
|
if "results" in data:
|
|
|
|
yield from data["results"]
|
|
|
|
if not data["has_more"]:
|
|
|
|
return
|
|
|
|
params["offset"] = data["next_offset"]
|
|
|
|
else:
|
|
|
|
self.log.error("Unexpected API response: %s", data)
|
|
|
|
return
|