2015-10-04 04:13:50 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2018-01-30 22:49:16 +01:00
|
|
|
# Copyright 2015-2018 Mike Fährmann
|
2015-10-04 04:13:50 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-04-20 13:20:41 +02:00
|
|
|
"""Extract images from https://www.deviantart.com/"""
|
2015-10-04 04:13:50 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
from .common import Extractor, Message
|
2017-09-24 15:59:25 +02:00
|
|
|
from .. import text, util, exception
|
2017-07-12 09:47:01 +02:00
|
|
|
from ..cache import cache, memcache
|
2017-05-13 21:42:29 +02:00
|
|
|
import itertools
|
2017-05-13 15:34:20 +02:00
|
|
|
import datetime
|
2017-03-08 16:40:20 +01:00
|
|
|
import time
|
2017-04-03 18:23:13 +02:00
|
|
|
import re
|
2015-10-04 04:13:50 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
class DeviantartExtractor(Extractor):
|
|
|
|
"""Base class for deviantart extractors"""
|
2015-11-21 04:26:30 +01:00
|
|
|
category = "deviantart"
|
2017-10-03 22:25:07 +02:00
|
|
|
directory_fmt = ["{category}", "{author[username]!l}"]
|
2018-01-30 22:49:16 +01:00
|
|
|
filename_fmt = "{category}_{index}_{title}.{extension}"
|
2015-11-21 04:26:30 +01:00
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def __init__(self, match=None):
|
2017-01-12 21:08:49 +01:00
|
|
|
Extractor.__init__(self)
|
2017-03-08 16:40:20 +01:00
|
|
|
self.api = DeviantartAPI(self)
|
2017-03-13 21:42:16 +01:00
|
|
|
self.offset = 0
|
2017-10-07 13:07:34 +02:00
|
|
|
self.flat = self.config("flat", True)
|
|
|
|
self.original = self.config("original", True)
|
2017-12-29 22:15:57 +01:00
|
|
|
self.user = match.group(1) if match else None
|
|
|
|
self.group = False
|
2017-08-22 20:15:13 +02:00
|
|
|
|
2017-03-13 21:42:16 +01:00
|
|
|
def skip(self, num):
|
|
|
|
self.offset += num
|
|
|
|
return num
|
2015-10-04 04:13:50 +02:00
|
|
|
|
|
|
|
def items(self):
|
2017-12-29 22:15:57 +01:00
|
|
|
if self.user:
|
|
|
|
self.group = not self.api.user_profile(self.user)
|
|
|
|
if self.group:
|
|
|
|
self.subcategory = "group-" + self.subcategory
|
|
|
|
|
2015-10-04 04:13:50 +02:00
|
|
|
yield Message.Version, 1
|
2017-04-03 14:56:47 +02:00
|
|
|
for deviation in self.deviations():
|
2017-09-12 16:19:00 +02:00
|
|
|
if isinstance(deviation, tuple):
|
|
|
|
url, data = deviation
|
|
|
|
yield Message.Queue, url, data
|
2017-07-12 09:47:01 +02:00
|
|
|
continue
|
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
self.prepare(deviation)
|
2017-07-10 18:14:40 +02:00
|
|
|
yield Message.Directory, deviation
|
2017-05-10 16:45:45 +02:00
|
|
|
|
|
|
|
if "content" in deviation:
|
2017-10-07 13:07:34 +02:00
|
|
|
content = deviation["content"]
|
|
|
|
if (self.original and deviation["is_downloadable"] and
|
|
|
|
content["filesize"] != deviation["download_filesize"]):
|
|
|
|
content.update(
|
|
|
|
self.api.deviation_download(deviation["deviationid"]))
|
|
|
|
yield self.commit(deviation, content)
|
2017-05-10 16:45:45 +02:00
|
|
|
|
|
|
|
if "videos" in deviation:
|
|
|
|
video = max(deviation["videos"],
|
2017-09-24 15:59:25 +02:00
|
|
|
key=lambda x: util.safe_int(x["quality"][:-1]))
|
2017-05-10 16:45:45 +02:00
|
|
|
yield self.commit(deviation, video)
|
|
|
|
|
|
|
|
if "flash" in deviation:
|
|
|
|
yield self.commit(deviation, deviation["flash"])
|
|
|
|
|
|
|
|
if "excerpt" in deviation:
|
2017-05-13 21:42:29 +02:00
|
|
|
journal = self.api.deviation_content(deviation["deviationid"])
|
|
|
|
yield self.commit_journal(deviation, journal)
|
2017-04-03 14:56:47 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
"""Return an iterable containing all relevant Deviation-objects"""
|
|
|
|
return []
|
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def prepare(self, deviation):
|
2017-04-03 18:23:13 +02:00
|
|
|
"""Adjust the contents of a Deviation-object"""
|
2017-05-13 21:42:29 +02:00
|
|
|
for key in ("stats", "preview", "is_favourited", "allows_comments"):
|
2017-05-10 16:45:45 +02:00
|
|
|
if key in deviation:
|
|
|
|
del deviation[key]
|
|
|
|
try:
|
2017-08-16 12:13:42 +02:00
|
|
|
deviation["index"] = deviation["url"].rpartition("-")[2]
|
2017-05-10 16:45:45 +02:00
|
|
|
except KeyError:
|
|
|
|
deviation["index"] = 0
|
2017-04-03 18:23:13 +02:00
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
if self.user:
|
|
|
|
deviation["username"] = self.user
|
2017-09-10 22:20:47 +02:00
|
|
|
deviation["da_category"] = deviation["category"]
|
2017-07-10 18:14:40 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
@staticmethod
|
|
|
|
def commit(deviation, target):
|
|
|
|
url = target["src"]
|
|
|
|
deviation["target"] = text.nameext_from_url(url, target.copy())
|
|
|
|
deviation["extension"] = deviation["target"]["extension"]
|
2017-08-16 12:13:42 +02:00
|
|
|
if url.startswith("http:"):
|
|
|
|
url = "https:" + url[5:]
|
2017-05-10 16:45:45 +02:00
|
|
|
return Message.Url, url, deviation
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2017-05-13 21:42:29 +02:00
|
|
|
@staticmethod
|
|
|
|
def commit_journal(deviation, journal):
|
|
|
|
title = text.escape(deviation["title"])
|
|
|
|
url = deviation["url"]
|
|
|
|
thumbs = deviation["thumbs"]
|
2017-05-15 15:58:06 +02:00
|
|
|
html = journal["html"]
|
|
|
|
date = datetime.datetime.utcfromtimestamp(deviation["published_time"])
|
2017-05-13 21:42:29 +02:00
|
|
|
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
|
2017-05-13 15:34:20 +02:00
|
|
|
|
|
|
|
if "css" in journal:
|
|
|
|
css, cls = journal["css"], "withskin"
|
|
|
|
else:
|
|
|
|
css, cls = "", "journal-green"
|
|
|
|
|
2017-05-15 15:58:06 +02:00
|
|
|
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
|
|
|
|
needle = '<div class="boxtop journaltop">'
|
|
|
|
header = HEADER_CUSTOM_TEMPLATE.format(
|
|
|
|
title=title, url=url, date=str(date),
|
2017-05-13 21:42:29 +02:00
|
|
|
)
|
2017-05-15 15:58:06 +02:00
|
|
|
else:
|
|
|
|
needle = '<div usr class="gr">'
|
|
|
|
catlist = deviation["category_path"].split("/")
|
|
|
|
categories = " / ".join(
|
2017-08-16 12:13:42 +02:00
|
|
|
('<span class="crumb"><a href="https://www.deviantart.com/{}/"'
|
|
|
|
'><span>{}</span></a></span>').format(cpath, cat.capitalize())
|
2017-05-15 15:58:06 +02:00
|
|
|
for cat, cpath in zip(
|
|
|
|
catlist,
|
|
|
|
itertools.accumulate(catlist, lambda t, c: t + "/" + c)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
header = HEADER_TEMPLATE.format(
|
|
|
|
title=title,
|
|
|
|
url=url,
|
|
|
|
userurl=url[:url.find("/", 8)],
|
|
|
|
username=deviation["author"]["username"],
|
|
|
|
date=str(date),
|
|
|
|
categories=categories,
|
|
|
|
)
|
|
|
|
|
2017-05-13 15:34:20 +02:00
|
|
|
html = JOURNAL_TEMPLATE.format(
|
|
|
|
title=title,
|
2017-05-15 15:58:06 +02:00
|
|
|
html=html.replace(needle, header, 1),
|
2017-05-13 21:42:29 +02:00
|
|
|
shadow=shadow,
|
2017-05-13 15:34:20 +02:00
|
|
|
css=css,
|
|
|
|
cls=cls,
|
|
|
|
)
|
|
|
|
|
|
|
|
deviation["extension"] = "htm"
|
|
|
|
return Message.Url, html, deviation
|
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
@staticmethod
|
2017-07-10 18:14:40 +02:00
|
|
|
def _find_folder(folders, name):
|
2017-07-27 20:50:33 +02:00
|
|
|
pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$"
|
2017-07-03 21:57:10 +02:00
|
|
|
for folder in folders:
|
2017-07-27 20:50:33 +02:00
|
|
|
if re.match(pattern, folder["name"]):
|
2017-07-03 21:57:10 +02:00
|
|
|
return folder
|
|
|
|
raise exception.NotFoundError("folder")
|
|
|
|
|
2017-07-12 17:05:31 +02:00
|
|
|
def _folder_urls(self, folders, category):
|
2017-08-22 20:15:13 +02:00
|
|
|
url = "https://{}.deviantart.com/{}/0/".format(self.user, category)
|
2017-09-12 16:19:00 +02:00
|
|
|
return [(url + folder["name"], folder) for folder in folders]
|
2017-07-12 17:05:31 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
|
|
|
|
class DeviantartGalleryExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for all deviations from an artist's gallery"""
|
|
|
|
subcategory = "gallery"
|
2018-03-01 17:40:31 +01:00
|
|
|
archive_fmt = "g_{username}_{index}.{extension}"
|
|
|
|
|
2017-07-06 20:40:50 +02:00
|
|
|
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
|
|
|
|
r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"]
|
|
|
|
test = [
|
|
|
|
("http://shimoda7.deviantart.com/gallery/", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "2b80b212717da6971b92670de15a29f68429a067",
|
2017-10-03 22:25:07 +02:00
|
|
|
"keyword": "15897b5090af460c814cdd3e5702de10517fc4cc",
|
2017-07-06 20:40:50 +02:00
|
|
|
}),
|
2017-07-12 09:47:01 +02:00
|
|
|
("https://yakuzafc.deviantart.com/", {
|
|
|
|
"url": "fa6ecb2c3aa78872f762d43f7809b7f0580debc1",
|
2017-09-12 16:38:57 +02:00
|
|
|
"keyword": "b29746bac291d8c8e339f0256a2bd7bb3ebe1741",
|
2017-07-12 09:47:01 +02:00
|
|
|
}),
|
2017-07-12 17:05:31 +02:00
|
|
|
("http://shimoda7.deviantart.com/gallery/?catpath=/", None),
|
2017-07-06 20:40:50 +02:00
|
|
|
]
|
2015-12-06 21:13:57 +01:00
|
|
|
|
2017-04-03 14:56:47 +02:00
|
|
|
def deviations(self):
|
2017-08-22 20:15:13 +02:00
|
|
|
if self.flat and not self.group:
|
2017-07-12 09:47:01 +02:00
|
|
|
return self.api.gallery_all(self.user, self.offset)
|
|
|
|
else:
|
2017-07-12 17:05:31 +02:00
|
|
|
folders = self.api.gallery_folders(self.user)
|
|
|
|
return self._folder_urls(folders, "gallery")
|
2016-11-06 10:44:50 +01:00
|
|
|
|
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
class DeviantartFolderExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for deviations inside an artist's gallery folder"""
|
|
|
|
subcategory = "folder"
|
|
|
|
directory_fmt = ["{category}", "{folder[owner]}", "{folder[title]}"]
|
2018-03-08 14:18:28 +01:00
|
|
|
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
|
2017-07-03 21:57:10 +02:00
|
|
|
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
|
|
|
|
r"/gallery/(\d+)/([^/?&#]+)"]
|
2017-07-10 18:14:40 +02:00
|
|
|
test = [
|
|
|
|
("http://shimoda7.deviantart.com/gallery/722019/Miscellaneous", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "12c331eeff84bd47350af5a199cecc187ae03832",
|
2018-03-08 14:18:28 +01:00
|
|
|
"keyword": "efc16f7aff0d070e7eb6394f080b790b1613609d",
|
2017-07-10 18:14:40 +02:00
|
|
|
}),
|
|
|
|
("http://majestic-da.deviantart.com/gallery/63419606/CHIBI-KAWAII", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "2ea2a3df9591c26568b09291acb453fb87ce9920",
|
2018-03-08 14:18:28 +01:00
|
|
|
"keyword": "42ecdc1a4d7441628f4bcfbe4d2e683a3a4361e2",
|
2017-10-07 13:07:34 +02:00
|
|
|
"options": (("original", False),),
|
2017-07-10 18:14:40 +02:00
|
|
|
}),
|
|
|
|
]
|
2017-07-03 21:57:10 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2017-08-22 20:15:13 +02:00
|
|
|
DeviantartExtractor.__init__(self, match)
|
2017-07-03 21:57:10 +02:00
|
|
|
self.user, fid, self.fname = match.groups()
|
|
|
|
self.folder = {"owner": self.user, "index": fid}
|
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
folders = self.api.gallery_folders(self.user)
|
2017-07-10 18:14:40 +02:00
|
|
|
folder = self._find_folder(folders, self.fname)
|
2017-07-03 21:57:10 +02:00
|
|
|
self.folder["title"] = folder["name"]
|
2018-03-08 14:18:28 +01:00
|
|
|
self.folder["uuid"] = folder["folderid"]
|
2017-07-03 21:57:10 +02:00
|
|
|
return self.api.gallery(self.user, folder["folderid"], self.offset)
|
|
|
|
|
|
|
|
def prepare(self, deviation):
|
2017-07-10 18:14:40 +02:00
|
|
|
DeviantartExtractor.prepare(self, deviation)
|
2017-07-03 21:57:10 +02:00
|
|
|
deviation["folder"] = self.folder
|
|
|
|
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
class DeviantartDeviationExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for single deviations"""
|
|
|
|
subcategory = "deviation"
|
2018-03-01 17:40:31 +01:00
|
|
|
archive_fmt = "{index}.{extension}"
|
2017-05-13 15:34:20 +02:00
|
|
|
pattern = [(r"(?:https?://)?([^.]+\.deviantart\.com/"
|
2017-05-10 17:21:33 +02:00
|
|
|
r"(?:art|journal)/[^/?&#]+-\d+)"),
|
2017-07-10 18:14:40 +02:00
|
|
|
(r"(?:https?://)?(sta\.sh/[a-z0-9]+)")]
|
2017-04-03 14:56:47 +02:00
|
|
|
test = [
|
|
|
|
(("http://shimoda7.deviantart.com/art/"
|
|
|
|
"For-the-sake-of-a-memory-10073852"), {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "eef0c01b3808c535ea673e7b3654ab5209b910b7",
|
2017-10-03 22:25:07 +02:00
|
|
|
"keyword": "344b558dead0da0031ba8d1dffff06f13bbb8561",
|
2017-04-03 14:56:47 +02:00
|
|
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
|
|
|
}),
|
|
|
|
("https://zzz.deviantart.com/art/zzz-1234567890", {
|
|
|
|
"exception": exception.NotFoundError,
|
|
|
|
}),
|
2017-04-17 11:52:16 +02:00
|
|
|
("http://sta.sh/01ijs78ebagf", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "35c0cd0e51494a1e01bddf5414a0d1585cd9fb0e",
|
2017-10-03 22:25:07 +02:00
|
|
|
"keyword": "225008b7d218d2cd1ac5d5bad3d74e3cc171a1cb",
|
2017-04-17 11:52:16 +02:00
|
|
|
}),
|
|
|
|
("http://sta.sh/abcdefghijkl", {
|
|
|
|
"exception": exception.NotFoundError,
|
|
|
|
}),
|
2017-10-07 13:07:34 +02:00
|
|
|
(("https://myria-moon.deviantart.com/art/"
|
|
|
|
"Aime-Moi-part-en-vadrouille-261986576"), {
|
|
|
|
"pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
|
|
|
|
r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
|
|
|
|
}),
|
2017-04-03 14:56:47 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DeviantartExtractor.__init__(self)
|
|
|
|
self.url = "https://" + match.group(1)
|
|
|
|
|
|
|
|
def deviations(self):
|
2017-08-05 16:11:46 +02:00
|
|
|
response = self.request(self.url, fatal=False)
|
2017-04-03 14:56:47 +02:00
|
|
|
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
|
|
|
|
if response.status_code != 200 or not deviation_id:
|
|
|
|
raise exception.NotFoundError("image")
|
|
|
|
return (self.api.deviation(deviation_id),)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
|
|
|
|
2017-04-20 13:20:41 +02:00
|
|
|
class DeviantartFavoriteExtractor(DeviantartExtractor):
|
2017-06-28 17:39:07 +02:00
|
|
|
"""Extractor for an artist's favorites"""
|
2017-04-20 13:20:41 +02:00
|
|
|
subcategory = "favorite"
|
2017-07-10 18:14:40 +02:00
|
|
|
directory_fmt = ["{category}", "{username}", "Favourites"]
|
2018-03-01 17:40:31 +01:00
|
|
|
archive_fmt = "f_{username}_{index}.{extension}"
|
2017-07-10 18:14:40 +02:00
|
|
|
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
|
|
|
|
r"/favourites/?(?:\?catpath=/)?$"]
|
2017-04-03 18:23:13 +02:00
|
|
|
test = [
|
|
|
|
("http://h3813067.deviantart.com/favourites/", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "eef0c01b3808c535ea673e7b3654ab5209b910b7",
|
2017-10-03 22:25:07 +02:00
|
|
|
"keyword": "4478a3fa7cf9c72947cd927e8b54dbec3db9d0b2",
|
2017-04-03 18:23:13 +02:00
|
|
|
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
|
|
|
}),
|
2017-07-10 18:14:40 +02:00
|
|
|
("http://h3813067.deviantart.com/favourites/?catpath=/", None),
|
2017-04-03 18:23:13 +02:00
|
|
|
]
|
|
|
|
|
2017-07-10 18:14:40 +02:00
|
|
|
def deviations(self):
|
2017-07-12 17:05:31 +02:00
|
|
|
folders = self.api.collections_folders(self.user)
|
|
|
|
if self.flat:
|
|
|
|
return itertools.chain.from_iterable([
|
|
|
|
self.api.collections(self.user, folder["folderid"])
|
|
|
|
for folder in folders
|
|
|
|
])
|
|
|
|
else:
|
|
|
|
return self._folder_urls(folders, "favourites")
|
2017-07-10 18:14:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
class DeviantartCollectionExtractor(DeviantartExtractor):
|
|
|
|
"""Extractor for a single favorite collection"""
|
|
|
|
subcategory = "collection"
|
|
|
|
directory_fmt = ["{category}", "{collection[owner]}",
|
|
|
|
"Favourites", "{collection[title]}"]
|
2018-03-08 14:18:28 +01:00
|
|
|
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
|
2017-07-10 18:14:40 +02:00
|
|
|
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
|
|
|
|
r"/favourites/(\d+)/([^/?&#]+)"]
|
2017-10-17 15:58:16 +02:00
|
|
|
test = [(("https://pencilshadings.deviantart.com"
|
|
|
|
"/favourites/70595441/3D-Favorites"), {
|
2017-12-28 13:58:32 +01:00
|
|
|
"url": "742f92199d5bc6a89cda6ec6133d46c7a523824d",
|
2018-03-08 14:18:28 +01:00
|
|
|
"keyword": "d258ca05424b3586c4feec940158bca00acdf511",
|
2017-10-07 13:07:34 +02:00
|
|
|
"options": (("original", False),),
|
2017-07-10 18:14:40 +02:00
|
|
|
})]
|
|
|
|
|
2017-04-03 18:23:13 +02:00
|
|
|
def __init__(self, match):
|
2017-08-22 20:15:13 +02:00
|
|
|
DeviantartExtractor.__init__(self, match)
|
2017-07-10 18:14:40 +02:00
|
|
|
self.user, cid, self.cname = match.groups()
|
|
|
|
self.collection = {"owner": self.user, "index": cid}
|
2017-04-03 18:23:13 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
2017-07-03 21:57:10 +02:00
|
|
|
folders = self.api.collections_folders(self.user)
|
2017-07-10 18:14:40 +02:00
|
|
|
folder = self._find_folder(folders, self.cname)
|
2017-07-03 21:57:10 +02:00
|
|
|
self.collection["title"] = folder["name"]
|
2018-03-08 14:18:28 +01:00
|
|
|
self.collection["uuid"] = folder["folderid"]
|
2017-07-03 21:57:10 +02:00
|
|
|
return self.api.collections(self.user, folder["folderid"], self.offset)
|
2017-04-03 18:23:13 +02:00
|
|
|
|
|
|
|
def prepare(self, deviation):
|
2017-07-10 18:14:40 +02:00
|
|
|
DeviantartExtractor.prepare(self, deviation)
|
2017-04-03 18:23:13 +02:00
|
|
|
deviation["collection"] = self.collection
|
|
|
|
|
|
|
|
|
2017-05-10 17:21:33 +02:00
|
|
|
class DeviantartJournalExtractor(DeviantartExtractor):
|
2017-06-28 17:39:07 +02:00
|
|
|
"""Extractor for an artist's journals"""
|
2017-05-10 17:21:33 +02:00
|
|
|
subcategory = "journal"
|
2017-07-10 18:14:40 +02:00
|
|
|
directory_fmt = ["{category}", "{username}", "Journal"]
|
2018-03-01 17:40:31 +01:00
|
|
|
archive_fmt = "j_{username}_{index}.{extension}"
|
2017-07-06 20:40:50 +02:00
|
|
|
pattern = [r"(?:https?://)?([^.]+)\.deviantart\.com"
|
|
|
|
r"/(?:journal|blog)/?(?:\?catpath=/)?$"]
|
|
|
|
test = [
|
2017-08-16 12:13:42 +02:00
|
|
|
("https://angrywhitewanker.deviantart.com/journal/", {
|
2017-09-12 16:38:57 +02:00
|
|
|
"url": "2a7dba8f18e0d7cb791cd8c78e35376f98933f9e",
|
2017-10-03 22:25:07 +02:00
|
|
|
"keyword": "57a92f1ccdba4acfd4f264963fc41fc37aec4de3",
|
2017-07-06 20:40:50 +02:00
|
|
|
}),
|
|
|
|
("http://shimoda7.deviantart.com/journal/?catpath=/", None),
|
|
|
|
]
|
2017-05-10 17:21:33 +02:00
|
|
|
|
|
|
|
def deviations(self):
|
|
|
|
return self.api.browse_user_journals(self.user, self.offset)
|
|
|
|
|
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
class DeviantartAPI():
|
|
|
|
"""Minimal interface for the deviantart API"""
|
2017-12-18 00:12:08 +01:00
|
|
|
CLIENT_ID = "5388"
|
|
|
|
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
2017-03-08 16:40:20 +01:00
|
|
|
self.session = extractor.session
|
|
|
|
self.log = extractor.log
|
2017-12-18 00:12:08 +01:00
|
|
|
self.headers = {}
|
2017-03-08 16:40:20 +01:00
|
|
|
self.delay = 0
|
2017-12-18 00:12:08 +01:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
self.mature = extractor.config("mature", "true")
|
2017-05-06 21:26:27 +02:00
|
|
|
if not isinstance(self.mature, str):
|
|
|
|
self.mature = "true" if self.mature else "false"
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-12-18 00:12:08 +01:00
|
|
|
self.refresh_token = extractor.config("refresh-token")
|
|
|
|
self.client_id = extractor.config("client-id", self.CLIENT_ID)
|
|
|
|
self.client_secret = extractor.config(
|
|
|
|
"client-secret", self.CLIENT_SECRET)
|
|
|
|
|
2017-05-10 17:21:33 +02:00
|
|
|
def browse_user_journals(self, username, offset=0):
|
|
|
|
"""Yield all journal entries of a specific user"""
|
|
|
|
endpoint = "browse/user/journals"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-05-10 17:21:33 +02:00
|
|
|
"mature_content": self.mature, "featured": "false"}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def collections(self, username, folder_id, offset=0):
|
|
|
|
"""Yield all Deviation-objects contained in a collection folder"""
|
|
|
|
endpoint = "collections/" + folder_id
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2017-04-03 14:56:47 +02:00
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-04-03 14:56:47 +02:00
|
|
|
def collections_folders(self, username, offset=0):
|
|
|
|
"""Yield all collection folders of a specific user"""
|
|
|
|
endpoint = "collections/folders"
|
2017-07-03 21:57:10 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2017-07-12 09:47:01 +02:00
|
|
|
return self._pagination_list(endpoint, params)
|
2017-04-03 14:56:47 +02:00
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def deviation(self, deviation_id):
|
|
|
|
"""Query and return info about a single Deviation"""
|
|
|
|
endpoint = "deviation/" + deviation_id
|
|
|
|
return self._call(endpoint)
|
|
|
|
|
|
|
|
def deviation_content(self, deviation_id):
|
2017-05-13 15:34:20 +02:00
|
|
|
"""Get extended content of a single Deviation"""
|
2017-05-10 16:45:45 +02:00
|
|
|
endpoint = "deviation/content"
|
|
|
|
params = {"deviationid": deviation_id}
|
|
|
|
return self._call(endpoint, params)
|
|
|
|
|
2017-10-07 13:07:34 +02:00
|
|
|
def deviation_download(self, deviation_id):
|
|
|
|
"""Get the original file download (if allowed)"""
|
|
|
|
endpoint = "deviation/download/" + deviation_id
|
|
|
|
params = {"mature_content": self.mature}
|
|
|
|
return self._call(endpoint, params)
|
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
def gallery(self, username, folder_id="", offset=0):
|
|
|
|
"""Yield all Deviation-objects contained in a gallery folder"""
|
|
|
|
endpoint = "gallery/" + folder_id
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-07-03 21:57:10 +02:00
|
|
|
"mature_content": self.mature, "mode": "newest"}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
def gallery_all(self, username, offset=0):
|
|
|
|
"""Yield all Deviation-objects of a specific user"""
|
|
|
|
endpoint = "gallery/all"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 24,
|
2017-05-06 21:26:27 +02:00
|
|
|
"mature_content": self.mature}
|
2017-04-03 14:56:47 +02:00
|
|
|
return self._pagination(endpoint, params)
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-07-03 21:57:10 +02:00
|
|
|
def gallery_folders(self, username, offset=0):
|
|
|
|
"""Yield all gallery folders of a specific user"""
|
|
|
|
endpoint = "gallery/folders"
|
2017-07-06 20:40:50 +02:00
|
|
|
params = {"username": username, "offset": offset, "limit": 50,
|
2017-07-03 21:57:10 +02:00
|
|
|
"mature_content": self.mature}
|
2017-07-12 09:47:01 +02:00
|
|
|
return self._pagination_list(endpoint, params)
|
|
|
|
|
2017-08-22 20:15:13 +02:00
|
|
|
@memcache(keyarg=1)
|
2017-07-12 09:47:01 +02:00
|
|
|
def user_profile(self, username):
|
|
|
|
"""Get user profile information"""
|
|
|
|
endpoint = "user/profile/" + username
|
|
|
|
return self._call(endpoint, expect_error=True)
|
2017-07-03 21:57:10 +02:00
|
|
|
|
2017-01-12 21:08:49 +01:00
|
|
|
def authenticate(self):
|
2017-04-03 14:56:47 +02:00
|
|
|
"""Authenticate the application by requesting an access token"""
|
2017-12-18 00:12:08 +01:00
|
|
|
access_token = self._authenticate_impl(self.refresh_token)
|
2017-10-07 13:07:34 +02:00
|
|
|
self.headers["Authorization"] = access_token
|
2017-01-12 21:08:49 +01:00
|
|
|
|
2017-07-03 21:57:10 +02:00
|
|
|
@cache(maxage=3590, keyarg=1)
|
2017-12-18 00:12:08 +01:00
|
|
|
def _authenticate_impl(self, refresh_token):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Actual authenticate implementation"""
|
2017-01-12 21:08:49 +01:00
|
|
|
url = "https://www.deviantart.com/oauth2/token"
|
2017-12-18 00:12:08 +01:00
|
|
|
if refresh_token:
|
|
|
|
self.log.info("Refreshing access token")
|
|
|
|
data = {"grant_type": "refresh_token",
|
2017-12-18 13:14:24 +01:00
|
|
|
"refresh_token": _refresh_token_cache(refresh_token)}
|
2017-12-18 00:12:08 +01:00
|
|
|
else:
|
|
|
|
self.log.info("Requesting public access token")
|
|
|
|
data = {"grant_type": "client_credentials"}
|
2017-12-18 13:14:24 +01:00
|
|
|
|
2017-12-18 00:12:08 +01:00
|
|
|
auth = (self.client_id, self.client_secret)
|
|
|
|
response = self.session.post(url, data=data, auth=auth)
|
2017-01-12 21:08:49 +01:00
|
|
|
if response.status_code != 200:
|
2017-03-08 16:40:20 +01:00
|
|
|
raise exception.AuthenticationError()
|
2017-12-18 13:14:24 +01:00
|
|
|
|
|
|
|
data = response.json()
|
|
|
|
if refresh_token:
|
|
|
|
_refresh_token_cache.invalidate(refresh_token)
|
|
|
|
_refresh_token_cache(refresh_token, data["refresh_token"])
|
|
|
|
return "Bearer " + data["access_token"]
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
def _call(self, endpoint, params=None, expect_error=False):
|
2017-03-08 16:40:20 +01:00
|
|
|
"""Call an API endpoint"""
|
2017-04-03 14:56:47 +02:00
|
|
|
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
|
2017-03-08 16:40:20 +01:00
|
|
|
while True:
|
2018-03-15 15:53:16 +01:00
|
|
|
if self.delay > 0:
|
|
|
|
time.sleep(2 ** (self.delay-1))
|
2017-03-08 16:40:20 +01:00
|
|
|
|
2017-03-13 21:42:16 +01:00
|
|
|
self.authenticate()
|
2017-10-07 13:07:34 +02:00
|
|
|
response = self.session.get(
|
|
|
|
url, headers=self.headers, params=params)
|
2017-03-08 16:40:20 +01:00
|
|
|
|
|
|
|
if response.status_code == 200:
|
2018-03-15 15:53:16 +01:00
|
|
|
if self.delay > 2:
|
|
|
|
self.delay -= 1
|
2017-03-08 16:40:20 +01:00
|
|
|
break
|
2018-03-15 15:53:16 +01:00
|
|
|
|
2017-03-08 16:40:20 +01:00
|
|
|
else:
|
2018-03-15 15:53:16 +01:00
|
|
|
if response.status_code == 429:
|
|
|
|
msg = "Rate limit reached"
|
|
|
|
else:
|
|
|
|
if expect_error:
|
|
|
|
return None
|
|
|
|
msg = "API responded with {} {}".format(
|
|
|
|
response.status_code, response.reason)
|
2018-03-15 16:44:58 +01:00
|
|
|
self.delay += 1
|
2018-03-15 15:53:16 +01:00
|
|
|
self.log.warning(
|
|
|
|
"%s. Using %ds delay.", msg, 2 ** (self.delay-1))
|
|
|
|
self.log.debug(response.text)
|
2017-03-08 16:40:20 +01:00
|
|
|
try:
|
|
|
|
return response.json()
|
|
|
|
except ValueError:
|
2017-12-18 00:12:08 +01:00
|
|
|
self.log.error("Failed to parse API response")
|
2017-03-08 16:40:20 +01:00
|
|
|
return {}
|
2017-04-03 14:56:47 +02:00
|
|
|
|
|
|
|
def _pagination(self, endpoint, params=None):
|
|
|
|
while True:
|
|
|
|
data = self._call(endpoint, params)
|
|
|
|
if "results" in data:
|
|
|
|
yield from data["results"]
|
|
|
|
if not data["has_more"]:
|
|
|
|
return
|
|
|
|
params["offset"] = data["next_offset"]
|
|
|
|
else:
|
|
|
|
self.log.error("Unexpected API response: %s", data)
|
|
|
|
return
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2017-07-12 09:47:01 +02:00
|
|
|
def _pagination_list(self, endpoint, params=None):
|
|
|
|
result = []
|
|
|
|
result.extend(self._pagination(endpoint, params))
|
|
|
|
return result
|
|
|
|
|
2017-05-10 16:45:45 +02:00
|
|
|
|
2017-12-18 13:14:24 +01:00
|
|
|
@cache(maxage=365*24*60*60, keyarg=0)
|
|
|
|
def _refresh_token_cache(original_token, new_token=None):
|
|
|
|
return new_token or original_token
|
|
|
|
|
|
|
|
|
2017-05-13 21:42:29 +02:00
|
|
|
SHADOW_TEMPLATE = """
|
|
|
|
<span class="shadow">
|
|
|
|
<img src="{src}" class="smshadow" width="{width}" height="{height}">
|
|
|
|
</span>
|
|
|
|
<br><br>
|
|
|
|
"""
|
|
|
|
|
2017-05-13 15:34:20 +02:00
|
|
|
HEADER_TEMPLATE = """<div usr class="gr">
|
|
|
|
<div class="metadata">
|
|
|
|
<h2><a href="{url}">{title}</a></h2>
|
|
|
|
<ul>
|
|
|
|
<li class="author">
|
|
|
|
by <span class="name"><span class="username-with-symbol u">
|
|
|
|
<a class="u regular username" href="{userurl}">{username}</a>\
|
|
|
|
<span class="user-symbol regular"></span></span></span>,
|
|
|
|
<span>{date}</span>
|
|
|
|
</li>
|
|
|
|
<li class="category">
|
|
|
|
{categories}
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</div>
|
|
|
|
"""
|
|
|
|
|
2017-05-15 15:58:06 +02:00
|
|
|
HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
|
|
|
|
<h2>
|
2017-08-16 12:13:42 +02:00
|
|
|
<img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\
|
2017-05-19 19:22:39 +02:00
|
|
|
?2" style="vertical-align:middle" alt=""/>
|
2017-05-15 15:58:06 +02:00
|
|
|
<a href="{url}">{title}</a>
|
|
|
|
</h2>
|
|
|
|
Journal Entry: <span>{date}</span>
|
|
|
|
"""
|
|
|
|
|
2017-05-12 14:10:25 +02:00
|
|
|
JOURNAL_TEMPLATE = """text:<!DOCTYPE html>
|
2017-05-10 16:45:45 +02:00
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
|
|
<title>{title}</title>
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/deviantart-network_lc.css?3843780832">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/group_secrets_lc.css?3250492874">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/v6core_lc.css?4246581581">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/sidebar_lc.css?1490570941">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/writer_lc.css?3090682151">
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
css/v6loggedin_lc.css?3001430805">
|
|
|
|
<style>{css}</style>
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/core.css?1488405371919" >
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/peeky.css?1487067424177" >
|
2017-08-16 12:13:42 +02:00
|
|
|
<link rel="stylesheet" href="https://st.deviantart.net/\
|
2017-05-10 16:45:45 +02:00
|
|
|
roses/cssmin/desktop.css?1491362542749" >
|
|
|
|
</head>
|
|
|
|
<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart">
|
|
|
|
<div id="output">
|
|
|
|
<div class="dev-page-container bubbleview">
|
|
|
|
<div class="dev-page-view view-mode-normal">
|
|
|
|
<div class="dev-view-main-content">
|
|
|
|
<div class="dev-view-deviation">
|
2017-05-13 21:42:29 +02:00
|
|
|
{shadow}
|
2017-05-10 16:45:45 +02:00
|
|
|
<div class="journal-wrapper tt-a">
|
|
|
|
<div class="journal-wrapper2">
|
2017-05-13 15:34:20 +02:00
|
|
|
<div class="journal {cls} journalcontrol">
|
2017-05-10 16:45:45 +02:00
|
|
|
{html}
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|