1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-24 03:32:33 +01:00
gallery-dl/gallery_dl/extractor/deviantart.py

1130 lines
41 KiB
Python
Raw Normal View History

2015-10-04 04:13:50 +02:00
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
2015-10-04 04:13:50 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2017-04-20 13:20:41 +02:00
"""Extract images from https://www.deviantart.com/"""
2015-10-04 04:13:50 +02:00
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
import collections
import itertools
import mimetypes
2018-07-14 11:52:21 +02:00
import math
import time
2017-04-03 18:23:13 +02:00
import re
2015-10-04 04:13:50 +02:00
BASE_PATTERN = (
r"(?:https?://)?(?:"
r"(?:www\.)?deviantart\.com/([\w-]+)|"
r"(?!www\.)([\w-]+)\.deviantart\.com)"
)
class DeviantartExtractor(Extractor):
"""Base class for deviantart extractors using the OAuth API"""
2015-11-21 04:26:30 +01:00
category = "deviantart"
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
2015-11-21 04:26:30 +01:00
def __init__(self, match=None):
Extractor.__init__(self, match)
2017-03-13 21:42:16 +01:00
self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.user = match.group(1) or match.group(2)
self.group = False
2019-06-17 19:49:50 +02:00
self.api = DeviantartAPI(self)
if self.quality:
self.quality = "q_{}".format(self.quality)
2019-06-17 19:49:50 +02:00
if self.original != "image":
self._update_content = self._update_content_default
else:
self._update_content = self._update_content_image
self.original = True
2018-07-16 18:14:41 +02:00
self.commit_journal = {
"html": self._commit_journal_html,
"text": self._commit_journal_text,
}.get(self.config("journals", "html"))
2017-03-13 21:42:16 +01:00
def skip(self, num):
self.offset += num
return num
2015-10-04 04:13:50 +02:00
def items(self):
if self.user:
self.group = not self.api.user_profile(self.user)
if self.group:
self.subcategory = "group-" + self.subcategory
2015-10-04 04:13:50 +02:00
yield Message.Version, 1
for deviation in self.deviations():
if isinstance(deviation, tuple):
url, data = deviation
yield Message.Queue, url, data
continue
2017-04-03 18:23:13 +02:00
self.prepare(deviation)
yield Message.Directory, deviation
if "content" in deviation:
content = deviation["content"]
if self.original and deviation["is_downloadable"] and \
text.ext_from_url(content["src"]) != "gif":
self._update_content(deviation, content)
if content["src"].startswith("https://images-wixmp-"):
if deviation["index"] <= 790677560:
# https://github.com/r888888888/danbooru/issues/4069
content["src"] = re.sub(
r"(/f/[^/]+/[^/]+)/v\d+/.*",
r"/intermediary\1", content["src"])
if self.quality:
content["src"] = re.sub(
r"q_\d+", self.quality, content["src"])
yield self.commit(deviation, content)
elif deviation["is_downloadable"]:
content = self.api.deviation_download(deviation["deviationid"])
yield self.commit(deviation, content)
if "videos" in deviation:
video = max(deviation["videos"],
key=lambda x: text.parse_int(x["quality"][:-1]))
yield self.commit(deviation, video)
if "flash" in deviation:
yield self.commit(deviation, deviation["flash"])
2018-07-16 18:14:41 +02:00
if "excerpt" in deviation and self.commit_journal:
journal = self.api.deviation_content(deviation["deviationid"])
yield self.commit_journal(deviation, journal)
if self.extra:
for match in DeviantartStashExtractor.pattern.finditer(
deviation.get("description", "")):
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, match.group(0), deviation
def deviations(self):
"""Return an iterable containing all relevant Deviation-objects"""
def prepare(self, deviation):
2017-04-03 18:23:13 +02:00
"""Adjust the contents of a Deviation-object"""
try:
deviation["index"] = text.parse_int(
deviation["url"].rpartition("-")[2])
except KeyError:
deviation["index"] = 0
if self.user:
deviation["username"] = self.user
deviation["da_category"] = deviation["category"]
deviation["published_time"] = text.parse_int(
deviation["published_time"])
deviation["date"] = text.parse_timestamp(
deviation["published_time"])
# filename metadata
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
sub = re.compile(r"\W").sub
deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
util.bencode(deviation["index"], alphabet),
))
@staticmethod
def commit(deviation, target):
url = target["src"]
target = target.copy()
target["filename"] = deviation["filename"]
deviation["target"] = target
deviation["extension"] = target["extension"] = text.ext_from_url(url)
return Message.Url, url, deviation
2018-07-16 18:14:41 +02:00
def _commit_journal_html(self, deviation, journal):
title = text.escape(deviation["title"])
url = deviation["url"]
thumbs = deviation["thumbs"]
html = journal["html"]
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
2017-05-13 15:34:20 +02:00
if "css" in journal:
css, cls = journal["css"], "withskin"
else:
css, cls = "", "journal-green"
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
needle = '<div class="boxtop journaltop">'
header = HEADER_CUSTOM_TEMPLATE.format(
title=title, url=url, date=deviation["date"],
)
else:
needle = '<div usr class="gr">'
catlist = deviation["category_path"].split("/")
categories = " / ".join(
('<span class="crumb"><a href="{}/{}/"><span>{}</span></a>'
'</span>').format(self.root, cpath, cat.capitalize())
for cat, cpath in zip(
catlist,
itertools.accumulate(catlist, lambda t, c: t + "/" + c)
)
)
username = deviation["author"]["username"]
urlname = deviation.get("username") or username.lower()
header = HEADER_TEMPLATE.format(
title=title,
url=url,
userurl="{}/{}/".format(self.root, urlname),
username=username,
date=deviation["date"],
categories=categories,
)
2018-07-16 18:14:41 +02:00
html = JOURNAL_TEMPLATE_HTML.format(
2017-05-13 15:34:20 +02:00
title=title,
html=html.replace(needle, header, 1),
shadow=shadow,
2017-05-13 15:34:20 +02:00
css=css,
cls=cls,
)
deviation["extension"] = "htm"
return Message.Url, html, deviation
2018-07-16 18:14:41 +02:00
@staticmethod
def _commit_journal_text(deviation, journal):
content = "\n".join(
text.unescape(text.remove_html(txt))
for txt in journal["html"].rpartition("<script")[0].split("<br />")
)
txt = JOURNAL_TEMPLATE_TEXT.format(
title=deviation["title"],
username=deviation["author"]["username"],
date=deviation["date"],
2018-07-16 18:14:41 +02:00
content=content,
)
deviation["extension"] = "txt"
return Message.Url, txt, deviation
@staticmethod
def _find_folder(folders, name):
pattern = re.compile(r"(?i)\W*" + name.replace("-", r"\W+") + r"\W*$")
for folder in folders:
2019-06-17 19:49:50 +02:00
if pattern.match(folder["name"]):
return folder
raise exception.NotFoundError("folder")
def _folder_urls(self, folders, category):
url = "{}/{}/{}/0/".format(self.root, self.user, category)
return [(url + folder["name"], folder) for folder in folders]
2019-06-17 19:49:50 +02:00
def _update_content_default(self, deviation, content):
content.update(self.api.deviation_download(deviation["deviationid"]))
def _update_content_image(self, deviation, content):
data = self.api.deviation_download(deviation["deviationid"])
2019-06-17 19:49:50 +02:00
url = data["src"].partition("?")[0]
mtype = mimetypes.guess_type(url, False)[0]
if mtype and mtype.startswith("image/"):
content.update(data)
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
2019-01-01 15:39:34 +01:00
"pattern": r"https://(s3.amazonaws.com/origin-(img|orig)"
r".deviantart.net/|images-wixmp-\w+.wixmp.com/)",
"count": ">= 30",
"keyword": {
"allows_comments": bool,
"author": {
"type": "regular",
"usericon": str,
"userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
"username": "shimoda7",
},
"category_path": str,
"content": {
"filesize": int,
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"da_category": str,
"date": "type:datetime",
2019-01-01 15:39:34 +01:00
"deviationid": str,
"?download_filesize": int,
"extension": str,
"index": int,
2019-01-01 15:39:34 +01:00
"is_deleted": bool,
"is_downloadable": bool,
"is_favourited": bool,
"is_mature": bool,
"preview": {
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"published_time": int,
"stats": {
"comments": int,
"favourites": int,
},
"target": dict,
"thumbs": list,
"title": str,
"url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
"username": "shimoda7",
},
}),
2019-06-17 19:49:50 +02:00
# group
("https://www.deviantart.com/yakuzafc", {
2019-01-01 15:39:34 +01:00
"pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/",
"count": ">= 15",
}),
2019-06-17 19:49:50 +02:00
# 'folders' option (#276)
("https://www.deviantart.com/justatest235723", {
"count": 2,
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
"keyword": {
"description": str,
"folders": list,
"is_watching": bool,
"license": str,
"tags": list,
},
}),
("https://www.deviantart.com/shimoda8/gallery/", {
"exception": exception.NotFoundError,
}),
2019-06-17 19:49:50 +02:00
# old-style URLs
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
("https://shimoda7.deviantart.com/gallery/"),
("https://yakuzafc.deviantart.com/"),
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
)
def deviations(self):
if self.flat and not self.group:
return self.api.gallery_all(self.user, self.offset)
2019-06-17 19:49:50 +02:00
folders = self.api.gallery_folders(self.user)
return self._folder_urls(folders, "gallery")
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
test = (
2019-06-17 19:49:50 +02:00
# user
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
2019-01-01 15:39:34 +01:00
"count": 5,
"options": (("original", False),),
}),
2019-06-17 19:49:50 +02:00
# group
2018-09-21 11:25:21 +02:00
("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
2019-01-01 15:39:34 +01:00
"count": ">= 4",
"options": (("original", False),),
}),
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
2019-06-17 19:49:50 +02:00
self.fname = match.group(4)
self.folder = {"owner": self.user, "index": match.group(3)}
def deviations(self):
folders = self.api.gallery_folders(self.user)
folder = self._find_folder(folders, self.fname)
self.folder["title"] = folder["name"]
self.folder["uuid"] = folder["folderid"]
return self.api.gallery(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
deviation["folder"] = self.folder
class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
"pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net",
"count": 1,
}),
2019-06-17 19:49:50 +02:00
# multiple stash items
("https://sta.sh/21jf51j7pzl2", {
"pattern": pattern,
"count": 4,
}),
# downloadable, but no "content" field (#307)
("https://sta.sh/024t4coz16mi", {
"count": 1,
}),
("https://sta.sh/abcdefghijkl", {
"exception": exception.HttpError,
}),
)
skip = Extractor.skip
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
2019-06-17 19:49:50 +02:00
self.user = None
self.stash_id = match.group(1)
def deviations(self):
url = "https://sta.sh/" + self.stash_id
page = self.request(url).text
deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id:
yield self.api.deviation(deviation_id)
else:
data = {"_extractor": DeviantartStashExtractor}
page = text.extract(
page, '<div id="stash-body"', '<div class="footer"')[0]
for url in text.extract_iter(page, '<a href="', '"'):
yield url, data
2017-04-20 13:20:41 +02:00
class DeviantartFavoriteExtractor(DeviantartExtractor):
2017-06-28 17:39:07 +02:00
"""Extractor for an artist's favorites"""
2017-04-20 13:20:41 +02:00
subcategory = "favorite"
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
"options": (("metadata", True), ("flat", False)), # issue #271
"count": 1,
}),
("https://www.deviantart.com/h3813067/favourites/", {
2017-04-03 18:23:13 +02:00
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
("https://h3813067.deviantart.com/favourites/"),
("https://h3813067.deviantart.com/favourites/?catpath=/"),
)
2017-04-03 18:23:13 +02:00
def deviations(self):
folders = self.api.collections_folders(self.user)
if self.flat:
2018-07-16 18:14:41 +02:00
return itertools.chain.from_iterable(
self.api.collections(self.user, folder["folderid"])
for folder in folders
2018-07-16 18:14:41 +02:00
)
2019-06-17 19:49:50 +02:00
return self._folder_urls(folders, "favourites")
class DeviantartCollectionExtractor(DeviantartExtractor):
"""Extractor for a single favorite collection"""
subcategory = "collection"
directory_fmt = ("{category}", "{collection[owner]}",
"Favourites", "{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
test = (
(("https://www.deviantart.com/pencilshadings"
"/favourites/70595441/3D-Favorites"), {
2019-01-01 15:39:34 +01:00
"count": ">= 20",
"options": (("original", False),),
}),
("https://pencilshadings.deviantart.com"
"/favourites/70595441/3D-Favorites"),
)
2017-04-03 18:23:13 +02:00
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
_, _, cid, self.cname = match.groups()
self.collection = {"owner": self.user, "index": cid}
2017-04-03 18:23:13 +02:00
def deviations(self):
folders = self.api.collections_folders(self.user)
folder = self._find_folder(folders, self.cname)
self.collection["title"] = folder["name"]
self.collection["uuid"] = folder["folderid"]
return self.api.collections(self.user, folder["folderid"], self.offset)
2017-04-03 18:23:13 +02:00
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
2017-04-03 18:23:13 +02:00
deviation["collection"] = self.collection
class DeviantartJournalExtractor(DeviantartExtractor):
2017-06-28 17:39:07 +02:00
"""Extractor for an artist's journals"""
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/journal/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}),
2018-07-16 18:14:41 +02:00
("https://www.deviantart.com/angrywhitewanker/journal/", {
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
"options": (("journals", "text"),),
}),
("https://www.deviantart.com/angrywhitewanker/journal/", {
"count": 0,
"options": (("journals", "none"),),
}),
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://shimoda7.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"),
)
def deviations(self):
return self.api.browse_user_journals(self.user, self.offset)
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
directory_fmt = ("{category}", "Popular",
"{popular[range]}", "{popular[search]}")
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?")
test = (
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
}),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.search_term = self.time_range = self.category_path = None
self.user = ""
path, trange, query = match.groups()
if path:
self.category_path = path.lstrip("/")
if trange:
self.time_range = trange.replace("-", "").replace("hours", "hr")
if query:
self.search_term = text.parse_query(query).get("q")
self.popular = {
"search": self.search_term or "",
"range": trange or "24-hours",
"path": self.category_path,
}
def deviations(self):
return self.api.browse_popular(
self.search_term, self.time_range, self.category_path, self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
deviation["popular"] = self.popular
class DeviantartExtractorV2(Extractor):
"""Base class for deviantart extractors using the NAPI"""
category = "deviantart"
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
def __init__(self, match=None):
Extractor.__init__(self, match)
self.offset = 0
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.user = match.group(1) or match.group(2)
if self.quality:
self.quality = "q_{}".format(self.quality)
def items(self):
url = (
self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch"
)
params = {
"deviationid" : None,
"username" : None,
"type" : None,
"include_session": "false",
}
headers = {
"Referer": self.root,
}
yield Message.Version, 1
for deviation in self.deviations():
params["deviationid"] = deviation["deviationId"]
params["username"] = deviation["author"]["username"]
params["type"] = "journal" if deviation["isJournal"] else "art"
data = self.request(url, params=params, headers=headers).json()
if "deviation" not in data:
self.log.warning("Skipping %s", params["deviationid"])
continue
deviation = self._extract(data)
yield Message.Directory, deviation
yield Message.Url, deviation["target"]["src"], deviation
if self.extra:
for match in DeviantartStashExtractor.pattern.finditer(
deviation["description"]):
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, match.group(0), deviation
def _extract(self, data):
deviation = data["deviation"]
extended = deviation["extended"]
files = deviation["files"]
del deviation["extended"]
del deviation["files"]
# prepare deviation metadata
deviation["description"] = extended.get("description", "")
deviation["username"] = self.user.lower()
deviation["stats"] = extended["stats"]
deviation["stats"]["comments"] = data["comments"]["total"]
deviation["index"] = deviation["deviationId"]
deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
deviation["date"] = text.parse_datetime(
deviation["publishedTime"])
deviation["category_path"] = "/".join(
extended[key]["displayNameEn"]
for key in ("typeFacet", "contentFacet", "categoryFacet")
if key in extended
)
# extract download target
target = files[-1]
if target["type"] == "gif":
pass
elif target["type"] == "video":
# select largest video
target = max(
files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
elif target["type"] == "flash":
if target["src"].startswith("https://sandbox.deviantart.com"):
# extract SWF file from "sandbox"
target["src"] = text.extract(
self.request(target["src"]).text,
'id="sandboxembed" src="', '"',
)[0]
elif "download" in extended:
target = extended["download"]
target["src"] = target["url"]
del target["url"]
# url rewrites
if target["src"].startswith("https://images-wixmp-"):
if deviation["index"] <= 790677560:
# https://github.com/r888888888/danbooru/issues/4069
target["src"] = re.sub(
r"(/f/[^/]+/[^/]+)/v\d+/.*",
r"/intermediary\1", target["src"])
if self.quality:
target["src"] = re.sub(
r"q_\d+", self.quality, target["src"])
# filename and extension metadata
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
sub = re.compile(r"\W").sub
deviation["filename"] = target["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
util.bencode(deviation["index"], alphabet),
))
deviation["extension"] = target["extension"] = (
text.ext_from_url(target["src"])
)
deviation["target"] = target
return deviation
class DeviantartDeviationExtractor(DeviantartExtractorV2):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"count": 0,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
"pattern": (r"https://www.deviantart.com/download/261986576"
r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
r"/intermediary/f/[^/]+/[^.]+\.jpg$")
}),
# wixmp URL rewrite v2 (#369)
(("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
}),
# non-download URL for GIFs (#242)
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
r"/f/[^/]+/[^.]+\.gif\?token="),
}),
# external URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
"options": (("extra", 1), ("original", 0)),
"pattern": r"https?://sta\.sh/\w+$",
"range": "2-",
"count": 4,
}),
# video
("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
"url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
"keyword": {
"target": {
"duration": 306,
"extension": "mp4",
"filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
"filesize": 9963639,
"quality": "1080p",
"src": str,
"type": "video",
},
}
}),
# archive
("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
"pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
}),
# swf
("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
"pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
}),
# old-style URLs
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
("https://myria-moon.deviantart.com"
"/art/Aime-Moi-part-en-vadrouille-261986576"),
("https://zzz.deviantart.com/art/zzz-1234567890"),
)
skip = Extractor.skip
def __init__(self, match):
DeviantartExtractorV2.__init__(self, match)
self.type = match.group(3)
self.deviation_id = match.group(4)
def deviations(self):
return ({
"deviationId": self.deviation_id,
"author" : {"username": self.user},
"isJournal" : self.type == "journal",
},)
class DeviantartScrapsExtractor(DeviantartExtractorV2):
"""Extractor for an artist's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
archive_fmt = "s_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
test = (
("https://www.deviantart.com/shimoda7/gallery/scraps", {
"count": 12,
}),
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
)
def deviations(self):
url = self.root + "/_napi/da-user-profile/api/gallery/contents"
params = {
"username" : self.user,
"offset" : self.offset,
"limit" : "24",
"scraps_folder": "true",
}
headers = {
"Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
}
while True:
data = self.request(url, params=params, headers=headers).json()
for obj in data["results"]:
yield obj["deviation"]
if not data["hasMore"]:
return
params["offset"] = data["nextOffset"]
class DeviantartAPI():
"""Minimal interface for the DeviantArt API
Ref: https://www.deviantart.com/developers/http/v1/20160316
"""
CLIENT_ID = "5388"
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
self.headers = {}
2018-07-14 11:52:21 +02:00
delay = extractor.config("wait-min", 0)
self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1
self.delay_min = max(2, self.delay)
self.mature = extractor.config("mature", "true")
if not isinstance(self.mature, str):
self.mature = "true" if self.mature else "false"
2019-06-17 19:49:50 +02:00
self.folders = extractor.config("folders", False)
2019-06-17 19:49:50 +02:00
self.metadata = extractor.extra or extractor.config("metadata", False)
self.refresh_token = extractor.config("refresh-token")
self.client_id = extractor.config("client-id", self.CLIENT_ID)
self.client_secret = extractor.config(
"client-secret", self.CLIENT_SECRET)
def browse_popular(self, query=None, timerange=None,
category_path=None, offset=0):
"""Yield popular deviations"""
endpoint = "browse/popular"
params = {"q": query, "offset": offset, "limit": 120,
"timerange": timerange, "category_path": category_path,
"mature_content": self.mature}
return self._pagination(endpoint, params)
def browse_user_journals(self, username, offset=0):
"""Yield all journal entries of a specific user"""
endpoint = "browse/user/journals"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature, "featured": "false"}
return self._pagination(endpoint, params)
def collections(self, username, folder_id, offset=0):
"""Yield all Deviation-objects contained in a collection folder"""
endpoint = "collections/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature}
return self._pagination(endpoint, params)
@memcache(keyarg=1)
def collections_folders(self, username, offset=0):
"""Yield all collection folders of a specific user"""
endpoint = "collections/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
return self._pagination_folders(endpoint, params)
def deviation(self, deviation_id):
"""Query and return info about a single Deviation"""
endpoint = "deviation/" + deviation_id
deviation = self._call(endpoint)
if self.metadata:
self._metadata((deviation,))
if self.folders:
self._folders((deviation,))
return deviation
def deviation_content(self, deviation_id):
2017-05-13 15:34:20 +02:00
"""Get extended content of a single Deviation"""
endpoint = "deviation/content"
params = {"deviationid": deviation_id}
return self._call(endpoint, params)
def deviation_download(self, deviation_id):
"""Get the original file download (if allowed)"""
endpoint = "deviation/download/" + deviation_id
params = {"mature_content": self.mature}
return self._call(endpoint, params)
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
endpoint = "deviation/metadata?" + "&".join(
"deviationids[{}]={}".format(num, deviation["deviationid"])
for num, deviation in enumerate(deviations)
)
params = {"mature_content": self.mature}
return self._call(endpoint, params)["metadata"]
def gallery(self, username, folder_id="", offset=0, extend=True):
"""Yield all Deviation-objects contained in a gallery folder"""
endpoint = "gallery/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature, "mode": "newest"}
return self._pagination(endpoint, params, extend)
def gallery_all(self, username, offset=0):
"""Yield all Deviation-objects of a specific user"""
endpoint = "gallery/all"
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature}
return self._pagination(endpoint, params)
@memcache(keyarg=1)
def gallery_folders(self, username, offset=0):
"""Yield all gallery folders of a specific user"""
endpoint = "gallery/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
return self._pagination_folders(endpoint, params)
@memcache(keyarg=1)
def user_profile(self, username):
"""Get user profile information"""
endpoint = "user/profile/" + username
return self._call(endpoint, fatal=False)
def authenticate(self, refresh_token):
"""Authenticate the application by requesting an access token"""
self.headers["Authorization"] = self._authenticate_impl(refresh_token)
2019-03-14 22:21:49 +01:00
@cache(maxage=3600, keyarg=1)
def _authenticate_impl(self, refresh_token):
"""Actual authenticate implementation"""
url = "https://www.deviantart.com/oauth2/token"
if refresh_token:
self.log.info("Refreshing private access token")
data = {"grant_type": "refresh_token",
"refresh_token": _refresh_token_cache(refresh_token)}
else:
self.log.info("Requesting public access token")
data = {"grant_type": "client_credentials"}
auth = (self.client_id, self.client_secret)
response = self.extractor.request(
url, method="POST", data=data, auth=auth)
data = response.json()
if response.status_code != 200:
raise exception.AuthenticationError('"{} ({})"'.format(
data.get("error_description"), data.get("error")))
if refresh_token:
2018-10-12 22:18:29 +02:00
_refresh_token_cache.update(refresh_token, data["refresh_token"])
return "Bearer " + data["access_token"]
def _call(self, endpoint, params=None, fatal=True, public=True):
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
while True:
if self.delay >= 0:
time.sleep(2 ** self.delay)
self.authenticate(None if public else self.refresh_token)
response = self.extractor.request(
url, headers=self.headers, params=params, fatal=None)
data = response.json()
status = response.status_code
if 200 <= status < 400:
2018-07-14 11:52:21 +02:00
if self.delay > self.delay_min:
self.delay -= 1
return data
if not fatal:
return None
2019-06-17 19:49:50 +02:00
if data.get("error_description") == "User not found.":
raise exception.NotFoundError("user or group")
self.log.debug(response.text)
msg = "API responded with {} {}".format(
status, response.reason)
if status == 429:
self.delay += 1
self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
else:
self.log.error(msg)
return data
def _pagination(self, endpoint, params, extend=True):
public = True
while True:
data = self._call(endpoint, params, public=public)
if "results" not in data:
self.log.error("Unexpected API response: %s", data)
return
if (public and self.refresh_token and
len(data["results"]) < params["limit"]):
self.log.debug("Switching to private access token")
public = False
continue
if extend:
if self.metadata:
self._metadata(data["results"])
if self.folders:
self._folders(data["results"])
yield from data["results"]
if not data["has_more"]:
return
params["offset"] = data["next_offset"]
def _pagination_folders(self, endpoint, params):
result = []
result.extend(self._pagination(endpoint, params, False))
return result
def _metadata(self, deviations):
"""Add extended metadata to each deviation object"""
for deviation, metadata in zip(
deviations, self.deviation_metadata(deviations)):
deviation.update(metadata)
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
return deviations
def _folders(self, deviations):
"""Add a list of all containing folders to each deviation object"""
for deviation in deviations:
deviation["folders"] = self._folders_map(
deviation["author"]["username"])[deviation["deviationid"]]
@memcache(keyarg=1)
def _folders_map(self, username):
"""Generate a deviation_id -> folders mapping for 'username'"""
self.log.info("Collecting folder information for '%s'", username)
folders = self.gallery_folders(username)
# add parent names to folders, but ignore "Featured" as parent
fmap = {}
featured = folders[0]["folderid"]
for folder in folders:
if folder["parent"] and folder["parent"] != featured:
folder["name"] = fmap[folder["parent"]] + "/" + folder["name"]
fmap[folder["folderid"]] = folder["name"]
# map deviationids to folder names
dmap = collections.defaultdict(list)
for folder in folders:
for deviation in self.gallery(
username, folder["folderid"], 0, False):
dmap[deviation["deviationid"]].append(folder["name"])
return dmap
2019-03-14 22:21:49 +01:00
@cache(maxage=10*365*24*3600, keyarg=0)
def _refresh_token_cache(original_token, new_token=None):
return new_token or original_token
SHADOW_TEMPLATE = """
<span class="shadow">
<img src="{src}" class="smshadow" width="{width}" height="{height}">
</span>
<br><br>
"""
2017-05-13 15:34:20 +02:00
HEADER_TEMPLATE = """<div usr class="gr">
<div class="metadata">
<h2><a href="{url}">{title}</a></h2>
<ul>
<li class="author">
by <span class="name"><span class="username-with-symbol u">
<a class="u regular username" href="{userurl}">{username}</a>\
<span class="user-symbol regular"></span></span></span>,
<span>{date}</span>
</li>
<li class="category">
{categories}
</li>
</ul>
</div>
"""
HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
<h2>
<img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\
2017-05-19 19:22:39 +02:00
?2" style="vertical-align:middle" alt=""/>
<a href="{url}">{title}</a>
</h2>
Journal Entry: <span>{date}</span>
"""
2018-07-16 18:14:41 +02:00
JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{title}</title>
<link rel="stylesheet" href="https://st.deviantart.net/\
css/deviantart-network_lc.css?3843780832">
<link rel="stylesheet" href="https://st.deviantart.net/\
css/group_secrets_lc.css?3250492874">
<link rel="stylesheet" href="https://st.deviantart.net/\
css/v6core_lc.css?4246581581">
<link rel="stylesheet" href="https://st.deviantart.net/\
css/sidebar_lc.css?1490570941">
<link rel="stylesheet" href="https://st.deviantart.net/\
css/writer_lc.css?3090682151">
<link rel="stylesheet" href="https://st.deviantart.net/\
css/v6loggedin_lc.css?3001430805">
<style>{css}</style>
<link rel="stylesheet" href="https://st.deviantart.net/\
roses/cssmin/core.css?1488405371919" >
<link rel="stylesheet" href="https://st.deviantart.net/\
roses/cssmin/peeky.css?1487067424177" >
<link rel="stylesheet" href="https://st.deviantart.net/\
roses/cssmin/desktop.css?1491362542749" >
</head>
<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart">
<div id="output">
<div class="dev-page-container bubbleview">
<div class="dev-page-view view-mode-normal">
<div class="dev-view-main-content">
<div class="dev-view-deviation">
{shadow}
<div class="journal-wrapper tt-a">
<div class="journal-wrapper2">
2017-05-13 15:34:20 +02:00
<div class="journal {cls} journalcontrol">
{html}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
</html>
"""
2018-07-16 18:14:41 +02:00
JOURNAL_TEMPLATE_TEXT = """text:{title}
by {username}, {date}
{content}
"""