1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 12:12:34 +01:00

[deviantart] add single image extractor

This commit is contained in:
Mike Fährmann 2015-12-06 21:13:57 +01:00
parent e4a661fd6b
commit 7d965a2897
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -8,13 +8,14 @@
"""Extract images from http://www.deviantart.com/"""
from .common import AsynchronousExtractor, Message
from .common import Extractor, AsynchronousExtractor, Message
from .. import text
import re
class DeviantArtExtractor(AsynchronousExtractor):
class DeviantArtUserExtractor(AsynchronousExtractor):
"""Extract all works of an artist on deviantart"""
category = "deviantart"
subcategory = "user"
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
@ -90,3 +91,35 @@ class DeviantArtExtractor(AsynchronousExtractor):
"""Extract a HTML attribute and apply a regex to it"""
txt, _ = text.extract(txt, ' %s="' % attr, '"')
return re.match(pattern, txt)
class DeviantArtImageExtractor(Extractor):
"""Extract a single image from deviantart"""
category = "deviantart"
subcategory = "image"
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [r"(?:https?://)?[^\.]+\.deviantart\.com/art/.+-(\d+)$"]
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
self.index = match.group(1)
def items(self):
page = self.request(self.url).text
data = text.extract_all(page, (
('title' , '"og:title" content="', '"'),
('image' , '"og:image" content="', '"'),
('width' , '"og:image:width" content="', '"'),
('height', '"og:image:height" content="', '"'),
('url' , '"og:url" content="', '"'),
('description', '"og:description" content="', '"'),
('date' , '<span class="cc-time"><a title="', '"'),
), values={'category': self.category, "index": self.index})[0]
data["artist"] = text.extract(data["url"], "//", ".")[0]
data["date"] = text.extract(data["date"], "", ", ", 8)[0]
text.nameext_from_url(data["image"], data)
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, data["image"], data