mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[deviantart] fix issue with small images
This commit is contained in:
parent
3ebd126b35
commit
e4a661fd6b
@ -13,11 +13,11 @@ from .. import text
|
||||
import re
|
||||
|
||||
class DeviantArtExtractor(AsynchronousExtractor):
|
||||
|
||||
"""Extract all works of an artist on deviantart"""
|
||||
category = "deviantart"
|
||||
directory_fmt = ["{category}", "{artist}"]
|
||||
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"]
|
||||
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
|
||||
|
||||
def __init__(self, match):
|
||||
AsynchronousExtractor.__init__(self)
|
||||
@ -57,39 +57,36 @@ class DeviantArtExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_image_metadata(self, image):
|
||||
"""Collect metadata for an image"""
|
||||
match = self.extract_data(image, 'title',
|
||||
tmatch = self.extract_data(image, 'title',
|
||||
r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
|
||||
if image.startswith(" ismature"):
|
||||
# adult image
|
||||
url, _ = text.extract(image, 'href="', '"')
|
||||
page = self.request(url).text
|
||||
_ , pos = text.extract(page, ' class="dev-content-normal "', '')
|
||||
url , pos = text.extract(page, ' src="', '"', pos)
|
||||
index , pos = text.extract(page, ' data-embed-id="', '"', pos)
|
||||
width , pos = text.extract(page, ' width="', '"', pos)
|
||||
height, pos = text.extract(page, ' height="', '"', pos)
|
||||
hmatch = self.extract_data(image, 'href', r'[^"]+-(\d+)')
|
||||
|
||||
url, pos = text.extract(image, ' data-super-full-img="', '"', tmatch.end())
|
||||
if url:
|
||||
width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
|
||||
height, pos = text.extract(image, ' data-super-full-height="', '"', pos)
|
||||
else:
|
||||
# normal image
|
||||
index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1)
|
||||
url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
|
||||
url, pos = text.extract(image, ' data-super-img="', '"', pos)
|
||||
if url:
|
||||
width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
|
||||
height, pos = text.extract(image, ' data-super-full-height="', '"', pos)
|
||||
else:
|
||||
url , pos = text.extract(image, ' data-super-img="', '"', pos)
|
||||
width , pos = text.extract(image, ' data-super-width="', '"', pos)
|
||||
height, pos = text.extract(image, ' data-super-height="', '"', pos)
|
||||
data = {
|
||||
"index": index,
|
||||
"title": match.group(1),
|
||||
"artist": match.group(2),
|
||||
"date": match.group(3),
|
||||
else:
|
||||
page = self.request(hmatch.group(0)).text
|
||||
_ , pos = text.extract(page, ' class="dev-content-normal "', '')
|
||||
url , pos = text.extract(page, ' src="', '"', pos)
|
||||
width , pos = text.extract(page, ' width="', '"', pos)
|
||||
height, pos = text.extract(page, ' height="', '"', pos)
|
||||
return url, text.nameext_from_url(url, {
|
||||
"index": hmatch.group(1),
|
||||
"title": text.unescape(tmatch.group(1)),
|
||||
"artist": tmatch.group(2),
|
||||
"date": tmatch.group(3),
|
||||
"width": width,
|
||||
"height": height,
|
||||
}
|
||||
return url, text.nameext_from_url(url, data)
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
def extract_data(txt, attr, pattern):
|
||||
"""Extract a HTML attribute and apply a regex to it"""
|
||||
txt, _ = text.extract(txt, ' %s="' % attr, '"')
|
||||
return re.match(pattern, txt)
|
||||
|
Loading…
Reference in New Issue
Block a user