# -*- coding: utf-8 -*- # Copyright 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images from http://www.deviantart.com/""" from .common import AsynchronousExtractor, Message from .. import text import os.path import re info = { "category": "deviantart", "extractor": "DeviantArtExtractor", "directory": ["{category}", "{artist}"], "filename": "{category}_{index}_{title}.{extension}", "pattern": [ r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*", ], } class DeviantArtExtractor(AsynchronousExtractor): def __init__(self, match, config): AsynchronousExtractor.__init__(self, config) self.session.cookies["agegate_state"] = "1" self.artist = match.group(1) def items(self): metadata = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, metadata for url, data in self.get_works(): data.update(metadata) yield Message.Url, url, data def get_works(self): """Yield all work-items for a deviantart-artist""" url = "http://{}.deviantart.com/gallery/".format(self.artist) params = {"catpath": "/", "offset": 0} while True: page = self.request(url, params=params).text _, pos = text.extract(page, '