From 30d6e284b08e7ffc1f9ec19a5cc2e03f1f9f880f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 21 Aug 2019 23:47:17 +0200 Subject: [PATCH] [deviantart] use NAPI for artworks and scraps (#392) TODO: - journal downloads - test for all media types --- gallery_dl/extractor/deviantart.py | 346 ++++++++++++++++++----------- 1 file changed, 212 insertions(+), 134 deletions(-) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index aa05fb19..d91797f5 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -27,7 +27,7 @@ BASE_PATTERN = ( class DeviantartExtractor(Extractor): - """Base class for deviantart extractors""" + """Base class for deviantart extractors using the OAuth API""" category = "deviantart" directory_fmt = ("{category}", "{author[username]!l}") filename_fmt = "{category}_{index}_{title}.{extension}" @@ -232,14 +232,6 @@ class DeviantartExtractor(Extractor): if mtype and mtype.startswith("image/"): content.update(data) - def _html_request(self, url, **kwargs): - cookies = {"userinfo": ( - '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0' - 'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU' - ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}' - )} - return self.request(url, cookies=cookies, **kwargs) - class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" @@ -367,73 +359,6 @@ class DeviantartFolderExtractor(DeviantartExtractor): deviation["folder"] = self.folder -class DeviantartDeviationExtractor(DeviantartExtractor): - """Extractor for single deviations""" - subcategory = "deviation" - archive_fmt = "{index}.{extension}" - pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)" - test = ( - (("https://www.deviantart.com/shimoda7/art/" - "For-the-sake-of-a-memory-10073852"), { - "options": (("original", 0),), - "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", - }), - ("https://www.deviantart.com/zzz/art/zzz-1234567890", { - "exception": exception.NotFoundError, - }), - (("https://www.deviantart.com/myria-moon/art/" - "Aime-Moi-part-en-vadrouille-261986576"), { - "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\." - r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"), - }), - # wixmp URL rewrite - (("https://www.deviantart.com/citizenfresh/art/" - "Hverarond-14-the-beauty-of-the-earth-789295466"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg$") - }), - # wixmp URL rewrite v2 (#369) - (("https://www.deviantart.com/josephbiwald/art/" - "Destiny-2-Warmind-Secondary-Keyart-804940104"), { - "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100," - }), - # non-download URL for GIFs (#242) - (("https://www.deviantart.com/skatergators/art/" - "COM-Monique-Model-781571783"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/f/[^/]+/[^.]+\.gif\?token="), - }), - # external URLs from description (#302) - (("https://www.deviantart.com/uotapo/art/" - "INANAKI-Memorial-Humane7-590297498"), { - "options": (("extra", 1), ("original", 0)), - "pattern": r"https?://sta\.sh/\w+$", - "range": "2-", - "count": 4, - }), - # old-style URLs - ("https://shimoda7.deviantart.com" - "/art/For-the-sake-of-a-memory-10073852"), - ("https://myria-moon.deviantart.com" - "/art/Aime-Moi-part-en-vadrouille-261986576"), - ("https://zzz.deviantart.com/art/zzz-1234567890"), - ) - - skip = Extractor.skip - - def __init__(self, match): - DeviantartExtractor.__init__(self, match) - self.path = match.group(3) - - def deviations(self): - url = "{}/{}/{}".format(self.root, self.user, self.path) - response = self._html_request(url, fatal=False) - deviation_id = text.extract(response.text, '//deviation/', '"')[0] - if response.status_code >= 400 or not deviation_id: - raise exception.NotFoundError("image") - return (self.api.deviation(deviation_id),) - - class DeviantartStashExtractor(DeviantartExtractor): """Extractor for sta.sh-ed deviations""" subcategory = "stash" @@ -570,64 +495,6 @@ class DeviantartJournalExtractor(DeviantartExtractor): return self.api.browse_user_journals(self.user, self.offset) -class DeviantartScrapsExtractor(DeviantartExtractor): - """Extractor for an artist's scraps""" - subcategory = "scraps" - directory_fmt = ("{category}", "{username}", "Scraps") - archive_fmt = "s_{username}_{index}.{extension}" - pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b" - test = ( - ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", { - "count": 12, - "options": (("original", False),), - }), - ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), - ) - - def deviations(self): - url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user) - page = self._html_request(url).text - csrf, pos = text.extract(page, '"csrf":"', '"') - iid , pos = text.extract(page, '"requestid":"', '"', pos) - - params = { - "iid": iid + "-jz6wpfib-1.1", - "mp": 0, - } - headers = { - "Referer": url, - } - data = { - "username": self.user, - "offset": self.offset, - "limit": "24", - "catpath": "scraps", - "_csrf": csrf, - "dapiIid": params["iid"] - } - url = "https://www.deviantart.com/dapi/v1/gallery/0" - - while True: - content = self.request( - url, method="POST", params=params, - headers=headers, data=data).json()["content"] - - for item in content["results"]: - if item["html"].startswith('