From 3510faaed5d75209bf33879ff2a8d7a6178dc98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 10 Nov 2015 00:55:01 +0100 Subject: [PATCH] [sankaku] always use correct file-url --- gallery_dl/extractor/sankaku.py | 46 +++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 0b719663..0835e29b 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -8,7 +8,7 @@ """Extract images from https://chan.sankakucomplex.com/""" -from .common import Extractor, Message +from .common import AsynchronousExtractor, Message from .. import text import os.path @@ -22,24 +22,25 @@ info = { ], } -class SankakuExtractor(Extractor): +class SankakuExtractor(AsynchronousExtractor): url = "https://chan.sankakucomplex.com/" def __init__(self, match): - Extractor.__init__(self) + AsynchronousExtractor.__init__(self) self.tags = text.unquote(match.group(1)) self.session.headers["User-Agent"] = ( "Mozilla/5.0 Gecko/20100101 Firefox/40.0" ) def items(self): - yield Message.Version, 1 data = self.get_job_metadata() + yield Message.Version, 1 + yield Message.Headers, self.session.headers yield Message.Directory, data for image in self.get_images(): - data.update(image) - yield Message.Url, image["file-url"], data + image.update(data) + yield Message.Url, image["file-url"], image def get_job_metadata(self): """Collect metadata for extractor-job""" @@ -49,33 +50,40 @@ class SankakuExtractor(Extractor): } def get_images(self): - image = {} params = { "tags": self.tags, "page": 1, } while True: - pos = 0 count = 0 page = self.request(self.url, params=params).text + pos = text.extract(page, '