From c0efea339e906ede37188ee2e6e8a5d753d70476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 4 Nov 2015 00:03:48 +0100 Subject: [PATCH] [imagebam] rewrite/fix --- gallery_dl/extractor/imagebam.py | 85 +++++++++++++++----------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index 809eaa1a..ec65430d 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -10,14 +10,15 @@ from .common import AsynchronousExtractor, Message from .. import text +import os.path info = { "category": "imagebam", "extractor": "ImagebamExtractor", - "directory": ["{category}", "{title} - {key}"], - "filename": "{num:>03}-{name}", + "directory": ["{category}", "{title} - {gallery-key}"], + "filename": "{num:>03}-{filename}", "pattern": [ - r"(?:https?://)?(?:www\.)?imagebam\.com/(gallery)/([^/]+).*", + r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*", ], } @@ -27,54 +28,48 @@ class ImagebamExtractor(AsynchronousExtractor): def __init__(self, match): AsynchronousExtractor.__init__(self) - self.match = match - self.num = 0 - self.metadata = {} + self.gkey = match.group(1) def items(self): - self.num = 0 - self.metadata = self.get_job_metadata() + data = self.get_job_metadata() + data["num"] = 0 yield Message.Version, 1 - yield Message.Directory, self.metadata - - next_url = self.metadata["first-url"] - done = False - while not done: - # get current page - page = self.request(self.url_base + next_url).text - - # get url for next page - next_url, pos = text.extract(page, "next image" we are done - if not page.startswith(">next image", pos): - done = True - - # get image url - img_url, pos = text.extract(page, 'onclick="scale(this);" src="', '"', pos) - - yield Message.Url, img_url, self.get_file_metadata(img_url) + yield Message.Directory, data + for image_url, image_id in self.get_images(data["first-url"]): + data["id"] = image_id + data["filename"] = text.unquote(text.filename_from_url(image_url)) + name, ext = os.path.splitext(data["filename"]) + data["num"] += 1 + data["name"] = name + data["extension"] = ext[1:] + yield Message.Url, image_url, data.copy() def get_job_metadata(self): """Collect metadata for extractor-job""" - gallery_key = self.match.group(2) - page = self.request(self.url_base + "/gallery/" + gallery_key).text - _ , pos = text.extract(page, " ", " <", pos) - count, pos = text.extract(page, "'>", " images", pos) - url , pos = text.extract(page, " ", " <"), + ("count" , "'>", " images"), + ("first-url", "', '') + if pos == 0: + done = True + else: + url, pos = text.extract(page, ' href="', '"', pos-70) + image_id , pos = text.extract(page, '