From bcb23e44cee25be618dc52b3bb6b020455784931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 21 Nov 2015 04:19:23 +0100 Subject: [PATCH] [powermanga] transfer code from redhawkscans --- gallery_dl/extractor/powermanga.py | 46 +++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py index 40ebc909..ce66fa09 100644 --- a/gallery_dl/extractor/powermanga.py +++ b/gallery_dl/extractor/powermanga.py @@ -8,7 +8,11 @@ """Extract manga pages from http://powermanga.org/""" -from .redhawkscans import RedHawkScansExtractor +from .common import Extractor, Message +from .. import text, iso639_1 +import os.path +import json +import re info = { "category": "powermanga", @@ -21,10 +25,44 @@ info = { ], } -class PowerMangaExtractor(RedHawkScansExtractor): +class PowerMangaExtractor(Extractor): def __init__(self, match): - RedHawkScansExtractor.__init__(self, match) - extra = "er" if "://reader" in match.string else "" + Extractor.__init__(self) self.category = info["category"] + self.part = match.group(1) + self.lang = match.group(2) + extra = "er" if "://reader" in match.string else "" self.url_base = "https://read" + extra + ".powermanga.org/read/" + + def items(self): + yield Message.Version, 1 + data, pages = self.get_job_metadata() + yield Message.Directory, data + for page_index, page_data in enumerate(pages, 1): + name, ext = os.path.splitext(page_data["filename"]) + page_data.update(data) + page_data["page"] = page_index + page_data["name"] = name + page_data["extension"] = ext[1:] + yield Message.Url, "https" + page_data["url"][4:], page_data + + def get_job_metadata(self): + """Collect metadata for extractor-job""" + response = self.request(self.url_base + self.part) + response.encoding = "utf-8" + page = response.text + _ , pos = text.extract(page, '

', '') + manga , pos = text.extract(page, 'title="', '"', pos) + chapter , pos = text.extract(page, '">', '', pos) + json_data, pos = text.extract(page, 'var pages = ', ';', pos) + match = re.match(r"(\w+ (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter) + return { + "category": self.category, + "manga": text.unescape(manga), + "chapter": match.group(2) or match.group(1), + "chapter-minor": match.group(3) or "", + "lang": self.lang, + "language": iso639_1.code_to_language(self.lang), + "title": text.unescape(match.group(4) or ""), + }, json.loads(json_data)