# -*- coding: utf-8 -*- # Copyright 2015, 2016 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga-chapters and entire manga from https://www.mangamint.com/""" from .common import Extractor, Message from .. import text, exception import re class MangamintExtractor(Extractor): """Base class for mangamint extractors""" category = "mangamint" directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"] filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_" "{page:>03}.{extension}") url_base = "https://www.mangamint.com" def __init__(self, match): Extractor.__init__(self) self.url = self.url_base + match.group(1) class MangamintMangaExtractor(MangamintExtractor): """Extractor for mangas from mangamint.com""" subcategory = "manga" pattern = [(r"(?:https?://)?(?:www\.)?mangamint\.com" r"(/manga/[^/\?]+)")] test = [ ("www.mangamint.com/manga/mushishi-manga", { "url": "df7a1f4224d23e392ec09d4c7bbd4fbc873327d0", }), ("https://www.mangamint.com/manga/mushishi", { "exception": exception.NotFoundError, }), ] def items(self): yield Message.Version, 1 for chapter in self.get_chapters(): yield Message.Queue, self.url_base + chapter def get_chapters(self): """Return a list of all chapter urls""" params = {"page": 0} chapters = [] while True: response = self.session.get(self.url, params=params) if response.status_code == 404: raise exception.NotFoundError("manga") page = response.text table, pos = text.extract( page, '', '
') chapters.extend(text.extract_iter(table, '', '<') chid , pos = text.extract(page, 'id="node-', '"', pos) match = re.match(r"(.+) (\d+)([^ ]*)$", manga) return { "manga": match.group(1), "chapter": match.group(2), "chapter-minor": match.group(3), "chapter-id": chid, "lang": "en", "language": "English", } def get_image_urls(self, page): """Extract list of all image-urls for a manga chapter""" params = { "manga_page": 0, "form_id": "select_similar_node_widget", } e = text.extract params["select_node"] , pos = e(page, r'"identifier":"node\/', '"') _ , pos = e(page, '>All pages<', '', pos) params["howmany"] , pos = e(page, 'value="', '"', pos-25) _ , pos = e(page, 'name="form_build_id"', '', pos) params["form_build_id"], pos = e(page, 'value="', '"', pos) url = self.url_base + "/many/callback" page = self.request(url, method="post", data=params).json()["data"] return list(text.extract_iter(page, r'