# -*- coding: utf-8 -*- # Copyright 2015, 2016 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga-chapters and entire manga from https://www.mangamint.com/""" from .common import Extractor, Message from .. import text, exception import re class MangamintExtractor(Extractor): """Base class for mangamint extractors""" category = "mangamint" directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"] filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_" "{page:>03}.{extension}") url_base = "https://www.mangamint.com" def __init__(self, match): Extractor.__init__(self) self.url = self.url_base + match.group(1) class MangamintMangaExtractor(MangamintExtractor): """Extractor for mangas from mangamint.com""" subcategory = "manga" pattern = [(r"(?:https?://)?(?:www\.)?mangamint\.com" r"(/manga/[^/\?]+)")] test = [ ("www.mangamint.com/manga/mushishi-manga", { "url": "df7a1f4224d23e392ec09d4c7bbd4fbc873327d0", }), ("https://www.mangamint.com/manga/mushishi", { "exception": exception.NotFoundError, }), ] def items(self): yield Message.Version, 1 for chapter in self.get_chapters(): yield Message.Queue, self.url_base + chapter def get_chapters(self): """Return a list of all chapter urls""" params = {"page": 0} chapters = [] while True: response = self.session.get(self.url, params=params) if response.status_code == 404: raise exception.NotFoundError("manga") page = response.text table, pos = text.extract( page, '