From d1673d912acdbe9a22d000e54bddece8acc82cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 26 Nov 2015 03:06:08 +0100 Subject: [PATCH] [mangahere] add chapter-extractor --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/mangahere.py | 69 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 gallery_dl/extractor/mangahere.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 53d4c2f6..aad5006d 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -31,6 +31,7 @@ modules = [ "imgur", "kissmanga", "konachan", + "mangahere", "mangamint", "mangapanda", "mangareader", diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py new file mode 100644 index 00000000..2df004b1 --- /dev/null +++ b/gallery_dl/extractor/mangahere.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract manga pages from http://www.mangahere.co/""" + +from .common import AsynchronousExtractor, Message +from .. import text +import re + +class MangaHereExtractor(AsynchronousExtractor): + + category = "mangahere" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?mangahere\.co/manga/([^/]+(?:/v0*(\d+))?/c0*(\d+))"] + url_fmt = "http://www.mangahere.co/manga/{}/{}.html" + + def __init__(self, match): + AsynchronousExtractor.__init__(self) + self.part = match.group(1) + self.volume = match.group(2) + self.chapter = match.group(3) + + def items(self): + page = self.request(self.url_fmt.format(self.part, 1)).text + data = self.get_job_metadata(page) + yield Message.Version, 1 + yield Message.Directory, data.copy() + for i, url in zip(range(int(data["count"])), (self.get_image_urls(page))): + data["page"] = i+1 + text.nameext_from_url(url, data) + yield Message.Url, url, data.copy() + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + manga, pos = text.extract(page, '', '') + chid , pos = text.extract(page, 'a.mhcdn.net/store/manga/', '/', pos) + _ , pos = text.extract(page, '