[readcomiconline] add comic-issue and comic extractor

2024-11-22 10:42:34 +01:00 · 2016-11-14 18:29:45 +01:00 · 2016-11-14 18:29:45 +01:00 · b634ace39e
commit b634ace39e
parent 99440ca51a
4 changed files with 70 additions and 1 deletions
--- a/README.rst
+++ b/README.rst
@ -52,6 +52,8 @@ Supported Sites
    mangahere.co, mangamint.com, mangapanda.com, mangapark.me, mangareader.net,
    mangashare.com, mangastream.com, powermanga.org, raw.senmanga.com,
    reader.sensescans.com, thespectrum.net
+* Comic:
+    readcomiconline.to
 * Hentai:
    exhentai.org, hbrowse.com, hentai2read.com,
    hentaibox.net, hentaihere.com, hitomi.la, luscious.net, nhentai.net
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@ -19,7 +19,8 @@ def bypass_ddos_protection(session, url):
    session.cookies = solve_challenge(session, url)
    return session

-@cache(maxage=24*60*60, keyarg=1)
+# TODO: this is only a temporary workaround for readcomiconline.to
+@cache(maxage=30*60, keyarg=1)
 def solve_challenge(session, url):
    session.headers["Referer"] = url
    page = session.get(url).text
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -50,6 +50,7 @@ modules = [
    "nijie",
    "pinterest",
    "powermanga",
+    "readcomiconline",
    "rule34",
    "safebooru",
    "sankaku",
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2016 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract comic-issues and entire comics from http://readcomiconline.to/"""
+
+from .common import Extractor
+from .. import text
+from . import kissmanga
+import re
+
+class ReadcomiconlineExtractor(Extractor):
+    """Base class for readcomiconline extractors"""
+    category = "readcomiconline"
+    directory_fmt = ["{category}", "{comic}", "{issue:>03}"]
+    filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
+    url_base = "http://readcomiconline.to"
+
+    def __init__(self, match):
+        Extractor.__init__(self)
+        self.url = match.group(0)
+
+
+class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
+                                    kissmanga.KissmangaMangaExtractor):
+    """Extractor for comics from readcomiconline.to"""
+    subcategory = "comic"
+    pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/Comic/[^/]+/?$"]
+    test = [("http://readcomiconline.to/Comic/W-i-t-c-h", {
+        "url": "c5a530538a30b176916e30cbe223a93d83cb2691",
+    })]
+
+    def get_chapters(self):
+        """Return a list of all chapter urls"""
+        page = self.request(self.url).text
+        return reversed(list(
+            text.extract_iter(page, '                <li><a href="', '"')
+        ))
+
+
+class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
+                                    kissmanga.KissmangaChapterExtractor):
+    """Extractor for comic-issues from readcomiconline.to"""
+    subcategory = "issue"
+    pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/Comic/.+/.+\?id=\d+"]
+    test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
+        "url": "dd1659d9eb5f6ebb421e66316c98d71682a44c2d",
+        "keyword": "bc2f937893c1204ba40e0293e86f0a8943be1304",
+    })]
+
+    def get_job_metadata(self, page):
+        """Collect metadata for extractor-job"""
+        comic, pos = text.extract(page, "   - Read\r\n    ", "\r\n")
+        iinfo, pos = text.extract(page, "    ", "\r\n", pos)
+        match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
+        return {
+            "comic": comic,
+            "issue": match.group(1) or match.group(2),
+            "lang": "en",
+            "language": "English",
+        }