[hentaicafe] add chapter and manga extractors (#101)

2024-11-25 12:12:34 +01:00 · 2018-09-05 21:08:40 +02:00 · 2018-09-05 21:08:40 +02:00 · 3ecea4cf36
commit 3ecea4cf36
parent 41249f3ead
4 changed files with 69 additions and 0 deletions
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -27,6 +27,7 @@ Futaba Channel       https://www.2chan.net/              Threads
 Gelbooru             https://gelbooru.com/               Pools, Posts, Tag-Searches
 Gfycat               https://gfycat.com/                 individual Images
 HBrowse              http://www.hbrowse.com/             Chapters, Manga
+Hentai Cafe          https://hentai.cafe/                Chapters, Manga
 Hentai Foundry       https://www.hentai-foundry.com/     Images from Users, individual Images
 Hentai2Read          https://hentai2read.com/            Chapters, Manga
 HentaiHere           https://hentaihere.com/             Chapters, Manga
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -34,6 +34,7 @@ modules = [
    "gfycat",
    "hbrowse",
    "hentai2read",
+    "hentaicafe",
    "hentaifoundry",
    "hentaihere",
    "hitomi",
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hentai.cafe/"""
+
+from . import foolslide
+from .. import text
+import re
+
+
+class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
+    """Extractor for manga-chapters from hentai.cafe"""
+    category = "hentaicafe"
+    pattern = foolslide.chapter_pattern(r"(?:www\.)?hentai\.cafe/manga")
+    test = [("https://hentai.cafe/manga/read/saitom-box/en/0/1/", {
+        "url": "8c6a8c56875ba3ed7ab0a74a64f9960077767fc2",
+        "keyword": "1b24a3e8625b89d160d01ce3eb5e5eb12fbbf648",
+    })]
+
+    def get_metadata(self, page):
+        info = text.unescape(text.extract(page, '<title>', '</title>')[0])
+        manga, _, chapter_string = info.partition(" :: ")
+        return self.parse_chapter_url(self.url, {
+            "manga": manga,
+            "chapter_string": chapter_string.rstrip(" :"),
+        })
+
+
+class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
+    """Extractor for manga from hentai.cafe"""
+    category = "hentaicafe"
+    pattern = [r"(?:https?://)?((?:www\.)?hentai\.cafe"
+               r"(?:/manga/series)?/[^/?&#]+/?$)"]
+    test = [
+        # single chapter
+        ("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
+            "url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b",
+        }),
+        # multi-chapter
+        ("https://hentai.cafe/saitom-saitom-box/", {
+            "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
+        }),
+        # foolslide URL
+        ("https://hentai.cafe/manga/series/saitom-box/", {
+            "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
+            "keyword": "46012b857eb1a1394bc55c0efe7aa4e7f704d10d",
+        }),
+    ]
+    reverse = False
+
+    def chapters(self, page):
+        if "/manga/series/" in self.url:
+            chapters = foolslide.FoolslideMangaExtractor.chapters(self, page)
+            chapters.reverse()
+            return chapters
+
+        return [
+            (url, {})
+            for url in re.findall(
+                r'<a +class="x-btn[^"]*" +href="([^"]+)"', page)
+        ]
--- a/scripts/build_supportedsites.py
+++ b/scripts/build_supportedsites.py
@ -22,6 +22,7 @@ CATEGORY_MAP = {
    "fallenangels"   : "Fallen Angels Scans",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
+    "hentaicafe"     : "Hentai Cafe",
    "hentaifoundry"  : "Hentai Foundry",
    "hentaihere"     : "HentaiHere",
    "hitomi"         : "Hitomi.la",