merge #3189: [tcbscans] add 'chapter' and 'manga' extractors

2025-02-01 03:51:42 +01:00 · 2023-01-06 13:42:27 +01:00 · 2023-01-06 13:42:27 +01:00 · d1dd52349a
commit d1dd52349a
parent ac78712618 e8541a131d
4 changed files with 114 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -799,6 +799,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Episodes, Series</td>
    <td>Supported</td>
 </tr>
+<tr>
+    <td>TCB Scans</td>
+    <td>https://onepiecechapters.com/</td>
+    <td>Chapters, Manga</td>
+    <td></td>
+</tr>
 <tr>
    <td>Telegraph</td>
    <td>https://telegra.ph/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -136,6 +136,7 @@ modules = [
    "speakerdeck",
    "subscribestar",
    "tapas",
+    "tcbscans",
    "telegraph",
    "toyhouse",
    "tsumino",
--- a/gallery_dl/extractor/tcbscans.py
+++ b/gallery_dl/extractor/tcbscans.py
@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://onepiecechapters.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+
+
+class TcbscansChapterExtractor(ChapterExtractor):
+    category = "tcbscans"
+    pattern = (r"(?:https?://)?onepiecechapters\.com"
+               r"(/chapters/\d+/[^/?#]+)")
+    root = "https://onepiecechapters.com"
+    test = (
+        (("https://onepiecechapters.com"
+          "/chapters/4708/chainsaw-man-chapter-108"), {
+            "pattern": (r"https://cdn\.[^/]+"
+                        r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+            "count"  : 17,
+            "keyword": {
+                "manga": "Chainsaw Man",
+                "chapter": 108,
+                "chapter_minor": "",
+                "lang": "en",
+                "language": "English",
+            },
+        }),
+        ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", {
+            "pattern": (r"https://cdn\.[^/]+"
+                        r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+            "count"  : 18,
+            "keyword": {
+                "manga": "One Piece",
+                "chapter": 1065,
+                "chapter_minor": "",
+                "lang": "en",
+                "language": "English",
+            },
+        }),
+        (("https://onepiecechapters.com/"
+          "chapters/44/ace-novel-manga-adaptation-chapter-1")),
+    )
+
+    def images(self, page):
+        return [
+            (url, None)
+            for url in text.extract_iter(
+                page, '<img class="fixed-ratio-content" src="', '"')
+        ]
+
+    def metadata(self, page):
+        manga, _, chapter = text.extr(
+            page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ")
+        chapter, sep, minor = chapter.partition(".")
+        return {
+            "manga": text.unescape(manga),
+            "chapter": text.parse_int(chapter),
+            "chapter_minor": sep + minor,
+            "lang": "en", "language": "English",
+        }
+
+
+class TcbscansMangaExtractor(MangaExtractor):
+    category = "tcbscans"
+    chapterclass = TcbscansChapterExtractor
+    pattern = (r"(?:https?://)?onepiecechapters\.com"
+               r"(/mangas/\d+/[^/?#]+)")
+    root = "https://onepiecechapters.com"
+    test = (
+        ("https://onepiecechapters.com/mangas/13/chainsaw-man", {
+            "pattern": TcbscansChapterExtractor.pattern,
+            "range"  : "1-50",
+            "count"  : 50,
+        }),
+        ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", {
+            "pattern": TcbscansChapterExtractor.pattern,
+            "range"  : "1-50",
+            "count"  : 50,
+        }),
+        ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"),
+    )
+
+    def chapters(self, page):
+        data = {
+            "manga": text.unescape(text.extr(
+                page, 'class="my-3 font-bold text-3xl">', "</h1>")),
+            "lang": "en", "language": "English",
+        }
+
+        results = []
+        page = text.extr(page, 'class="col-span-2"', 'class="order-1')
+        for chapter in text.extract_iter(page, "<a", "</a>"):
+            url = text.extr(chapter, 'href="', '"')
+            data["title"] = text.unescape(text.extr(
+                chapter, 'text-gray-500">', "</div>"))
+            chapter = text.extr(
+                chapter, 'font-bold">', "</div>").rpartition(" Chapter ")[2]
+            chapter, sep, minor = chapter.partition(".")
+            data["chapter"] = text.parse_int(chapter)
+            data["chapter_minor"] = sep + minor
+            results.append((self.root + url, data.copy()))
+        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -104,6 +104,7 @@ CATEGORY_MAP = {
    "speakerdeck"    : "Speaker Deck",
    "subscribestar"  : "SubscribeStar",
    "tbib"           : "The Big ImageBoard",
+    "tcbscans"       : "TCB Scans",
    "thatpervert"    : "ThatPervert",
    "thebarchive"    : "The /b/ Archive",
    "thecollection"  : "The /co/llection",