2018-03-05 18:37:21 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2021-01-21 21:35:42 +01:00
|
|
|
# Copyright 2018-2021 Mike Fährmann
|
2018-03-05 18:37:21 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractors for https://mangadex.org/"""
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
from .common import Extractor, Message
|
2020-12-04 20:56:30 +01:00
|
|
|
from .. import text, util, exception
|
2019-03-14 22:21:49 +01:00
|
|
|
from ..cache import memcache
|
2018-03-05 18:37:21 +01:00
|
|
|
|
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
class MangadexExtractor(Extractor):
|
2018-03-05 18:37:21 +01:00
|
|
|
"""Base class for mangadex extractors"""
|
|
|
|
category = "mangadex"
|
2020-01-14 09:49:57 +01:00
|
|
|
root = "https://mangadex.org"
|
2021-01-15 21:08:24 +01:00
|
|
|
api_root = "https://api.mangadex.org"
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-08 18:08:26 +02:00
|
|
|
# mangadex-to-iso639-1 codes
|
|
|
|
iso639_map = {
|
|
|
|
"br": "pt",
|
|
|
|
"ct": "ca",
|
|
|
|
"gb": "en",
|
|
|
|
"vn": "vi",
|
|
|
|
}
|
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
def chapter_data(self, chapter_id):
|
|
|
|
"""Request API results for 'chapter_id'"""
|
2021-01-15 21:08:24 +01:00
|
|
|
url = "{}/v2/chapter/{}".format(self.api_root, chapter_id)
|
2020-11-16 11:04:02 +01:00
|
|
|
return self.request(url).json()["data"]
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2019-03-14 22:21:49 +01:00
|
|
|
@memcache(keyarg=1)
|
|
|
|
def manga_data(self, manga_id):
|
2018-08-10 16:26:10 +02:00
|
|
|
"""Request API results for 'manga_id'"""
|
2021-01-15 21:08:24 +01:00
|
|
|
url = "{}/v2/manga/{}".format(self.api_root, manga_id)
|
2020-11-16 11:04:02 +01:00
|
|
|
return self.request(url).json()["data"]
|
|
|
|
|
|
|
|
def manga_chapters(self, manga_id):
|
|
|
|
"""Request chapter list for 'manga_id'"""
|
2021-01-15 21:08:24 +01:00
|
|
|
url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id)
|
2020-11-16 11:04:02 +01:00
|
|
|
data = self.request(url).json()["data"]
|
|
|
|
|
|
|
|
groups = {
|
|
|
|
group["id"]: group["name"]
|
|
|
|
for group in data["groups"]
|
|
|
|
}
|
|
|
|
|
|
|
|
for chapter in data["chapters"]:
|
|
|
|
cgroups = chapter["groups"]
|
|
|
|
for idx, group_id in enumerate(cgroups):
|
|
|
|
cgroups[idx] = groups[group_id]
|
|
|
|
yield chapter
|
2018-08-10 16:26:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
class MangadexChapterExtractor(MangadexExtractor):
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractor for manga-chapters from mangadex.org"""
|
2018-08-10 16:26:10 +02:00
|
|
|
subcategory = "chapter"
|
2019-02-08 13:45:40 +01:00
|
|
|
directory_fmt = (
|
2018-08-10 16:26:10 +02:00
|
|
|
"{category}", "{manga}",
|
2019-02-08 13:45:40 +01:00
|
|
|
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
|
2018-08-10 16:26:10 +02:00
|
|
|
filename_fmt = (
|
|
|
|
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
|
2018-03-06 14:15:15 +01:00
|
|
|
archive_fmt = "{chapter_id}_{page}"
|
2020-01-14 09:49:57 +01:00
|
|
|
pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2020-01-14 09:49:57 +01:00
|
|
|
("https://mangadex.org/chapter/122094", {
|
2020-11-16 11:04:02 +01:00
|
|
|
"keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987",
|
2020-06-12 00:36:06 +02:00
|
|
|
# "content": "50383a4c15124682057b197d40261641a98db514",
|
2018-03-05 18:37:21 +01:00
|
|
|
}),
|
|
|
|
# oneshot
|
2020-01-05 21:21:40 +01:00
|
|
|
("https://mangadex.cc/chapter/138086", {
|
2018-03-05 18:37:21 +01:00
|
|
|
"count": 64,
|
2020-11-16 11:04:02 +01:00
|
|
|
"keyword": "c53a0e4c12250578a4e630281085875e59532c03",
|
2018-05-23 18:37:12 +02:00
|
|
|
}),
|
2020-12-04 20:56:30 +01:00
|
|
|
# MANGA Plus (#1154)
|
|
|
|
("https://mangadex.org/chapter/1122815", {
|
2021-01-21 21:35:42 +01:00
|
|
|
"exception": exception.HttpError,
|
2020-12-04 20:56:30 +01:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
)
|
2018-03-05 18:37:21 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2019-02-11 13:31:10 +01:00
|
|
|
MangadexExtractor.__init__(self, match)
|
2018-08-10 16:26:10 +02:00
|
|
|
self.chapter_id = match.group(1)
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
def items(self):
|
|
|
|
cdata = self.chapter_data(self.chapter_id)
|
2020-12-04 20:56:30 +01:00
|
|
|
if "server" not in cdata:
|
|
|
|
if cdata["status"] == "external":
|
|
|
|
raise exception.StopExtraction(
|
|
|
|
"Chapter is not available on MangaDex and can be read on "
|
|
|
|
"the official publisher's website at %s.", cdata["pages"])
|
|
|
|
raise exception.StopExtraction("No download server available.")
|
2020-11-16 11:04:02 +01:00
|
|
|
mdata = self.manga_data(cdata["mangaId"])
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
chapter, sep, minor = cdata["chapter"].partition(".")
|
2020-11-16 11:04:02 +01:00
|
|
|
lang = self.iso639_map.get(cdata["language"], cdata["language"])
|
|
|
|
|
|
|
|
base = cdata["server"] + cdata["hash"] + "/"
|
|
|
|
if base[0] == "/":
|
|
|
|
base = text.urljoin(self.root, base)
|
|
|
|
|
|
|
|
if "serverFallback" in cdata:
|
|
|
|
fallback = cdata["serverFallback"] + cdata["hash"] + "/"
|
|
|
|
else:
|
|
|
|
fallback = None
|
|
|
|
|
|
|
|
data = {
|
|
|
|
"manga" : text.unescape(mdata["title"]),
|
|
|
|
"manga_id": mdata["id"],
|
|
|
|
"artist" : mdata["artist"],
|
|
|
|
"author" : mdata["author"],
|
2020-10-16 00:41:15 +02:00
|
|
|
"title" : text.unescape(cdata["title"]),
|
|
|
|
"volume" : text.parse_int(cdata["volume"]),
|
|
|
|
"chapter" : text.parse_int(chapter),
|
2018-08-08 18:08:26 +02:00
|
|
|
"chapter_minor": sep + minor,
|
2018-08-10 16:26:10 +02:00
|
|
|
"chapter_id": cdata["id"],
|
2020-11-16 11:04:02 +01:00
|
|
|
"group" : [group["name"] for group in cdata["groups"]],
|
2020-10-16 00:41:15 +02:00
|
|
|
"date" : text.parse_timestamp(cdata["timestamp"]),
|
2020-11-16 11:04:02 +01:00
|
|
|
"lang" : lang,
|
|
|
|
"language": util.code_to_language(lang),
|
|
|
|
"count" : len(cdata["pages"]),
|
2018-03-05 18:37:21 +01:00
|
|
|
}
|
|
|
|
|
2020-11-16 11:04:02 +01:00
|
|
|
yield Message.Directory, data
|
|
|
|
for data["page"], page in enumerate(cdata["pages"], 1):
|
|
|
|
if fallback:
|
|
|
|
data["_fallback"] = (fallback + page,)
|
|
|
|
yield Message.Url, base + page, text.nameext_from_url(page, data)
|
2018-03-05 18:37:21 +01:00
|
|
|
|
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
class MangadexMangaExtractor(MangadexExtractor):
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractor for manga from mangadex.org"""
|
2018-08-10 16:26:10 +02:00
|
|
|
subcategory = "manga"
|
|
|
|
categorytransfer = True
|
2020-01-14 09:49:57 +01:00
|
|
|
pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
|
2019-02-08 13:45:40 +01:00
|
|
|
r"/(?:title|manga)/(\d+)")
|
|
|
|
test = (
|
2020-01-14 09:49:57 +01:00
|
|
|
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
|
|
|
"pattern": r"https://mangadex.org/chapter/\d+",
|
2020-03-18 02:19:19 +01:00
|
|
|
"keyword": {
|
2020-11-16 11:04:02 +01:00
|
|
|
"manga" : "Souten no Koumori",
|
2018-03-05 18:37:21 +01:00
|
|
|
"manga_id": 2946,
|
2020-11-16 11:04:02 +01:00
|
|
|
"title" : "re:One[Ss]hot",
|
|
|
|
"volume" : 0,
|
|
|
|
"chapter" : 0,
|
2018-03-06 14:15:15 +01:00
|
|
|
"chapter_minor": "",
|
2018-03-05 18:37:21 +01:00
|
|
|
"chapter_id": int,
|
2020-11-16 11:04:02 +01:00
|
|
|
"group" : list,
|
|
|
|
"date" : "type:datetime",
|
|
|
|
"lang" : str,
|
2018-03-05 18:37:21 +01:00
|
|
|
"language": str,
|
|
|
|
},
|
|
|
|
}),
|
2020-01-14 09:49:57 +01:00
|
|
|
("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", {
|
2018-05-06 17:43:50 +02:00
|
|
|
"count": ">= 100",
|
|
|
|
}),
|
2019-01-03 11:52:00 +01:00
|
|
|
("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
|
|
|
|
"count": 0,
|
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
)
|
2018-08-08 18:08:26 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2019-02-11 13:31:10 +01:00
|
|
|
MangadexExtractor.__init__(self, match)
|
2020-11-16 11:04:02 +01:00
|
|
|
self.manga_id = match.group(1)
|
2018-08-10 16:26:10 +02:00
|
|
|
|
|
|
|
def items(self):
|
|
|
|
yield Message.Version, 1
|
|
|
|
for data in self.chapters():
|
|
|
|
url = "{}/chapter/{}".format(self.root, data["chapter_id"])
|
|
|
|
yield Message.Queue, url, data
|
|
|
|
|
|
|
|
def chapters(self):
|
|
|
|
"""Return a sorted list of chapter-metadata dicts"""
|
2020-11-16 11:04:02 +01:00
|
|
|
manga = self.manga_data(self.manga_id)
|
2018-03-05 18:37:21 +01:00
|
|
|
results = []
|
2020-11-16 11:04:02 +01:00
|
|
|
|
|
|
|
for cdata in self.manga_chapters(self.manga_id):
|
|
|
|
chapter, sep, minor = cdata["chapter"].partition(".")
|
|
|
|
lang = self.iso639_map.get(cdata["language"], cdata["language"])
|
2018-08-10 16:26:10 +02:00
|
|
|
results.append({
|
2020-10-16 00:41:15 +02:00
|
|
|
"manga" : text.unescape(manga["title"]),
|
2020-11-16 11:04:02 +01:00
|
|
|
"manga_id": text.parse_int(self.manga_id),
|
|
|
|
"artist" : manga["artist"],
|
|
|
|
"author" : manga["author"],
|
|
|
|
"title" : text.unescape(cdata["title"]),
|
|
|
|
"volume" : text.parse_int(cdata["volume"]),
|
2020-10-16 00:41:15 +02:00
|
|
|
"chapter" : text.parse_int(chapter),
|
2018-08-08 18:08:26 +02:00
|
|
|
"chapter_minor": sep + minor,
|
2020-11-16 11:04:02 +01:00
|
|
|
"chapter_id": text.parse_int(cdata["id"]),
|
|
|
|
"group" : cdata["groups"],
|
|
|
|
"date" : text.parse_timestamp(cdata["timestamp"]),
|
2020-10-16 00:41:15 +02:00
|
|
|
"lang" : lang,
|
2018-08-08 18:08:26 +02:00
|
|
|
"language": util.code_to_language(lang),
|
2019-02-12 21:26:41 +01:00
|
|
|
"_extractor": MangadexChapterExtractor,
|
2018-08-10 16:26:10 +02:00
|
|
|
})
|
2018-08-08 18:08:26 +02:00
|
|
|
|
2020-12-22 15:08:10 +01:00
|
|
|
results.sort(
|
|
|
|
key=lambda x: (x["chapter"], x["chapter_minor"]),
|
|
|
|
reverse=self.config("chapter-reverse", False),
|
|
|
|
)
|
2018-08-08 18:08:26 +02:00
|
|
|
return results
|