2018-03-05 18:37:21 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-02-18 22:25:54 +01:00
|
|
|
# Copyright 2018-2023 Mike Fährmann
|
2018-03-05 18:37:21 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractors for https://mangadex.org/"""
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
from .common import Extractor, Message
|
2023-02-22 11:09:17 +01:00
|
|
|
from .. import text, util, exception
|
2021-06-08 02:06:19 +02:00
|
|
|
from ..cache import cache, memcache
|
2021-06-07 01:29:42 +02:00
|
|
|
from collections import defaultdict
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2021-06-08 02:45:36 +02:00
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
|
|
|
|
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
class MangadexExtractor(Extractor):
|
2018-03-05 18:37:21 +01:00
|
|
|
"""Base class for mangadex extractors"""
|
|
|
|
category = "mangadex"
|
2021-06-07 01:29:42 +02:00
|
|
|
directory_fmt = (
|
|
|
|
"{category}", "{manga}",
|
|
|
|
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
|
|
|
|
filename_fmt = (
|
|
|
|
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
|
|
|
|
archive_fmt = "{chapter_id}_{page}"
|
2020-01-14 09:49:57 +01:00
|
|
|
root = "https://mangadex.org"
|
2021-06-09 21:54:40 +02:00
|
|
|
_cache = {}
|
2018-08-08 18:08:26 +02:00
|
|
|
|
2021-02-11 19:52:16 +01:00
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self, match)
|
2023-07-25 20:09:44 +02:00
|
|
|
self.uuid = match.group(1)
|
|
|
|
|
|
|
|
def _init(self):
|
2023-02-22 11:09:17 +01:00
|
|
|
self.session.headers["User-Agent"] = util.USERAGENT
|
2021-06-07 01:29:42 +02:00
|
|
|
self.api = MangadexAPI(self)
|
2021-02-28 01:31:50 +01:00
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
def items(self):
|
|
|
|
for chapter in self.chapters():
|
2021-09-14 17:48:41 +02:00
|
|
|
uuid = chapter["id"]
|
2021-06-09 21:54:40 +02:00
|
|
|
data = self._transform(chapter)
|
|
|
|
data["_extractor"] = MangadexChapterExtractor
|
2022-01-08 17:21:35 +01:00
|
|
|
self._cache[uuid] = data
|
2021-06-09 21:54:40 +02:00
|
|
|
yield Message.Queue, self.root + "/chapter/" + uuid, data
|
|
|
|
|
|
|
|
def _transform(self, chapter):
|
|
|
|
relationships = defaultdict(list)
|
|
|
|
for item in chapter["relationships"]:
|
2021-11-22 01:10:30 +01:00
|
|
|
relationships[item["type"]].append(item)
|
|
|
|
manga = self.api.manga(relationships["manga"][0]["id"])
|
2021-06-09 21:54:40 +02:00
|
|
|
for item in manga["relationships"]:
|
2021-11-22 01:10:30 +01:00
|
|
|
relationships[item["type"]].append(item)
|
2021-06-09 21:54:40 +02:00
|
|
|
|
2021-09-14 17:48:41 +02:00
|
|
|
cattributes = chapter["attributes"]
|
|
|
|
mattributes = manga["attributes"]
|
2022-03-01 02:04:25 +01:00
|
|
|
|
|
|
|
lang = cattributes.get("translatedLanguage")
|
|
|
|
if lang:
|
|
|
|
lang = lang.partition("-")[0]
|
2021-06-09 21:54:40 +02:00
|
|
|
|
|
|
|
if cattributes["chapter"]:
|
|
|
|
chnum, sep, minor = cattributes["chapter"].partition(".")
|
|
|
|
else:
|
|
|
|
chnum, sep, minor = 0, "", ""
|
|
|
|
|
|
|
|
data = {
|
2021-08-21 14:44:37 +02:00
|
|
|
"manga" : (mattributes["title"].get("en") or
|
|
|
|
next(iter(mattributes["title"].values()))),
|
2021-09-14 17:48:41 +02:00
|
|
|
"manga_id": manga["id"],
|
2021-06-09 21:54:40 +02:00
|
|
|
"title" : cattributes["title"],
|
|
|
|
"volume" : text.parse_int(cattributes["volume"]),
|
|
|
|
"chapter" : text.parse_int(chnum),
|
|
|
|
"chapter_minor": sep + minor,
|
2021-09-14 17:48:41 +02:00
|
|
|
"chapter_id": chapter["id"],
|
2021-06-09 21:54:40 +02:00
|
|
|
"date" : text.parse_datetime(cattributes["publishAt"]),
|
|
|
|
"lang" : lang,
|
|
|
|
"language": util.code_to_language(lang),
|
2022-01-08 17:21:35 +01:00
|
|
|
"count" : cattributes["pages"],
|
2022-01-16 03:21:50 +01:00
|
|
|
"_external_url": cattributes.get("externalUrl"),
|
2021-06-09 21:54:40 +02:00
|
|
|
}
|
|
|
|
|
2021-11-22 01:10:30 +01:00
|
|
|
data["artist"] = [artist["attributes"]["name"]
|
|
|
|
for artist in relationships["artist"]]
|
|
|
|
data["author"] = [author["attributes"]["name"]
|
|
|
|
for author in relationships["author"]]
|
|
|
|
data["group"] = [group["attributes"]["name"]
|
2023-05-07 17:47:04 +02:00
|
|
|
for group in relationships["scanlation_group"]]
|
2021-06-09 21:54:40 +02:00
|
|
|
|
2023-05-07 17:35:02 +02:00
|
|
|
data["status"] = mattributes["status"]
|
|
|
|
data["tags"] = [tag["attributes"]["name"]["en"]
|
|
|
|
for tag in mattributes["tags"]]
|
2023-05-07 17:47:04 +02:00
|
|
|
|
2021-06-09 21:54:40 +02:00
|
|
|
return data
|
2018-08-10 16:26:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
class MangadexChapterExtractor(MangadexExtractor):
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractor for manga-chapters from mangadex.org"""
|
2018-08-10 16:26:10 +02:00
|
|
|
subcategory = "chapter"
|
2021-06-08 02:45:36 +02:00
|
|
|
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = ("https://mangadex.org/chapter"
|
|
|
|
"/01234567-89ab-cdef-0123-456789abcdef")
|
2018-03-05 18:37:21 +01:00
|
|
|
|
2021-06-09 21:54:40 +02:00
|
|
|
def items(self):
|
|
|
|
try:
|
2022-01-08 17:21:35 +01:00
|
|
|
data = self._cache.pop(self.uuid)
|
2021-06-09 21:54:40 +02:00
|
|
|
except KeyError:
|
|
|
|
chapter = self.api.chapter(self.uuid)
|
|
|
|
data = self._transform(chapter)
|
|
|
|
|
2022-04-18 18:09:52 +02:00
|
|
|
if data.get("_external_url") and not data["count"]:
|
2022-01-16 03:21:50 +01:00
|
|
|
raise exception.StopExtraction(
|
|
|
|
"Chapter %s%s is not available on MangaDex and can instead be "
|
|
|
|
"read on the official publisher's website at %s.",
|
|
|
|
data["chapter"], data["chapter_minor"], data["_external_url"])
|
|
|
|
|
2022-01-08 17:21:35 +01:00
|
|
|
yield Message.Directory, data
|
|
|
|
|
|
|
|
server = self.api.athome_server(self.uuid)
|
|
|
|
chapter = server["chapter"]
|
|
|
|
base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"])
|
2021-09-18 02:15:42 +02:00
|
|
|
|
|
|
|
enum = util.enumerate_reversed if self.config(
|
|
|
|
"page-reverse") else enumerate
|
2022-01-08 17:21:35 +01:00
|
|
|
for data["page"], page in enum(chapter["data"], 1):
|
2021-06-09 21:54:40 +02:00
|
|
|
text.nameext_from_url(page, data)
|
|
|
|
yield Message.Url, base + page, data
|
2018-03-05 18:37:21 +01:00
|
|
|
|
|
|
|
|
2018-08-10 16:26:10 +02:00
|
|
|
class MangadexMangaExtractor(MangadexExtractor):
|
2020-01-14 09:49:57 +01:00
|
|
|
"""Extractor for manga from mangadex.org"""
|
2018-08-10 16:26:10 +02:00
|
|
|
subcategory = "manga"
|
2021-06-08 02:45:36 +02:00
|
|
|
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = ("https://mangadex.org/title"
|
|
|
|
"/01234567-89ab-cdef-0123-456789abcdef")
|
2018-08-08 18:08:26 +02:00
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
def chapters(self):
|
|
|
|
return self.api.manga_feed(self.uuid)
|
2018-08-10 16:26:10 +02:00
|
|
|
|
|
|
|
|
2021-06-08 02:45:36 +02:00
|
|
|
class MangadexFeedExtractor(MangadexExtractor):
|
|
|
|
"""Extractor for chapters from your Followed Feed"""
|
|
|
|
subcategory = "feed"
|
|
|
|
pattern = BASE_PATTERN + r"/title/feed$()"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://mangadex.org/title/feed"
|
2021-06-08 02:45:36 +02:00
|
|
|
|
|
|
|
def chapters(self):
|
|
|
|
return self.api.user_follows_manga_feed()
|
|
|
|
|
|
|
|
|
2024-01-07 02:59:35 +01:00
|
|
|
class MangadexListExtractor(MangadexExtractor):
|
|
|
|
"""Extractor for mangadex lists"""
|
|
|
|
subcategory = "list"
|
|
|
|
pattern = (BASE_PATTERN +
|
|
|
|
r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
|
|
|
|
example = ("https://mangadex.org/list"
|
|
|
|
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
MangadexExtractor.__init__(self, match)
|
2024-01-19 03:15:30 +01:00
|
|
|
if match.group(2) == "feed":
|
2024-01-07 02:59:35 +01:00
|
|
|
self.subcategory = "list-feed"
|
2024-01-19 03:15:30 +01:00
|
|
|
else:
|
2024-01-07 02:59:35 +01:00
|
|
|
self.items = self._items_titles
|
|
|
|
|
|
|
|
def chapters(self):
|
|
|
|
return self.api.list_feed(self.uuid)
|
|
|
|
|
|
|
|
def _items_titles(self):
|
|
|
|
data = {"_extractor": MangadexMangaExtractor}
|
|
|
|
for item in self.api.list(self.uuid)["relationships"]:
|
|
|
|
if item["type"] == "manga":
|
|
|
|
url = "{}/title/{}".format(self.root, item["id"])
|
|
|
|
yield Message.Queue, url, data
|
|
|
|
|
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
class MangadexAPI():
|
2023-05-24 12:32:13 +02:00
|
|
|
"""Interface for the MangaDex API v5
|
|
|
|
|
|
|
|
https://api.mangadex.org/docs/
|
|
|
|
"""
|
2021-06-07 01:29:42 +02:00
|
|
|
|
|
|
|
def __init__(self, extr):
|
|
|
|
self.extractor = extr
|
2023-02-18 22:25:54 +01:00
|
|
|
self.headers = {}
|
2021-06-08 02:06:19 +02:00
|
|
|
|
2023-07-25 20:09:44 +02:00
|
|
|
self.username, self.password = extr._get_auth_info()
|
2021-06-08 02:06:19 +02:00
|
|
|
if not self.username:
|
|
|
|
self.authenticate = util.noop
|
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
server = extr.config("api-server")
|
|
|
|
self.root = ("https://api.mangadex.org" if server is None
|
|
|
|
else text.ensure_http_scheme(server).rstrip("/"))
|
|
|
|
|
|
|
|
def athome_server(self, uuid):
|
|
|
|
return self._call("/at-home/server/" + uuid)
|
|
|
|
|
|
|
|
def chapter(self, uuid):
|
2021-11-22 01:10:30 +01:00
|
|
|
params = {"includes[]": ("scanlation_group",)}
|
|
|
|
return self._call("/chapter/" + uuid, params)["data"]
|
2021-06-07 01:29:42 +02:00
|
|
|
|
2024-01-07 02:59:35 +01:00
|
|
|
def list(self, uuid):
|
|
|
|
return self._call("/list/" + uuid)["data"]
|
|
|
|
|
|
|
|
def list_feed(self, uuid):
|
|
|
|
return self._pagination("/list/" + uuid + "/feed")
|
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
@memcache(keyarg=1)
|
|
|
|
def manga(self, uuid):
|
2021-11-22 01:10:30 +01:00
|
|
|
params = {"includes[]": ("artist", "author")}
|
|
|
|
return self._call("/manga/" + uuid, params)["data"]
|
2021-06-07 01:29:42 +02:00
|
|
|
|
|
|
|
def manga_feed(self, uuid):
|
2021-10-05 19:46:48 +02:00
|
|
|
order = "desc" if self.extractor.config("chapter-reverse") else "asc"
|
2021-06-07 01:29:42 +02:00
|
|
|
params = {
|
2021-10-05 19:46:48 +02:00
|
|
|
"order[volume]" : order,
|
|
|
|
"order[chapter]": order,
|
2021-06-07 01:29:42 +02:00
|
|
|
}
|
|
|
|
return self._pagination("/manga/" + uuid + "/feed", params)
|
|
|
|
|
2021-06-08 02:45:36 +02:00
|
|
|
def user_follows_manga_feed(self):
|
2021-10-05 19:46:48 +02:00
|
|
|
params = {"order[publishAt]": "desc"}
|
2021-06-08 02:45:36 +02:00
|
|
|
return self._pagination("/user/follows/manga/feed", params)
|
|
|
|
|
2021-06-08 02:06:19 +02:00
|
|
|
def authenticate(self):
|
|
|
|
self.headers["Authorization"] = \
|
|
|
|
self._authenticate_impl(self.username, self.password)
|
|
|
|
|
|
|
|
@cache(maxage=900, keyarg=1)
|
|
|
|
def _authenticate_impl(self, username, password):
|
|
|
|
refresh_token = _refresh_token_cache(username)
|
|
|
|
if refresh_token:
|
|
|
|
self.extractor.log.info("Refreshing access token")
|
|
|
|
url = self.root + "/auth/refresh"
|
|
|
|
data = {"token": refresh_token}
|
|
|
|
else:
|
|
|
|
self.extractor.log.info("Logging in as %s", username)
|
|
|
|
url = self.root + "/auth/login"
|
|
|
|
data = {"username": username, "password": password}
|
|
|
|
|
|
|
|
data = self.extractor.request(
|
|
|
|
url, method="POST", json=data, fatal=None).json()
|
|
|
|
if data.get("result") != "ok":
|
|
|
|
raise exception.AuthenticationError()
|
|
|
|
|
|
|
|
if refresh_token != data["token"]["refresh"]:
|
|
|
|
_refresh_token_cache.update(username, data["token"]["refresh"])
|
|
|
|
return "Bearer " + data["token"]["session"]
|
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
def _call(self, endpoint, params=None):
|
|
|
|
url = self.root + endpoint
|
|
|
|
|
|
|
|
while True:
|
2021-06-08 02:06:19 +02:00
|
|
|
self.authenticate()
|
|
|
|
response = self.extractor.request(
|
|
|
|
url, params=params, headers=self.headers, fatal=None)
|
2021-06-07 01:29:42 +02:00
|
|
|
|
|
|
|
if response.status_code < 400:
|
|
|
|
return response.json()
|
|
|
|
if response.status_code == 429:
|
|
|
|
until = response.headers.get("X-RateLimit-Retry-After")
|
|
|
|
self.extractor.wait(until=until)
|
|
|
|
continue
|
|
|
|
|
|
|
|
msg = ", ".join('{title}: {detail}'.format_map(error)
|
|
|
|
for error in response.json()["errors"])
|
|
|
|
raise exception.StopExtraction(
|
|
|
|
"%s %s (%s)", response.status_code, response.reason, msg)
|
|
|
|
|
|
|
|
def _pagination(self, endpoint, params=None):
|
|
|
|
if params is None:
|
|
|
|
params = {}
|
2021-10-05 19:46:48 +02:00
|
|
|
|
|
|
|
config = self.extractor.config
|
|
|
|
ratings = config("ratings")
|
|
|
|
if ratings is None:
|
|
|
|
ratings = ("safe", "suggestive", "erotica", "pornographic")
|
|
|
|
|
2023-07-26 17:14:12 +02:00
|
|
|
lang = config("lang")
|
|
|
|
if isinstance(lang, str) and "," in lang:
|
|
|
|
lang = lang.split(",")
|
|
|
|
|
2021-10-05 19:46:48 +02:00
|
|
|
params["contentRating[]"] = ratings
|
2023-07-26 17:14:12 +02:00
|
|
|
params["translatedLanguage[]"] = lang
|
2021-11-22 01:10:30 +01:00
|
|
|
params["includes[]"] = ("scanlation_group",)
|
2021-06-07 01:29:42 +02:00
|
|
|
params["offset"] = 0
|
|
|
|
|
2021-10-05 19:46:48 +02:00
|
|
|
api_params = config("api-parameters")
|
|
|
|
if api_params:
|
|
|
|
params.update(api_params)
|
|
|
|
|
2021-06-07 01:29:42 +02:00
|
|
|
while True:
|
|
|
|
data = self._call(endpoint, params)
|
2021-09-14 17:48:41 +02:00
|
|
|
yield from data["data"]
|
2021-06-07 01:29:42 +02:00
|
|
|
|
|
|
|
params["offset"] = data["offset"] + data["limit"]
|
|
|
|
if params["offset"] >= data["total"]:
|
|
|
|
return
|
2021-06-08 02:06:19 +02:00
|
|
|
|
|
|
|
|
2023-12-18 23:19:44 +01:00
|
|
|
@cache(maxage=28*86400, keyarg=0)
|
2021-06-08 02:06:19 +02:00
|
|
|
def _refresh_token_cache(username):
|
|
|
|
return None
|