2015-12-08 22:29:34 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2020-03-27 22:07:42 +01:00
|
|
|
# Copyright 2015-2020 Mike Fährmann
|
2015-12-08 22:29:34 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-03-27 22:07:42 +01:00
|
|
|
"""Extractors for https://mangapark.net/"""
|
2015-12-08 22:29:34 +01:00
|
|
|
|
2018-02-03 23:14:32 +01:00
|
|
|
from .common import ChapterExtractor, MangaExtractor
|
2018-12-27 21:41:50 +01:00
|
|
|
from .. import text, exception
|
2019-01-17 21:21:57 +01:00
|
|
|
import json
|
2020-03-27 22:07:42 +01:00
|
|
|
import re
|
2015-12-09 00:07:18 +01:00
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2019-02-08 11:43:40 +01:00
|
|
|
class MangaparkBase():
|
2017-09-22 17:33:58 +02:00
|
|
|
"""Base class for mangapark extractors"""
|
2015-12-09 00:07:18 +01:00
|
|
|
category = "mangapark"
|
2018-07-05 14:45:05 +02:00
|
|
|
root_fmt = "https://mangapark.{}"
|
2017-09-22 17:33:58 +02:00
|
|
|
|
|
|
|
@staticmethod
|
2017-09-24 15:59:25 +02:00
|
|
|
def parse_chapter_path(path, data):
|
|
|
|
"""Get volume/chapter information from url-path of a chapter"""
|
2017-09-22 17:33:58 +02:00
|
|
|
data["volume"], data["chapter_minor"] = 0, ""
|
2018-12-21 23:32:04 +01:00
|
|
|
for part in path.split("/")[1:]:
|
2017-09-22 17:33:58 +02:00
|
|
|
key, value = part[0], part[1:]
|
2018-12-21 23:32:04 +01:00
|
|
|
if key == "c":
|
2017-09-22 17:33:58 +02:00
|
|
|
chapter, dot, minor = value.partition(".")
|
2018-04-20 14:53:21 +02:00
|
|
|
data["chapter"] = text.parse_int(chapter)
|
2017-09-22 17:33:58 +02:00
|
|
|
data["chapter_minor"] = dot + minor
|
2018-12-21 23:32:04 +01:00
|
|
|
elif key == "i":
|
|
|
|
data["chapter_id"] = text.parse_int(value)
|
|
|
|
elif key == "v":
|
|
|
|
data["volume"] = text.parse_int(value)
|
|
|
|
elif key == "s":
|
|
|
|
data["stream"] = text.parse_int(value)
|
2017-09-22 17:33:58 +02:00
|
|
|
elif key == "e":
|
|
|
|
data["chapter_minor"] = "v" + value
|
|
|
|
|
2020-03-27 22:07:42 +01:00
|
|
|
@staticmethod
|
|
|
|
def parse_chapter_title(title, data):
|
|
|
|
match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?"
|
|
|
|
r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title)
|
|
|
|
if match:
|
|
|
|
vol, ch, data["chapter_minor"] = match.groups()
|
|
|
|
data["volume"] = text.parse_int(vol)
|
|
|
|
data["chapter"] = text.parse_int(ch)
|
|
|
|
|
2017-09-22 17:33:58 +02:00
|
|
|
|
2019-02-08 11:43:40 +01:00
|
|
|
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
2020-03-27 22:07:42 +01:00
|
|
|
"""Extractor for manga-chapters from mangapark.net"""
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
|
2020-10-22 23:12:59 +02:00
|
|
|
r"/manga/([^?#]+/i\d+)")
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2020-08-28 22:26:54 +02:00
|
|
|
("https://mangapark.net/manga/gosu/i811653/c055/1", {
|
2017-08-25 22:01:14 +02:00
|
|
|
"count": 50,
|
2020-08-28 22:26:54 +02:00
|
|
|
"keyword": "8344bdda8cd8414e7729a4e148379f147e3437da",
|
2016-11-16 14:20:25 +01:00
|
|
|
}),
|
2020-03-27 22:07:42 +01:00
|
|
|
(("https://mangapark.net/manga"
|
2020-08-28 22:26:54 +02:00
|
|
|
"/ad-astra-per-aspera-hata-kenjirou/i662051/c001.2/1"), {
|
2017-08-25 22:01:14 +02:00
|
|
|
"count": 40,
|
2020-08-28 22:26:54 +02:00
|
|
|
"keyword": "2bb3a8f426383ea13f17ff5582f3070d096d30ac",
|
2015-12-14 03:00:58 +01:00
|
|
|
}),
|
2020-08-28 22:26:54 +02:00
|
|
|
(("https://mangapark.net/manga"
|
|
|
|
"/gekkan-shoujo-nozaki-kun/i2067426/v7/c70/1"), {
|
2018-12-30 13:19:35 +01:00
|
|
|
"count": 15,
|
2020-08-28 22:26:54 +02:00
|
|
|
"keyword": "edc14993c4752cee3a76e09b2f024d40d854bfd1",
|
2017-08-25 22:01:14 +02:00
|
|
|
}),
|
2020-03-27 22:07:42 +01:00
|
|
|
("https://mangapark.me/manga/gosu/i811615/c55/1"),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://mangapark.com/manga/gosu/i811615/c55/1"),
|
|
|
|
)
|
2015-12-08 22:29:34 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
2018-07-05 14:45:05 +02:00
|
|
|
tld, self.path = match.groups()
|
|
|
|
self.root = self.root_fmt.format(tld)
|
2018-12-21 23:32:04 +01:00
|
|
|
url = "{}/manga/{}?zoom=2".format(self.root, self.path)
|
2019-02-11 13:31:10 +01:00
|
|
|
ChapterExtractor.__init__(self, match, url)
|
2015-12-08 22:29:34 +01:00
|
|
|
|
2019-02-11 18:38:47 +01:00
|
|
|
def metadata(self, page):
|
2018-12-21 23:32:04 +01:00
|
|
|
data = text.extract_all(page, (
|
2017-09-10 22:20:47 +02:00
|
|
|
("manga_id" , "var _manga_id = '", "'"),
|
|
|
|
("chapter_id", "var _book_id = '", "'"),
|
2018-12-21 23:32:04 +01:00
|
|
|
("stream" , "var _stream = '", "'"),
|
|
|
|
("path" , "var _book_link = '", "'"),
|
2015-12-08 22:29:34 +01:00
|
|
|
("manga" , "<h2>", "</h2>"),
|
2016-11-16 14:20:25 +01:00
|
|
|
("title" , "</a>", "<"),
|
2018-12-21 23:32:04 +01:00
|
|
|
), values={"lang": "en", "language": "English"})[0]
|
2018-12-27 21:41:50 +01:00
|
|
|
|
|
|
|
if not data["path"]:
|
|
|
|
raise exception.NotFoundError("chapter")
|
2020-03-27 22:07:42 +01:00
|
|
|
|
2018-12-21 23:32:04 +01:00
|
|
|
self.parse_chapter_path(data["path"], data)
|
2020-03-27 22:07:42 +01:00
|
|
|
if "chapter" not in data:
|
|
|
|
self.parse_chapter_title(data["title"], data)
|
2018-12-21 23:32:04 +01:00
|
|
|
|
2017-08-21 18:29:50 +02:00
|
|
|
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
|
2016-11-16 14:20:25 +01:00
|
|
|
data["manga"] = text.unescape(data["manga"])
|
2017-08-21 18:29:50 +02:00
|
|
|
data["title"] = data["title"].partition(": ")[2]
|
2019-01-17 21:21:57 +01:00
|
|
|
for key in ("manga_id", "chapter_id", "stream"):
|
2018-12-21 23:32:04 +01:00
|
|
|
data[key] = text.parse_int(data[key])
|
|
|
|
|
2015-12-08 22:29:34 +01:00
|
|
|
return data
|
|
|
|
|
2019-02-11 18:38:47 +01:00
|
|
|
def images(self, page):
|
2020-03-27 22:07:42 +01:00
|
|
|
data = json.loads(text.extract(page, "var _load_pages =", ";")[0])
|
2019-01-17 21:21:57 +01:00
|
|
|
return [
|
|
|
|
(text.urljoin(self.root, item["u"]), {
|
|
|
|
"width": text.parse_int(item["w"]),
|
|
|
|
"height": text.parse_int(item["h"]),
|
|
|
|
})
|
|
|
|
for item in data
|
|
|
|
]
|
2019-02-11 18:38:47 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
|
2020-03-27 22:07:42 +01:00
|
|
|
"""Extractor for manga from mangapark.net"""
|
2019-02-13 13:23:36 +01:00
|
|
|
chapterclass = MangaparkChapterExtractor
|
2019-02-11 18:38:47 +01:00
|
|
|
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
|
2020-10-22 23:12:59 +02:00
|
|
|
r"(/manga/[^/?#]+)/?$")
|
2019-02-11 18:38:47 +01:00
|
|
|
test = (
|
2020-03-27 22:07:42 +01:00
|
|
|
("https://mangapark.net/manga/aria", {
|
2020-08-28 22:26:54 +02:00
|
|
|
"url": "9b62883c25c8de471f8ab43651e1448536c4ce3f",
|
|
|
|
"keyword": "eb4a9b273c69acf31efa731eba713e1cfa14bab6",
|
2019-02-11 18:38:47 +01:00
|
|
|
}),
|
2020-03-27 22:07:42 +01:00
|
|
|
("https://mangapark.me/manga/aria"),
|
2019-02-11 18:38:47 +01:00
|
|
|
("https://mangapark.com/manga/aria"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
self.root = self.root_fmt.format(match.group(1))
|
|
|
|
MangaExtractor.__init__(self, match, self.root + match.group(2))
|
|
|
|
|
|
|
|
def chapters(self, page):
|
|
|
|
results = []
|
|
|
|
data = {"lang": "en", "language": "English"}
|
|
|
|
data["manga"] = text.unescape(
|
|
|
|
text.extract(page, '<title>', ' Manga - ')[0])
|
|
|
|
|
|
|
|
for stream in page.split('<div id="stream_')[1:]:
|
|
|
|
data["stream"] = text.parse_int(text.extract(stream, '', '"')[0])
|
|
|
|
|
|
|
|
for chapter in text.extract_iter(stream, '<li ', '</li>'):
|
2020-03-27 22:07:42 +01:00
|
|
|
path , pos = text.extract(chapter, 'href="', '"')
|
|
|
|
title1, pos = text.extract(chapter, '>', '<', pos)
|
|
|
|
title2, pos = text.extract(chapter, '>: </span>', '<', pos)
|
|
|
|
count , pos = text.extract(chapter, ' of ', ' ', pos)
|
2019-02-11 18:38:47 +01:00
|
|
|
|
|
|
|
self.parse_chapter_path(path[8:], data)
|
2020-03-27 22:07:42 +01:00
|
|
|
if "chapter" not in data:
|
|
|
|
self.parse_chapter_title(title1, data)
|
|
|
|
|
|
|
|
if title2:
|
|
|
|
data["title"] = title2.strip()
|
|
|
|
else:
|
|
|
|
data["title"] = title1.partition(":")[2].strip()
|
|
|
|
|
2019-02-11 18:38:47 +01:00
|
|
|
data["count"] = text.parse_int(count)
|
|
|
|
results.append((self.root + path, data.copy()))
|
2020-03-27 22:07:42 +01:00
|
|
|
data.pop("chapter", None)
|
2019-02-11 18:38:47 +01:00
|
|
|
|
|
|
|
return results
|