2016-10-23 17:51:12 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-02-07 23:14:53 +01:00
|
|
|
# Copyright 2016-2023 Mike Fährmann
|
2016-10-23 17:51:12 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2019-02-03 23:54:17 +01:00
|
|
|
"""Extractors for FoOlSlide based sites"""
|
2016-10-23 17:51:12 +02:00
|
|
|
|
2021-01-26 18:50:32 +01:00
|
|
|
from .common import BaseExtractor, Message
|
2019-03-07 22:55:26 +01:00
|
|
|
from .. import text, util
|
2017-04-11 21:03:40 +02:00
|
|
|
|
|
|
|
|
2021-01-26 18:50:32 +01:00
|
|
|
class FoolslideExtractor(BaseExtractor):
|
2017-09-24 16:57:47 +02:00
|
|
|
"""Base class for FoOlSlide extractors"""
|
2017-08-29 22:42:48 +02:00
|
|
|
basecategory = "foolslide"
|
2017-09-24 16:57:47 +02:00
|
|
|
|
2021-01-26 18:50:32 +01:00
|
|
|
def __init__(self, match):
|
|
|
|
BaseExtractor.__init__(self, match)
|
|
|
|
self.gallery_url = self.root + match.group(match.lastindex)
|
|
|
|
|
2017-09-24 16:57:47 +02:00
|
|
|
def request(self, url):
|
2021-01-26 18:50:32 +01:00
|
|
|
return BaseExtractor.request(
|
2019-02-03 23:54:17 +01:00
|
|
|
self, url, encoding="utf-8", method="POST", data={"adult": "true"})
|
2017-09-24 16:57:47 +02:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def parse_chapter_url(url, data):
|
2017-09-25 12:59:24 +02:00
|
|
|
info = url.partition("/read/")[2].rstrip("/").split("/")
|
2018-01-07 21:42:28 +01:00
|
|
|
lang = info[1].partition("-")[0]
|
|
|
|
data["lang"] = lang
|
|
|
|
data["language"] = util.code_to_language(lang)
|
2018-04-20 14:53:21 +02:00
|
|
|
data["volume"] = text.parse_int(info[2])
|
|
|
|
data["chapter"] = text.parse_int(info[3])
|
2017-09-25 12:59:24 +02:00
|
|
|
data["chapter_minor"] = "." + info[4] if len(info) >= 5 else ""
|
2019-04-19 23:02:29 +02:00
|
|
|
data["title"] = data["chapter_string"].partition(":")[2].strip()
|
2017-09-24 16:57:47 +02:00
|
|
|
return data
|
|
|
|
|
|
|
|
|
2021-01-26 18:50:32 +01:00
|
|
|
BASE_PATTERN = FoolslideExtractor.update({
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
class FoolslideChapterExtractor(FoolslideExtractor):
|
2017-09-24 16:57:47 +02:00
|
|
|
"""Base class for chapter extractors for FoOlSlide based sites"""
|
2021-01-26 18:50:32 +01:00
|
|
|
subcategory = "chapter"
|
2019-10-16 18:12:07 +02:00
|
|
|
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
|
2021-01-26 18:50:32 +01:00
|
|
|
filename_fmt = (
|
|
|
|
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
2018-01-30 22:49:16 +01:00
|
|
|
archive_fmt = "{id}"
|
2021-01-26 18:50:32 +01:00
|
|
|
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
|
2023-09-14 13:27:03 +02:00
|
|
|
example = "https://read.powermanga.org/read/MANGA/en/0/123/"
|
2016-10-23 17:51:12 +02:00
|
|
|
|
|
|
|
def items(self):
|
2019-10-16 18:12:07 +02:00
|
|
|
page = self.request(self.gallery_url).text
|
2019-02-11 18:38:47 +01:00
|
|
|
data = self.metadata(page)
|
|
|
|
imgs = self.images(page)
|
improved foolslide-based extractors
- this includes dokireader, fallenangels, jaiminisbox, powermanga,
sensescans, worldthree, yonkouprod, gomanga, yomanga
- added 'chapter_string', 'chapter_id', 'chapter_minor' and 'count'
keywords
- changed the 'chapter' keyword to always be just a number
- changed the default directory format
2017-02-16 23:42:30 +01:00
|
|
|
|
|
|
|
data["count"] = len(imgs)
|
2018-04-20 14:53:21 +02:00
|
|
|
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
|
improved foolslide-based extractors
- this includes dokireader, fallenangels, jaiminisbox, powermanga,
sensescans, worldthree, yonkouprod, gomanga, yomanga
- added 'chapter_string', 'chapter_id', 'chapter_minor' and 'count'
keywords
- changed the 'chapter' keyword to always be just a number
- changed the default directory format
2017-02-16 23:42:30 +01:00
|
|
|
|
2016-10-23 17:51:12 +02:00
|
|
|
yield Message.Directory, data
|
2021-09-18 02:15:42 +02:00
|
|
|
enum = util.enumerate_reversed if self.config(
|
|
|
|
"page-reverse") else enumerate
|
|
|
|
for data["page"], image in enum(imgs, 1):
|
2016-10-23 17:51:12 +02:00
|
|
|
try:
|
|
|
|
url = image["url"]
|
|
|
|
del image["url"]
|
2017-09-24 16:57:47 +02:00
|
|
|
del image["chapter_id"]
|
2016-10-23 17:51:12 +02:00
|
|
|
del image["thumb_url"]
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2017-09-24 16:57:47 +02:00
|
|
|
for key in ("height", "id", "size", "width"):
|
2018-04-20 14:53:21 +02:00
|
|
|
image[key] = text.parse_int(image[key])
|
2016-10-23 17:51:12 +02:00
|
|
|
data.update(image)
|
|
|
|
text.nameext_from_url(data["filename"], data)
|
|
|
|
yield Message.Url, url, data
|
|
|
|
|
2019-02-11 18:38:47 +01:00
|
|
|
def metadata(self, page):
|
2019-04-19 23:02:29 +02:00
|
|
|
extr = text.extract_from(page)
|
|
|
|
extr('<h1 class="tbtitle dnone">', '')
|
2019-10-16 18:12:07 +02:00
|
|
|
return self.parse_chapter_url(self.gallery_url, {
|
2019-04-19 23:02:29 +02:00
|
|
|
"manga" : text.unescape(extr('title="', '"')).strip(),
|
|
|
|
"chapter_string": text.unescape(extr('title="', '"')),
|
2017-09-24 16:57:47 +02:00
|
|
|
})
|
2016-10-23 17:51:12 +02:00
|
|
|
|
2019-02-11 18:38:47 +01:00
|
|
|
def images(self, page):
|
2023-02-07 23:14:53 +01:00
|
|
|
return util.json_loads(text.extr(page, "var pages = ", ";"))
|
2017-04-11 21:03:40 +02:00
|
|
|
|
|
|
|
|
2021-01-26 18:50:32 +01:00
|
|
|
class FoolslideMangaExtractor(FoolslideExtractor):
|
2017-04-11 21:03:40 +02:00
|
|
|
"""Base class for manga extractors for FoOlSlide based sites"""
|
2021-01-26 18:50:32 +01:00
|
|
|
subcategory = "manga"
|
|
|
|
categorytransfer = True
|
|
|
|
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://read.powermanga.org/series/MANGA/"
|
2021-01-26 18:50:32 +01:00
|
|
|
|
|
|
|
def items(self):
|
|
|
|
page = self.request(self.gallery_url).text
|
|
|
|
|
|
|
|
chapters = self.chapters(page)
|
|
|
|
if not self.config("chapter-reverse", False):
|
|
|
|
chapters.reverse()
|
|
|
|
|
|
|
|
for chapter, data in chapters:
|
|
|
|
data["_extractor"] = FoolslideChapterExtractor
|
|
|
|
yield Message.Queue, chapter, data
|
2017-04-11 21:03:40 +02:00
|
|
|
|
2017-05-20 11:27:43 +02:00
|
|
|
def chapters(self, page):
|
2019-04-19 23:02:29 +02:00
|
|
|
extr = text.extract_from(page)
|
|
|
|
manga = text.unescape(extr('<h1 class="title">', '</h1>')).strip()
|
|
|
|
author = extr('<b>Author</b>: ', '<br')
|
|
|
|
artist = extr('<b>Artist</b>: ', '<br')
|
2017-09-12 16:44:38 +02:00
|
|
|
|
|
|
|
results = []
|
|
|
|
while True:
|
2019-04-19 23:02:29 +02:00
|
|
|
url = extr('<div class="title"><a href="', '"')
|
2017-09-12 16:44:38 +02:00
|
|
|
if not url:
|
|
|
|
return results
|
2017-09-24 16:57:47 +02:00
|
|
|
results.append((url, self.parse_chapter_url(url, {
|
2017-09-12 16:44:38 +02:00
|
|
|
"manga": manga, "author": author, "artist": artist,
|
2019-04-19 23:02:29 +02:00
|
|
|
"chapter_string": extr('title="', '"'),
|
|
|
|
"group" : extr('title="', '"'),
|
2017-09-24 16:57:47 +02:00
|
|
|
})))
|