mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 11:41:35 +01:00
change Chapter and MangaExtractor classes
- unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url
This commit is contained in:
parent
4b1880fa5e
commit
580baef72c
@ -207,16 +207,17 @@ class ChapterExtractor(Extractor):
|
||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||
archive_fmt = (
|
||||
"{manga}_{chapter}{chapter_minor}_{page}")
|
||||
root = ""
|
||||
|
||||
def __init__(self, match, url):
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self, match)
|
||||
self.url = url
|
||||
self.chapter_url = url or self.root + match.group(1)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
page = self.request(self.url).text
|
||||
data = self.get_metadata(page)
|
||||
imgs = self.get_images(page)
|
||||
page = self.request(self.chapter_url).text
|
||||
data = self.metadata(page)
|
||||
imgs = self.images(page)
|
||||
|
||||
if "count" in data:
|
||||
images = zip(
|
||||
@ -240,10 +241,10 @@ class ChapterExtractor(Extractor):
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
"""Return a dict with general metadata"""
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
"""Return a list of all (image-url, metadata)-tuples"""
|
||||
|
||||
|
||||
@ -251,19 +252,19 @@ class MangaExtractor(Extractor):
|
||||
|
||||
subcategory = "manga"
|
||||
categorytransfer = True
|
||||
scheme = "http"
|
||||
root = ""
|
||||
reverse = True
|
||||
root = ""
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self, match)
|
||||
self.url = url or self.scheme + "://" + match.group(1)
|
||||
self.manga_url = url or self.root + match.group(1)
|
||||
|
||||
if self.config("chapter-reverse", False):
|
||||
self.reverse = not self.reverse
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.url).text
|
||||
self.login()
|
||||
page = self.request(self.manga_url).text
|
||||
|
||||
chapters = self.chapters(page)
|
||||
if self.reverse:
|
||||
@ -273,6 +274,9 @@ class MangaExtractor(Extractor):
|
||||
for chapter, data in chapters:
|
||||
yield Message.Queue, chapter, data
|
||||
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
|
||||
def chapters(self, page):
|
||||
"""Return a list of all (chapter-url, metadata)-tuples"""
|
||||
|
||||
|
@ -10,14 +10,14 @@
|
||||
|
||||
from .common import ChapterExtractor
|
||||
from .. import text
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
class DynastyscansChapterExtractor(ChapterExtractor):
|
||||
"""Extractor for manga-chapters from dynasty-scans.com"""
|
||||
category = "dynastyscans"
|
||||
pattern = r"(?:https?://)?(?:www\.)?dynasty-scans\.com/chapters/([^/]+)"
|
||||
pattern = r"(?:https?://)?(?:www\.)?dynasty-scans\.com(/chapters/[^/?&#]+)"
|
||||
test = (
|
||||
(("http://dynasty-scans.com/chapters/"
|
||||
"hitoribocchi_no_oo_seikatsu_ch33"), {
|
||||
@ -32,13 +32,7 @@ class DynastyscansChapterExtractor(ChapterExtractor):
|
||||
)
|
||||
root = "https://dynasty-scans.com"
|
||||
|
||||
def __init__(self, match):
|
||||
self.chaptername = match.group(1)
|
||||
url = self.root + "/chapters/" + self.chaptername
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
def metadata(self, page):
|
||||
info , pos = text.extract(page, "<h3 id='chapter-title'><b>", "</b>")
|
||||
author, pos = text.extract(page, " by ", "</a>", pos)
|
||||
group , pos = text.extract(page, '"icon-print"></i> ', '</span>', pos)
|
||||
@ -64,8 +58,7 @@ class DynastyscansChapterExtractor(ChapterExtractor):
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
"""Extract list of all image-urls for a manga chapter"""
|
||||
def images(self, page):
|
||||
data = text.extract(page, "var pages = ", ";\n")[0]
|
||||
return [
|
||||
(self.root + img["image"], None)
|
||||
|
@ -38,7 +38,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
|
||||
self.version, self.manga, self.chapter)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
lang = "vi" if self.version == "truyen" else "en"
|
||||
data = {
|
||||
"chapter": self.chapter,
|
||||
@ -52,7 +52,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
|
||||
), values=data)[0]
|
||||
|
||||
@staticmethod
|
||||
def get_images(page):
|
||||
def images(page):
|
||||
return [
|
||||
(img["page_image"], None)
|
||||
for img in json.loads(
|
||||
@ -65,7 +65,6 @@ class FallenangelsMangaExtractor(MangaExtractor):
|
||||
"""Extractor for manga from fascans.com"""
|
||||
category = "fallenangels"
|
||||
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
|
||||
scheme = "https"
|
||||
test = (
|
||||
("http://manga.fascans.com/manga/trinity-seven", {
|
||||
"url": "92699a250ff7d5adcf4b06e6a45b0c05f3426643",
|
||||
@ -78,8 +77,9 @@ class FallenangelsMangaExtractor(MangaExtractor):
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MangaExtractor.__init__(self, match)
|
||||
url = "https://" + match.group(1)
|
||||
self.lang = "vi" if match.group(2) == "truyen" else "en"
|
||||
MangaExtractor.__init__(self, match, url)
|
||||
|
||||
def chapters(self, page):
|
||||
language = util.code_to_language(self.lang)
|
||||
|
@ -8,7 +8,8 @@
|
||||
|
||||
"""Extractors for FoOlSlide based sites"""
|
||||
|
||||
from .common import Extractor, MangaExtractor, Message, SharedConfigMixin
|
||||
from .common import (
|
||||
Extractor, ChapterExtractor, MangaExtractor, Message, SharedConfigMixin)
|
||||
from .. import text, util, config
|
||||
import base64
|
||||
import json
|
||||
@ -35,24 +36,17 @@ class FoolslideBase(SharedConfigMixin):
|
||||
return data
|
||||
|
||||
|
||||
class FoolslideChapterExtractor(FoolslideBase, Extractor):
|
||||
class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
|
||||
"""Base class for chapter extractors for FoOlSlide based sites"""
|
||||
subcategory = "chapter"
|
||||
directory_fmt = (
|
||||
"{category}", "{manga}", "{chapter_string}")
|
||||
filename_fmt = (
|
||||
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
|
||||
archive_fmt = "{id}"
|
||||
decode = "default"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.url = self.root + match.group(1)
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.url).text
|
||||
data = self.get_metadata(page)
|
||||
imgs = self.get_images(page)
|
||||
page = self.request(self.chapter_url).text
|
||||
data = self.metadata(page)
|
||||
imgs = self.images(page)
|
||||
|
||||
data["count"] = len(imgs)
|
||||
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
|
||||
@ -73,20 +67,18 @@ class FoolslideChapterExtractor(FoolslideBase, Extractor):
|
||||
text.nameext_from_url(data["filename"], data)
|
||||
yield Message.Url, url, data
|
||||
|
||||
def get_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
def metadata(self, page):
|
||||
_ , pos = text.extract(page, '<h1 class="tbtitle dnone">', '')
|
||||
manga , pos = text.extract(page, 'title="', '"', pos)
|
||||
chapter, pos = text.extract(page, 'title="', '"', pos)
|
||||
chapter = text.unescape(chapter)
|
||||
return self.parse_chapter_url(self.url, {
|
||||
return self.parse_chapter_url(self.chapter_url, {
|
||||
"manga": text.unescape(manga).strip(),
|
||||
"title": chapter.partition(":")[2].strip(),
|
||||
"chapter_string": chapter,
|
||||
})
|
||||
|
||||
def get_images(self, page):
|
||||
"""Return a list of all images in this chapter"""
|
||||
def images(self, page):
|
||||
if self.decode == "base64":
|
||||
base64_data = text.extract(page, 'atob("', '"')[0].encode()
|
||||
data = base64.b64decode(base64_data).decode()
|
||||
@ -101,11 +93,7 @@ class FoolslideChapterExtractor(FoolslideBase, Extractor):
|
||||
class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
|
||||
"""Base class for manga extractors for FoOlSlide based sites"""
|
||||
|
||||
def __init__(self, match):
|
||||
MangaExtractor.__init__(self, match, self.root + match.group(1))
|
||||
|
||||
def chapters(self, page):
|
||||
"""Return a list of all chapter urls"""
|
||||
manga , pos = text.extract(page, '<h1 class="title">', '</h1>')
|
||||
author, pos = text.extract(page, '<b>Author</b>: ', '<br', pos)
|
||||
artist, pos = text.extract(page, '<b>Artist</b>: ', '<br', pos)
|
||||
|
@ -42,9 +42,39 @@ class HbrowseBase():
|
||||
return data
|
||||
|
||||
|
||||
class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from hbrowse.com"""
|
||||
directory_fmt = ("{category}", "{manga_id} {manga}", "c{chapter:>05}")
|
||||
filename_fmt = ("{category}_{manga_id}_{chapter:>05}_"
|
||||
"{page:>03}.{extension}")
|
||||
archive_fmt = "{manga_id}_{chapter}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
|
||||
test = ("https://www.hbrowse.com/10363/c00000", {
|
||||
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
|
||||
"keyword": "95ec73a58aeac57f4dd20f0fa0c2812b045a30e8",
|
||||
"content": "44578ebbe176c2c27434966aef22945787e2781e",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.path, self.gid, self.chapter = match.groups()
|
||||
self.path += "/"
|
||||
ChapterExtractor.__init__(self, match)
|
||||
|
||||
def metadata(self, page):
|
||||
return self.parse_page(page, {
|
||||
"manga_id": text.parse_int(self.gid),
|
||||
"chapter": text.parse_int(self.chapter)
|
||||
})
|
||||
|
||||
def images(self, page):
|
||||
base = self.root + "/data" + self.path
|
||||
json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
|
||||
return [(base + name, None) for name in json.loads(json_data)]
|
||||
|
||||
|
||||
class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
|
||||
"""Extractor for manga from hbrowse.com"""
|
||||
pattern = r"(?:https?://)?((?:www\.)?hbrowse\.com/\d+)/?$"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
|
||||
reverse = False
|
||||
test = ("https://www.hbrowse.com/10363", {
|
||||
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
|
||||
@ -55,7 +85,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
|
||||
results = []
|
||||
data = self.parse_page(page, {
|
||||
"manga_id": text.parse_int(
|
||||
self.url.rstrip("/").rpartition("/")[2])
|
||||
self.manga_url.rstrip("/").rpartition("/")[2])
|
||||
})
|
||||
|
||||
pos = 0
|
||||
@ -68,33 +98,3 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
|
||||
data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
|
||||
data["title"] = title
|
||||
results.append((text.urljoin(self.root, url), data.copy()))
|
||||
|
||||
|
||||
class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from hbrowse.com"""
|
||||
directory_fmt = ("{category}", "{manga_id} {manga}", "c{chapter:>05}")
|
||||
filename_fmt = ("{category}_{manga_id}_{chapter:>05}_"
|
||||
"{page:>03}.{extension}")
|
||||
archive_fmt = "{manga_id}_{chapter}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/c(\d+)"
|
||||
test = ("https://www.hbrowse.com/10363/c00000", {
|
||||
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
|
||||
"keyword": "95ec73a58aeac57f4dd20f0fa0c2812b045a30e8",
|
||||
"content": "44578ebbe176c2c27434966aef22945787e2781e",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.gid, self.chapter = match.groups()
|
||||
self.path = "/{}/c{}/".format(self.gid, self.chapter)
|
||||
ChapterExtractor.__init__(self, match, self.root + self.path)
|
||||
|
||||
def get_metadata(self, page):
|
||||
return self.parse_page(page, {
|
||||
"manga_id": text.parse_int(self.gid),
|
||||
"chapter": text.parse_int(self.chapter)
|
||||
})
|
||||
|
||||
def get_images(self, page):
|
||||
base = self.root + "/data" + self.path
|
||||
json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
|
||||
return [(base + name, None) for name in json.loads(json_data)]
|
||||
|
@ -10,21 +10,65 @@
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
class Hentai2readMangaExtractor(MangaExtractor):
|
||||
"""Extractor for hmanga from hentai2read.com"""
|
||||
class Hentai2readBase():
|
||||
"""Base class for hentai2read extractors"""
|
||||
category = "hentai2read"
|
||||
scheme = "https"
|
||||
pattern = r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"
|
||||
root = "https://hentai2read.com"
|
||||
|
||||
|
||||
class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentai2read.com"""
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+/(\d+))"
|
||||
test = ("https://hentai2read.com/amazon_elixir/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.chapter = match.group(2)
|
||||
ChapterExtractor.__init__(self, match)
|
||||
|
||||
def metadata(self, page):
|
||||
title, pos = text.extract(page, "<title>", "</title>")
|
||||
manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
|
||||
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
|
||||
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
|
||||
r"(\d+): (.+) . Page 1 ", title)
|
||||
return {
|
||||
"manga": match.group(1),
|
||||
"manga_id": text.parse_int(manga_id),
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"type": match.group(2),
|
||||
"author": match.group(3),
|
||||
"title": match.group(5),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def images(page):
|
||||
images = text.extract(page, "'images' : ", ",\n")[0]
|
||||
return [
|
||||
("https://hentaicdn.com/hentai" + part, None)
|
||||
for part in json.loads(images)
|
||||
]
|
||||
|
||||
|
||||
class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
|
||||
"""Extractor for hmanga from hentai2read.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+)/?$"
|
||||
test = (
|
||||
("http://hentai2read.com/amazon_elixir/", {
|
||||
("https://hentai2read.com/amazon_elixir/", {
|
||||
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
|
||||
"keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
|
||||
}),
|
||||
("http://hentai2read.com/oshikage_riot/", {
|
||||
("https://hentai2read.com/oshikage_riot/", {
|
||||
"url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
|
||||
"keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
|
||||
}),
|
||||
@ -54,46 +98,3 @@ class Hentai2readMangaExtractor(MangaExtractor):
|
||||
"chapter": text.parse_int(chapter),
|
||||
"title": title, "lang": "en", "language": "English",
|
||||
}))
|
||||
|
||||
|
||||
class Hentai2readChapterExtractor(ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentai2read.com"""
|
||||
category = "hentai2read"
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"
|
||||
test = ("http://hentai2read.com/amazon_elixir/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
url_title, self.chapter = match.groups()
|
||||
url = "https://hentai2read.com/{}/{}/".format(url_title, self.chapter)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
title, pos = text.extract(page, "<title>", "</title>")
|
||||
manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
|
||||
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
|
||||
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
|
||||
r"(\d+): (.+) . Page 1 ", title)
|
||||
return {
|
||||
"manga": match.group(1),
|
||||
"manga_id": text.parse_int(manga_id),
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"type": match.group(2),
|
||||
"author": match.group(3),
|
||||
"title": match.group(5),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_images(page):
|
||||
"""Extract and return a list of all image-urls"""
|
||||
images = text.extract(page, "'images' : ", ",\n")[0]
|
||||
return [
|
||||
("https://hentaicdn.com/hentai" + part, None)
|
||||
for part in json.loads(images)
|
||||
]
|
||||
|
@ -25,10 +25,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
|
||||
})
|
||||
root = "https://hentai.cafe"
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
info = text.unescape(text.extract(page, '<title>', '</title>')[0])
|
||||
manga, _, chapter_string = info.partition(" :: ")
|
||||
return self.parse_chapter_url(self.url, {
|
||||
return self.parse_chapter_url(self.chapter_url, {
|
||||
"manga": manga,
|
||||
"chapter_string": chapter_string.rstrip(" :"),
|
||||
})
|
||||
@ -58,7 +58,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
|
||||
reverse = False
|
||||
|
||||
def chapters(self, page):
|
||||
if "/manga/series/" in self.url:
|
||||
if "/manga/series/" in self.manga_url:
|
||||
chapters = foolslide.FoolslideMangaExtractor.chapters(self, page)
|
||||
chapters.reverse()
|
||||
return chapters
|
||||
|
@ -19,7 +19,7 @@ class HentaifoxGalleryExtractor(ChapterExtractor):
|
||||
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
|
||||
directory_fmt = ("{category}", "{gallery_id} {title}")
|
||||
archive_fmt = "{gallery_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com/gallery/(\d+)"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
|
||||
test = ("https://hentaifox.com/gallery/56622/", {
|
||||
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
|
||||
"count": 24,
|
||||
@ -28,11 +28,10 @@ class HentaifoxGalleryExtractor(ChapterExtractor):
|
||||
root = "https://hentaifox.com"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = match.group(1)
|
||||
url = "{}/gallery/{}".format(self.root, self.gallery_id)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
ChapterExtractor.__init__(self, match)
|
||||
self.gallery_id = match.group(2)
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
title, pos = text.extract(page, "<h1>", "</h1>")
|
||||
data = text.extract_all(page, (
|
||||
("parodies" , ">Parodies:" , "</a></span>"),
|
||||
@ -51,7 +50,7 @@ class HentaifoxGalleryExtractor(ChapterExtractor):
|
||||
data["lang"] = "en"
|
||||
return data
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
return [
|
||||
(text.urljoin(self.root, url.replace("t.", ".")), None)
|
||||
for url in text.extract_iter(page, 'data-src="', '"')
|
||||
|
@ -14,11 +14,55 @@ import json
|
||||
import re
|
||||
|
||||
|
||||
class HentaihereMangaExtractor(MangaExtractor):
|
||||
"""Extractor for hmanga from hentaihere.com"""
|
||||
class HentaihereBase():
|
||||
"""Base class for hentaihere extractors"""
|
||||
category = "hentaihere"
|
||||
pattern = r"(?:https?://)?(?:www\.)?(hentaihere\.com/m/S\d+)/?$"
|
||||
scheme = "https"
|
||||
root = "https://hentaihere.com"
|
||||
|
||||
|
||||
class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentaihere.com"""
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"
|
||||
test = ("https://hentaihere.com/m/S13812/1/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "e9382a9be337abce3db2b1132e85751379dc05c5",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.manga_id, self.chapter = match.groups()
|
||||
url = "{}/m/S{}/{}/1".format(self.root, self.manga_id, self.chapter)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
title = text.extract(page, "<title>", "</title>")[0]
|
||||
chapter_id = text.extract(page, 'report/C', '"')[0]
|
||||
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
||||
match = re.match(pattern, title)
|
||||
return {
|
||||
"manga": match.group(1),
|
||||
"manga_id": text.parse_int(self.manga_id),
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"type": match.group(2),
|
||||
"title": match.group(3),
|
||||
"author": match.group(4),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def images(page):
|
||||
images = text.extract(page, "var rff_imageList = ", ";")[0]
|
||||
return [
|
||||
("https://hentaicdn.com/hentai" + part, None)
|
||||
for part in json.loads(images)
|
||||
]
|
||||
|
||||
|
||||
class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
|
||||
"""Extractor for hmanga from hentaihere.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com(/m/S\d+)/?$"
|
||||
test = (
|
||||
("https://hentaihere.com/m/S13812", {
|
||||
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
|
||||
@ -33,7 +77,7 @@ class HentaihereMangaExtractor(MangaExtractor):
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
manga_id = text.parse_int(
|
||||
self.url.rstrip("/").rpartition("/")[2][1:])
|
||||
self.manga_url.rstrip("/").rpartition("/")[2][1:])
|
||||
manga, pos = text.extract(
|
||||
page, '<span itemprop="name">', '</span>')
|
||||
mtype, pos = text.extract(
|
||||
@ -54,45 +98,3 @@ class HentaihereMangaExtractor(MangaExtractor):
|
||||
"chapter": text.parse_int(chapter),
|
||||
"title": title, "lang": "en", "language": "English",
|
||||
}))
|
||||
|
||||
|
||||
class HentaihereChapterExtractor(ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentaihere.com"""
|
||||
category = "hentaihere"
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"
|
||||
test = ("https://hentaihere.com/m/S13812/1/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "e9382a9be337abce3db2b1132e85751379dc05c5",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.manga_id, self.chapter = match.groups()
|
||||
url = "https://hentaihere.com/m/S{}/{}/1".format(
|
||||
self.manga_id, self.chapter)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
title = text.extract(page, "<title>", "</title>")[0]
|
||||
chapter_id = text.extract(page, 'report/C', '"')[0]
|
||||
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
||||
match = re.match(pattern, title)
|
||||
return {
|
||||
"manga": match.group(1),
|
||||
"manga_id": text.parse_int(self.manga_id),
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"type": match.group(2),
|
||||
"title": match.group(3),
|
||||
"author": match.group(4),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_images(page):
|
||||
images = text.extract(page, "var rff_imageList = ", ";")[0]
|
||||
return [
|
||||
("https://hentaicdn.com/hentai" + part, None)
|
||||
for part in json.loads(images)
|
||||
]
|
||||
|
@ -38,8 +38,9 @@ class HitomiGalleryExtractor(ChapterExtractor):
|
||||
url = "https://hitomi.la/galleries/{}.html".format(self.gid)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page, extr=text.extract):
|
||||
def metadata(self, page):
|
||||
pos = page.index('<h1><a href="/reader/')
|
||||
extr = text.extract
|
||||
title , pos = extr(page, '.html">', '<', pos)
|
||||
artist, pos = extr(page, '<h2>', '</h2>', pos)
|
||||
group , pos = extr(page, '<td>Group</td><td>', '</td>', pos)
|
||||
@ -65,7 +66,7 @@ class HitomiGalleryExtractor(ChapterExtractor):
|
||||
"tags": self._prepare(tags),
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
# see https://ltn.hitomi.la/common.js
|
||||
frontends = 2
|
||||
offset = self.gid % frontends if self.gid % 10 != 1 else 0
|
||||
|
@ -67,41 +67,6 @@ class KissmangaBase():
|
||||
return data
|
||||
|
||||
|
||||
class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
|
||||
"""Extractor for manga from kissmanga.com"""
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
|
||||
r"(/Manga/[^/?&#]+/?)$")
|
||||
test = (
|
||||
("https://kissmanga.com/Manga/Dropout", {
|
||||
"url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532",
|
||||
"keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
|
||||
}),
|
||||
("https://kissmanga.com/manga/feng-shen-ji"), # lowercase
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MangaExtractor.__init__(self, match, self.root + match.group(1))
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
manga, pos = text.extract(page, ' class="barTitle">', '\ninformation')
|
||||
page , pos = text.extract(page, ' class="listing">', '</table>', pos)
|
||||
manga = manga.strip()
|
||||
needle = '" title="Read ' + manga + ' '
|
||||
manga = text.unescape(manga)
|
||||
|
||||
for item in text.extract_iter(page, '<a href="', ' online">'):
|
||||
url, _, chapter = item.partition(needle)
|
||||
data = {
|
||||
"manga": manga, "chapter_string": chapter,
|
||||
"chapter_id": text.parse_int(url.rpartition("=")[2]),
|
||||
"lang": "en", "language": "English",
|
||||
}
|
||||
self.parse_chapter_string(data)
|
||||
results.append((self.root + url, data))
|
||||
return results
|
||||
|
||||
|
||||
class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from kissmanga.com"""
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
|
||||
@ -127,11 +92,11 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
ChapterExtractor.__init__(self, match, self.root + match.group(1))
|
||||
ChapterExtractor.__init__(self, match)
|
||||
self.chapter_id = match.group(2)
|
||||
self.session.headers["Referer"] = self.root
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
title = text.extract(page, "<title>", "</title>")[0].strip()
|
||||
manga, cinfo = title.split("\n")[1:3]
|
||||
data = {
|
||||
@ -143,7 +108,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
|
||||
}
|
||||
return self.parse_chapter_string(data)
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
self.session.headers["Referer"] = None
|
||||
try:
|
||||
key = self.build_aes_key(page)
|
||||
@ -211,3 +176,35 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
|
||||
pos = script.index(var)
|
||||
lst = text.extract(script, "=", ";", pos)[0]
|
||||
return ast.literal_eval(lst.strip())[int(idx)]
|
||||
|
||||
|
||||
class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
|
||||
"""Extractor for manga from kissmanga.com"""
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
|
||||
r"(/Manga/[^/?&#]+/?)$")
|
||||
test = (
|
||||
("https://kissmanga.com/Manga/Dropout", {
|
||||
"url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532",
|
||||
"keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
|
||||
}),
|
||||
("https://kissmanga.com/manga/feng-shen-ji"), # lowercase
|
||||
)
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
manga, pos = text.extract(page, ' class="barTitle">', '\ninformation')
|
||||
page , pos = text.extract(page, ' class="listing">', '</table>', pos)
|
||||
manga = manga.strip()
|
||||
needle = '" title="Read ' + manga + ' '
|
||||
manga = text.unescape(manga)
|
||||
|
||||
for item in text.extract_iter(page, '<a href="', ' online">'):
|
||||
url, _, chapter = item.partition(needle)
|
||||
data = {
|
||||
"manga": manga, "chapter_string": chapter,
|
||||
"chapter_id": text.parse_int(url.rpartition("=")[2]),
|
||||
"lang": "en", "language": "English",
|
||||
}
|
||||
self.parse_chapter_string(data)
|
||||
results.append((self.root + url, data))
|
||||
return results
|
||||
|
@ -16,7 +16,6 @@ import re
|
||||
class KomikcastBase():
|
||||
"""Base class for komikcast extractors"""
|
||||
category = "komikcast"
|
||||
scheme = "https"
|
||||
root = "https://komikcast.com"
|
||||
|
||||
request = cloudflare.request_func
|
||||
@ -62,15 +61,12 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
ChapterExtractor.__init__(self, match, self.root + match.group(1))
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
info = text.extract(page, '<b>', "</b>")[0]
|
||||
return self.parse_chapter_string(info)
|
||||
|
||||
@staticmethod
|
||||
def get_images(page):
|
||||
def images(page):
|
||||
readerarea = text.extract(
|
||||
page, '<div id="readerarea">', '<div class="navig">')[0]
|
||||
return [
|
||||
@ -90,8 +86,8 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
||||
|
||||
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
|
||||
"""Extractor for manga from komikcast.com"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?(komikcast\.com"
|
||||
r"/(?:komik/)?[^/?&#]+/?)$")
|
||||
pattern = (r"(?:https?://)?(?:www\.)?komikcast\.com"
|
||||
r"(/(?:komik/)?[^/?&#]+)/?$")
|
||||
test = (
|
||||
("https://komikcast.com/komik/090-eko-to-issho/", {
|
||||
"url": "dc798d107697d1f2309b14ca24ca9dba30c6600f",
|
||||
|
@ -66,8 +66,8 @@ class MangadexChapterExtractor(MangadexExtractor):
|
||||
self.data = None
|
||||
|
||||
def items(self):
|
||||
data = self.get_metadata()
|
||||
imgs = self.get_images()
|
||||
data = self.metadata()
|
||||
imgs = self.images()
|
||||
data["count"] = len(imgs)
|
||||
|
||||
yield Message.Version, 1
|
||||
@ -75,7 +75,7 @@ class MangadexChapterExtractor(MangadexExtractor):
|
||||
for data["page"], url in enumerate(imgs, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
def get_metadata(self):
|
||||
def metadata(self):
|
||||
"""Return a dict with general metadata"""
|
||||
cdata = self.chapter_data(self.chapter_id)
|
||||
mdata = self.manga_data(cdata["manga_id"])
|
||||
@ -98,7 +98,7 @@ class MangadexChapterExtractor(MangadexExtractor):
|
||||
"language": cdata["lang_name"],
|
||||
}
|
||||
|
||||
def get_images(self):
|
||||
def images(self):
|
||||
"""Return a list of all image URLs"""
|
||||
base = self.data["server"] + self.data["hash"] + "/"
|
||||
if base.startswith("/"):
|
||||
|
@ -31,7 +31,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
|
||||
self.urlbase = self.root + base
|
||||
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
manga, pos = text.extract(page, "<title>", "</title>")
|
||||
count, pos = text.extract(
|
||||
page, ">", "<", page.find("</select>", pos) - 20)
|
||||
@ -49,7 +49,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
|
||||
"cid": text.parse_int(cid),
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
pnum = 1
|
||||
while True:
|
||||
url, pos = text.extract(page, '<img src="', '"')
|
||||
|
@ -21,10 +21,72 @@ class MangahereBase():
|
||||
url_fmt = mobile_root + "/manga/{}/{}.html"
|
||||
|
||||
|
||||
class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangahere.cc"""
|
||||
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
|
||||
r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))")
|
||||
test = (
|
||||
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
|
||||
"keyword": "6407556817bd1fd2bdc8dee3fd2a718f5724ddc0",
|
||||
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
|
||||
}),
|
||||
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
self.part, self.volume, self.chapter = match.groups()
|
||||
url = self.url_fmt.format(self.part, 1)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
pos = page.index("</select>")
|
||||
count , pos = text.extract(page, ">", "<", pos - 20)
|
||||
manga_id , pos = text.extract(page, "series_id = ", ";", pos)
|
||||
chapter_id, pos = text.extract(page, "chapter_id = ", ";", pos)
|
||||
manga , pos = text.extract(page, '"name":"', '"', pos)
|
||||
chapter, dot, minor = self.chapter.partition(".")
|
||||
|
||||
return {
|
||||
"manga": text.unescape(manga),
|
||||
"manga_id": text.parse_int(manga_id),
|
||||
"title": self._get_title(),
|
||||
"volume": text.parse_int(self.volume),
|
||||
"chapter": text.parse_int(chapter),
|
||||
"chapter_minor": dot + minor,
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"count": text.parse_int(count),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
pnum = 1
|
||||
|
||||
while True:
|
||||
url, pos = text.extract(page, '<img src="', '"')
|
||||
yield url, None
|
||||
url, pos = text.extract(page, ' src="', '"', pos)
|
||||
yield url, None
|
||||
pnum += 2
|
||||
page = self.request(self.url_fmt.format(self.part, pnum)).text
|
||||
|
||||
def _get_title(self):
|
||||
url = "{}/manga/{}/".format(self.root, self.part)
|
||||
page = self.request(url).text
|
||||
|
||||
try:
|
||||
pos = page.index(self.part) + len(self.part)
|
||||
pos = page.index(self.part, pos) + len(self.part)
|
||||
return text.extract(page, ' title="', '"', pos)[0]
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
|
||||
class MangahereMangaExtractor(MangahereBase, MangaExtractor):
|
||||
"""Extractor for manga from mangahere.cc"""
|
||||
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
|
||||
r"/manga/([^/]+)/?(?:#.*)?$")
|
||||
r"(/manga/[^/]+)/?(?:#.*)?$")
|
||||
test = (
|
||||
("https://www.mangahere.cc/manga/aria/", {
|
||||
"url": "23ad9256f7392de5973b79a36f6875e9fdcb7563",
|
||||
@ -38,10 +100,6 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
|
||||
("https://m.mangahere.co/manga/aria/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
url = "{}/manga/{}/".format(self.root, match.group(1))
|
||||
MangaExtractor.__init__(self, match, url)
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
manga, pos = text.extract(page, '<meta name="og:title" content="', '"')
|
||||
@ -77,67 +135,3 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}))
|
||||
|
||||
|
||||
class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangahere.cc"""
|
||||
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
|
||||
r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))")
|
||||
test = (
|
||||
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
|
||||
"keyword": "6407556817bd1fd2bdc8dee3fd2a718f5724ddc0",
|
||||
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
|
||||
}),
|
||||
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
self.part, self.volume, self.chapter = match.groups()
|
||||
url = self.url_fmt.format(self.part, 1)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
pos = page.index("</select>")
|
||||
count , pos = text.extract(page, ">", "<", pos - 20)
|
||||
manga_id , pos = text.extract(page, "series_id = ", ";", pos)
|
||||
chapter_id, pos = text.extract(page, "chapter_id = ", ";", pos)
|
||||
manga , pos = text.extract(page, '"name":"', '"', pos)
|
||||
chapter, dot, minor = self.chapter.partition(".")
|
||||
|
||||
return {
|
||||
"manga": text.unescape(manga),
|
||||
"manga_id": text.parse_int(manga_id),
|
||||
"title": self._get_title(),
|
||||
"volume": text.parse_int(self.volume),
|
||||
"chapter": text.parse_int(chapter),
|
||||
"chapter_minor": dot + minor,
|
||||
"chapter_id": text.parse_int(chapter_id),
|
||||
"count": text.parse_int(count),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
"""Yield all image-urls for this chapter"""
|
||||
pnum = 1
|
||||
|
||||
while True:
|
||||
url, pos = text.extract(page, '<img src="', '"')
|
||||
yield url, None
|
||||
url, pos = text.extract(page, ' src="', '"', pos)
|
||||
yield url, None
|
||||
pnum += 2
|
||||
page = self.request(self.url_fmt.format(self.part, pnum)).text
|
||||
|
||||
def _get_title(self):
|
||||
url = "{}/manga/{}/".format(self.root, self.part)
|
||||
page = self.request(url).text
|
||||
|
||||
try:
|
||||
pos = page.index(self.part) + len(self.part)
|
||||
pos = page.index(self.part, pos) + len(self.part)
|
||||
return text.extract(page, ' title="', '"', pos)[0]
|
||||
except ValueError:
|
||||
return ""
|
||||
|
@ -17,15 +17,6 @@ class MangapandaBase():
|
||||
root = "https://www.mangapanda.com"
|
||||
|
||||
|
||||
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
"""Extractor for manga from mangapanda.com"""
|
||||
pattern = r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/?&#]+)/?$"
|
||||
test = ("https://www.mangapanda.com/mushishi", {
|
||||
"url": "357f965732371cac1990fee8b480f62e29141a42",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
||||
|
||||
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapanda.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"
|
||||
@ -33,3 +24,12 @@ class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
"url": "1f633f776e950531ba9b1e81965316458e785261",
|
||||
"keyword": "32b5e84017c2bf5f122b339ecf40899e41f18cc9",
|
||||
})
|
||||
|
||||
|
||||
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
"""Extractor for manga from mangapanda.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?&#]+)/?$"
|
||||
test = ("https://www.mangapanda.com/mushishi", {
|
||||
"url": "357f965732371cac1990fee8b480f62e29141a42",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
@ -38,6 +38,68 @@ class MangaparkBase():
|
||||
data["chapter_minor"] = "v" + value
|
||||
|
||||
|
||||
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapark.me"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
|
||||
r"/manga/([^?&#]+/i\d+)")
|
||||
test = (
|
||||
("https://mangapark.me/manga/gosu/i811615/c55/1", {
|
||||
"count": 50,
|
||||
"keyword": "a18e07119b3317d7e795ef37ee69ce0bbb806350",
|
||||
}),
|
||||
(("https://mangapark.me/manga"
|
||||
"/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), {
|
||||
"count": 40,
|
||||
"keyword": "3f286631279e2017ce87c1b8db05d7b3f15e2971",
|
||||
}),
|
||||
("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/i655476/c70/1", {
|
||||
"count": 15,
|
||||
"keyword": "3abb13e6d1ea7f8808b0ec415270b3afac97f98b",
|
||||
}),
|
||||
("https://mangapark.net/manga/gosu/i811615/c55/1"),
|
||||
("https://mangapark.com/manga/gosu/i811615/c55/1"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
tld, self.path = match.groups()
|
||||
self.root = self.root_fmt.format(tld)
|
||||
url = "{}/manga/{}?zoom=2".format(self.root, self.path)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
data = text.extract_all(page, (
|
||||
("manga_id" , "var _manga_id = '", "'"),
|
||||
("chapter_id", "var _book_id = '", "'"),
|
||||
("stream" , "var _stream = '", "'"),
|
||||
("path" , "var _book_link = '", "'"),
|
||||
("manga" , "<h2>", "</h2>"),
|
||||
("title" , "</a>", "<"),
|
||||
), values={"lang": "en", "language": "English"})[0]
|
||||
|
||||
if not data["path"]:
|
||||
raise exception.NotFoundError("chapter")
|
||||
self.parse_chapter_path(data["path"], data)
|
||||
|
||||
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
|
||||
data["manga"] = text.unescape(data["manga"])
|
||||
data["title"] = data["title"].partition(": ")[2]
|
||||
for key in ("manga_id", "chapter_id", "stream"):
|
||||
data[key] = text.parse_int(data[key])
|
||||
|
||||
return data
|
||||
|
||||
def images(self, page):
|
||||
data = json.loads(text.extract(
|
||||
page, "var _load_pages =", ";")[0] or "[]")
|
||||
return [
|
||||
(text.urljoin(self.root, item["u"]), {
|
||||
"width": text.parse_int(item["w"]),
|
||||
"height": text.parse_int(item["h"]),
|
||||
})
|
||||
for item in data
|
||||
]
|
||||
|
||||
|
||||
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
|
||||
"""Extractor for manga from mangapark.me"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
|
||||
@ -75,65 +137,3 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
|
||||
results.append((self.root + path, data.copy()))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapark.me"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
|
||||
r"/manga/([^?&#]+/i\d+)")
|
||||
test = (
|
||||
("https://mangapark.me/manga/gosu/i811615/c55/1", {
|
||||
"count": 50,
|
||||
"keyword": "a18e07119b3317d7e795ef37ee69ce0bbb806350",
|
||||
}),
|
||||
(("https://mangapark.me/manga"
|
||||
"/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), {
|
||||
"count": 40,
|
||||
"keyword": "3f286631279e2017ce87c1b8db05d7b3f15e2971",
|
||||
}),
|
||||
("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/i655476/c70/1", {
|
||||
"count": 15,
|
||||
"keyword": "3abb13e6d1ea7f8808b0ec415270b3afac97f98b",
|
||||
}),
|
||||
("https://mangapark.net/manga/gosu/i811615/c55/1"),
|
||||
("https://mangapark.com/manga/gosu/i811615/c55/1"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
tld, self.path = match.groups()
|
||||
self.root = self.root_fmt.format(tld)
|
||||
url = "{}/manga/{}?zoom=2".format(self.root, self.path)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
data = text.extract_all(page, (
|
||||
("manga_id" , "var _manga_id = '", "'"),
|
||||
("chapter_id", "var _book_id = '", "'"),
|
||||
("stream" , "var _stream = '", "'"),
|
||||
("path" , "var _book_link = '", "'"),
|
||||
("manga" , "<h2>", "</h2>"),
|
||||
("title" , "</a>", "<"),
|
||||
), values={"lang": "en", "language": "English"})[0]
|
||||
|
||||
if not data["path"]:
|
||||
raise exception.NotFoundError("chapter")
|
||||
self.parse_chapter_path(data["path"], data)
|
||||
|
||||
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
|
||||
data["manga"] = text.unescape(data["manga"])
|
||||
data["title"] = data["title"].partition(": ")[2]
|
||||
for key in ("manga_id", "chapter_id", "stream"):
|
||||
data[key] = text.parse_int(data[key])
|
||||
|
||||
return data
|
||||
|
||||
def get_images(self, page):
|
||||
data = json.loads(text.extract(
|
||||
page, "var _load_pages =", ";")[0] or "[]")
|
||||
return [
|
||||
(text.urljoin(self.root, item["u"]), {
|
||||
"width": text.parse_int(item["w"]),
|
||||
"height": text.parse_int(item["h"]),
|
||||
})
|
||||
for item in data
|
||||
]
|
||||
|
@ -32,31 +32,6 @@ class MangareaderBase():
|
||||
return data
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?&#]+)/?$"
|
||||
reverse = False
|
||||
test = ("https://www.mangareader.net/mushishi", {
|
||||
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = self.parse_page(page, {"lang": "en", "language": "English"})
|
||||
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="chapterlist">')
|
||||
while True:
|
||||
url, pos = text.extract(page, needle, '"', pos)
|
||||
if not url:
|
||||
return results
|
||||
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
|
||||
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
|
||||
data["chapter"] = text.parse_int(url.rpartition("/")[2])
|
||||
results.append((self.root + url, data.copy()))
|
||||
|
||||
|
||||
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangareader.net"""
|
||||
archive_fmt = "{manga}_{chapter}_{page}"
|
||||
@ -68,11 +43,10 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.part, self.url_title, self.chapter = match.groups()
|
||||
ChapterExtractor.__init__(self, match, self.root + self.part)
|
||||
path, self.url_title, self.chapter = match.groups()
|
||||
ChapterExtractor.__init__(self, match, self.root + path)
|
||||
|
||||
def get_metadata(self, chapter_page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
def metadata(self, chapter_page):
|
||||
page = self.request(self.root + self.url_title).text
|
||||
data = self.parse_page(page, {
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
@ -88,7 +62,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
)
|
||||
return data
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
while True:
|
||||
next_url, image_url, image_data = self.get_image_metadata(page)
|
||||
yield image_url, image_data
|
||||
@ -117,3 +91,28 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
"width": text.parse_int(width),
|
||||
"height": text.parse_int(height),
|
||||
}
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?&#]+)/?$"
|
||||
reverse = False
|
||||
test = ("https://www.mangareader.net/mushishi", {
|
||||
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = self.parse_page(page, {"lang": "en", "language": "English"})
|
||||
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="chapterlist">')
|
||||
while True:
|
||||
url, pos = text.extract(page, needle, '"', pos)
|
||||
if not url:
|
||||
return results
|
||||
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
|
||||
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
|
||||
data["chapter"] = text.parse_int(url.rpartition("/")[2])
|
||||
results.append((self.root + url, data.copy()))
|
||||
|
@ -16,8 +16,8 @@ class MangastreamChapterExtractor(ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangastream.com"""
|
||||
category = "mangastream"
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)/"
|
||||
r"r(?:ead)?/([^/]*/([^/]+)/(\d+))")
|
||||
pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)"
|
||||
r"/r(?:ead)?/([^/]*/([^/]+)/(\d+))")
|
||||
test = (
|
||||
("https://readms.net/r/onepunch_man/087/4874/1"),
|
||||
("https://mangastream.com/r/onepunch_man/087/4874/1"),
|
||||
@ -29,7 +29,7 @@ class MangastreamChapterExtractor(ChapterExtractor):
|
||||
url = "{}/r/{}".format(self.root, self.part)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
manga, pos = text.extract(
|
||||
page, '<span class="hidden-xs hidden-sm">', "<")
|
||||
pos = page.find(self.part, pos)
|
||||
@ -45,7 +45,7 @@ class MangastreamChapterExtractor(ChapterExtractor):
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
while True:
|
||||
pos = page.index(' class="page"')
|
||||
next_url = text.extract(page, ' href="', '"', pos)[0]
|
||||
|
@ -17,17 +17,13 @@ class NgomikChapterExtractor(ChapterExtractor):
|
||||
category = "ngomik"
|
||||
root = "http://ngomik.in"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in"
|
||||
r"/([^/?&#]+-chapter-[^/?&#]+)")
|
||||
r"(/[^/?&#]+-chapter-[^/?&#]+)")
|
||||
test = ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", {
|
||||
"url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4",
|
||||
"keyword": "7cc913ed2b9018afbd3336755d28b8252d83044c",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
url = "{}/{}".format(self.root, match.group(1))
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
info = text.extract(page, '<title>', "</title>")[0]
|
||||
manga, _, chapter = info.partition(" Chapter ")
|
||||
chapter, sep, minor = chapter.partition(" ")[0].partition(".")
|
||||
@ -41,7 +37,7 @@ class NgomikChapterExtractor(ChapterExtractor):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_images(page):
|
||||
def images(page):
|
||||
readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0]
|
||||
return [
|
||||
(text.unescape(url), None)
|
||||
|
@ -24,6 +24,42 @@ class ReadcomiconlineBase():
|
||||
request = cloudflare.request_func
|
||||
|
||||
|
||||
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
||||
"""Extractor for comic-issues from readcomiconline.to"""
|
||||
subcategory = "issue"
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
|
||||
r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))")
|
||||
test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
|
||||
"url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
|
||||
"keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
ChapterExtractor.__init__(self, match)
|
||||
self.issue_id = match.group(2)
|
||||
|
||||
def metadata(self, page):
|
||||
comic, pos = text.extract(page, " - Read\r\n ", "\r\n")
|
||||
iinfo, pos = text.extract(page, " ", "\r\n", pos)
|
||||
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
|
||||
return {
|
||||
"comic": comic,
|
||||
"issue": match.group(1) or match.group(2),
|
||||
"issue_id": text.parse_int(self.issue_id),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
self.session.headers["Referer"] = None
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(
|
||||
page, 'lstImages.push("', '"'
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
|
||||
"""Extractor for comics from readcomiconline.to"""
|
||||
subcategory = "comic"
|
||||
@ -40,9 +76,6 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MangaExtractor.__init__(self, match, self.root + match.group(1))
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
comic, pos = text.extract(page, ' class="barTitle">', '<')
|
||||
@ -63,39 +96,3 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
|
||||
"lang": "en", "language": "English",
|
||||
}))
|
||||
return results
|
||||
|
||||
|
||||
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
||||
"""Extractor for comic-issues from readcomiconline.to"""
|
||||
subcategory = "issue"
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
|
||||
r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))")
|
||||
test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
|
||||
"url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
|
||||
"keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
ChapterExtractor.__init__(self, match, self.root + match.group(1))
|
||||
self.issue_id = match.group(2)
|
||||
|
||||
def get_metadata(self, page):
|
||||
comic, pos = text.extract(page, " - Read\r\n ", "\r\n")
|
||||
iinfo, pos = text.extract(page, " ", "\r\n", pos)
|
||||
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
|
||||
return {
|
||||
"comic": comic,
|
||||
"issue": match.group(1) or match.group(2),
|
||||
"issue_id": text.parse_int(self.issue_id),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
self.session.headers["Referer"] = None
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(
|
||||
page, 'lstImages.push("', '"'
|
||||
)
|
||||
]
|
||||
|
@ -41,14 +41,14 @@ class SenmangaChapterExtractor(Extractor):
|
||||
self.session.headers["Referer"] = self.chapter_url
|
||||
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
data = self.metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for data["page"] in range(1, data["count"]+1):
|
||||
data["extension"] = None
|
||||
yield Message.Url, self.img_url + str(data["page"]), data
|
||||
|
||||
def get_job_metadata(self):
|
||||
def metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request(self.chapter_url).text
|
||||
self.session.cookies.clear()
|
||||
|
@ -42,7 +42,7 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
self.session.headers["Referer"] = url
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
extr = text.extract
|
||||
title , pos = extr(page, '<meta property="og:title" content="', '"')
|
||||
if not title:
|
||||
@ -68,9 +68,10 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
|
||||
"date": text.remove_html(date),
|
||||
}
|
||||
|
||||
def get_images(self, _):
|
||||
def images(self, _):
|
||||
url = self.chapter_url + "/all-pages"
|
||||
headers = {"Accept": "application/json"}
|
||||
images = self.request(self.url + "/all-pages", headers=headers).json()
|
||||
images = self.request(url, headers=headers).json()
|
||||
return [
|
||||
(urls["full"], {"image_id": text.parse_int(image_id)})
|
||||
for image_id, urls in sorted(images.items())
|
||||
|
@ -80,7 +80,7 @@ class TsuminoGalleryExtractor(TsuminoBase, ChapterExtractor):
|
||||
url = "{}/Book/Info/{}".format(self.root, self.gallery_id)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
def metadata(self, page):
|
||||
extr = text.extract
|
||||
title, pos = extr(page, '"og:title" content="', '"')
|
||||
thumb, pos = extr(page, '"og:image" content="', '"', pos)
|
||||
@ -116,9 +116,9 @@ class TsuminoGalleryExtractor(TsuminoBase, ChapterExtractor):
|
||||
"lang": "en",
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
url = "{}/Read/Load/?q={}".format(self.root, self.gallery_id)
|
||||
headers = {"Referer": self.url}
|
||||
headers = {"Referer": self.chapter_url}
|
||||
response = self.request(url, headers=headers, expect=(404,))
|
||||
|
||||
if response.status_code == 404:
|
||||
|
Loading…
x
Reference in New Issue
Block a user