')[0]
return [
@@ -90,8 +86,8 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for manga from komikcast.com"""
- pattern = (r"(?:https?://)?(?:www\.)?(komikcast\.com"
- r"/(?:komik/)?[^/?]+/?)$")
+ pattern = (r"(?:https?://)?(?:www\.)?komikcast\.com"
+ r"(/(?:komik/)?[^/?]+)/?$")
test = (
("https://komikcast.com/komik/090-eko-to-issho/", {
"url": "dc798d107697d1f2309b14ca24ca9dba30c6600f",
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index d32bded1..78fb2bdb 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -66,8 +66,8 @@ class MangadexChapterExtractor(MangadexExtractor):
self.data = None
def items(self):
- data = self.get_metadata()
- imgs = self.get_images()
+ data = self.metadata()
+ imgs = self.images()
data["count"] = len(imgs)
yield Message.Version, 1
@@ -75,7 +75,7 @@ class MangadexChapterExtractor(MangadexExtractor):
for data["page"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
- def get_metadata(self):
+ def metadata(self):
"""Return a dict with general metadata"""
cdata = self.chapter_data(self.chapter_id)
mdata = self.manga_data(cdata["manga_id"])
@@ -98,7 +98,7 @@ class MangadexChapterExtractor(MangadexExtractor):
"language": cdata["lang_name"],
}
- def get_images(self):
+ def images(self):
"""Return a list of all image URLs"""
base = self.data["server"] + self.data["hash"] + "/"
if base.startswith("/"):
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index f1291096..a57eb954 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -31,7 +31,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
- def get_metadata(self, page):
+ def metadata(self, page):
manga, pos = text.extract(page, "
", "")
count, pos = text.extract(
page, ">", "<", page.find("", pos) - 20)
@@ -49,7 +49,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
"cid": text.parse_int(cid),
}
- def get_images(self, page):
+ def images(self, page):
pnum = 1
while True:
url, pos = text.extract(page, '
")
+ count , pos = text.extract(page, ">", "<", pos - 20)
+ manga_id , pos = text.extract(page, "series_id = ", ";", pos)
+ chapter_id, pos = text.extract(page, "chapter_id = ", ";", pos)
+ manga , pos = text.extract(page, '"name":"', '"', pos)
+ chapter, dot, minor = self.chapter.partition(".")
+
+ return {
+ "manga": text.unescape(manga),
+ "manga_id": text.parse_int(manga_id),
+ "title": self._get_title(),
+ "volume": text.parse_int(self.volume),
+ "chapter": text.parse_int(chapter),
+ "chapter_minor": dot + minor,
+ "chapter_id": text.parse_int(chapter_id),
+ "count": text.parse_int(count),
+ "lang": "en",
+ "language": "English",
+ }
+
+ def images(self, page):
+ pnum = 1
+
+ while True:
+ url, pos = text.extract(page, '
")
- count , pos = text.extract(page, ">", "<", pos - 20)
- manga_id , pos = text.extract(page, "series_id = ", ";", pos)
- chapter_id, pos = text.extract(page, "chapter_id = ", ";", pos)
- manga , pos = text.extract(page, '"name":"', '"', pos)
- chapter, dot, minor = self.chapter.partition(".")
-
- return {
- "manga": text.unescape(manga),
- "manga_id": text.parse_int(manga_id),
- "title": self._get_title(),
- "volume": text.parse_int(self.volume),
- "chapter": text.parse_int(chapter),
- "chapter_minor": dot + minor,
- "chapter_id": text.parse_int(chapter_id),
- "count": text.parse_int(count),
- "lang": "en",
- "language": "English",
- }
-
- def get_images(self, page):
- """Yield all image-urls for this chapter"""
- pnum = 1
-
- while True:
- url, pos = text.extract(page, '
", ""),
+ ("title" , "", "<"),
+ ), values={"lang": "en", "language": "English"})[0]
+
+ if not data["path"]:
+ raise exception.NotFoundError("chapter")
+ self.parse_chapter_path(data["path"], data)
+
+ data["manga"], _, data["type"] = data["manga"].rpartition(" ")
+ data["manga"] = text.unescape(data["manga"])
+ data["title"] = data["title"].partition(": ")[2]
+ for key in ("manga_id", "chapter_id", "stream"):
+ data[key] = text.parse_int(data[key])
+
+ return data
+
+ def images(self, page):
+ data = json.loads(text.extract(
+ page, "var _load_pages =", ";")[0] or "[]")
+ return [
+ (text.urljoin(self.root, item["u"]), {
+ "width": text.parse_int(item["w"]),
+ "height": text.parse_int(item["h"]),
+ })
+ for item in data
+ ]
+
+
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
"""Extractor for manga from mangapark.me"""
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
@@ -75,65 +137,3 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
results.append((self.root + path, data.copy()))
return results
-
-
-class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
- """Extractor for manga-chapters from mangapark.me"""
- pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
- r"/manga/([^?]+/i\d+)")
- test = (
- ("https://mangapark.me/manga/gosu/i811615/c55/1", {
- "count": 50,
- "keyword": "a18e07119b3317d7e795ef37ee69ce0bbb806350",
- }),
- (("https://mangapark.me/manga"
- "/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), {
- "count": 40,
- "keyword": "3f286631279e2017ce87c1b8db05d7b3f15e2971",
- }),
- ("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/i655476/c70/1", {
- "count": 15,
- "keyword": "3abb13e6d1ea7f8808b0ec415270b3afac97f98b",
- }),
- ("https://mangapark.net/manga/gosu/i811615/c55/1"),
- ("https://mangapark.com/manga/gosu/i811615/c55/1"),
- )
-
- def __init__(self, match):
- tld, self.path = match.groups()
- self.root = self.root_fmt.format(tld)
- url = "{}/manga/{}?zoom=2".format(self.root, self.path)
- ChapterExtractor.__init__(self, match, url)
-
- def get_metadata(self, page):
- data = text.extract_all(page, (
- ("manga_id" , "var _manga_id = '", "'"),
- ("chapter_id", "var _book_id = '", "'"),
- ("stream" , "var _stream = '", "'"),
- ("path" , "var _book_link = '", "'"),
- ("manga" , "
", "
"),
- ("title" , "", "<"),
- ), values={"lang": "en", "language": "English"})[0]
-
- if not data["path"]:
- raise exception.NotFoundError("chapter")
- self.parse_chapter_path(data["path"], data)
-
- data["manga"], _, data["type"] = data["manga"].rpartition(" ")
- data["manga"] = text.unescape(data["manga"])
- data["title"] = data["title"].partition(": ")[2]
- for key in ("manga_id", "chapter_id", "stream"):
- data[key] = text.parse_int(data[key])
-
- return data
-
- def get_images(self, page):
- data = json.loads(text.extract(
- page, "var _load_pages =", ";")[0] or "[]")
- return [
- (text.urljoin(self.root, item["u"]), {
- "width": text.parse_int(item["w"]),
- "height": text.parse_int(item["h"]),
- })
- for item in data
- ]
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index ed922116..93f087bc 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -32,31 +32,6 @@ class MangareaderBase():
return data
-class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
- """Extractor for manga from mangareader.net"""
- pattern = r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?]+)/?$"
- reverse = False
- test = ("https://www.mangareader.net/mushishi", {
- "url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
- "keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
- })
-
- def chapters(self, page):
- results = []
- data = self.parse_page(page, {"lang": "en", "language": "English"})
-
- needle = '
\n
')
- while True:
- url, pos = text.extract(page, needle, '"', pos)
- if not url:
- return results
- data["title"], pos = text.extract(page, ' : ', '', pos)
- data["date"] , pos = text.extract(page, '
', ' | ', pos)
- data["chapter"] = text.parse_int(url.rpartition("/")[2])
- results.append((self.root + url, data.copy()))
-
-
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Extractor for manga-chapters from mangareader.net"""
archive_fmt = "{manga}_{chapter}_{page}"
@@ -68,11 +43,10 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
})
def __init__(self, match):
- self.part, self.url_title, self.chapter = match.groups()
- ChapterExtractor.__init__(self, match, self.root + self.part)
+ path, self.url_title, self.chapter = match.groups()
+ ChapterExtractor.__init__(self, match, self.root + path)
- def get_metadata(self, chapter_page):
- """Collect metadata for extractor-job"""
+ def metadata(self, chapter_page):
page = self.request(self.root + self.url_title).text
data = self.parse_page(page, {
"chapter": text.parse_int(self.chapter),
@@ -88,7 +62,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
)
return data
- def get_images(self, page):
+ def images(self, page):
while True:
next_url, image_url, image_data = self.get_image_metadata(page)
yield image_url, image_data
@@ -117,3 +91,28 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"width": text.parse_int(width),
"height": text.parse_int(height),
}
+
+
+class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
+ """Extractor for manga from mangareader.net"""
+ pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?]+)/?$"
+ reverse = False
+ test = ("https://www.mangareader.net/mushishi", {
+ "url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
+ "keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
+ })
+
+ def chapters(self, page):
+ results = []
+ data = self.parse_page(page, {"lang": "en", "language": "English"})
+
+ needle = '
\n
')
+ while True:
+ url, pos = text.extract(page, needle, '"', pos)
+ if not url:
+ return results
+ data["title"], pos = text.extract(page, ' : ', '', pos)
+ data["date"] , pos = text.extract(page, '
', ' | ', pos)
+ data["chapter"] = text.parse_int(url.rpartition("/")[2])
+ results.append((self.root + url, data.copy()))
diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py
index 57a878fc..7ff0239c 100644
--- a/gallery_dl/extractor/mangastream.py
+++ b/gallery_dl/extractor/mangastream.py
@@ -16,8 +16,8 @@ class MangastreamChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from mangastream.com"""
category = "mangastream"
archive_fmt = "{chapter_id}_{page}"
- pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)/"
- r"r(?:ead)?/([^/]*/([^/]+)/(\d+))")
+ pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)"
+ r"/r(?:ead)?/([^/]*/([^/]+)/(\d+))")
test = (
("https://readms.net/r/onepunch_man/087/4874/1"),
("https://mangastream.com/r/onepunch_man/087/4874/1"),
@@ -29,7 +29,7 @@ class MangastreamChapterExtractor(ChapterExtractor):
url = "{}/r/{}".format(self.root, self.part)
ChapterExtractor.__init__(self, match, url)
- def get_metadata(self, page):
+ def metadata(self, page):
manga, pos = text.extract(
page, '
', "<")
pos = page.find(self.part, pos)
@@ -45,7 +45,7 @@ class MangastreamChapterExtractor(ChapterExtractor):
"language": "English",
}
- def get_images(self, page):
+ def images(self, page):
while True:
pos = page.index(' class="page"')
next_url = text.extract(page, ' href="', '"', pos)[0]
diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py
index 1f315821..da042756 100644
--- a/gallery_dl/extractor/ngomik.py
+++ b/gallery_dl/extractor/ngomik.py
@@ -17,17 +17,13 @@ class NgomikChapterExtractor(ChapterExtractor):
category = "ngomik"
root = "http://ngomik.in"
pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in"
- r"/([^/?]+-chapter-[^/?]+)")
+ r"(/[^/?]+-chapter-[^/?]+)")
test = ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", {
"url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4",
"keyword": "7cc913ed2b9018afbd3336755d28b8252d83044c",
})
- def __init__(self, match):
- url = "{}/{}".format(self.root, match.group(1))
- ChapterExtractor.__init__(self, match, url)
-
- def get_metadata(self, page):
+ def metadata(self, page):
info = text.extract(page, '', "")[0]
manga, _, chapter = info.partition(" Chapter ")
chapter, sep, minor = chapter.partition(" ")[0].partition(".")
@@ -41,7 +37,7 @@ class NgomikChapterExtractor(ChapterExtractor):
}
@staticmethod
- def get_images(page):
+ def images(page):
readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0]
return [
(text.unescape(url), None)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index f03936d7..10deb60d 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -24,6 +24,42 @@ class ReadcomiconlineBase():
request = cloudflare.request_func
+class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
+ """Extractor for comic-issues from readcomiconline.to"""
+ subcategory = "issue"
+ pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
+ r"(/Comic/[^/?]+/[^/?]+\?id=(\d+))")
+ test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
+ "url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
+ "keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
+ })
+
+ def __init__(self, match):
+ ChapterExtractor.__init__(self, match)
+ self.issue_id = match.group(2)
+
+ def metadata(self, page):
+ comic, pos = text.extract(page, " - Read\r\n ", "\r\n")
+ iinfo, pos = text.extract(page, " ", "\r\n", pos)
+ match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
+ return {
+ "comic": comic,
+ "issue": match.group(1) or match.group(2),
+ "issue_id": text.parse_int(self.issue_id),
+ "lang": "en",
+ "language": "English",
+ }
+
+ def images(self, page):
+ self.session.headers["Referer"] = None
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, 'lstImages.push("', '"'
+ )
+ ]
+
+
class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"""Extractor for comics from readcomiconline.to"""
subcategory = "comic"
@@ -40,9 +76,6 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
}),
)
- def __init__(self, match):
- MangaExtractor.__init__(self, match, self.root + match.group(1))
-
def chapters(self, page):
results = []
comic, pos = text.extract(page, ' class="barTitle">', '<')
@@ -63,39 +96,3 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"lang": "en", "language": "English",
}))
return results
-
-
-class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
- """Extractor for comic-issues from readcomiconline.to"""
- subcategory = "issue"
- pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
- r"(/Comic/[^/?]+/[^/?]+\?id=(\d+))")
- test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
- "url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
- "keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
- })
-
- def __init__(self, match):
- ChapterExtractor.__init__(self, match, self.root + match.group(1))
- self.issue_id = match.group(2)
-
- def get_metadata(self, page):
- comic, pos = text.extract(page, " - Read\r\n ", "\r\n")
- iinfo, pos = text.extract(page, " ", "\r\n", pos)
- match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
- return {
- "comic": comic,
- "issue": match.group(1) or match.group(2),
- "issue_id": text.parse_int(self.issue_id),
- "lang": "en",
- "language": "English",
- }
-
- def get_images(self, page):
- self.session.headers["Referer"] = None
- return [
- (url, None)
- for url in text.extract_iter(
- page, 'lstImages.push("', '"'
- )
- ]
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index a1734783..736173f3 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -41,14 +41,14 @@ class SenmangaChapterExtractor(Extractor):
self.session.headers["Referer"] = self.chapter_url
def items(self):
- data = self.get_job_metadata()
+ data = self.metadata()
yield Message.Version, 1
yield Message.Directory, data
for data["page"] in range(1, data["count"]+1):
data["extension"] = None
yield Message.Url, self.img_url + str(data["page"]), data
- def get_job_metadata(self):
+ def metadata(self):
"""Collect metadata for extractor-job"""
page = self.request(self.chapter_url).text
self.session.cookies.clear()
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 389cd089..e638a49a 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -42,7 +42,7 @@ class SimplyhentaiGalleryExtractor(ChapterExtractor):
ChapterExtractor.__init__(self, match, url)
self.session.headers["Referer"] = url
- def get_metadata(self, page):
+ def metadata(self, page):
extr = text.extract
title , pos = extr(page, '