mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 11:41:35 +01:00
update extractor class hierarchies
- let the GalleryExtractor class inherit directly from Extractor - make ChapterExtractor a subclass of GalleryExtractor - change enumeration field names of GalleryExtractors to 'num'
This commit is contained in:
parent
7ebd984e8d
commit
1693d97bd3
@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor):
|
||||
test = (
|
||||
("https://www.adultempire.com/5998/gallery.html", {
|
||||
"range": "1",
|
||||
"keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361",
|
||||
"keyword": "5b3266e69801db0d78c22181da23bc102886e027",
|
||||
"content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
|
||||
}),
|
||||
("https://www.adultdvdempire.com/5683/gallery.html", {
|
||||
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
|
||||
"keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167",
|
||||
"keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
|
||||
}),
|
||||
)
|
||||
|
||||
@ -55,4 +55,4 @@ class AdultempireGalleryExtractor(GalleryExtractor):
|
||||
if len(urls) < 24:
|
||||
return
|
||||
params["page"] += 1
|
||||
page = self.request(self.chapter_url, params=params).text
|
||||
page = self.request(self.gallery_url, params=params).text
|
||||
|
@ -249,24 +249,21 @@ class Extractor():
|
||||
yield test
|
||||
|
||||
|
||||
class ChapterExtractor(Extractor):
|
||||
class GalleryExtractor(Extractor):
|
||||
|
||||
subcategory = "chapter"
|
||||
directory_fmt = (
|
||||
"{category}", "{manga}",
|
||||
"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
|
||||
filename_fmt = (
|
||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||
archive_fmt = (
|
||||
"{manga}_{chapter}{chapter_minor}_{page}")
|
||||
subcategory = "gallery"
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
directory_fmt = ("{category}", "{gallery_id} {title}")
|
||||
archive_fmt = "{gallery_id}_{num}"
|
||||
enum = "num"
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self, match)
|
||||
self.chapter_url = url or self.root + match.group(1)
|
||||
self.gallery_url = self.root + match.group(1) if url is None else url
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
page = self.request(self.chapter_url).text
|
||||
page = self.request(self.gallery_url).text
|
||||
data = self.metadata(page)
|
||||
imgs = self.images(page)
|
||||
|
||||
@ -284,7 +281,7 @@ class ChapterExtractor(Extractor):
|
||||
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for data["page"], (url, imgdata) in images:
|
||||
for data[self.enum], (url, imgdata) in images:
|
||||
if imgdata:
|
||||
data.update(imgdata)
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
@ -299,6 +296,19 @@ class ChapterExtractor(Extractor):
|
||||
"""Return a list of all (image-url, metadata)-tuples"""
|
||||
|
||||
|
||||
class ChapterExtractor(GalleryExtractor):
|
||||
|
||||
subcategory = "chapter"
|
||||
directory_fmt = (
|
||||
"{category}", "{manga}",
|
||||
"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
|
||||
filename_fmt = (
|
||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||
archive_fmt = (
|
||||
"{manga}_{chapter}{chapter_minor}_{page}")
|
||||
enum = "page"
|
||||
|
||||
|
||||
class MangaExtractor(Extractor):
|
||||
|
||||
subcategory = "manga"
|
||||
@ -333,14 +343,6 @@ class MangaExtractor(Extractor):
|
||||
"""Return a list of all (chapter-url, metadata)-tuples"""
|
||||
|
||||
|
||||
class GalleryExtractor(ChapterExtractor):
|
||||
|
||||
subcategory = "gallery"
|
||||
filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
|
||||
directory_fmt = ("{category}", "{gallery_id} {title}")
|
||||
archive_fmt = "{gallery_id}_{page}"
|
||||
|
||||
|
||||
class AsynchronousMixin():
|
||||
"""Run info extraction in a separate thread"""
|
||||
|
||||
|
@ -44,14 +44,13 @@ class FoolslideBase(SharedConfigMixin):
|
||||
|
||||
class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
|
||||
"""Base class for chapter extractors for FoOlSlide based sites"""
|
||||
directory_fmt = (
|
||||
"{category}", "{manga}", "{chapter_string}")
|
||||
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
|
||||
archive_fmt = "{id}"
|
||||
pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
|
||||
decode = "default"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.chapter_url).text
|
||||
page = self.request(self.gallery_url).text
|
||||
data = self.metadata(page)
|
||||
imgs = self.images(page)
|
||||
|
||||
@ -77,7 +76,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
extr('<h1 class="tbtitle dnone">', '')
|
||||
return self.parse_chapter_url(self.chapter_url, {
|
||||
return self.parse_chapter_url(self.gallery_url, {
|
||||
"manga" : text.unescape(extr('title="', '"')).strip(),
|
||||
"chapter_string": text.unescape(extr('title="', '"')),
|
||||
})
|
||||
|
@ -42,7 +42,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
|
||||
|
||||
def metadata(self, page):
|
||||
headers = {
|
||||
"Referer" : self.chapter_url,
|
||||
"Referer" : self.gallery_url,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
auth = self.request(
|
||||
|
@ -31,10 +31,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
|
||||
info = text.unescape(text.extract(page, '<title>', '</title>')[0])
|
||||
manga, _, chapter_string = info.partition(" :: ")
|
||||
|
||||
data = self._data(self.chapter_url.split("/")[5])
|
||||
data = self._data(self.gallery_url.split("/")[5])
|
||||
data["manga"] = manga
|
||||
data["chapter_string"] = chapter_string.rstrip(" :")
|
||||
return self.parse_chapter_url(self.chapter_url, data)
|
||||
return self.parse_chapter_url(self.gallery_url, data)
|
||||
|
||||
@memcache(keyarg=1)
|
||||
def _data(self, manga):
|
||||
|
@ -24,7 +24,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
|
||||
test = ("https://hentaifox.com/gallery/56622/", {
|
||||
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
|
||||
"count": 24,
|
||||
"keyword": "38f8517605feb6854d48833297da6b05c6541b69",
|
||||
"keyword": "903ebe227d85e484460382fc6cbab42be7a244d5",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
|
||||
test = (
|
||||
("https://hentainexus.com/view/5688", {
|
||||
"url": "746d0043e20030f1171aae5ea113176607302517",
|
||||
"keyword": "b05986369fbaf29cfa08b118960d92c49e59524b",
|
||||
"keyword": "9512cf5f258130e5f75de9954d7a13217c2405e7",
|
||||
}),
|
||||
("https://hentainexus.com/read/5688"),
|
||||
)
|
||||
|
@ -21,7 +21,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
test = (
|
||||
("https://hitomi.la/galleries/867789.html", {
|
||||
"pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg",
|
||||
"keyword": "d097a8db8e810045131b4510c41714004f9eff3a",
|
||||
"keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
|
||||
"count": 16,
|
||||
}),
|
||||
("https://hitomi.la/galleries/1401410.html", {
|
||||
@ -89,7 +89,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
base = "https://" + subdomain + ".hitomi.la/galleries/"
|
||||
|
||||
# set Referer header before image downloads (#239)
|
||||
self.session.headers["Referer"] = self.chapter_url
|
||||
self.session.headers["Referer"] = self.gallery_url
|
||||
|
||||
# handle Game CG galleries with scenes (#321)
|
||||
scenes = text.extract(page, "var scene_indexes = [", "]")[0]
|
||||
|
@ -17,14 +17,14 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
|
||||
category = "nsfwalbum"
|
||||
subcategory = "album"
|
||||
root = "https://nsfwalbum.com"
|
||||
filename_fmt = "{album_id}_{page:>03}_{id}.{extension}"
|
||||
filename_fmt = "{album_id}_{num:>03}_{id}.{extension}"
|
||||
directory_fmt = ("{category}", "{album_id} {title}")
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))"
|
||||
test = ("https://nsfwalbum.com/album/401611", {
|
||||
"range": "1-5",
|
||||
"url": "b0481fc7fad5982da397b6359fbed8421b8ba284",
|
||||
"keyword": "fc1ad4ebcd6d4cf32da15203120112b8bcf12eec",
|
||||
"keyword": "e98f9b0d473c00000831618d0235863b1dd78294",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
(("https://original-work.simply-hentai.com"
|
||||
"/amazon-no-hiyaku-amazon-elixir"), {
|
||||
"url": "21613585ae5ec2f69ea579e9713f536fceab5bd5",
|
||||
"keyword": "bf75f9ff0fb60756b1b9b92403526a72d9178d23",
|
||||
"keyword": "9e87a0973553b2922ddee37958b8f5d87910af72",
|
||||
}),
|
||||
("https://www.simply-hentai.com/notfound", {
|
||||
"exception": exception.GalleryDLException,
|
||||
@ -43,7 +43,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
extr = text.extract_from(page)
|
||||
split = text.split_html
|
||||
|
||||
self.chapter_url = extr('<link rel="canonical" href="', '"')
|
||||
self.gallery_url = extr('<link rel="canonical" href="', '"')
|
||||
title = extr('<meta property="og:title" content="', '"')
|
||||
if not title:
|
||||
raise exception.NotFoundError("gallery")
|
||||
@ -63,7 +63,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
return data
|
||||
|
||||
def images(self, _):
|
||||
url = self.chapter_url + "/all-pages"
|
||||
url = self.gallery_url + "/all-pages"
|
||||
headers = {"Accept": "application/json"}
|
||||
images = self.request(url, headers=headers).json()
|
||||
return [
|
||||
|
@ -109,7 +109,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
|
||||
|
||||
def images(self, page):
|
||||
url = "{}/Read/Index/{}?page=1".format(self.root, self.gallery_id)
|
||||
headers = {"Referer": self.chapter_url}
|
||||
headers = {"Referer": self.gallery_url}
|
||||
response = self.request(url, headers=headers, fatal=False)
|
||||
|
||||
if "/Auth/" in response.url:
|
||||
|
@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.10.7-dev"
|
||||
__version__ = "1.11.0-dev"
|
||||
|
@ -27,6 +27,8 @@ TRAVIS_SKIP = {
|
||||
# temporary issues, etc.
|
||||
BROKEN = {
|
||||
"8chan",
|
||||
"hentaifoundry",
|
||||
"luscious",
|
||||
"mangapark",
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user