mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 19:51:34 +01:00
remove explicit (sub)category keywords
This commit is contained in:
parent
a347d50ef5
commit
19c2d4ff6f
@ -69,7 +69,6 @@ class BatotoChapterExtractor(AsynchronousExtractor):
|
||||
manga, pos = extr(page, "document.title = '", " - ", pos)
|
||||
match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo)
|
||||
return {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
"manga": text.unescape(manga),
|
||||
"volume": match.group(2) or "",
|
||||
|
@ -55,13 +55,10 @@ class BooruExtractor(Extractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
# Override this method in derived classes
|
||||
return {
|
||||
"category": self.category,
|
||||
}
|
||||
return {}
|
||||
|
||||
def get_file_metadata(self, data):
|
||||
"""Collect metadata for a downloadable file"""
|
||||
data["category"] = self.category
|
||||
return text.nameext_from_url(self.get_file_url(data), data)
|
||||
|
||||
def get_file_url(self, data):
|
||||
@ -114,10 +111,7 @@ class BooruTagExtractor(BooruExtractor):
|
||||
self.params["tags"] = self.tags
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
return {"tags": self.tags}
|
||||
|
||||
|
||||
class BooruPoolExtractor(BooruExtractor):
|
||||
@ -131,10 +125,7 @@ class BooruPoolExtractor(BooruExtractor):
|
||||
self.params["tags"] = "pool:" + self.pool
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"pool": self.pool,
|
||||
}
|
||||
return {"pool": self.pool}
|
||||
|
||||
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
|
@ -21,7 +21,6 @@ class ChanExtractor(Extractor):
|
||||
def __init__(self, board, thread):
|
||||
Extractor.__init__(self)
|
||||
self.metadata = {
|
||||
"category": self.category,
|
||||
"board": board,
|
||||
"thread": thread,
|
||||
}
|
||||
|
@ -30,10 +30,6 @@ class ChronosImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
}
|
||||
params = {
|
||||
"op": "view",
|
||||
"id": self.token,
|
||||
@ -44,7 +40,7 @@ class ChronosImageExtractor(Extractor):
|
||||
data=params).text
|
||||
url , pos = text.extract(page, '<br><img src="', '"')
|
||||
filename, pos = text.extract(page, ' alt="', '"', pos)
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
@ -57,10 +57,7 @@ class DeviantartUserExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
}
|
||||
return {"artist": self.artist}
|
||||
|
||||
def get_image_metadata(self, image):
|
||||
"""Collect metadata for an image"""
|
||||
@ -127,7 +124,7 @@ class DeviantartImageExtractor(Extractor):
|
||||
('description', '"og:description" content="', '"'),
|
||||
(None , '<span class="tt-w">', ''),
|
||||
('date' , 'title="', '"'),
|
||||
), values={'category': self.category, "index": self.index})[0]
|
||||
), values={"index": self.index})[0]
|
||||
data["description"] = text.unescape(text.unescape(data["description"]))
|
||||
data["artist"] = text.extract(data["url"], "//", ".")[0]
|
||||
data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0]
|
||||
|
@ -45,7 +45,6 @@ class DoujinmodeChapterExtractor(Extractor):
|
||||
count, pos = text.extract(page, ' class="manga-count">', '</span>')
|
||||
title, pos = text.extract(page, '<h2>', ' Images List</h2>', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"title": text.unescape(title),
|
||||
"count": count,
|
||||
|
@ -61,7 +61,6 @@ class DynastyscansChapterExtractor(Extractor):
|
||||
info
|
||||
)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(match.group(1)),
|
||||
"chapter": match.group(2) or "",
|
||||
"title": text.unescape(match.group(3) or ""),
|
||||
|
@ -71,7 +71,6 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category" : self.category,
|
||||
"gallery-id" : self.gid,
|
||||
"gallery-token": self.token,
|
||||
}
|
||||
|
@ -68,7 +68,6 @@ class HbrowseChapterExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
'gallery-id': self.gid,
|
||||
"chapter": int(self.chapter[1:]),
|
||||
}
|
||||
|
@ -78,7 +78,6 @@ class Hentai2readChapterExtractor(Extractor):
|
||||
title = text.extract(page, "<title>", "</title>")[0]
|
||||
match = re.match(r"Reading (?:(.+) dj - )?(.+) Hentai - \d+: ", title)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": images[0].split("/")[-3],
|
||||
"chapter": self.chapter,
|
||||
"count": len(images),
|
||||
|
@ -44,7 +44,7 @@ class HentaiboxChapterExtractor(Extractor):
|
||||
("title" , 'content="Read or Download ', ' hentai manga from'),
|
||||
("series" , ' the series ', ' with ' + self.count),
|
||||
("language", ' translated pages to ', '.'),
|
||||
), values={"category": self.category, "count": self.count})[0]
|
||||
), values={"count": self.count})[0]
|
||||
data["lang"] = iso639_1.language_to_code(data["language"])
|
||||
return data
|
||||
|
||||
|
@ -60,7 +60,6 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
token, pos = text.extract(page, 'hidden" value="', '"')
|
||||
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
"count": count,
|
||||
}, token
|
||||
@ -136,7 +135,6 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
title, pos = text.extract(page, 'Pictures</a> » <span>', '<')
|
||||
url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
"index": self.index,
|
||||
"title": text.unescape(title),
|
||||
|
@ -61,7 +61,6 @@ class HitomiGalleryExtractor(Extractor):
|
||||
series, pos = text.extract(page, '.html">', '</a>', pos)
|
||||
lang = lang.capitalize()
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"title": " ".join(title.split()),
|
||||
"artist": string.capwords(artist),
|
||||
|
@ -44,17 +44,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
url = self.url_base + "/gallery/" + self.gkey
|
||||
page = self.request(url, encoding="utf-8").text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"gallery-key": self.gkey,
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
return text.extract_all(page, (
|
||||
(None , "<img src='/img/icons/photos.png'", ""),
|
||||
("title" , "'> ", " <"),
|
||||
("count" , "'>", " images"),
|
||||
("first-url", "<a href='http://www.imagebam.com", "'"),
|
||||
), values=data)
|
||||
return data
|
||||
), values={"gallery-key": self.gkey})[0]
|
||||
|
||||
def get_images(self, url):
|
||||
"""Yield all image-urls and -ids for a gallery"""
|
||||
@ -71,7 +66,6 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
yield image_url, image_id
|
||||
|
||||
|
||||
|
||||
class ImagebamImageExtractor(Extractor):
|
||||
"""Extractor for single images from imagebam.com"""
|
||||
category = "imagebam"
|
||||
@ -90,10 +84,9 @@ class ImagebamImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
page = self.request("http://www.imagebam.com/image/" + self.token).text
|
||||
url = text.extract(page, 'property="og:image" content="', '"')[0]
|
||||
text.nameext_from_url(url, data)
|
||||
url = text.extract(page, 'property="og:image" content="', '"')[0]
|
||||
data = text.nameext_from_url(url, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
@ -48,7 +48,7 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
("title" , '<title>Porn pics of ', ' (Page 1)</title>'),
|
||||
("uploader", '>Uploaded by ', '</font>'),
|
||||
("count" , ' 1 of ', ' pics"'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})
|
||||
), values={"gallery-id": self.gid})
|
||||
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
|
||||
data["title"] = text.unescape(data["title"])
|
||||
return data
|
||||
@ -101,7 +101,6 @@ class ImagefapImageExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
parts = info["contentUrl"].rsplit("/", 3)
|
||||
return text.nameext_from_url(parts[3], {
|
||||
"category": self.category,
|
||||
"title": text.unescape(info["name"]),
|
||||
"section": info["section"],
|
||||
"uploader": info["author"],
|
||||
|
@ -34,7 +34,6 @@ class ImagetwistImageExtractor(Extractor):
|
||||
filename, pos = text.extract(page, ' alt="', '"', pos)
|
||||
userid , pos = text.extract(url , '/', '/', 29)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
"user": userid,
|
||||
}
|
||||
|
@ -47,7 +47,6 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
title = text.extract(page, "<h1>", "</h1>")[0]
|
||||
parts = title.rsplit(" - ", maxsplit=1)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-key": self.key,
|
||||
"title": text.unescape(parts[0]),
|
||||
"count": parts[1][:-7],
|
||||
@ -91,8 +90,7 @@ class ImgboxImageExtractor(Extractor):
|
||||
page = self.request("http://imgbox.com/" + self.key).text
|
||||
url , pos = text.extract(page, 'src="http://i.', '"')
|
||||
filename, pos = text.extract(page, ' title="', '"', pos)
|
||||
data = {"category": self.category, "image-key": self.key}
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"image-key": self.key})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, "http://i." + url, data
|
||||
|
@ -30,12 +30,11 @@ class ImgcandyImageExtractor(Extractor):
|
||||
self.token, self.filename = match.groups()
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
params = {"imgContinue": "Continue+to+image+...+"}
|
||||
page = self.request("http://imgcandy.net/img-" + self.token + ".html",
|
||||
method="post", data=params).text
|
||||
url = text.extract(page, "<img class='centred' src='", "'")[0]
|
||||
text.nameext_from_url(self.filename or url, data)
|
||||
data = text.nameext_from_url(self.filename or url, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
@ -61,8 +61,6 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
|
||||
parts = name2.split("in the gallery ")
|
||||
name = parts[0] if not parts[0].endswith("...") else name1
|
||||
return text.nameext_from_url(name, {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"image-id": self.match.group(1),
|
||||
"title": text.unescape(parts[-1]) if len(parts) > 1 else ""
|
||||
})
|
||||
@ -86,8 +84,6 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
title = text.extract(page, "<h1>", "</h1>")[0]
|
||||
return {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"title": text.unescape(title),
|
||||
"key": self.match.group(1),
|
||||
}
|
||||
|
@ -61,4 +61,4 @@ class ImgthGalleryExtractor(Extractor):
|
||||
("date" , 'created on ', ' by <'),
|
||||
(None , 'href="/users/', ''),
|
||||
("user" , '>', '<'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})[0]
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
|
@ -29,11 +29,10 @@ class ImgtrexImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
page = self.request("http://imgtrex.com/" + self.token).text
|
||||
filename, pos = text.extract(page, '<title>ImgTrex: ', '</title>')
|
||||
url , pos = text.extract(page, '<br>\n<img src="', '"', pos)
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
@ -43,16 +43,12 @@ class ImgurAlbumExtractor(Extractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request("https://imgur.com/a/" + self.album).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"album-key": self.album,
|
||||
}
|
||||
text.extract_all(page, (
|
||||
data = text.extract_all(page, (
|
||||
('title', '<meta property="og:title" content="', '"'),
|
||||
('count', '"num_images":"', '"'),
|
||||
('date' , '"datetime":"', ' '),
|
||||
('time' , '', '"'),
|
||||
), values=data)
|
||||
), values={"album-key": self.album})[0]
|
||||
data["title"] = text.unescape(data["title"])
|
||||
return data
|
||||
|
||||
|
@ -30,12 +30,12 @@ class ImgytImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
params = {"imgContinue": "Continue+to+image+...+"}
|
||||
page = self.request("https://img.yt/img-" + self.token + ".html",
|
||||
method="post", data=params).text
|
||||
url , pos = text.extract(page, "<img class='centred' src='", "'")
|
||||
filename, pos = text.extract(page, " alt='", "'", pos)
|
||||
data = {"token": self.token}
|
||||
text.nameext_from_url(filename + splitext(url)[1], data)
|
||||
if url.startswith("http:"):
|
||||
url = "https:" + url[5:]
|
||||
|
@ -45,7 +45,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||
("size" , "Total Filesize: <b>", "</b>"),
|
||||
("date" , "Date added: <b>", "</b>"),
|
||||
("type" , "Album type: <b>", "</b>"),
|
||||
), values={"category": self.category})[0]
|
||||
))[0]
|
||||
|
||||
def get_album_tracks(self, page):
|
||||
"""Collect url and metadata for all tracks of a soundtrack"""
|
||||
|
@ -81,7 +81,6 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
|
||||
chminor = match.group(3)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": manga,
|
||||
"volume": match.group(1) or "",
|
||||
"chapter": match.group(2),
|
||||
|
@ -50,7 +50,7 @@ class LusciousAlbumExtractor(Extractor):
|
||||
(None , '<p>Language:', ''),
|
||||
("language", '\n ', ' '),
|
||||
("artist" , 'rtist: ', '\n'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})[0]
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
data["lang"] = iso639_1.language_to_code(data["language"])
|
||||
return data
|
||||
|
||||
|
@ -76,7 +76,6 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
count, pos = text.extract(page, '>', '<', pos-30)
|
||||
manga = re.match(r"(.+) \d+(\.\d+)? - Read .+ Chapter \d+(\.\d+)? Online", manga).group(1)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga),
|
||||
# "title": TODO,
|
||||
"volume": self.volume or "",
|
||||
|
@ -80,7 +80,6 @@ class MangamintChapterExtractor(Extractor):
|
||||
chid , pos = text.extract(page, r'"identifier":"node\/', '"', pos)
|
||||
match = re.match(r"(.+) (\d+)(\.\d+)?$", manga)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": match.group(1),
|
||||
"chapter": match.group(2),
|
||||
"chapter-minor": match.group(3) or "",
|
||||
|
@ -80,7 +80,6 @@ class MangaparkChapterExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"version": self.version,
|
||||
"volume": self.volume or "",
|
||||
"chapter": self.chapter,
|
||||
|
@ -74,7 +74,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request(self.url_base + self.url_title).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"chapter": self.chapter,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
|
@ -67,7 +67,6 @@ class MangashareChapterExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
@ -46,7 +46,6 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
|
||||
title, pos = text.extract(page, ' - ', '<', pos)
|
||||
count, pos = text.extract(page, 'Last Page (', ')', pos)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"manga": manga,
|
||||
"chapter": text.unquote(self.chapter),
|
||||
"chapter-id": self.ch_id,
|
||||
|
@ -57,7 +57,6 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
title_en = ginfo["title"].get("english", "")
|
||||
title_ja = ginfo["title"].get("japanese", "")
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"upload-date": ginfo["upload_date"],
|
||||
"media-id": ginfo["media_id"],
|
||||
|
@ -40,10 +40,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist-id": self.artist_id,
|
||||
}
|
||||
return {"artist-id": self.artist_id}
|
||||
|
||||
def get_image_ids(self):
|
||||
"""Collect all image-ids for a specific artist"""
|
||||
|
@ -26,8 +26,6 @@ class PinterestExtractor(Extractor):
|
||||
img = pin["image"]["original"]
|
||||
url = img["url"]
|
||||
data = {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"pin-id": pin["id"],
|
||||
"note": pin["note"],
|
||||
"width": img["width"],
|
||||
@ -90,8 +88,6 @@ class PinterestBoardExtractor(PinterestExtractor):
|
||||
def data_from_board(self, board):
|
||||
"""Get metadata from a board-object"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"user": self.user,
|
||||
"board-id": board["id"],
|
||||
"board": board["name"],
|
||||
|
@ -92,7 +92,6 @@ class PixivUserExtractor(Extractor):
|
||||
"""Prepare a work-dictionary with additional keywords"""
|
||||
user = work["user"]
|
||||
url = work["image_urls"]["large"]
|
||||
work["category"] = self.category
|
||||
work["artist-id"] = user["id"]
|
||||
work["artist-name"] = user["name"]
|
||||
work["artist-nick"] = user["account"]
|
||||
@ -130,7 +129,6 @@ class PixivUserExtractor(Extractor):
|
||||
if not user:
|
||||
user = self.api.user(self.artist_id)["response"][0]
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist-id": user["id"],
|
||||
"artist-name": user["name"],
|
||||
"artist-nick": user["account"],
|
||||
|
@ -63,7 +63,6 @@ class PowermangaChapterExtractor(Extractor):
|
||||
json_data, pos = text.extract(page, 'var pages = ', ';', pos)
|
||||
match = re.match(r"(\w+ (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga),
|
||||
"chapter": match.group(2) or match.group(1),
|
||||
"chapter-minor": match.group(3) or "",
|
||||
|
@ -42,10 +42,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
return {"tags": self.tags}
|
||||
|
||||
def get_images(self):
|
||||
params = {
|
||||
|
@ -44,10 +44,7 @@ class SeigaImageExtractor(Extractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"image-id": self.image_id,
|
||||
}
|
||||
return {"image-id": self.image_id}
|
||||
|
||||
def get_image_url(self, image_id):
|
||||
"""Get url for an image with id 'image_id'"""
|
||||
|
@ -52,7 +52,6 @@ class SenmangaChapterExtractor(Extractor):
|
||||
manga, pos = text.extract(title, '| Raw | ', ' | Chapter ')
|
||||
chapter, pos = text.extract(title, '', ' | Page ', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga.replace("-", " ")),
|
||||
"chapter": chapter,
|
||||
"count": count,
|
||||
|
@ -82,7 +82,6 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"chapter": self.chapter or "",
|
||||
"volume": self.volume or "",
|
||||
"identifier": self.identifier.replace("+", " "),
|
||||
|
@ -47,7 +47,6 @@ class TumblrUserExtractor(Extractor):
|
||||
def get_job_metadata(self, image_data):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = next(image_data)
|
||||
data["category"] = self.category
|
||||
data["user"] = self.user
|
||||
del data["cname"]
|
||||
del data["description"]
|
||||
|
@ -30,15 +30,11 @@ class TurboimagehostImageExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
page = self.request("http://www.turboimagehost.com/p/" + self.part).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
}
|
||||
text.extract_all(page, (
|
||||
data = text.extract_all(page, (
|
||||
('width' , 'var imWidth = ', ';'),
|
||||
('height', 'var imHeight = ', ';'),
|
||||
('url' , '<a href="http://www.turboimagehost.com"><img src="', '"'),
|
||||
), values=data)
|
||||
), values={"token": self.token})[0]
|
||||
text.nameext_from_url(data["url"], data)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
|
Loading…
x
Reference in New Issue
Block a user