1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2025-01-31 19:51:34 +01:00

remove explicit (sub)category keywords

This commit is contained in:
Mike Fährmann 2016-09-25 14:22:07 +02:00
parent a347d50ef5
commit 19c2d4ff6f
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
43 changed files with 26 additions and 104 deletions

View File

@ -69,7 +69,6 @@ class BatotoChapterExtractor(AsynchronousExtractor):
manga, pos = extr(page, "document.title = '", " - ", pos)
match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo)
return {
"category": self.category,
"token": self.token,
"manga": text.unescape(manga),
"volume": match.group(2) or "",

View File

@ -55,13 +55,10 @@ class BooruExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
# Override this method in derived classes
return {
"category": self.category,
}
return {}
def get_file_metadata(self, data):
"""Collect metadata for a downloadable file"""
data["category"] = self.category
return text.nameext_from_url(self.get_file_url(data), data)
def get_file_url(self, data):
@ -114,10 +111,7 @@ class BooruTagExtractor(BooruExtractor):
self.params["tags"] = self.tags
def get_job_metadata(self):
return {
"category": self.category,
"tags": self.tags,
}
return {"tags": self.tags}
class BooruPoolExtractor(BooruExtractor):
@ -131,10 +125,7 @@ class BooruPoolExtractor(BooruExtractor):
self.params["tags"] = "pool:" + self.pool
def get_job_metadata(self):
return {
"category": self.category,
"pool": self.pool,
}
return {"pool": self.pool}
class BooruPostExtractor(BooruExtractor):

View File

@ -21,7 +21,6 @@ class ChanExtractor(Extractor):
def __init__(self, board, thread):
Extractor.__init__(self)
self.metadata = {
"category": self.category,
"board": board,
"thread": thread,
}

View File

@ -30,10 +30,6 @@ class ChronosImageExtractor(Extractor):
self.token = match.group(1)
def items(self):
data = {
"category": self.category,
"token": self.token,
}
params = {
"op": "view",
"id": self.token,
@ -44,7 +40,7 @@ class ChronosImageExtractor(Extractor):
data=params).text
url , pos = text.extract(page, '<br><img src="', '"')
filename, pos = text.extract(page, ' alt="', '"', pos)
text.nameext_from_url(filename, data)
data = text.nameext_from_url(filename, {"token": self.token})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data

View File

@ -57,10 +57,7 @@ class DeviantartUserExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {
"category": self.category,
"artist": self.artist,
}
return {"artist": self.artist}
def get_image_metadata(self, image):
"""Collect metadata for an image"""
@ -127,7 +124,7 @@ class DeviantartImageExtractor(Extractor):
('description', '"og:description" content="', '"'),
(None , '<span class="tt-w">', ''),
('date' , 'title="', '"'),
), values={'category': self.category, "index": self.index})[0]
), values={"index": self.index})[0]
data["description"] = text.unescape(text.unescape(data["description"]))
data["artist"] = text.extract(data["url"], "//", ".")[0]
data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0]

View File

@ -45,7 +45,6 @@ class DoujinmodeChapterExtractor(Extractor):
count, pos = text.extract(page, ' class="manga-count">', '</span>')
title, pos = text.extract(page, '<h2>', ' Images List</h2>', pos)
return {
"category": self.category,
"gallery-id": self.gid,
"title": text.unescape(title),
"count": count,

View File

@ -61,7 +61,6 @@ class DynastyscansChapterExtractor(Extractor):
info
)
return {
"category": self.category,
"manga": text.unescape(match.group(1)),
"chapter": match.group(2) or "",
"title": text.unescape(match.group(3) or ""),

View File

@ -71,7 +71,6 @@ class ExhentaiGalleryExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category" : self.category,
"gallery-id" : self.gid,
"gallery-token": self.token,
}

View File

@ -68,7 +68,6 @@ class HbrowseChapterExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category": self.category,
'gallery-id': self.gid,
"chapter": int(self.chapter[1:]),
}

View File

@ -78,7 +78,6 @@ class Hentai2readChapterExtractor(Extractor):
title = text.extract(page, "<title>", "</title>")[0]
match = re.match(r"Reading (?:(.+) dj - )?(.+) Hentai - \d+: ", title)
return {
"category": self.category,
"gallery-id": images[0].split("/")[-3],
"chapter": self.chapter,
"count": len(images),

View File

@ -44,7 +44,7 @@ class HentaiboxChapterExtractor(Extractor):
("title" , 'content="Read or Download ', ' hentai manga from'),
("series" , ' the series ', ' with ' + self.count),
("language", ' translated pages to ', '.'),
), values={"category": self.category, "count": self.count})[0]
), values={"count": self.count})[0]
data["lang"] = iso639_1.language_to_code(data["language"])
return data

View File

@ -60,7 +60,6 @@ class HentaifoundryUserExtractor(Extractor):
token, pos = text.extract(page, 'hidden" value="', '"')
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
return {
"category": self.category,
"artist": self.artist,
"count": count,
}, token
@ -136,7 +135,6 @@ class HentaifoundryImageExtractor(Extractor):
title, pos = text.extract(page, 'Pictures</a> &raquo; <span>', '<')
url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)
data = {
"category": self.category,
"artist": self.artist,
"index": self.index,
"title": text.unescape(title),

View File

@ -61,7 +61,6 @@ class HitomiGalleryExtractor(Extractor):
series, pos = text.extract(page, '.html">', '</a>', pos)
lang = lang.capitalize()
return {
"category": self.category,
"gallery-id": self.gid,
"title": " ".join(title.split()),
"artist": string.capwords(artist),

View File

@ -44,17 +44,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
"""Collect metadata for extractor-job"""
url = self.url_base + "/gallery/" + self.gkey
page = self.request(url, encoding="utf-8").text
data = {
"category": self.category,
"gallery-key": self.gkey,
}
data, _ = text.extract_all(page, (
return text.extract_all(page, (
(None , "<img src='/img/icons/photos.png'", ""),
("title" , "'> ", " <"),
("count" , "'>", " images"),
("first-url", "<a href='http://www.imagebam.com", "'"),
), values=data)
return data
), values={"gallery-key": self.gkey})[0]
def get_images(self, url):
"""Yield all image-urls and -ids for a gallery"""
@ -71,7 +66,6 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
yield image_url, image_id
class ImagebamImageExtractor(Extractor):
"""Extractor for single images from imagebam.com"""
category = "imagebam"
@ -90,10 +84,9 @@ class ImagebamImageExtractor(Extractor):
self.token = match.group(1)
def items(self):
data = {"category": self.category, "token": self.token}
page = self.request("http://www.imagebam.com/image/" + self.token).text
url = text.extract(page, 'property="og:image" content="', '"')[0]
text.nameext_from_url(url, data)
url = text.extract(page, 'property="og:image" content="', '"')[0]
data = text.nameext_from_url(url, {"token": self.token})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data

View File

@ -48,7 +48,7 @@ class ImagefapGalleryExtractor(Extractor):
("title" , '<title>Porn pics of ', ' (Page 1)</title>'),
("uploader", '>Uploaded by ', '</font>'),
("count" , ' 1 of ', ' pics"'),
), values={"category": self.category, "gallery-id": self.gid})
), values={"gallery-id": self.gid})
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
data["title"] = text.unescape(data["title"])
return data
@ -101,7 +101,6 @@ class ImagefapImageExtractor(Extractor):
"""Collect metadata for extractor-job"""
parts = info["contentUrl"].rsplit("/", 3)
return text.nameext_from_url(parts[3], {
"category": self.category,
"title": text.unescape(info["name"]),
"section": info["section"],
"uploader": info["author"],

View File

@ -34,7 +34,6 @@ class ImagetwistImageExtractor(Extractor):
filename, pos = text.extract(page, ' alt="', '"', pos)
userid , pos = text.extract(url , '/', '/', 29)
data = {
"category": self.category,
"token": self.token,
"user": userid,
}

View File

@ -47,7 +47,6 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
title = text.extract(page, "<h1>", "</h1>")[0]
parts = title.rsplit(" - ", maxsplit=1)
return {
"category": self.category,
"gallery-key": self.key,
"title": text.unescape(parts[0]),
"count": parts[1][:-7],
@ -91,8 +90,7 @@ class ImgboxImageExtractor(Extractor):
page = self.request("http://imgbox.com/" + self.key).text
url , pos = text.extract(page, 'src="http://i.', '"')
filename, pos = text.extract(page, ' title="', '"', pos)
data = {"category": self.category, "image-key": self.key}
text.nameext_from_url(filename, data)
data = text.nameext_from_url(filename, {"image-key": self.key})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, "http://i." + url, data

View File

@ -30,12 +30,11 @@ class ImgcandyImageExtractor(Extractor):
self.token, self.filename = match.groups()
def items(self):
data = {"category": self.category, "token": self.token}
params = {"imgContinue": "Continue+to+image+...+"}
page = self.request("http://imgcandy.net/img-" + self.token + ".html",
method="post", data=params).text
url = text.extract(page, "<img class='centred' src='", "'")[0]
text.nameext_from_url(self.filename or url, data)
data = text.nameext_from_url(self.filename or url, {"token": self.token})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data

View File

@ -61,8 +61,6 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
parts = name2.split("in the gallery ")
name = parts[0] if not parts[0].endswith("...") else name1
return text.nameext_from_url(name, {
"category": self.category,
"subcategory": self.subcategory,
"image-id": self.match.group(1),
"title": text.unescape(parts[-1]) if len(parts) > 1 else ""
})
@ -86,8 +84,6 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
def get_job_metadata(self, page):
title = text.extract(page, "<h1>", "</h1>")[0]
return {
"category": self.category,
"subcategory": self.subcategory,
"title": text.unescape(title),
"key": self.match.group(1),
}

View File

@ -61,4 +61,4 @@ class ImgthGalleryExtractor(Extractor):
("date" , 'created on ', ' by <'),
(None , 'href="/users/', ''),
("user" , '>', '<'),
), values={"category": self.category, "gallery-id": self.gid})[0]
), values={"gallery-id": self.gid})[0]

View File

@ -29,11 +29,10 @@ class ImgtrexImageExtractor(Extractor):
self.token = match.group(1)
def items(self):
data = {"category": self.category, "token": self.token}
page = self.request("http://imgtrex.com/" + self.token).text
filename, pos = text.extract(page, '<title>ImgTrex: ', '</title>')
url , pos = text.extract(page, '<br>\n<img src="', '"', pos)
text.nameext_from_url(filename, data)
data = text.nameext_from_url(filename, {"token": self.token})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data

View File

@ -43,16 +43,12 @@ class ImgurAlbumExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
page = self.request("https://imgur.com/a/" + self.album).text
data = {
"category": self.category,
"album-key": self.album,
}
text.extract_all(page, (
data = text.extract_all(page, (
('title', '<meta property="og:title" content="', '"'),
('count', '"num_images":"', '"'),
('date' , '"datetime":"', ' '),
('time' , '', '"'),
), values=data)
), values={"album-key": self.album})[0]
data["title"] = text.unescape(data["title"])
return data

View File

@ -30,12 +30,12 @@ class ImgytImageExtractor(Extractor):
self.token = match.group(1)
def items(self):
data = {"category": self.category, "token": self.token}
params = {"imgContinue": "Continue+to+image+...+"}
page = self.request("https://img.yt/img-" + self.token + ".html",
method="post", data=params).text
url , pos = text.extract(page, "<img class='centred' src='", "'")
filename, pos = text.extract(page, " alt='", "'", pos)
data = {"token": self.token}
text.nameext_from_url(filename + splitext(url)[1], data)
if url.startswith("http:"):
url = "https:" + url[5:]

View File

@ -45,7 +45,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
("size" , "Total Filesize: <b>", "</b>"),
("date" , "Date added: <b>", "</b>"),
("type" , "Album type: <b>", "</b>"),
), values={"category": self.category})[0]
))[0]
def get_album_tracks(self, page):
"""Collect url and metadata for all tracks of a soundtrack"""

View File

@ -81,7 +81,6 @@ class KissmangaChapterExtractor(KissmangaExtractor):
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
chminor = match.group(3)
return {
"category": self.category,
"manga": manga,
"volume": match.group(1) or "",
"chapter": match.group(2),

View File

@ -50,7 +50,7 @@ class LusciousAlbumExtractor(Extractor):
(None , '<p>Language:', ''),
("language", '\n ', ' '),
("artist" , 'rtist: ', '\n'),
), values={"category": self.category, "gallery-id": self.gid})[0]
), values={"gallery-id": self.gid})[0]
data["lang"] = iso639_1.language_to_code(data["language"])
return data

View File

@ -76,7 +76,6 @@ class MangahereChapterExtractor(AsynchronousExtractor):
count, pos = text.extract(page, '>', '<', pos-30)
manga = re.match(r"(.+) \d+(\.\d+)? - Read .+ Chapter \d+(\.\d+)? Online", manga).group(1)
return {
"category": self.category,
"manga": text.unescape(manga),
# "title": TODO,
"volume": self.volume or "",

View File

@ -80,7 +80,6 @@ class MangamintChapterExtractor(Extractor):
chid , pos = text.extract(page, r'"identifier":"node\/', '"', pos)
match = re.match(r"(.+) (\d+)(\.\d+)?$", manga)
return {
"category": self.category,
"manga": match.group(1),
"chapter": match.group(2),
"chapter-minor": match.group(3) or "",

View File

@ -80,7 +80,6 @@ class MangaparkChapterExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category": self.category,
"version": self.version,
"volume": self.volume or "",
"chapter": self.chapter,

View File

@ -74,7 +74,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
"""Collect metadata for extractor-job"""
page = self.request(self.url_base + self.url_title).text
data = {
"category": self.category,
"chapter": self.chapter,
"lang": "en",
"language": "English",

View File

@ -67,7 +67,6 @@ class MangashareChapterExtractor(AsynchronousExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category": self.category,
"lang": "en",
"language": "English",
}

View File

@ -46,7 +46,6 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
title, pos = text.extract(page, ' - ', '<', pos)
count, pos = text.extract(page, 'Last Page (', ')', pos)
data = {
"category": self.category,
"manga": manga,
"chapter": text.unquote(self.chapter),
"chapter-id": self.ch_id,

View File

@ -57,7 +57,6 @@ class NhentaiGalleryExtractor(Extractor):
title_en = ginfo["title"].get("english", "")
title_ja = ginfo["title"].get("japanese", "")
return {
"category": self.category,
"gallery-id": self.gid,
"upload-date": ginfo["upload_date"],
"media-id": ginfo["media_id"],

View File

@ -40,10 +40,7 @@ class NijieExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {
"category": self.category,
"artist-id": self.artist_id,
}
return {"artist-id": self.artist_id}
def get_image_ids(self):
"""Collect all image-ids for a specific artist"""

View File

@ -26,8 +26,6 @@ class PinterestExtractor(Extractor):
img = pin["image"]["original"]
url = img["url"]
data = {
"category": self.category,
"subcategory": self.subcategory,
"pin-id": pin["id"],
"note": pin["note"],
"width": img["width"],
@ -90,8 +88,6 @@ class PinterestBoardExtractor(PinterestExtractor):
def data_from_board(self, board):
"""Get metadata from a board-object"""
data = {
"category": self.category,
"subcategory": self.subcategory,
"user": self.user,
"board-id": board["id"],
"board": board["name"],

View File

@ -92,7 +92,6 @@ class PixivUserExtractor(Extractor):
"""Prepare a work-dictionary with additional keywords"""
user = work["user"]
url = work["image_urls"]["large"]
work["category"] = self.category
work["artist-id"] = user["id"]
work["artist-name"] = user["name"]
work["artist-nick"] = user["account"]
@ -130,7 +129,6 @@ class PixivUserExtractor(Extractor):
if not user:
user = self.api.user(self.artist_id)["response"][0]
return {
"category": self.category,
"artist-id": user["id"],
"artist-name": user["name"],
"artist-nick": user["account"],

View File

@ -63,7 +63,6 @@ class PowermangaChapterExtractor(Extractor):
json_data, pos = text.extract(page, 'var pages = ', ';', pos)
match = re.match(r"(\w+ (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter)
return {
"category": self.category,
"manga": text.unescape(manga),
"chapter": match.group(2) or match.group(1),
"chapter-minor": match.group(3) or "",

View File

@ -42,10 +42,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {
"category": self.category,
"tags": self.tags,
}
return {"tags": self.tags}
def get_images(self):
params = {

View File

@ -44,10 +44,7 @@ class SeigaImageExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {
"category": self.category,
"image-id": self.image_id,
}
return {"image-id": self.image_id}
def get_image_url(self, image_id):
"""Get url for an image with id 'image_id'"""

View File

@ -52,7 +52,6 @@ class SenmangaChapterExtractor(Extractor):
manga, pos = text.extract(title, '| Raw | ', ' | Chapter ')
chapter, pos = text.extract(title, '', ' | Page ', pos)
return {
"category": self.category,
"manga": text.unescape(manga.replace("-", " ")),
"chapter": chapter,
"count": count,

View File

@ -82,7 +82,6 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category": self.category,
"chapter": self.chapter or "",
"volume": self.volume or "",
"identifier": self.identifier.replace("+", " "),

View File

@ -47,7 +47,6 @@ class TumblrUserExtractor(Extractor):
def get_job_metadata(self, image_data):
"""Collect metadata for extractor-job"""
data = next(image_data)
data["category"] = self.category
data["user"] = self.user
del data["cname"]
del data["description"]

View File

@ -30,15 +30,11 @@ class TurboimagehostImageExtractor(Extractor):
def items(self):
page = self.request("http://www.turboimagehost.com/p/" + self.part).text
data = {
"category": self.category,
"token": self.token,
}
text.extract_all(page, (
data = text.extract_all(page, (
('width' , 'var imWidth = ', ';'),
('height', 'var imHeight = ', ';'),
('url' , '<a href="http://www.turboimagehost.com"><img src="', '"'),
), values=data)
), values={"token": self.token})[0]
text.nameext_from_url(data["url"], data)
yield Message.Version, 1
yield Message.Directory, data