mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 19:51:34 +01:00
change keyword names to valid Python identifiers
This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough)
This commit is contained in:
parent
81877bb5f6
commit
6f30cf4c64
@ -16,12 +16,12 @@ class FutabaThreadExtractor(Extractor):
|
||||
"""Extractor for images from threads on www.2chan.net"""
|
||||
category = "2chan"
|
||||
subcategory = "thread"
|
||||
directory_fmt = ["{category}", "{board-name}", "{thread}"]
|
||||
directory_fmt = ["{category}", "{board_name}", "{thread}"]
|
||||
pattern = [r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"]
|
||||
urlfmt = "https://{server}.2chan.net/{board}/src/{filename}"
|
||||
test = [("http://dec.2chan.net/70/res/947.htm", {
|
||||
"url": "c5c12b80b290e224b6758507b3bb952044f4595b",
|
||||
"keyword": "e1295c0a96f733898e92742bcc1a4c4b320e3748",
|
||||
"keyword": "4bd22e7a9c3636faecd6ea7082509e8655e10dd0",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -49,7 +49,7 @@ class FutabaThreadExtractor(Extractor):
|
||||
"server": self.server,
|
||||
"title": title,
|
||||
"board": self.board,
|
||||
"board-name": boardname[:-4],
|
||||
"board_name": boardname[:-4],
|
||||
"thread": self.thread,
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,7 @@ class DeviantartExtractor(Extractor):
|
||||
deviation["username"] = self.user
|
||||
author = deviation["author"]
|
||||
author["urlname"] = author["username"].lower()
|
||||
deviation["da-category"] = deviation["category"]
|
||||
deviation["da_category"] = deviation["category"]
|
||||
|
||||
@staticmethod
|
||||
def commit(deviation, target):
|
||||
@ -170,7 +170,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
|
||||
test = [
|
||||
("http://shimoda7.deviantart.com/gallery/", {
|
||||
"url": "f95b222d939c1e6aa8b9aabe89eaa2d364f06d38",
|
||||
"keyword": "9342c2a7a2bd6eb9f4a6ea539d04d75248ebe05f",
|
||||
"keyword": "af266348c6bf2bfb9219f94e505ff346db9b6321",
|
||||
}),
|
||||
("https://yakuzafc.deviantart.com/", {
|
||||
"url": "fa6ecb2c3aa78872f762d43f7809b7f0580debc1",
|
||||
@ -195,11 +195,11 @@ class DeviantartFolderExtractor(DeviantartExtractor):
|
||||
test = [
|
||||
("http://shimoda7.deviantart.com/gallery/722019/Miscellaneous", {
|
||||
"url": "1ee23a0bd8f7099d375afe8a29ea1a3bf394ba1e",
|
||||
"keyword": "a0d7093148b9bab8ee0efa6213139efd99f23394",
|
||||
"keyword": "96f4a766180d9eac96c5904a5c9cb29613401aac",
|
||||
}),
|
||||
("http://majestic-da.deviantart.com/gallery/63419606/CHIBI-KAWAII", {
|
||||
"url": "1df6f4312f124b0ad9f2a905c8f9e94e89c84370",
|
||||
"keyword": "b651f5d540aaaf7974fa7e181e4cc54151a65e9e",
|
||||
"keyword": "3eb4e7f10c49ab23d783a825bd934dbf98e8ff88",
|
||||
}),
|
||||
]
|
||||
|
||||
@ -229,7 +229,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
||||
(("http://shimoda7.deviantart.com/art/"
|
||||
"For-the-sake-of-a-memory-10073852"), {
|
||||
"url": "393dc581ca9e6938dbf0a3db8e9eea6243eb35f4",
|
||||
"keyword": "5f58ecdce9b9ebb51f65d0e24e0f7efe00a74a55",
|
||||
"keyword": "e9373cda1d3e2a5b6ece82325f8f8e08b202ce10",
|
||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||
}),
|
||||
("https://zzz.deviantart.com/art/zzz-1234567890", {
|
||||
@ -237,7 +237,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
||||
}),
|
||||
("http://sta.sh/01ijs78ebagf", {
|
||||
"url": "3a15ed9201e665172b1daece8ef6d42f6a7ad3d5",
|
||||
"keyword": "00246726d49f51ab35ea88d66467067f05b10bc9",
|
||||
"keyword": "8213960fd651e1c8e2622a514d2482078341948e",
|
||||
}),
|
||||
("http://sta.sh/abcdefghijkl", {
|
||||
"exception": exception.NotFoundError,
|
||||
@ -265,7 +265,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
|
||||
test = [
|
||||
("http://h3813067.deviantart.com/favourites/", {
|
||||
"url": "393dc581ca9e6938dbf0a3db8e9eea6243eb35f4",
|
||||
"keyword": "c7d0a3bacc1e4c5625dda703e25affe047cbbc3f",
|
||||
"keyword": "76b34daf4bde433897ef1f230b036182da118413",
|
||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||
}),
|
||||
("http://h3813067.deviantart.com/favourites/?catpath=/", None),
|
||||
@ -291,7 +291,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
|
||||
r"/favourites/(\d+)/([^/?&#]+)"]
|
||||
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
|
||||
"url": "f0c12581060aab9699289817b39804d9eb88f675",
|
||||
"keyword": "2778b4abaac240ff6fb1d630d7b04b8e983ef9c4",
|
||||
"keyword": "868ff18de9e3dd2af6ae95b0fb73d6556f051e17",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -319,7 +319,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
|
||||
test = [
|
||||
("https://angrywhitewanker.deviantart.com/journal/", {
|
||||
"url": "6474f49fbb4d01637ff0762708953252a52dc9c1",
|
||||
"keyword": "5306515383a7ec26b22a2de42045718e6d630f25",
|
||||
"keyword": "5b29c0fb9af34d35f069bfe8fd448c8ca0dd8a9d",
|
||||
}),
|
||||
("http://shimoda7.deviantart.com/journal/?catpath=/", None),
|
||||
]
|
||||
|
@ -20,12 +20,12 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from exhentai.org"""
|
||||
category = "exhentai"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery-id}"]
|
||||
filename_fmt = "{gallery-id}_{num:>04}_{image-token}_{name}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id}"]
|
||||
filename_fmt = "{gallery_id}_{num:>04}_{image_token}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
||||
test = [
|
||||
("https://exhentai.org/g/960460/4f0e369d82/", {
|
||||
"keyword": "623f8c86c9fe38e964682dd4309b96922655b900",
|
||||
"keyword": "d837276b02c4e91e96c1b40fe4415cbb73b56577",
|
||||
"content": "493d759de534355c9f55f8e365565b62411de146",
|
||||
}),
|
||||
("https://exhentai.org/g/960461/4f0e369d82/", {
|
||||
@ -85,8 +85,8 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"gallery-id" : self.gid,
|
||||
"gallery-token": self.token,
|
||||
"gallery_id" : self.gid,
|
||||
"gallery_token": self.token,
|
||||
}
|
||||
text.extract_all(page, (
|
||||
("title" , '<h1 id="gn">', '</h1>'),
|
||||
@ -94,7 +94,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
|
||||
("language" , '>Language:</td><td class="gdt2">', ' '),
|
||||
("size" , '>File Size:</td><td class="gdt2">', ' '),
|
||||
("size-units", '', '<'),
|
||||
("size_units", '', '<'),
|
||||
("count" , '>Length:</td><td class="gdt2">', ' '),
|
||||
), values=data)
|
||||
data["lang"] = util.language_to_code(data["language"])
|
||||
@ -132,7 +132,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
return url, text.nameext_from_url(data["url"], {
|
||||
"num": 1,
|
||||
"image-token": data["startkey"],
|
||||
"image_token": data["startkey"],
|
||||
})
|
||||
|
||||
def images_from_api(self):
|
||||
@ -165,7 +165,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
yield url, text.nameext_from_url(imgurl, {
|
||||
"num": request["page"],
|
||||
"image-token": imgkey
|
||||
"image_token": imgkey
|
||||
})
|
||||
request["imgkey"] = nextkey
|
||||
|
||||
|
@ -31,13 +31,13 @@ class HbrowseChapterExtractor(Extractor):
|
||||
"""Extractor for manga-chapters from hbrowse.com"""
|
||||
category = "hbrowse"
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}", "c{chapter:>05}"]
|
||||
filename_fmt = ("{category}_{gallery-id}_{chapter:>05}_"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}", "c{chapter:>05}"]
|
||||
filename_fmt = ("{category}_{gallery_id}_{chapter:>05}_"
|
||||
"{num:>03}.{extension}")
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
|
||||
test = [("http://www.hbrowse.com/10363/c00000", {
|
||||
"url": "634f4800858913f097bc3b62a8fedaf74b5254bd",
|
||||
"keyword": "c7dc22a10699dee5cf466406fecee6ffa2e6277e",
|
||||
"keyword": "f0f96cefda19e5aee1a19454f63ffe3a425602ab",
|
||||
"content": "44578ebbe176c2c27434966aef22945787e2781e",
|
||||
})]
|
||||
url_base = "http://www.hbrowse.com"
|
||||
@ -59,14 +59,14 @@ class HbrowseChapterExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
'gallery-id': self.gid,
|
||||
'gallery_id': self.gid,
|
||||
"chapter": int(self.chapter[1:]),
|
||||
}
|
||||
return text.extract_all(page, (
|
||||
('title' , '<td class="listLong">', '</td>'),
|
||||
(None , '<td class="listLong">', ''),
|
||||
('artist' , '>', '<'),
|
||||
('count-total', '<td class="listLong">', ' '),
|
||||
('count_total', '<td class="listLong">', ' '),
|
||||
(None , '<td class="listLong">', ''),
|
||||
('origin' , '>', '<'),
|
||||
), values=data)[0]
|
||||
|
@ -42,7 +42,7 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
|
||||
test = [("http://hentai2read.com/amazon_elixir/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "fc79e4c70d61ae476aea2b63a75324e3d96f4497",
|
||||
"keyword": "a159017295546e2647d80a4a4165c702662abe1e",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -57,7 +57,7 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
|
||||
r"(\d+): (.+) . Page 1 ", title)
|
||||
return {
|
||||
"manga-id": images[0].split("/")[-3],
|
||||
"manga_id": images[0].split("/")[-3],
|
||||
"chapter": self.chapter,
|
||||
"count": len(images),
|
||||
"manga": match.group(1),
|
||||
|
@ -16,8 +16,8 @@ import json
|
||||
class HentaicdnChapterExtractor(Extractor):
|
||||
"""Base class for extractors for a single manga chapter"""
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ["{category}", "{manga-id} {title}"]
|
||||
filename_fmt = ("{category}_{manga-id}_{chapter:>02}_"
|
||||
directory_fmt = ["{category}", "{manga_id} {title}"]
|
||||
filename_fmt = ("{category}_{manga_id}_{chapter:>02}_"
|
||||
"{num:>03}.{extension}")
|
||||
url = ""
|
||||
|
||||
|
@ -40,7 +40,7 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
|
||||
test = [("https://hentaihere.com/m/S13812/1/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "fd6e515ccf073e3b57d39c5cb472692858bddb88",
|
||||
"keyword": "7b31d19668b353f7be73b330a52ec6a7e56d23ea",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -55,7 +55,7 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
||||
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
||||
match = re.match(pattern, title)
|
||||
return {
|
||||
"manga-id": self.gid,
|
||||
"manga_id": self.gid,
|
||||
"manga": match.group(1),
|
||||
"type": match.group(2),
|
||||
"chapter": self.chapter,
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015,2016 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,12 +17,12 @@ class HitomiGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from hitomi.la"""
|
||||
category = "hitomi"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
|
||||
test = [("https://hitomi.la/galleries/867789.html", {
|
||||
"url": "e42a47dfadda93e4bf37e82b1dc9ad29edfa9130",
|
||||
"keyword": "03a64d67584afd7b8ad96ecb47acae08ea14d90f",
|
||||
"keyword": "c007cd41229d727b2ced3b364350561444738351",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -37,8 +37,7 @@ class HitomiGalleryExtractor(Extractor):
|
||||
data["count"] = len(images)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for num, url in enumerate(images, 1):
|
||||
data["num"] = num
|
||||
for data["num"], url in enumerate(images, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
@ -63,7 +62,7 @@ class HitomiGalleryExtractor(Extractor):
|
||||
series, pos = text.extract(page, '.html">', '</a>', pos)
|
||||
lang = lang.capitalize()
|
||||
return {
|
||||
"gallery-id": self.gid,
|
||||
"gallery_id": self.gid,
|
||||
"title": " ".join(title.split()),
|
||||
"artist": string.capwords(artist),
|
||||
"group": string.capwords(group),
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,13 +16,13 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
"""Extractor for image galleries from imagebam.com"""
|
||||
category = "imagebam"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||
directory_fmt = ["{category}", "{title} - {gallery_key}"]
|
||||
filename_fmt = "{num:>03}-{filename}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
|
||||
test = [(("http://www.imagebam.com/"
|
||||
"gallery/adz2y0f9574bjpmonaismyrhtjgvey4o"), {
|
||||
"url": "d7a4483b6d5ebba81950a349aad58ae034c60eda",
|
||||
"keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c",
|
||||
"keyword": "0ab7bef5cf995d9229dc900dc508311cefb32306",
|
||||
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
|
||||
})]
|
||||
url_base = "http://www.imagebam.com"
|
||||
@ -32,12 +32,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
self.gkey = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
data["num"] = 0
|
||||
data, url = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for image_url, image_id in self.get_images(data["first-url"]):
|
||||
data["id"] = image_id
|
||||
data["num"] = 0
|
||||
for image_url, image_id in self.get_images(url):
|
||||
data["image_id"] = image_id
|
||||
data["num"] += 1
|
||||
text.nameext_from_url(image_url, data)
|
||||
yield Message.Url, image_url, data.copy()
|
||||
@ -46,12 +46,14 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
url = self.url_base + "/gallery/" + self.gkey
|
||||
page = self.request(url, encoding="utf-8").text
|
||||
return text.extract_all(page, (
|
||||
data, pos = text.extract_all(page, (
|
||||
(None , "<img src='/img/icons/photos.png'", ""),
|
||||
("title" , "'> ", " <"),
|
||||
("count" , "'>", " images"),
|
||||
("first-url", "<a href='http://www.imagebam.com", "'"),
|
||||
), values={"gallery-key": self.gkey})[0]
|
||||
), values={"gallery_key": self.gkey})
|
||||
url, pos = text.extract(
|
||||
page, "<a href='http://www.imagebam.com", "'", pos)
|
||||
return data, url
|
||||
|
||||
def get_images(self, url):
|
||||
"""Yield all image-urls and -ids for a gallery"""
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,19 +17,19 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from imagefap.com"""
|
||||
category = "imagefap"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
|
||||
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")]
|
||||
test = [
|
||||
("http://www.imagefap.com/gallery/6318447", {
|
||||
"url": "f63e6876df83a40e1a98dad70e46952dd9edb7a7",
|
||||
"keyword": "715f99ad154c4cf608afc7cd77dd1e896030646a",
|
||||
"keyword": "275857d113bb007245de705ae7bd0dff7d677874",
|
||||
"content": "38e50699db9518ae68648c45ecdd6be614efc324",
|
||||
}),
|
||||
("http://www.imagefap.com/gallery/5486966", {
|
||||
"url": "eace9b33be99f87f3382c87bd915cf495a865d6e",
|
||||
"keyword": "0f14b5547adb9ffda6a6ac8ded15fc2b44d23c4a",
|
||||
"keyword": "b84da0543c2d1f848bf5e4c2950dd4f4543a1e0c",
|
||||
}),
|
||||
]
|
||||
|
||||
@ -55,7 +55,7 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
("title" , '<title>Porn pics of ', ' (Page 1)</title>'),
|
||||
("uploader", '>Uploaded by ', '</font>'),
|
||||
("count" , ' 1 of ', ' pics"'),
|
||||
), values={"gallery-id": self.gid})
|
||||
), values={"gallery_id": self.gid})
|
||||
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
|
||||
data["title"] = text.unescape(data["title"])
|
||||
return data
|
||||
@ -74,7 +74,7 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
return
|
||||
num += 1
|
||||
_, imgid, name = imgurl.rsplit("/", 2)
|
||||
data = {"image-id": imgid, "num": num}
|
||||
data = {"image_id": imgid, "num": num}
|
||||
yield imgurl, text.nameext_from_url(name, data)
|
||||
params["idx"] += 24
|
||||
|
||||
@ -83,12 +83,12 @@ class ImagefapImageExtractor(Extractor):
|
||||
"""Extractor for single images from imagefap.com"""
|
||||
category = "imagefap"
|
||||
subcategory = "image"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"]
|
||||
test = [("http://www.imagefap.com/photo/1616331218/", {
|
||||
"url": "8a05c0ccdcf84e63c962803bc41d247628c549ea",
|
||||
"keyword": "c9880c6731b3fdc6d98d25dbff56f4342c11683e",
|
||||
"keyword": "c5023841c72b88949786c231f472f51453103185",
|
||||
"content": "964b8c62c9d5c2a039a2fccf1b1e10aaf7a18a96",
|
||||
})]
|
||||
|
||||
@ -113,8 +113,8 @@ class ImagefapImageExtractor(Extractor):
|
||||
"date": info["datePublished"],
|
||||
"width": info["width"],
|
||||
"height": info["height"],
|
||||
"gallery-id": parts[1],
|
||||
"image-id": parts[2],
|
||||
"gallery_id": parts[1],
|
||||
"image_id": parts[2],
|
||||
})
|
||||
|
||||
def load_json(self):
|
||||
@ -136,8 +136,8 @@ class ImagefapUserExtractor(Extractor):
|
||||
"""Extractor for all galleries from a user at imagefap.com"""
|
||||
category = "imagefap"
|
||||
subcategory = "user"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
|
||||
r"profile(?:\.php\?user=|/)([^/]+)"),
|
||||
(r"(?:https?://)?(?:www\.)?imagefap\.com/"
|
||||
@ -161,7 +161,7 @@ class ImagefapUserExtractor(Extractor):
|
||||
yield Message.Queue, "http://www.imagefap.com/gallery/" + gallery
|
||||
|
||||
def get_gallery_ids(self):
|
||||
"""Yield all gallery-ids of a specific user"""
|
||||
"""Yield all gallery_ids of a specific user"""
|
||||
folders = self.get_gallery_folders()
|
||||
url = "http://www.imagefap.com/ajax_usergallery_folder.php"
|
||||
params = {"userid": self.user_id}
|
||||
@ -171,7 +171,7 @@ class ImagefapUserExtractor(Extractor):
|
||||
yield from text.extract_iter(page, '<a href="/gallery/', '"')
|
||||
|
||||
def get_gallery_folders(self):
|
||||
"""Create a list of all folder-ids of a specific user"""
|
||||
"""Create a list of all folder_ids of a specific user"""
|
||||
if self.user:
|
||||
url = "http://www.imagefap.com/profile/" + self.user + "/galleries"
|
||||
else:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,13 +17,13 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
"""Extractor for image galleries from imgbox.com"""
|
||||
category = "imgbox"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||
directory_fmt = ["{category}", "{title} - {gallery_key}"]
|
||||
filename_fmt = "{num:>03}-{filename}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"]
|
||||
test = [
|
||||
("https://imgbox.com/g/JaX5V5HX7g", {
|
||||
"url": "6eafdeebaf0774238dddc9227e2ba315e40e9b7c",
|
||||
"keyword": "cebd7f6868cf84ff492341c936cb6dbe5cde4682",
|
||||
"keyword": "abe510221e1dc8c804296be25adf1498fb93f892",
|
||||
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
|
||||
}),
|
||||
("https://imgbox.com/g/JaX5V5HX7h", {
|
||||
@ -56,7 +56,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
title = text.extract(page, "<h1>", "</h1>")[0]
|
||||
title, _, count = title.rpartition(" - ")
|
||||
return {
|
||||
"gallery-key": self.key,
|
||||
"gallery_key": self.key,
|
||||
"title": text.unescape(title),
|
||||
"count": count[:-7],
|
||||
}
|
||||
@ -66,7 +66,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
return text.extract_all(page, (
|
||||
("num" , '</a> ', ' of '),
|
||||
(None , 'class="image-container"', ''),
|
||||
("image-key", 'alt="', '"'),
|
||||
("image_key", 'alt="', '"'),
|
||||
("filename" , ' title="', '"'),
|
||||
), values=self.metadata.copy())[0]
|
||||
|
||||
@ -86,7 +86,7 @@ class ImgboxImageExtractor(Extractor):
|
||||
test = [
|
||||
("https://imgbox.com/qHhw7lpG", {
|
||||
"url": "b9556dc307edf88e016fbced6d354702bc236070",
|
||||
"keyword": "ff0524dba869a4b3292d7d4f72f5da4024b4f002",
|
||||
"keyword": "a5cdcdf6e784bb186ed65a0cd7978ae2d0e17a12",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
}),
|
||||
("https://imgbox.com/qHhw7lpH", {
|
||||
@ -104,7 +104,7 @@ class ImgboxImageExtractor(Extractor):
|
||||
if not url:
|
||||
raise exception.NotFoundError("image")
|
||||
filename, pos = text.extract(page, ' title="', '"', pos)
|
||||
data = text.nameext_from_url(filename, {"image-key": self.key})
|
||||
data = text.nameext_from_url(filename, {"image_key": self.key})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, "https://i." + url, data
|
||||
|
@ -48,7 +48,7 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
|
||||
test = [(("http://imgchili.net/show/89427/"
|
||||
"89427136_test___quot;___gt;.png"), {
|
||||
"url": "b93d92a6b58eb30a7ff6f9729cb748d25fea0c86",
|
||||
"keyword": "376c4584dfae7d7d2e88687d4ee9618bbfd0a35c",
|
||||
"keyword": "9c584f848766e4cc71d9e7f5f1f849e296ec05ae",
|
||||
})]
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
@ -59,7 +59,7 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
|
||||
parts = name2.split("in the gallery ")
|
||||
name = parts[0] if not parts[0].endswith("...") else name1
|
||||
return text.nameext_from_url(name, {
|
||||
"image-id": self.match.group(1),
|
||||
"image_id": self.match.group(1),
|
||||
"title": text.unescape(parts[-1]) if len(parts) > 1 else ""
|
||||
})
|
||||
|
||||
@ -75,7 +75,7 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
|
||||
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
|
||||
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
|
||||
"keyword": "2d065bd7f822de4c0b7598679f2730e0082a617e",
|
||||
"keyword": "ae0c56cfd1fe032e5bc22f1188767b2a923ae25e",
|
||||
})]
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
@ -95,5 +95,5 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
|
||||
return
|
||||
imgid, pos = text.extract(page, ' alt="', '_', pos)
|
||||
name , pos = text.extract(page, '<strong>', '</strong>', pos)
|
||||
data = text.nameext_from_url(name, {"image-id": imgid, "num": num})
|
||||
data = text.nameext_from_url(name, {"image_id": imgid, "num": num})
|
||||
yield "http://i" + url + data["extension"], data
|
||||
|
@ -16,12 +16,12 @@ class ImgthGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from imgth.com"""
|
||||
category = "imgth"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
|
||||
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
|
||||
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
|
||||
"keyword": "3f268fcc18d49ac3799a8f25cc08053e90891955",
|
||||
"keyword": "e62d14f20ded393d28c2789fcc34ea2c30bc6a7c",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -62,4 +62,4 @@ class ImgthGalleryExtractor(Extractor):
|
||||
("date" , 'created on ', ' by <'),
|
||||
(None , 'href="/users/', ''),
|
||||
("user" , '>', '<'),
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
), values={"gallery_id": self.gid})[0]
|
||||
|
@ -25,8 +25,8 @@ class KissmangaExtractor(Extractor):
|
||||
"""Base class for kissmanga extractors"""
|
||||
category = "kissmanga"
|
||||
directory_fmt = ["{category}", "{manga}",
|
||||
"c{chapter:>03}{chapter-minor} - {title}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
|
||||
"c{chapter:>03}{chapter_minor} - {title}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
|
||||
"{page:>03}.{extension}")
|
||||
root = "http://kissmanga.com"
|
||||
|
||||
@ -60,15 +60,15 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
test = [
|
||||
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
|
||||
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
|
||||
"keyword": "ab332093a4f2e473a468235bfd624cbe3b19fd7f",
|
||||
"keyword": "97cc9e513953e20d6309648df57a52a7ced59ae0",
|
||||
}),
|
||||
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
|
||||
"url": "de074848f6c1245204bb9214c12bcc3ecfd65019",
|
||||
"keyword": "013aad80e578c6ccd2e1fe47cdc27c12a64f6db2",
|
||||
"keyword": "3d96653188b761752c38b60d6e397e2ace0ea04c",
|
||||
}),
|
||||
("http://kissmanga.com/Manga/Monster/Monster-79?id=7608", {
|
||||
"url": "6abec8178f35fe7846586280ca9e38eacc32452c",
|
||||
"keyword": "ca7a07ecfd9525c0f825dc747f520306611d6af9",
|
||||
"keyword": "2ae18e456a4a7e4a2889af49d5f2e9c10fbc45e6",
|
||||
}),
|
||||
("http://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None),
|
||||
]
|
||||
@ -95,7 +95,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
"manga": manga,
|
||||
"volume": match.group(1) or "",
|
||||
"chapter": match.group(2) or match.group(5),
|
||||
"chapter-minor": "."+chminor if chminor else "",
|
||||
"chapter_minor": "."+chminor if chminor else "",
|
||||
"title": match.group(4) or "",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,20 +16,20 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
|
||||
"""Extractor for image albums from luscious.net"""
|
||||
category = "luscious"
|
||||
subcategory = "album"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/"
|
||||
r"(?:c/[^/]+/)?(?:pictures/album|albums)/([^/]+_(\d+))")]
|
||||
test = [
|
||||
(("https://luscious.net/c/hentai_manga/albums/"
|
||||
"okinami-no-koigokoro_277031/view/"), {
|
||||
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
|
||||
"keyword": "8533c72ff85578240cf7594eb617d907bebf87ab",
|
||||
"keyword": "76e099479b180420fd5cf820f00c52fe07fda884",
|
||||
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
|
||||
}),
|
||||
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
|
||||
"url": "01e2d7dd6eecea0152610f2446a6b1d60519c8bd",
|
||||
"keyword": "6c8750df7f38ff4e15cabc9a3a2e876b84a328d6",
|
||||
"keyword": "02624ff1097260e2a3c1b220afc92ea4c6b109b3",
|
||||
}),
|
||||
("https://luscious.net/albums/okinami-no-koigokoro_277031/", None),
|
||||
]
|
||||
@ -58,7 +58,7 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
|
||||
(None , '<p>Section:', ''),
|
||||
("section" , '>', '<'),
|
||||
("language", '<p>Language:', ' '),
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
), values={"gallery_id": self.gid})[0]
|
||||
data["lang"] = util.language_to_code(data["language"])
|
||||
try:
|
||||
data["artist"] = text.extract(data["tags"], "rtist: ", ",")[0]
|
||||
@ -84,6 +84,6 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
|
||||
"num": num,
|
||||
"name": name,
|
||||
"extension": iurl.rpartition(".")[2],
|
||||
"image-id": imgid,
|
||||
"image_id": imgid,
|
||||
}
|
||||
num += 1
|
||||
|
@ -17,14 +17,14 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
|
||||
"""Extractor for manga-chapters from mangafox.me"""
|
||||
category = "mangafox"
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter_minor}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
|
||||
"{page:>03}.{extension}")
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?(mangafox\.me/manga/"
|
||||
r"[^/]+/(v\d+/)?c\d+[^/]*)")]
|
||||
test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
|
||||
"v05/c006.2/1.html"), {
|
||||
"keyword": "3bae0396e96868f5f24dff5e547a6bbfcbed7282",
|
||||
"keyword": "ef2757d6136ef6b02eafe12d98a05f189fe8b2ba",
|
||||
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
|
||||
})]
|
||||
|
||||
@ -59,7 +59,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
|
||||
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter"])
|
||||
data["volume"] = match.group(2) or ""
|
||||
data["chapter"] = match.group(3)
|
||||
data["chapter-minor"] = match.group(4) or ""
|
||||
data["chapter_minor"] = match.group(4) or ""
|
||||
data["manga"] = data["manga"].rpartition(" ")[0]
|
||||
return data
|
||||
|
||||
|
@ -32,13 +32,13 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
"""Extractor for manga-chapters from mangahere.co"""
|
||||
category = "mangahere"
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter_minor}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
|
||||
"{page:>03}.{extension}")
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
|
||||
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
|
||||
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
|
||||
"keyword": "d3fe470e934a9f02ed00d4391b1743970eae82fa",
|
||||
"keyword": "8cb9f9512b68d2cdcbea2419592b9247304c149b",
|
||||
"content": "dd8454469429c6c717cbc3cad228e76ef8c6e420",
|
||||
})]
|
||||
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
|
||||
@ -74,8 +74,8 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
# "title": TODO,
|
||||
"volume": self.volume or "",
|
||||
"chapter": self.chapter,
|
||||
"chapter-minor": self.chminor or "",
|
||||
"chapter-id": chid,
|
||||
"chapter_minor": self.chminor or "",
|
||||
"chapter_id": chid,
|
||||
"count": count,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
|
@ -35,5 +35,5 @@ class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
]
|
||||
test = [("http://www.mangapanda.com/red-storm/2", {
|
||||
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",
|
||||
"keyword": "89c712f7ed255ec9c1d8e84dcb5a160b6cb4498c",
|
||||
"keyword": "16c881a09f5cf0f2c1c374665b667d1f26b20c22",
|
||||
})]
|
||||
|
@ -32,24 +32,24 @@ class MangaparkChapterExtractor(Extractor):
|
||||
category = "mangapark"
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ["{category}", "{manga}",
|
||||
"c{chapter:>03}{chapter-minor} - {title}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
|
||||
"c{chapter:>03}{chapter_minor} - {title}"]
|
||||
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
|
||||
"{page:>03}.{extension}")
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me/manga/"
|
||||
r"([^/]+/s(\d+)(?:/v([^/]+))?/c(\d+)(?:([^/]+)|/e(\d+))?)")]
|
||||
test = [
|
||||
("http://mangapark.me/manga/gosu/s2/c55", {
|
||||
"count": 50,
|
||||
"keyword": "bd97ca24ef344b44292910384215ef3f1005ea2e",
|
||||
"keyword": "b2216c0b8621a86be51eced72a2a61ba9f47e11e",
|
||||
}),
|
||||
(("http://mangapark.me/manga/"
|
||||
"ad-astra-per-aspera-hata-kenjirou/s1/c1.2"), {
|
||||
"count": 40,
|
||||
"keyword": "f28eb26b4966bebda0e761f241c2dd49e505ce13",
|
||||
"keyword": "257d90d582b8f259380a61a0774ff2c4ae5b3986",
|
||||
}),
|
||||
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
|
||||
"count": 15,
|
||||
"keyword": "34aa6ca3bdf5078f839cbf68ff68e39728cf248b",
|
||||
"keyword": "cf5329984c062058133e12ee3fe4a53fb734ed4c",
|
||||
}),
|
||||
]
|
||||
|
||||
@ -80,13 +80,13 @@ class MangaparkChapterExtractor(Extractor):
|
||||
"version": self.version,
|
||||
"volume": self.volume or "",
|
||||
"chapter": self.chapter,
|
||||
"chapter-minor": self.chminor or "",
|
||||
"chapter_minor": self.chminor or "",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
data = text.extract_all(page, (
|
||||
("manga-id" , "var _manga_id = '", "'"),
|
||||
("chapter-id", "var _book_id = '", "'"),
|
||||
("manga_id" , "var _manga_id = '", "'"),
|
||||
("chapter_id", "var _book_id = '", "'"),
|
||||
("manga" , "<h2>", "</h2>"),
|
||||
("title" , "</a>", "<"),
|
||||
(None , 'target="_blank" href="', ''),
|
||||
|
@ -45,7 +45,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
test = [(("http://www.mangareader.net/"
|
||||
"karate-shoukoushi-kohinata-minoru/11"), {
|
||||
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
|
||||
"keyword": "09b4ad57a082eb371dec027ccfc8ed1157c6eac6",
|
||||
"keyword": "05ef372e80257726166f78625cb78a09e6d9b1d1",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -77,14 +77,14 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
(None, '<td class="propertytitle">Name:', ''),
|
||||
("manga", '<h2 class="aname">', '</h2>'),
|
||||
(None, '<td class="propertytitle">Year of Release:', ''),
|
||||
('manga-release', '<td>', '</td>'),
|
||||
('release', '<td>', '</td>'),
|
||||
(None, '<td class="propertytitle">Author:', ''),
|
||||
('author', '<td>', '</td>'),
|
||||
(None, '<td class="propertytitle">Artist:', ''),
|
||||
('artist', '<td>', '</td>'),
|
||||
(None, '<div id="readmangasum">', ''),
|
||||
('title', ' ' + self.chapter + '</a> : ', '</td>'),
|
||||
('chapter-date', '<td>', '</td>'),
|
||||
('date', '<td>', '</td>'),
|
||||
), values=data)
|
||||
data, _ = text.extract_all(chapter_page, (
|
||||
(None, '<select id="pageMenu"', ''),
|
||||
|
@ -17,12 +17,12 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
"""Extractor for image galleries from nhentai.net"""
|
||||
category = "nhentai"
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
directory_fmt = ["{category}", "{gallery_id} {title}"]
|
||||
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||
test = [("http://nhentai.net/g/147850/", {
|
||||
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
|
||||
"keyword": "574e36436a1c01c82e5779207e44e4e78d0e1726",
|
||||
"keyword": "82751294e75fc203b019ffd94d8c1f94a5b86494",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -33,7 +33,7 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
ginfo = self.get_gallery_info()
|
||||
data = self.get_job_metadata(ginfo)
|
||||
urlfmt = "{}galleries/{}/{{}}.{{}}".format(
|
||||
ginfo["media_url"], data["media-id"])
|
||||
ginfo["media_url"], data["media_id"])
|
||||
extdict = {"j": "jpg", "p": "png", "g": "gif"}
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
@ -60,12 +60,12 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
title_en = ginfo["title"].get("english", "")
|
||||
title_ja = ginfo["title"].get("japanese", "")
|
||||
return {
|
||||
"gallery-id": self.gid,
|
||||
"upload-date": ginfo["upload_date"],
|
||||
"media-id": ginfo["media_id"],
|
||||
"gallery_id": self.gid,
|
||||
"upload_date": ginfo["upload_date"],
|
||||
"media_id": ginfo["media_id"],
|
||||
"scanlator": ginfo["scanlator"],
|
||||
"count": ginfo["num_pages"],
|
||||
"title": title_en or title_ja,
|
||||
"title-en": title_en,
|
||||
"title-ja": title_ja,
|
||||
"title_en": title_en,
|
||||
"title_ja": title_ja,
|
||||
}
|
||||
|
@ -16,8 +16,8 @@ from ..cache import cache
|
||||
class NijieExtractor(AsynchronousExtractor):
|
||||
"""Base class for nijie extractors"""
|
||||
category = "nijie"
|
||||
directory_fmt = ["{category}", "{artist-id}"]
|
||||
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
|
||||
directory_fmt = ["{category}", "{artist_id}"]
|
||||
filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}"
|
||||
cookiedomain = "nijie.info"
|
||||
popup_url = "https://nijie.info/view_popup.php?id="
|
||||
|
||||
@ -39,7 +39,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {"artist-id": self.artist_id}
|
||||
return {"artist_id": self.artist_id}
|
||||
|
||||
def get_image_ids(self):
|
||||
"""Collect all image-ids for a specific artist"""
|
||||
@ -58,7 +58,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
yield "https://pic" + url, text.nameext_from_url(url, {
|
||||
"count": len(images),
|
||||
"index": index,
|
||||
"image-id": image_id,
|
||||
"image_id": image_id,
|
||||
})
|
||||
|
||||
def login(self):
|
||||
@ -87,7 +87,7 @@ class NijieUserExtractor(NijieExtractor):
|
||||
test = [
|
||||
("https://nijie.info/members_illust.php?id=44", {
|
||||
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
|
||||
"keyword": "7a2dbf8fc0dfdb2af208ecdb8ec7f3186bdc31ab",
|
||||
"keyword": "804d3a9bb8205048ac0d1fe8eec39266b50f1e8e",
|
||||
}),
|
||||
("https://nijie.info/members_illust.php?id=43", {
|
||||
"exception": exception.NotFoundError,
|
||||
@ -119,7 +119,7 @@ class NijieImageExtractor(NijieExtractor):
|
||||
test = [
|
||||
("https://nijie.info/view.php?id=70720", {
|
||||
"url": "a10d4995645b5f260821e32c60a35f73546c2699",
|
||||
"keyword": "e454c2bad9b636b90d569881bf4fe8438506e0d2",
|
||||
"keyword": "4ecfd46460761b7a89fdba815eece10e917032c2",
|
||||
"content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
|
||||
}),
|
||||
("https://nijie.info/view.php?id=70724", {
|
||||
|
@ -15,7 +15,7 @@ from .. import text, exception
|
||||
class PinterestExtractor(Extractor):
|
||||
"""Base class for pinterest extractors"""
|
||||
category = "pinterest"
|
||||
filename_fmt = "{category}_{pin-id}.{extension}"
|
||||
filename_fmt = "{category}_{pin_id}.{extension}"
|
||||
|
||||
def __init__(self):
|
||||
Extractor.__init__(self)
|
||||
@ -26,7 +26,7 @@ class PinterestExtractor(Extractor):
|
||||
img = pin["image"]["original"]
|
||||
url = img["url"]
|
||||
data = {
|
||||
"pin-id": pin["id"],
|
||||
"pin_id": pin["id"],
|
||||
"note": pin["note"],
|
||||
"width": img["width"],
|
||||
"height": img["height"],
|
||||
@ -41,7 +41,7 @@ class PinterestPinExtractor(PinterestExtractor):
|
||||
test = [
|
||||
("https://www.pinterest.com/pin/858146903966145189/", {
|
||||
"url": "7abf2be76bf03d452feacf6e000b040fc2706b80",
|
||||
"keyword": "e1a2ce625ece86f0b31f0ae94a3af3d72e6454b9",
|
||||
"keyword": "5aac8028244b865824c61667f6cadd51e8765853",
|
||||
"content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
|
||||
}),
|
||||
("https://www.pinterest.com/pin/858146903966145188/", {
|
||||
@ -70,7 +70,7 @@ class PinterestBoardExtractor(PinterestExtractor):
|
||||
test = [
|
||||
("https://www.pinterest.com/g1952849/test-/", {
|
||||
"url": "705ee521630a5d613b0449d694a5345e684572a9",
|
||||
"keyword": "2815716747f84fa0a4047d29d71df8ae96a0e177",
|
||||
"keyword": "1650dd31c4dedd940cef399135e485400625ec0b",
|
||||
"content": "30897fb5d5616765bb2c9c26cb84f54499424fb4",
|
||||
}),
|
||||
("https://www.pinterest.com/g1952848/test/", {
|
||||
@ -99,7 +99,7 @@ class PinterestBoardExtractor(PinterestExtractor):
|
||||
"""Get metadata from a board-object"""
|
||||
data = {
|
||||
"user": self.user,
|
||||
"board-id": board["id"],
|
||||
"board_id": board["id"],
|
||||
"board": board["name"],
|
||||
"count": board["counts"]["pins"],
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
yield Message.Directory, data
|
||||
for image in self.get_images():
|
||||
image.update(data)
|
||||
yield Message.Url, image["file-url"], image
|
||||
yield Message.Url, image["file_url"], image
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
@ -75,7 +75,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
height , pos = text.extract(page, '', ' ', pos)
|
||||
data = text.nameext_from_url(image_url, {
|
||||
"id": image_id,
|
||||
"file-url": "https:" + text.unescape(image_url),
|
||||
"file_url": "https:" + text.unescape(image_url),
|
||||
"width": width,
|
||||
"height": height,
|
||||
})
|
||||
|
@ -27,7 +27,7 @@ class SeigaExtractor(Extractor):
|
||||
for image in self.get_images():
|
||||
data.update(image)
|
||||
data["extension"] = None
|
||||
url = self.get_image_url(image["image-id"])
|
||||
url = self.get_image_url(image["image_id"])
|
||||
yield Message.Url, url, data
|
||||
|
||||
def get_metadata(self):
|
||||
@ -68,17 +68,17 @@ class SeigaExtractor(Extractor):
|
||||
class SeigaUserExtractor(SeigaExtractor):
|
||||
"""Extractor for images of a user from seiga.nicovideo.jp"""
|
||||
subcategory = "user"
|
||||
directory_fmt = ["{category}", "{user-id}"]
|
||||
filename_fmt = "{category}_{user-id}_{image-id}.{extension}"
|
||||
directory_fmt = ["{category}", "{user_id}"]
|
||||
filename_fmt = "{category}_{user_id}_{image_id}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
|
||||
r"user/illust/(\d+)")]
|
||||
test = [
|
||||
("http://seiga.nicovideo.jp/user/illust/39537793", {
|
||||
"keyword": "15e4158164f9309c75c0f97169e0026b13a642ed",
|
||||
"keyword": "a716bf534b4191dc58ddbff51494b72a9cf58285",
|
||||
}),
|
||||
("http://seiga.nicovideo.jp/user/illust/79433", {
|
||||
"url": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
||||
"keyword": "82b330a4d1e8a2cd47ee934a0a40829232b49cdc",
|
||||
"keyword": "187b77728381d072466af7f7ebcc479a0830ce25",
|
||||
}),
|
||||
]
|
||||
|
||||
@ -87,10 +87,10 @@ class SeigaUserExtractor(SeigaExtractor):
|
||||
self.user_id = match.group(1)
|
||||
|
||||
def get_metadata(self):
|
||||
return {"user-id": self.user_id}
|
||||
return {"user_id": self.user_id}
|
||||
|
||||
def get_images(self):
|
||||
keymap = {0: "image-id", 2: "title", 3: "description",
|
||||
keymap = {0: "image_id", 2: "title", 3: "description",
|
||||
7: "summary", 8: "genre", 18: "date"}
|
||||
url = "http://seiga.nicovideo.jp/api/user/data?id=" + self.user_id
|
||||
response = self.request(url)
|
||||
@ -114,14 +114,14 @@ class SeigaUserExtractor(SeigaExtractor):
|
||||
class SeigaImageExtractor(SeigaExtractor):
|
||||
"""Extractor for single images from seiga.nicovideo.jp"""
|
||||
subcategory = "image"
|
||||
filename_fmt = "{category}_{image-id}.{extension}"
|
||||
filename_fmt = "{category}_{image_id}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
|
||||
r"(?:seiga/im|image/source/)(\d+)"),
|
||||
(r"(?:https?://)?lohas\.nicoseiga\.jp/"
|
||||
r"(?:priv|o)/[^/]+/\d+/(\d+)")]
|
||||
test = [
|
||||
("http://seiga.nicovideo.jp/seiga/im5977527", {
|
||||
"keyword": "3b61d2fc26efb74547f47c522051cf3596ff6b62",
|
||||
"keyword": "6ff7564b35890e333ff7413cb633ddb58339912f",
|
||||
"content": "d9202292012178374d57fb0126f6124387265297",
|
||||
}),
|
||||
("http://seiga.nicovideo.jp/seiga/im123", {
|
||||
@ -134,4 +134,4 @@ class SeigaImageExtractor(SeigaExtractor):
|
||||
self.image_id = match.group(1)
|
||||
|
||||
def get_images(self):
|
||||
return ({"image-id": self.image_id},)
|
||||
return ({"image_id": self.image_id},)
|
||||
|
@ -22,7 +22,7 @@ class TumblrUserExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com(?:/page/\d+)?/?$"]
|
||||
test = [("http://demo.tumblr.com/", {
|
||||
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
|
||||
"keyword": "d2cf142bcaf1cbea29291f8c8ccb5f582962d8be",
|
||||
"keyword": "8f1b06c2a0a562b10df3e62ab2a8156e3da1855b",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -42,6 +42,8 @@ class TumblrUserExtractor(Extractor):
|
||||
image.update(data)
|
||||
image = text.nameext_from_url(url, image)
|
||||
image["hash"] = text.extract(image["name"], "_", "_")[0]
|
||||
image = {key.replace("-", "_"): value
|
||||
for key, value in image.items()}
|
||||
yield Message.Url, url, image
|
||||
|
||||
def get_job_metadata(self, image_data):
|
||||
@ -100,7 +102,7 @@ class TumblrPostExtractor(TumblrUserExtractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/post/(\d+)"]
|
||||
test = [("http://demo.tumblr.com/post/459265350", {
|
||||
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
|
||||
"keyword": "ce211deb8e3936e2202f3f82f38375fd14781b79",
|
||||
"keyword": "4d5bc44bf8ec334fdaf78696edf215574fa6d998",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@ -114,7 +116,7 @@ class TumblrTagExtractor(TumblrUserExtractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/tagged/(.+)"]
|
||||
test = [("http://demo.tumblr.com/tagged/Times Square", {
|
||||
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
|
||||
"keyword": "f36901e86c5d20affbe66f78c6b5717d34466fc4",
|
||||
"keyword": "b0465d131ecb097633127b79805432dacae06d14",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -17,18 +17,18 @@ class TwitterTweetExtractor(Extractor):
|
||||
category = "twitter"
|
||||
subcategory = "tweet"
|
||||
directory_fmt = ["{category}", "{user}"]
|
||||
filename_fmt = "{tweet-id}_{num}.{extension}"
|
||||
filename_fmt = "{tweet_id}_{num}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/"
|
||||
r"(([^/]+)/status/(\d+))"]
|
||||
test = [
|
||||
("https://twitter.com/PicturesEarth/status/672897688871018500", {
|
||||
"url": "d9e68d41301d2fe382eb27711dea28366be03b1a",
|
||||
"keyword": "3cd8e27026a2112008985b1b53f5e4baf4616177",
|
||||
"keyword": "7a6eac2bc88bbf16d0671ebb38e31f708d940ee8",
|
||||
"content": "a1f2f04cb2d8df24b1afa7a39910afda23484342",
|
||||
}),
|
||||
("https://twitter.com/perrypumas/status/894001459754180609", {
|
||||
"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
|
||||
"keyword": "8438551b34caf2f580ba23f6014509c8dd5e1e0f",
|
||||
"keyword": "334cd0c1f85c3e66923b44740f17407ce444931e",
|
||||
}),
|
||||
]
|
||||
|
||||
@ -54,7 +54,7 @@ class TwitterTweetExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"user": self.user,
|
||||
"tweet-id": self.tid,
|
||||
"tweet_id": self.tid,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -22,11 +22,11 @@ class WarosuThreadExtractor(Extractor):
|
||||
test = [
|
||||
("https://warosu.org/jp/thread/16656025", {
|
||||
"url": "889d57246ed67e491e5b8f7f124e50ea7991e770",
|
||||
"keyword": "dab56209e31634b44eb99a2cdd85fa922c726b4f",
|
||||
"keyword": "65607b4630d87767465a5985c81cfa594913c073",
|
||||
}),
|
||||
("https://warosu.org/jp/thread/16658073", {
|
||||
"url": "4500cf3184b067424fd9883249bd543c905fbecd",
|
||||
"keyword": "084369b27b8cfc08a2276e00a4be6ffd7b1e5088",
|
||||
"keyword": "d88ea2280201a7b04256c852733faff7272d7d11",
|
||||
"content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
|
||||
}),
|
||||
]
|
||||
@ -59,7 +59,7 @@ class WarosuThreadExtractor(Extractor):
|
||||
title = text.extract(page, 'filetitle" itemprop="name">', '<')[0]
|
||||
return {
|
||||
"board": self.board,
|
||||
"board-name": boardname.rpartition(" - ")[2],
|
||||
"board_name": boardname.rpartition(" - ")[2],
|
||||
"thread": self.thread,
|
||||
"title": title,
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user