2019-04-16 18:16:48 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
"""Generate a reStructuredText document with all supported sites"""
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
|
|
|
|
import sys
|
2019-02-20 19:25:41 +01:00
|
|
|
|
import collections
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
2019-04-16 18:16:48 +02:00
|
|
|
|
import util
|
|
|
|
|
from gallery_dl import extractor
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
CATEGORY_MAP = {
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"2chan" : "Futaba Channel",
|
2019-03-18 01:11:30 +01:00
|
|
|
|
"35photo" : "35PHOTO",
|
2019-07-21 22:29:57 +02:00
|
|
|
|
"adultempire" : "Adult Empire",
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"archivedmoe" : "Archived.Moe",
|
|
|
|
|
"archiveofsins" : "Archive of Sins",
|
2018-03-17 15:35:38 +01:00
|
|
|
|
"artstation" : "ArtStation",
|
2017-09-16 21:11:44 +02:00
|
|
|
|
"b4k" : "arch.b4k.co",
|
2020-02-08 23:25:53 +01:00
|
|
|
|
"bcy" : "半次元",
|
2018-09-13 20:13:12 +02:00
|
|
|
|
"bobx" : "BobX",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"deviantart" : "DeviantArt",
|
|
|
|
|
"dokireader" : "Doki Reader",
|
|
|
|
|
"dynastyscans" : "Dynasty Reader",
|
|
|
|
|
"e621" : "e621",
|
2019-08-03 11:42:28 +02:00
|
|
|
|
"e-hentai" : "E-Hentai",
|
|
|
|
|
"exhentai" : "ExHentai",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"fallenangels" : "Fallen Angels Scans",
|
2019-03-05 22:33:37 +01:00
|
|
|
|
"fashionnova" : "Fashion Nova",
|
2020-02-11 19:51:24 +01:00
|
|
|
|
"furaffinity" : "Fur Affinity",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hbrowse" : "HBrowse",
|
|
|
|
|
"hentai2read" : "Hentai2Read",
|
2018-09-05 21:08:40 +02:00
|
|
|
|
"hentaicafe" : "Hentai Cafe",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hentaifoundry" : "Hentai Foundry",
|
2019-01-28 18:00:32 +01:00
|
|
|
|
"hentaifox" : "HentaiFox",
|
2020-02-18 23:49:59 +01:00
|
|
|
|
"hentaihand" : "HentaiHand",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hentaihere" : "HentaiHere",
|
|
|
|
|
"hitomi" : "Hitomi.la",
|
2018-01-09 17:52:12 +01:00
|
|
|
|
"idolcomplex" : "Idol Complex",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"imagebam" : "ImageBam",
|
|
|
|
|
"imagefap" : "ImageFap",
|
2019-07-30 23:02:21 +02:00
|
|
|
|
"imgbb" : "ImgBB",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"imgbox" : "imgbox",
|
|
|
|
|
"imgth" : "imgth",
|
|
|
|
|
"imgur" : "imgur",
|
|
|
|
|
"jaiminisbox" : "Jaimini's Box",
|
2020-03-13 16:45:42 +01:00
|
|
|
|
"kabeuchi" : "かべうち",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"kireicake" : "Kirei Cake",
|
|
|
|
|
"kissmanga" : "KissManga",
|
2019-09-06 21:58:13 +02:00
|
|
|
|
"lineblog" : "LINE BLOG",
|
2019-04-06 16:10:29 +02:00
|
|
|
|
"livedoor" : "livedoor Blog",
|
2018-03-06 14:15:15 +01:00
|
|
|
|
"mangadex" : "MangaDex",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"mangafox" : "Manga Fox",
|
|
|
|
|
"mangahere" : "Manga Here",
|
|
|
|
|
"mangapark" : "MangaPark",
|
|
|
|
|
"mangastream" : "Manga Stream",
|
2018-07-19 18:56:45 +02:00
|
|
|
|
"myportfolio" : "Adobe Portfolio",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"nhentai" : "nhentai",
|
|
|
|
|
"nijie" : "nijie",
|
2019-10-13 22:10:32 +02:00
|
|
|
|
"nozomi" : "Nozomi.la",
|
2019-06-22 22:43:09 +02:00
|
|
|
|
"nsfwalbum" : "NSFWalbum.com",
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"nyafuu" : "Nyafuu Archive",
|
2018-01-15 16:39:05 +01:00
|
|
|
|
"paheal" : "rule #34",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"powermanga" : "PowerManga",
|
|
|
|
|
"readcomiconline": "Read Comic Online",
|
2017-07-24 10:50:40 +02:00
|
|
|
|
"rbt" : "RebeccaBlackTech",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"rule34" : "Rule 34",
|
|
|
|
|
"sankaku" : "Sankaku Channel",
|
2019-05-27 23:49:23 +02:00
|
|
|
|
"sankakucomplex" : "Sankaku Complex",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"seaotterscans" : "Sea Otter Scans",
|
|
|
|
|
"seiga" : "Niconico Seiga",
|
|
|
|
|
"senmanga" : "Sen Manga",
|
|
|
|
|
"sensescans" : "Sense-Scans",
|
2019-04-23 22:10:39 +02:00
|
|
|
|
"sexcom" : "Sex.com",
|
2018-05-27 15:25:04 +02:00
|
|
|
|
"simplyhentai" : "Simply Hentai",
|
2019-06-09 21:59:22 +02:00
|
|
|
|
"slickpic" : "SlickPic",
|
2017-12-13 21:15:05 +01:00
|
|
|
|
"slideshare" : "SlideShare",
|
2018-04-29 21:27:25 +02:00
|
|
|
|
"smugmug" : "SmugMug",
|
2017-07-24 10:50:40 +02:00
|
|
|
|
"thebarchive" : "The /b/ Archive",
|
2019-06-23 22:02:54 +02:00
|
|
|
|
"vanillarock" : "もえぴりあ",
|
2019-07-22 22:15:36 +02:00
|
|
|
|
"vsco" : "VSCO",
|
2019-04-02 17:34:57 +02:00
|
|
|
|
"wikiart" : "WikiArt.org",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"worldthree" : "World Three",
|
2019-06-04 22:23:32 +02:00
|
|
|
|
"xhamster" : "xHamster",
|
2017-11-02 17:28:35 +01:00
|
|
|
|
"xvideos" : "XVideos",
|
2018-09-28 12:46:39 +02:00
|
|
|
|
"yuki" : "yuki.la 4chan archive",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SUBCATEGORY_MAP = {
|
2018-03-31 18:54:25 +02:00
|
|
|
|
"doujin" : "Doujin",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"gallery": "Galleries",
|
|
|
|
|
"image" : "individual Images",
|
2020-03-24 02:00:50 +01:00
|
|
|
|
"issue" : "Comic Issues",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"manga" : "Manga",
|
2017-08-24 21:24:51 +02:00
|
|
|
|
"popular": "Popular Images",
|
2018-09-22 21:28:16 +02:00
|
|
|
|
"recent" : "Recent Images",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
"search" : "Search Results",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"status" : "Images from Statuses",
|
2020-03-24 02:00:50 +01:00
|
|
|
|
"tag" : "Tag Searches",
|
2019-09-22 22:21:48 +02:00
|
|
|
|
"user" : "User Profiles",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"following" : "",
|
2018-08-15 21:28:27 +02:00
|
|
|
|
"related-pin" : "related Pins",
|
|
|
|
|
"related-board": "",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
|
|
|
|
|
"artstation": {
|
|
|
|
|
"artwork": "Artwork Listings",
|
|
|
|
|
},
|
|
|
|
|
"deviantart": {
|
|
|
|
|
"stash": "Sta.sh",
|
|
|
|
|
},
|
2020-03-16 22:57:30 +01:00
|
|
|
|
"instagram": {
|
|
|
|
|
"saved": "Saved Posts",
|
|
|
|
|
},
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"newgrounds": {
|
|
|
|
|
"art" : "Art",
|
|
|
|
|
"audio": "Audio",
|
|
|
|
|
"media": "Media Files",
|
|
|
|
|
},
|
|
|
|
|
"pinterest": {
|
|
|
|
|
"pinit": "pin.it Links",
|
|
|
|
|
},
|
|
|
|
|
"pixiv": {
|
|
|
|
|
"me" : "pixiv.me Links",
|
|
|
|
|
"work": "individual Images",
|
|
|
|
|
},
|
|
|
|
|
"smugmug": {
|
|
|
|
|
"path": "Images from Users and Folders",
|
|
|
|
|
},
|
|
|
|
|
"twitter": {
|
|
|
|
|
"media": "Media Timelines",
|
|
|
|
|
},
|
|
|
|
|
"wikiart": {
|
|
|
|
|
"artists": "Artist Listings",
|
|
|
|
|
},
|
2017-04-20 16:56:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
2017-06-15 21:06:20 +02:00
|
|
|
|
AUTH_MAP = {
|
2019-01-09 14:21:19 +01:00
|
|
|
|
"danbooru" : "Optional",
|
2018-01-09 17:52:12 +01:00
|
|
|
|
"deviantart" : "Optional (OAuth)",
|
2020-03-14 01:13:14 +01:00
|
|
|
|
"e621" : "Optional",
|
2019-08-03 11:42:28 +02:00
|
|
|
|
"e-hentai" : "Optional",
|
2018-01-09 17:52:12 +01:00
|
|
|
|
"exhentai" : "Optional",
|
|
|
|
|
"flickr" : "Optional (OAuth)",
|
|
|
|
|
"idolcomplex": "Optional",
|
2019-08-01 21:39:20 +02:00
|
|
|
|
"imgbb" : "Optional",
|
2019-06-26 23:54:38 +02:00
|
|
|
|
"instagram" : "Optional",
|
2019-04-09 16:54:15 +02:00
|
|
|
|
"mangoxo" : "Optional",
|
2019-11-15 23:54:07 +01:00
|
|
|
|
"newgrounds" : "Optional",
|
2018-01-09 17:52:12 +01:00
|
|
|
|
"nijie" : "Required",
|
|
|
|
|
"pixiv" : "Required",
|
|
|
|
|
"reddit" : "Optional (OAuth)",
|
|
|
|
|
"sankaku" : "Optional",
|
|
|
|
|
"seiga" : "Required",
|
2018-05-10 18:58:05 +02:00
|
|
|
|
"smugmug" : "Optional (OAuth)",
|
2019-01-30 17:58:48 +01:00
|
|
|
|
"tsumino" : "Optional",
|
2018-01-11 14:11:37 +01:00
|
|
|
|
"tumblr" : "Optional (OAuth)",
|
2019-04-07 23:06:57 +02:00
|
|
|
|
"twitter" : "Optional",
|
2019-07-29 18:22:31 +02:00
|
|
|
|
"wallhaven" : ("Optional (`API Key "
|
|
|
|
|
"<configuration.rst#extractorwallhavenapi-key>`__)"),
|
2017-06-15 21:06:20 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IGNORE_LIST = (
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"directlink",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
"oauth",
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"recursive",
|
|
|
|
|
"test",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
)
|
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def domain(cls):
|
|
|
|
|
"""Return the web-domain related to an extractor class"""
|
|
|
|
|
url = sys.modules[cls.__module__].__doc__.split()[-1]
|
|
|
|
|
if url.startswith("http"):
|
|
|
|
|
return url
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if hasattr(cls, "root") and cls.root:
|
|
|
|
|
return cls.root + "/"
|
2018-09-28 12:39:05 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if hasattr(cls, "https"):
|
|
|
|
|
scheme = "https" if cls.https else "http"
|
|
|
|
|
netloc = cls.__doc__.split()[-1]
|
|
|
|
|
return "{}://{}/".format(scheme, netloc)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
test = next(cls._get_tests(), None)
|
|
|
|
|
if test:
|
|
|
|
|
url = test[0]
|
|
|
|
|
return url[:url.find("/", 8)+1]
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
return ""
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def category_text(cls):
|
|
|
|
|
"""Return a human-readable representation of a category"""
|
|
|
|
|
c = cls.category
|
|
|
|
|
return CATEGORY_MAP.get(c) or c.capitalize()
|
2018-09-28 12:39:05 +02:00
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def subcategory_text(cls):
|
|
|
|
|
"""Return a human-readable representation of a subcategory"""
|
2020-01-13 22:29:42 +01:00
|
|
|
|
c, sc = cls.category, cls.subcategory
|
|
|
|
|
|
|
|
|
|
if c in SUBCATEGORY_MAP:
|
|
|
|
|
scm = SUBCATEGORY_MAP[c]
|
|
|
|
|
if sc in scm:
|
|
|
|
|
return scm[sc]
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if sc in SUBCATEGORY_MAP:
|
|
|
|
|
return SUBCATEGORY_MAP[sc]
|
2020-01-13 22:29:42 +01:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
sc = sc.capitalize()
|
|
|
|
|
return sc if sc.endswith("s") else sc + "s"
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def category_key(cls):
|
|
|
|
|
"""Generate sorting keys by category"""
|
|
|
|
|
key = category_text(cls).lower()
|
|
|
|
|
if cls.__module__.endswith(".imagehosts"):
|
|
|
|
|
key = "zz" + key
|
|
|
|
|
return key
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def subcategory_key(cls):
|
|
|
|
|
"""Generate sorting keys by subcategory"""
|
2019-09-22 22:21:48 +02:00
|
|
|
|
if cls.subcategory == "issue":
|
2019-02-20 19:25:41 +01:00
|
|
|
|
return "A"
|
|
|
|
|
return cls.subcategory
|
2017-07-15 15:01:30 +02:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def build_extractor_list():
|
|
|
|
|
"""Generate a sorted list of lists of extractor classes"""
|
|
|
|
|
extractors = collections.defaultdict(list)
|
2017-04-23 17:08:45 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# get lists of extractor classes grouped by category
|
|
|
|
|
for extr in extractor.extractors():
|
2019-01-09 14:21:19 +01:00
|
|
|
|
if not extr.category or extr.category in IGNORE_LIST:
|
2017-06-15 21:06:20 +02:00
|
|
|
|
continue
|
2019-02-20 19:25:41 +01:00
|
|
|
|
extractors[extr.category].append(extr)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# sort extractor lists with the same category
|
|
|
|
|
for extrlist in extractors.values():
|
|
|
|
|
extrlist.sort(key=subcategory_key)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-08-03 11:42:28 +02:00
|
|
|
|
# ugly hack to add e-hentai.org
|
|
|
|
|
eh = []
|
|
|
|
|
for extr in extractors["exhentai"]:
|
|
|
|
|
class eh_extr(extr):
|
|
|
|
|
category = "e-hentai"
|
|
|
|
|
root = "https://e-hentai.org"
|
|
|
|
|
eh.append(eh_extr)
|
|
|
|
|
extractors["e-hentai"] = eh
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# sort lists by category
|
|
|
|
|
return sorted(
|
|
|
|
|
extractors.values(),
|
|
|
|
|
key=lambda lst: category_key(lst[0]),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# define table columns
|
|
|
|
|
COLUMNS = (
|
|
|
|
|
("Site", 20,
|
|
|
|
|
lambda x: category_text(x[0])),
|
|
|
|
|
("URL" , 35,
|
|
|
|
|
lambda x: domain(x[0])),
|
|
|
|
|
("Capabilities", 50,
|
|
|
|
|
lambda x: ", ".join(subcategory_text(extr) for extr in x
|
|
|
|
|
if subcategory_text(extr))),
|
|
|
|
|
("Authentication", 16,
|
|
|
|
|
lambda x: AUTH_MAP.get(x[0].category, "")),
|
|
|
|
|
)
|
2019-01-09 14:21:19 +01:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def write_output(fobj, columns, extractors):
|
2019-02-05 16:17:25 +01:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def pad(output, col, category=None):
|
|
|
|
|
size = col[1]
|
|
|
|
|
output = output if isinstance(output, str) else col[2](output)
|
2019-02-08 20:08:16 +01:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if len(output) > size:
|
|
|
|
|
sub = "|{}-{}|".format(category, col[0][0])
|
|
|
|
|
subs.append((sub, output))
|
|
|
|
|
output = sub
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
return output + " " * (size - len(output))
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
w = fobj.write
|
|
|
|
|
subs = []
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# caption
|
|
|
|
|
w("Supported Sites\n")
|
|
|
|
|
w("===============\n")
|
2019-07-15 21:44:34 +02:00
|
|
|
|
w("Unless otherwise known, assume all sites to be NSFW\n\n")
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# table head
|
|
|
|
|
sep = " ".join("=" * c[1] for c in columns) + "\n"
|
|
|
|
|
w(sep)
|
|
|
|
|
w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
|
|
|
|
|
w(sep)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# table body
|
|
|
|
|
for lst in extractors:
|
|
|
|
|
w(" ".join(
|
|
|
|
|
pad(col[2](lst), col, lst[0].category)
|
|
|
|
|
for col in columns
|
|
|
|
|
).strip())
|
|
|
|
|
w("\n")
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# table bottom
|
|
|
|
|
w(sep)
|
|
|
|
|
w("\n")
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
# substitutions
|
|
|
|
|
for sub, value in subs:
|
|
|
|
|
w(".. {} replace:: {}\n".format(sub, value))
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
|
|
|
|
outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
|
2019-04-16 18:16:48 +02:00
|
|
|
|
with open(util.path("docs", outfile), "w") as file:
|
2019-02-20 19:25:41 +01:00
|
|
|
|
write_output(file, COLUMNS, build_extractor_list())
|