2019-04-16 18:16:48 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"""Generate a Markdown document listing all supported sites"""
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
2021-01-22 21:02:12 +01:00
|
|
|
|
import os
|
2017-01-15 21:31:21 +01:00
|
|
|
|
import sys
|
2019-02-20 19:25:41 +01:00
|
|
|
|
import collections
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
2019-04-16 18:16:48 +02:00
|
|
|
|
import util
|
|
|
|
|
from gallery_dl import extractor
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
CATEGORY_MAP = {
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"2chan" : "Futaba Channel",
|
2019-03-18 01:11:30 +01:00
|
|
|
|
"35photo" : "35PHOTO",
|
2019-07-21 22:29:57 +02:00
|
|
|
|
"adultempire" : "Adult Empire",
|
2021-03-13 17:48:10 +01:00
|
|
|
|
"allgirlbooru" : "All girl",
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"archivedmoe" : "Archived.Moe",
|
|
|
|
|
"archiveofsins" : "Archive of Sins",
|
2018-03-17 15:35:38 +01:00
|
|
|
|
"artstation" : "ArtStation",
|
2020-04-08 21:38:02 +02:00
|
|
|
|
"aryion" : "Eka's Portal",
|
2022-02-11 21:01:51 +01:00
|
|
|
|
"atfbooru" : "ATFBooru",
|
2017-09-16 21:11:44 +02:00
|
|
|
|
"b4k" : "arch.b4k.co",
|
2020-04-02 22:34:32 +02:00
|
|
|
|
"baraag" : "baraag",
|
2021-07-22 20:37:05 +02:00
|
|
|
|
"bbc" : "BBC",
|
2020-02-08 23:25:53 +01:00
|
|
|
|
"bcy" : "半次元",
|
2021-07-23 02:01:51 +02:00
|
|
|
|
"comicvine" : "Comic Vine",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"deviantart" : "DeviantArt",
|
2021-03-13 17:48:10 +01:00
|
|
|
|
"drawfriends" : "Draw Friends",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"dynastyscans" : "Dynasty Reader",
|
|
|
|
|
"e621" : "e621",
|
2021-02-07 22:58:19 +01:00
|
|
|
|
"erome" : "EroMe",
|
2019-08-03 11:42:28 +02:00
|
|
|
|
"e-hentai" : "E-Hentai",
|
|
|
|
|
"exhentai" : "ExHentai",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"fallenangels" : "Fallen Angels Scans",
|
2021-04-25 19:44:19 +02:00
|
|
|
|
"fanbox" : "pixivFANBOX",
|
2019-03-05 22:33:37 +01:00
|
|
|
|
"fashionnova" : "Fashion Nova",
|
2020-02-11 19:51:24 +01:00
|
|
|
|
"furaffinity" : "Fur Affinity",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hbrowse" : "HBrowse",
|
|
|
|
|
"hentai2read" : "Hentai2Read",
|
2021-04-18 20:28:00 +02:00
|
|
|
|
"hentaicosplays" : "Hentai Cosplay",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hentaifoundry" : "Hentai Foundry",
|
2019-01-28 18:00:32 +01:00
|
|
|
|
"hentaifox" : "HentaiFox",
|
2020-02-18 23:49:59 +01:00
|
|
|
|
"hentaihand" : "HentaiHand",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hentaihere" : "HentaiHere",
|
2021-04-18 20:28:00 +02:00
|
|
|
|
"hentaiimg" : "Hentai Image",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"hitomi" : "Hitomi.la",
|
2018-01-09 17:52:12 +01:00
|
|
|
|
"idolcomplex" : "Idol Complex",
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"illusioncardsbooru": "Illusion Game Cards",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"imagebam" : "ImageBam",
|
|
|
|
|
"imagefap" : "ImageFap",
|
2019-07-30 23:02:21 +02:00
|
|
|
|
"imgbb" : "ImgBB",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"imgbox" : "imgbox",
|
2020-05-18 19:02:56 +02:00
|
|
|
|
"imagechest" : "ImageChest",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"imgth" : "imgth",
|
|
|
|
|
"imgur" : "imgur",
|
2021-11-25 16:18:48 +01:00
|
|
|
|
"joyreactor" : "JoyReactor",
|
2020-03-13 16:45:42 +01:00
|
|
|
|
"kabeuchi" : "かべうち",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"kireicake" : "Kirei Cake",
|
2019-09-06 21:58:13 +02:00
|
|
|
|
"lineblog" : "LINE BLOG",
|
2019-04-06 16:10:29 +02:00
|
|
|
|
"livedoor" : "livedoor Blog",
|
2021-04-13 03:05:23 +02:00
|
|
|
|
"omgmiamiswimwear": "Omg Miami Swimwear",
|
2018-03-06 14:15:15 +01:00
|
|
|
|
"mangadex" : "MangaDex",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"mangafox" : "Manga Fox",
|
|
|
|
|
"mangahere" : "Manga Here",
|
2020-07-13 21:20:09 +02:00
|
|
|
|
"mangakakalot" : "MangaKakalot",
|
2021-05-28 17:52:30 +02:00
|
|
|
|
"manganelo" : "Manganato",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"mangapark" : "MangaPark",
|
2021-06-26 23:38:58 +02:00
|
|
|
|
"mangasee" : "MangaSee",
|
2020-04-02 22:34:32 +02:00
|
|
|
|
"mastodon.social": "mastodon.social",
|
2020-09-17 18:06:12 +02:00
|
|
|
|
"myhentaigallery": "My Hentai Gallery",
|
2018-07-19 18:56:45 +02:00
|
|
|
|
"myportfolio" : "Adobe Portfolio",
|
2021-03-03 15:21:13 +01:00
|
|
|
|
"naverwebtoon" : "NaverWebtoon",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"nhentai" : "nhentai",
|
|
|
|
|
"nijie" : "nijie",
|
2019-10-13 22:10:32 +02:00
|
|
|
|
"nozomi" : "Nozomi.la",
|
2019-06-22 22:43:09 +02:00
|
|
|
|
"nsfwalbum" : "NSFWalbum.com",
|
2017-07-15 15:01:30 +02:00
|
|
|
|
"nyafuu" : "Nyafuu Archive",
|
2018-01-15 16:39:05 +01:00
|
|
|
|
"paheal" : "rule #34",
|
2021-01-22 19:41:50 +01:00
|
|
|
|
"photovogue" : "PhotoVogue",
|
2021-04-18 20:28:00 +02:00
|
|
|
|
"pornimagesxxx" : "Porn Image",
|
2021-11-25 16:18:48 +01:00
|
|
|
|
"pornreactor" : "PornReactor",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"powermanga" : "PowerManga",
|
|
|
|
|
"readcomiconline": "Read Comic Online",
|
2017-07-24 10:50:40 +02:00
|
|
|
|
"rbt" : "RebeccaBlackTech",
|
2020-05-10 00:31:42 +02:00
|
|
|
|
"redgifs" : "RedGIFs",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"rule34" : "Rule 34",
|
2021-12-12 23:36:16 +01:00
|
|
|
|
"rule34us" : "Rule 34",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"sankaku" : "Sankaku Channel",
|
2019-05-27 23:49:23 +02:00
|
|
|
|
"sankakucomplex" : "Sankaku Complex",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"seiga" : "Niconico Seiga",
|
2021-06-25 18:40:11 +02:00
|
|
|
|
"seisoparty" : "Seiso",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"senmanga" : "Sen Manga",
|
|
|
|
|
"sensescans" : "Sense-Scans",
|
2019-04-23 22:10:39 +02:00
|
|
|
|
"sexcom" : "Sex.com",
|
2018-05-27 15:25:04 +02:00
|
|
|
|
"simplyhentai" : "Simply Hentai",
|
2019-06-09 21:59:22 +02:00
|
|
|
|
"slickpic" : "SlickPic",
|
2017-12-13 21:15:05 +01:00
|
|
|
|
"slideshare" : "SlideShare",
|
2018-04-29 21:27:25 +02:00
|
|
|
|
"smugmug" : "SmugMug",
|
2020-05-01 22:32:22 +02:00
|
|
|
|
"speakerdeck" : "Speaker Deck",
|
2020-07-14 16:18:21 +02:00
|
|
|
|
"subscribestar" : "SubscribeStar",
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"tbib" : "The Big ImageBoard",
|
2021-11-25 16:22:17 +01:00
|
|
|
|
"thatpervert" : "ThatPervert",
|
2017-07-24 10:50:40 +02:00
|
|
|
|
"thebarchive" : "The /b/ Archive",
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"thecollection" : "The /co/llection",
|
2021-03-13 17:48:10 +01:00
|
|
|
|
"theloudbooru" : "The Loud Booru",
|
2021-03-03 15:20:47 +01:00
|
|
|
|
"tumblrgallery" : "TumblrGallery",
|
2019-06-23 22:02:54 +02:00
|
|
|
|
"vanillarock" : "もえぴりあ",
|
2021-03-13 17:48:10 +01:00
|
|
|
|
"vidyart" : "/v/idyart",
|
2021-03-23 18:48:01 +01:00
|
|
|
|
"vk" : "VK",
|
2019-07-22 22:15:36 +02:00
|
|
|
|
"vsco" : "VSCO",
|
2021-06-02 15:45:43 +02:00
|
|
|
|
"wakarimasen" : "Wakarimasen Archive",
|
2020-05-18 19:04:20 +02:00
|
|
|
|
"webtoons" : "Webtoon",
|
2019-04-02 17:34:57 +02:00
|
|
|
|
"wikiart" : "WikiArt.org",
|
2019-06-04 22:23:32 +02:00
|
|
|
|
"xhamster" : "xHamster",
|
2017-11-02 17:28:35 +01:00
|
|
|
|
"xvideos" : "XVideos",
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"yandere" : "yande.re",
|
2021-12-21 19:24:17 +01:00
|
|
|
|
"zzzz" : "ZzZz",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SUBCATEGORY_MAP = {
|
2018-03-31 18:54:25 +02:00
|
|
|
|
"doujin" : "Doujin",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"gallery": "Galleries",
|
|
|
|
|
"image" : "individual Images",
|
2021-03-11 01:06:47 +01:00
|
|
|
|
"index" : "Site Index",
|
2020-03-24 02:00:50 +01:00
|
|
|
|
"issue" : "Comic Issues",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"manga" : "Manga",
|
2017-08-24 21:24:51 +02:00
|
|
|
|
"popular": "Popular Images",
|
2018-09-22 21:28:16 +02:00
|
|
|
|
"recent" : "Recent Images",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
"search" : "Search Results",
|
2017-04-20 16:56:50 +02:00
|
|
|
|
"status" : "Images from Statuses",
|
2020-03-24 02:00:50 +01:00
|
|
|
|
"tag" : "Tag Searches",
|
2019-09-22 22:21:48 +02:00
|
|
|
|
"user" : "User Profiles",
|
2021-03-17 22:50:02 +01:00
|
|
|
|
"watch" : "Watches",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"following" : "",
|
2018-08-15 21:28:27 +02:00
|
|
|
|
"related-pin" : "related Pins",
|
|
|
|
|
"related-board": "",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
|
|
|
|
|
"artstation": {
|
|
|
|
|
"artwork": "Artwork Listings",
|
|
|
|
|
},
|
2022-02-11 21:01:51 +01:00
|
|
|
|
"atfbooru": {
|
|
|
|
|
"favorite": "",
|
|
|
|
|
},
|
|
|
|
|
"danbooru": {
|
|
|
|
|
"favorite": "",
|
|
|
|
|
},
|
2021-09-17 20:09:24 +02:00
|
|
|
|
"desktopography": {
|
|
|
|
|
"site": "",
|
|
|
|
|
},
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"deviantart": {
|
|
|
|
|
"stash": "Sta.sh",
|
2021-03-22 19:20:08 +01:00
|
|
|
|
"watch-posts": "",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
},
|
2021-12-24 23:06:42 +01:00
|
|
|
|
"fanbox": {
|
|
|
|
|
"redirect": "",
|
|
|
|
|
},
|
2020-09-27 02:22:23 +02:00
|
|
|
|
"hentaifoundry": {
|
|
|
|
|
"story": "",
|
|
|
|
|
},
|
2020-03-16 22:57:30 +01:00
|
|
|
|
"instagram": {
|
2020-12-20 23:20:32 +01:00
|
|
|
|
"posts": "",
|
2020-03-16 22:57:30 +01:00
|
|
|
|
"saved": "Saved Posts",
|
2021-04-22 22:57:06 +02:00
|
|
|
|
"tagged": "Tagged Posts",
|
2020-03-16 22:57:30 +01:00
|
|
|
|
},
|
2021-10-13 19:33:00 +02:00
|
|
|
|
"kemonoparty": {
|
|
|
|
|
"discord": "Discord Servers",
|
2021-10-18 04:04:58 +02:00
|
|
|
|
"discord-server": "",
|
2021-10-13 19:33:00 +02:00
|
|
|
|
},
|
2021-06-08 02:45:36 +02:00
|
|
|
|
"mangadex": {
|
|
|
|
|
"feed" : "Followed Feed",
|
|
|
|
|
},
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"newgrounds": {
|
|
|
|
|
"art" : "Art",
|
|
|
|
|
"audio": "Audio",
|
|
|
|
|
"media": "Media Files",
|
|
|
|
|
},
|
|
|
|
|
"pinterest": {
|
2020-12-29 16:57:03 +01:00
|
|
|
|
"board": "",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"pinit": "pin.it Links",
|
|
|
|
|
},
|
|
|
|
|
"pixiv": {
|
|
|
|
|
"me" : "pixiv.me Links",
|
2021-07-07 02:22:44 +02:00
|
|
|
|
"pixivision": "pixivision",
|
2021-10-12 20:50:11 +02:00
|
|
|
|
"sketch": "Sketch",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"work": "individual Images",
|
|
|
|
|
},
|
2020-12-29 17:36:37 +01:00
|
|
|
|
"sankaku": {
|
|
|
|
|
"books": "Book Searches",
|
|
|
|
|
},
|
2022-02-04 19:01:24 +01:00
|
|
|
|
"sexcom": {
|
|
|
|
|
"pins": "User Pins",
|
|
|
|
|
},
|
2020-01-13 22:29:42 +01:00
|
|
|
|
"smugmug": {
|
|
|
|
|
"path": "Images from Users and Folders",
|
|
|
|
|
},
|
|
|
|
|
"twitter": {
|
|
|
|
|
"media": "Media Timelines",
|
2021-09-10 20:40:43 +02:00
|
|
|
|
"replies": "",
|
2020-11-13 06:47:45 +01:00
|
|
|
|
"list-members": "List Members",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
},
|
2021-03-02 01:32:26 +01:00
|
|
|
|
"wallhaven": {
|
|
|
|
|
"collections": "",
|
2020-01-13 22:29:42 +01:00
|
|
|
|
},
|
2020-09-25 23:43:11 +02:00
|
|
|
|
"weasyl": {
|
|
|
|
|
"journals" : "",
|
|
|
|
|
"submissions": "",
|
|
|
|
|
},
|
2021-03-02 01:32:26 +01:00
|
|
|
|
"wikiart": {
|
|
|
|
|
"artists": "Artist Listings",
|
|
|
|
|
},
|
2017-04-20 16:56:50 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-03-12 03:56:54 +01:00
|
|
|
|
BASE_MAP = {
|
|
|
|
|
"foolfuuka" : "FoolFuuka 4chan Archives",
|
|
|
|
|
"foolslide" : "FoOlSlide Instances",
|
|
|
|
|
"gelbooru_v01": "Gelbooru Beta 0.1.11",
|
|
|
|
|
"gelbooru_v02": "Gelbooru Beta 0.2",
|
2021-12-21 19:24:17 +01:00
|
|
|
|
"lolisafe" : "lolisafe and chibisafe",
|
2021-03-12 03:56:54 +01:00
|
|
|
|
"moebooru" : "Moebooru and MyImouto",
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-12 03:03:48 +01:00
|
|
|
|
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
|
|
|
|
|
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
|
|
|
|
|
_APIKEY_DB = \
|
|
|
|
|
'<a href="configuration.rst#extractorderpibooruapi-key">API Key</a>'
|
|
|
|
|
_APIKEY_WH = \
|
|
|
|
|
'<a href="configuration.rst#extractorwallhavenapi-key">API Key</a>'
|
|
|
|
|
_APIKEY_WY = \
|
|
|
|
|
'<a href="configuration.rst#extractorweasylapi-key">API Key</a>'
|
2020-07-14 16:18:21 +02:00
|
|
|
|
|
2017-06-15 21:06:20 +02:00
|
|
|
|
AUTH_MAP = {
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"aryion" : "Supported",
|
2022-02-11 21:01:51 +01:00
|
|
|
|
"atfbooru" : "Supported",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"baraag" : _OAUTH,
|
|
|
|
|
"danbooru" : "Supported",
|
2021-01-07 18:05:32 +01:00
|
|
|
|
"derpibooru" : _APIKEY_DB,
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"deviantart" : _OAUTH,
|
|
|
|
|
"e621" : "Supported",
|
|
|
|
|
"e-hentai" : "Supported",
|
|
|
|
|
"exhentai" : "Supported",
|
2021-04-25 19:39:13 +02:00
|
|
|
|
"fanbox" : _COOKIES,
|
|
|
|
|
"fantia" : _COOKIES,
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"flickr" : _OAUTH,
|
|
|
|
|
"furaffinity" : _COOKIES,
|
|
|
|
|
"idolcomplex" : "Supported",
|
|
|
|
|
"imgbb" : "Supported",
|
|
|
|
|
"inkbunny" : "Supported",
|
|
|
|
|
"instagram" : "Supported",
|
2021-09-09 01:02:59 +02:00
|
|
|
|
"kemonoparty" : "Supported",
|
2021-06-08 02:06:19 +02:00
|
|
|
|
"mangadex" : "Supported",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"mangoxo" : "Supported",
|
|
|
|
|
"mastodon.social": _OAUTH,
|
|
|
|
|
"newgrounds" : "Supported",
|
2020-07-14 16:18:21 +02:00
|
|
|
|
"nijie" : "Required",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"patreon" : _COOKIES,
|
|
|
|
|
"pawoo" : _OAUTH,
|
2021-05-19 02:57:36 +02:00
|
|
|
|
"pillowfort" : "Supported",
|
2021-09-16 16:36:37 +02:00
|
|
|
|
"pinterest" : _COOKIES,
|
2021-02-12 18:03:06 +01:00
|
|
|
|
"pixiv" : _OAUTH,
|
2021-03-14 16:31:33 +01:00
|
|
|
|
"ponybooru" : "API Key",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"reddit" : _OAUTH,
|
2020-12-17 16:12:59 +01:00
|
|
|
|
"sankaku" : "Supported",
|
2020-07-14 16:18:21 +02:00
|
|
|
|
"seiga" : "Required",
|
2021-10-08 22:44:31 +02:00
|
|
|
|
"seisoparty" : "Supported",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"smugmug" : _OAUTH,
|
|
|
|
|
"subscribestar" : "Supported",
|
2021-03-29 23:06:47 +02:00
|
|
|
|
"tapas" : "Supported",
|
2020-10-18 22:18:27 +02:00
|
|
|
|
"tsumino" : "Supported",
|
|
|
|
|
"tumblr" : _OAUTH,
|
|
|
|
|
"twitter" : "Supported",
|
|
|
|
|
"wallhaven" : _APIKEY_WH,
|
|
|
|
|
"weasyl" : _APIKEY_WY,
|
2017-06-15 21:06:20 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IGNORE_LIST = (
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"directlink",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
"oauth",
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"recursive",
|
|
|
|
|
"test",
|
2021-07-10 20:47:33 +02:00
|
|
|
|
"ytdl",
|
2021-12-29 22:45:07 +01:00
|
|
|
|
"generic",
|
2017-06-15 21:06:20 +02:00
|
|
|
|
)
|
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def domain(cls):
|
|
|
|
|
"""Return the web-domain related to an extractor class"""
|
2021-02-21 22:57:37 +01:00
|
|
|
|
try:
|
|
|
|
|
url = sys.modules[cls.__module__].__doc__.split()[-1]
|
|
|
|
|
if url.startswith("http"):
|
|
|
|
|
return url
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if hasattr(cls, "root") and cls.root:
|
|
|
|
|
return cls.root + "/"
|
2018-09-28 12:39:05 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if hasattr(cls, "https"):
|
|
|
|
|
scheme = "https" if cls.https else "http"
|
|
|
|
|
netloc = cls.__doc__.split()[-1]
|
|
|
|
|
return "{}://{}/".format(scheme, netloc)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
test = next(cls._get_tests(), None)
|
|
|
|
|
if test:
|
|
|
|
|
url = test[0]
|
|
|
|
|
return url[:url.find("/", 8)+1]
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
return ""
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
def category_text(c):
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"""Return a human-readable representation of a category"""
|
|
|
|
|
return CATEGORY_MAP.get(c) or c.capitalize()
|
2018-09-28 12:39:05 +02:00
|
|
|
|
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
def subcategory_text(c, sc):
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"""Return a human-readable representation of a subcategory"""
|
2020-01-13 22:29:42 +01:00
|
|
|
|
if c in SUBCATEGORY_MAP:
|
|
|
|
|
scm = SUBCATEGORY_MAP[c]
|
|
|
|
|
if sc in scm:
|
|
|
|
|
return scm[sc]
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
if sc in SUBCATEGORY_MAP:
|
|
|
|
|
return SUBCATEGORY_MAP[sc]
|
2020-01-13 22:29:42 +01:00
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
sc = sc.capitalize()
|
|
|
|
|
return sc if sc.endswith("s") else sc + "s"
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
def category_key(c):
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"""Generate sorting keys by category"""
|
2021-02-20 22:31:21 +01:00
|
|
|
|
return category_text(c[0]).lower()
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
def subcategory_key(sc):
|
2019-02-20 19:25:41 +01:00
|
|
|
|
"""Generate sorting keys by subcategory"""
|
2021-02-20 22:31:21 +01:00
|
|
|
|
return "A" if sc == "issue" else sc
|
2017-07-15 15:01:30 +02:00
|
|
|
|
|
|
|
|
|
|
2019-02-20 19:25:41 +01:00
|
|
|
|
def build_extractor_list():
|
|
|
|
|
"""Generate a sorted list of lists of extractor classes"""
|
2021-03-12 03:56:54 +01:00
|
|
|
|
categories = collections.defaultdict(lambda: collections.defaultdict(list))
|
|
|
|
|
default = categories[""]
|
2021-02-20 22:31:21 +01:00
|
|
|
|
domains = {}
|
2017-04-23 17:08:45 +02:00
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
for extr in extractor._list_classes():
|
|
|
|
|
category = extr.category
|
|
|
|
|
if category in IGNORE_LIST:
|
2017-06-15 21:06:20 +02:00
|
|
|
|
continue
|
2021-02-20 22:31:21 +01:00
|
|
|
|
if category:
|
2021-03-12 03:56:54 +01:00
|
|
|
|
default[category].append(extr.subcategory)
|
2021-02-20 22:31:21 +01:00
|
|
|
|
if category not in domains:
|
|
|
|
|
domains[category] = domain(extr)
|
|
|
|
|
else:
|
2021-03-12 03:56:54 +01:00
|
|
|
|
base = categories[extr.basecategory]
|
2021-02-20 22:31:21 +01:00
|
|
|
|
for category, root in extr.instances:
|
2021-03-12 03:56:54 +01:00
|
|
|
|
base[category].append(extr.subcategory)
|
2021-02-20 22:31:21 +01:00
|
|
|
|
if category not in domains:
|
2022-02-10 01:38:50 +01:00
|
|
|
|
if not root:
|
|
|
|
|
# use domain from first matching test
|
|
|
|
|
for url, _ in extr._get_tests():
|
|
|
|
|
if extr.from_url(url).category == category:
|
|
|
|
|
root = url[:url.index("/", 8)]
|
|
|
|
|
break
|
2021-02-20 22:31:21 +01:00
|
|
|
|
domains[category] = root + "/"
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
# sort subcategory lists
|
2021-03-12 03:56:54 +01:00
|
|
|
|
for base in categories.values():
|
|
|
|
|
for subcategories in base.values():
|
|
|
|
|
subcategories.sort(key=subcategory_key)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
# add e-hentai.org
|
2021-03-12 03:56:54 +01:00
|
|
|
|
default["e-hentai"] = default["exhentai"]
|
2021-02-20 22:31:21 +01:00
|
|
|
|
domains["e-hentai"] = domains["exhentai"].replace("x", "-")
|
2019-08-03 11:42:28 +02:00
|
|
|
|
|
2021-04-18 20:28:00 +02:00
|
|
|
|
# add hentai-cosplays sister sites (hentai-img, porn-images-xxx)
|
|
|
|
|
default["hentaiimg"] = default["hentaicosplays"]
|
|
|
|
|
domains["hentaiimg"] = "https://hentai-img.com/"
|
|
|
|
|
|
|
|
|
|
default["pornimagesxxx"] = default["hentaicosplays"]
|
|
|
|
|
domains["pornimagesxxx"] = "https://porn-images-xxx.com/"
|
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
return categories, domains
|
2019-02-20 19:25:41 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# define table columns
|
|
|
|
|
COLUMNS = (
|
|
|
|
|
("Site", 20,
|
2021-02-20 22:31:21 +01:00
|
|
|
|
lambda c, scs, d: category_text(c)),
|
2019-02-20 19:25:41 +01:00
|
|
|
|
("URL" , 35,
|
2021-02-20 22:31:21 +01:00
|
|
|
|
lambda c, scs, d: d),
|
2019-02-20 19:25:41 +01:00
|
|
|
|
("Capabilities", 50,
|
2021-02-20 22:31:21 +01:00
|
|
|
|
lambda c, scs, d: ", ".join(subcategory_text(c, sc) for sc in scs
|
|
|
|
|
if subcategory_text(c, sc))),
|
2019-02-20 19:25:41 +01:00
|
|
|
|
("Authentication", 16,
|
2021-02-20 22:31:21 +01:00
|
|
|
|
lambda c, scs, d: AUTH_MAP.get(c, "")),
|
2019-02-20 19:25:41 +01:00
|
|
|
|
)
|
2019-01-09 14:21:19 +01:00
|
|
|
|
|
|
|
|
|
|
2021-03-12 03:03:48 +01:00
|
|
|
|
def generate_output(columns, categories, domains):
|
2019-02-05 16:17:25 +01:00
|
|
|
|
|
2021-03-12 03:03:48 +01:00
|
|
|
|
thead = []
|
|
|
|
|
append = thead.append
|
|
|
|
|
append("<tr>")
|
|
|
|
|
for column in columns:
|
|
|
|
|
append(" <th>" + column[0] + "</th>")
|
|
|
|
|
append("</tr>")
|
2019-02-08 20:08:16 +01:00
|
|
|
|
|
2021-03-12 03:03:48 +01:00
|
|
|
|
tbody = []
|
|
|
|
|
append = tbody.append
|
2021-03-12 03:56:54 +01:00
|
|
|
|
|
|
|
|
|
for name, base in categories.items():
|
|
|
|
|
|
|
|
|
|
if name and base:
|
|
|
|
|
name = BASE_MAP.get(name) or (name.capitalize() + " Instances")
|
|
|
|
|
append('\n<tr>\n <td colspan="4"><strong>' +
|
|
|
|
|
name + '</strong></td>\n</tr>')
|
2022-02-10 01:44:16 +01:00
|
|
|
|
clist = base.items()
|
|
|
|
|
else:
|
|
|
|
|
clist = sorted(base.items(), key=category_key)
|
2021-03-12 03:56:54 +01:00
|
|
|
|
|
|
|
|
|
for category, subcategories in clist:
|
|
|
|
|
append("<tr>")
|
|
|
|
|
for column in columns:
|
|
|
|
|
domain = domains[category]
|
|
|
|
|
content = column[2](category, subcategories, domain)
|
|
|
|
|
append(" <td>" + content + "</td>")
|
|
|
|
|
append("</tr>")
|
2021-03-12 03:03:48 +01:00
|
|
|
|
|
|
|
|
|
TEMPLATE = """# Supported Sites
|
|
|
|
|
|
|
|
|
|
<!-- auto-generated by {} -->
|
|
|
|
|
Consider all sites to be NSFW unless otherwise known.
|
|
|
|
|
|
|
|
|
|
<table>
|
|
|
|
|
<thead valign="bottom">
|
|
|
|
|
{}
|
|
|
|
|
</thead>
|
|
|
|
|
<tbody valign="top">
|
|
|
|
|
{}
|
|
|
|
|
</tbody>
|
|
|
|
|
</table>
|
|
|
|
|
"""
|
|
|
|
|
return TEMPLATE.format(
|
|
|
|
|
"/".join(os.path.normpath(__file__).split(os.sep)[-2:]),
|
|
|
|
|
"\n".join(thead),
|
|
|
|
|
"\n".join(tbody),
|
|
|
|
|
)
|
2017-04-20 16:56:50 +02:00
|
|
|
|
|
2017-01-15 21:31:21 +01:00
|
|
|
|
|
2021-02-20 22:31:21 +01:00
|
|
|
|
categories, domains = build_extractor_list()
|
2021-03-12 03:03:48 +01:00
|
|
|
|
outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.md"
|
|
|
|
|
with open(util.path("docs", outfile), "w") as fp:
|
|
|
|
|
fp.write(generate_output(COLUMNS, categories, domains))
|