gallery-dl/scripts/supportedsites.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Generate a reStructuredText document with all supported sites"""

import sys
import collections

import util
from gallery_dl import extractor


CATEGORY_MAP = {
    "2chan"          : "Futaba Channel",
    "35photo"        : "35PHOTO",
    "adultempire"    : "Adult Empire",
    "archivedmoe"    : "Archived.Moe",
    "archiveofsins"  : "Archive of Sins",
    "artstation"     : "ArtStation",
    "b4k"            : "arch.b4k.co",
    "bcy"            : "半次元",
    "bobx"           : "BobX",
    "deviantart"     : "DeviantArt",
    "dokireader"     : "Doki Reader",
    "dynastyscans"   : "Dynasty Reader",
    "e621"           : "e621",
    "e-hentai"       : "E-Hentai",
    "exhentai"       : "ExHentai",
    "fallenangels"   : "Fallen Angels Scans",
    "fashionnova"    : "Fashion Nova",
    "furaffinity"    : "Fur Affinity",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
    "hentaicafe"     : "Hentai Cafe",
    "hentaifoundry"  : "Hentai Foundry",
    "hentaifox"      : "HentaiFox",
    "hentaihand"     : "HentaiHand",
    "hentaihere"     : "HentaiHere",
    "hitomi"         : "Hitomi.la",
    "idolcomplex"    : "Idol Complex",
    "imagebam"       : "ImageBam",
    "imagefap"       : "ImageFap",
    "imgbb"          : "ImgBB",
    "imgbox"         : "imgbox",
    "imgth"          : "imgth",
    "imgur"          : "imgur",
    "jaiminisbox"    : "Jaimini's Box",
    "kabeuchi"       : "かべうち",
    "kireicake"      : "Kirei Cake",
    "kissmanga"      : "KissManga",
    "lineblog"       : "LINE BLOG",
    "livedoor"       : "livedoor Blog",
    "mangadex"       : "MangaDex",
    "mangafox"       : "Manga Fox",
    "mangahere"      : "Manga Here",
    "mangapark"      : "MangaPark",
    "mangastream"    : "Manga Stream",
    "myportfolio"    : "Adobe Portfolio",
    "nhentai"        : "nhentai",
    "nijie"          : "nijie",
    "nozomi"         : "Nozomi.la",
    "nsfwalbum"      : "NSFWalbum.com",
    "nyafuu"         : "Nyafuu Archive",
    "paheal"         : "rule #34",
    "powermanga"     : "PowerManga",
    "readcomiconline": "Read Comic Online",
    "rbt"            : "RebeccaBlackTech",
    "rule34"         : "Rule 34",
    "sankaku"        : "Sankaku Channel",
    "sankakucomplex" : "Sankaku Complex",
    "seaotterscans"  : "Sea Otter Scans",
    "seiga"          : "Niconico Seiga",
    "senmanga"       : "Sen Manga",
    "sensescans"     : "Sense-Scans",
    "sexcom"         : "Sex.com",
    "simplyhentai"   : "Simply Hentai",
    "slickpic"       : "SlickPic",
    "slideshare"     : "SlideShare",
    "smugmug"        : "SmugMug",
    "thebarchive"    : "The /b/ Archive",
    "vanillarock"    : "もえぴりあ",
    "vsco"           : "VSCO",
    "wikiart"        : "WikiArt.org",
    "worldthree"     : "World Three",
    "xhamster"       : "xHamster",
    "xvideos"        : "XVideos",
    "yuki"           : "yuki.la 4chan archive",
}

SUBCATEGORY_MAP = {
    "doujin" : "Doujin",
    "gallery": "Galleries",
    "image"  : "individual Images",
    "issue"  : "Comic Issues",
    "manga"  : "Manga",
    "popular": "Popular Images",
    "recent" : "Recent Images",
    "search" : "Search Results",
    "status" : "Images from Statuses",
    "tag"    : "Tag Searches",
    "user"   : "User Profiles",
    "following"    : "",
    "related-pin"  : "related Pins",
    "related-board": "",

    "artstation": {
        "artwork": "Artwork Listings",
    },
    "deviantart": {
        "stash": "Sta.sh",
    },
    "instagram": {
        "saved": "Saved Posts",
    },
    "newgrounds": {
        "art"  : "Art",
        "audio": "Audio",
        "media": "Media Files",
    },
    "pinterest": {
        "pinit": "pin.it Links",
    },
    "pixiv": {
        "me"  : "pixiv.me Links",
        "work": "individual Images",
    },
    "smugmug": {
        "path": "Images from Users and Folders",
    },
    "twitter": {
        "media": "Media Timelines",
    },
    "wikiart": {
        "artists": "Artist Listings",
    },
}

AUTH_MAP = {
    "danbooru"   : "Optional",
    "deviantart" : "Optional (OAuth)",
    "e621"       : "Optional",
    "e-hentai"   : "Optional",
    "exhentai"   : "Optional",
    "flickr"     : "Optional (OAuth)",
    "idolcomplex": "Optional",
    "imgbb"      : "Optional",
    "instagram"  : "Optional",
    "mangoxo"    : "Optional",
    "newgrounds" : "Optional",
    "nijie"      : "Required",
    "pixiv"      : "Required",
    "reddit"     : "Optional (OAuth)",
    "sankaku"    : "Optional",
    "seiga"      : "Required",
    "smugmug"    : "Optional (OAuth)",
    "tsumino"    : "Optional",
    "tumblr"     : "Optional (OAuth)",
    "twitter"    : "Optional",
    "wallhaven"  : ("Optional (`API Key "
                    "<configuration.rst#extractorwallhavenapi-key>`__)"),
}

IGNORE_LIST = (
    "directlink",
    "oauth",
    "recursive",
    "test",
)


def domain(cls):
    """Return the web-domain related to an extractor class"""
    url = sys.modules[cls.__module__].__doc__.split()[-1]
    if url.startswith("http"):
        return url

    if hasattr(cls, "root") and cls.root:
        return cls.root + "/"

    if hasattr(cls, "https"):
        scheme = "https" if cls.https else "http"
        netloc = cls.__doc__.split()[-1]
        return "{}://{}/".format(scheme, netloc)

    test = next(cls._get_tests(), None)
    if test:
        url = test[0]
        return url[:url.find("/", 8)+1]

    return ""


def category_text(cls):
    """Return a human-readable representation of a category"""
    c = cls.category
    return CATEGORY_MAP.get(c) or c.capitalize()


def subcategory_text(cls):
    """Return a human-readable representation of a subcategory"""
    c, sc = cls.category, cls.subcategory

    if c in SUBCATEGORY_MAP:
        scm = SUBCATEGORY_MAP[c]
        if sc in scm:
            return scm[sc]

    if sc in SUBCATEGORY_MAP:
        return SUBCATEGORY_MAP[sc]

    sc = sc.capitalize()
    return sc if sc.endswith("s") else sc + "s"


def category_key(cls):
    """Generate sorting keys by category"""
    key = category_text(cls).lower()
    if cls.__module__.endswith(".imagehosts"):
        key = "zz" + key
    return key


def subcategory_key(cls):
    """Generate sorting keys by subcategory"""
    if cls.subcategory == "issue":
        return "A"
    return cls.subcategory


def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # ugly hack to add e-hentai.org
    eh = []
    for extr in extractors["exhentai"]:
        class eh_extr(extr):
            category = "e-hentai"
            root = "https://e-hentai.org"
        eh.append(eh_extr)
    extractors["e-hentai"] = eh

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )


# define table columns
COLUMNS = (
    ("Site", 20,
     lambda x: category_text(x[0])),
    ("URL" , 35,
     lambda x: domain(x[0])),
    ("Capabilities", 50,
     lambda x: ", ".join(subcategory_text(extr) for extr in x
                         if subcategory_text(extr))),
    ("Authentication", 16,
     lambda x: AUTH_MAP.get(x[0].category, "")),
)


def write_output(fobj, columns, extractors):

    def pad(output, col, category=None):
        size = col[1]
        output = output if isinstance(output, str) else col[2](output)

        if len(output) > size:
            sub = "|{}-{}|".format(category, col[0][0])
            subs.append((sub, output))
            output = sub

        return output + " " * (size - len(output))

    w = fobj.write
    subs = []

    # caption
    w("Supported Sites\n")
    w("===============\n")
    w("Unless otherwise known, assume all sites to be NSFW\n\n")

    # table head
    sep = " ".join("=" * c[1] for c in columns) + "\n"
    w(sep)
    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
    w(sep)

    # table body
    for lst in extractors:
        w(" ".join(
            pad(col[2](lst), col, lst[0].category)
            for col in columns
        ).strip())
        w("\n")

    # table bottom
    w(sep)
    w("\n")

    # substitutions
    for sub, value in subs:
        w(".. {} replace:: {}\n".format(sub, value))


outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
with open(util.path("docs", outfile), "w") as file:
    write_output(file, COLUMNS, build_extractor_list())
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											2019-04-16 18:16:48 +02:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								"""Generate a reStructuredText document with all supported sites"""
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
 								import sys
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								import collections
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											2019-04-16 18:16:48 +02:00
+								import util
 								from gallery_dl import extractor
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								CATEGORY_MAP = {
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "2chan"          : "Futaba Channel",
-												[35photo] add user-, genre-, and image-extractors (#162)

											
										
										
											2019-03-18 01:11:30 +01:00
+								    "35photo"        : "35PHOTO",
-												[adultempire] add gallery extractor (closes #340)

											
										
										
											2019-07-21 22:29:57 +02:00
+								    "adultempire"    : "Adult Empire",
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "archivedmoe"    : "Archived.Moe",
 								    "archiveofsins"  : "Archive of Sins",
-												check supportedsites.rst in release script

											
										
										
											2018-03-17 15:35:38 +01:00
+								    "artstation"     : "ArtStation",
-												[foolfuuka] add support for more sites (#18)

- https://arch.b4k.co
- https://archive.whatisthisimnotgoodwithcomputers.com
- https://archive.yeet.net

Notes:
- The name "whatisthisimnotgoodwithcomputers" is way too long ...
- archive.yeet.net is out of date and also blocked by 4chan servers
  - newest threads are 2 weeks old
  - using "https://archive.yeet.net" as Referer header results in
    "403 Forbidden" when accessing 4chan

											
										
										
											2017-09-16 21:11:44 +02:00
+								    "b4k"            : "arch.b4k.co",
-												[bcy] add user and post extractors (#592)

											
										
										
											2020-02-08 23:25:53 +01:00
+								    "bcy"            : "半次元",
-												[bobx] add gallery and model extractors

											
										
										
											2018-09-13 20:13:12 +02:00
+								    "bobx"           : "BobX",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "deviantart"     : "DeviantArt",
 								    "dokireader"     : "Doki Reader",
 								    "dynastyscans"   : "Dynasty Reader",
 								    "e621"           : "e621",
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											2019-08-03 11:42:28 +02:00
+								    "e-hentai"       : "E-Hentai",
 								    "exhentai"       : "ExHentai",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "fallenangels"   : "Fallen Angels Scans",
-												[shopify] add generic collection and product extractors (#175)

with fashionnova.com  as a default domain

											
										
										
											2019-03-05 22:33:37 +01:00
+								    "fashionnova"    : "Fashion Nova",
-												[furaffinity] add extractors (#284)

											
										
										
											2020-02-11 19:51:24 +01:00
+								    "furaffinity"    : "Fur Affinity",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hbrowse"        : "HBrowse",
 								    "hentai2read"    : "Hentai2Read",
-												[hentaicafe] add chapter and manga extractors (#101)

											
										
										
											2018-09-05 21:08:40 +02:00
+								    "hentaicafe"     : "Hentai Cafe",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hentaifoundry"  : "Hentai Foundry",
-												[hentaifox] add chapter extractor (#160)

											
										
										
											2019-01-28 18:00:32 +01:00
+								    "hentaifox"      : "HentaiFox",
-												[hentaihand] add extractors (closes #605)

											
										
										
											2020-02-18 23:49:59 +01:00
+								    "hentaihand"     : "HentaiHand",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hentaihere"     : "HentaiHere",
 								    "hitomi"         : "Hitomi.la",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											2018-01-09 17:52:12 +01:00
+								    "idolcomplex"    : "Idol Complex",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "imagebam"       : "ImageBam",
 								    "imagefap"       : "ImageFap",
-												[imgbb] add album extractor (#361)

											
										
										
											2019-07-30 23:02:21 +02:00
+								    "imgbb"          : "ImgBB",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "imgbox"         : "imgbox",
 								    "imgth"          : "imgth",
 								    "imgur"          : "imgur",
 								    "jaiminisbox"    : "Jaimini's Box",
-												[kabeuchi] add 'user' extractor (closes #561)

											
										
										
											2020-03-13 16:45:42 +01:00
+								    "kabeuchi"       : "かべうち",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "kireicake"      : "Kirei Cake",
 								    "kissmanga"      : "KissManga",
-												[lineblog] add blog and post extractors (closes #404)

											
										
										
											2019-09-06 21:58:13 +02:00
+								    "lineblog"       : "LINE BLOG",
-												[livedoor] add blog- and post-extractors (#190)

											
										
										
											2019-04-06 16:10:29 +02:00
+								    "livedoor"       : "livedoor Blog",
-												[mangadex] general improvements

- support >100 chapter entries per manga
- custom archive ID format
- detect non-existing chapters

											
										
										
											2018-03-06 14:15:15 +01:00
+								    "mangadex"       : "MangaDex",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "mangafox"       : "Manga Fox",
 								    "mangahere"      : "Manga Here",
 								    "mangapark"      : "MangaPark",
 								    "mangastream"    : "Manga Stream",
-												[myportfolio] add user and gallery extractors (#95)

											
										
										
											2018-07-19 18:56:45 +02:00
+								    "myportfolio"    : "Adobe Portfolio",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "nhentai"        : "nhentai",
 								    "nijie"          : "nijie",
-												[nozomi] add post and tag extractors (#388)

											
										
										
											2019-10-13 22:10:32 +02:00
+								    "nozomi"         : "Nozomi.la",
-												[nsfwalbum] add album extractor (closes #287)

											
										
										
											2019-06-22 22:43:09 +02:00
+								    "nsfwalbum"      : "NSFWalbum.com",
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "nyafuu"         : "Nyafuu Archive",
-												[paheal] add tag- and post-extractors (closes #69)

											
										
										
											2018-01-15 16:39:05 +01:00
+								    "paheal"         : "rule #34",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "powermanga"     : "PowerManga",
 								    "readcomiconline": "Read Comic Online",
-												update supportedsites.rst

											
										
										
											2017-07-24 10:50:40 +02:00
+								    "rbt"            : "RebeccaBlackTech",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "rule34"         : "Rule 34",
 								    "sankaku"        : "Sankaku Channel",
-												[sankakucomplex] move article extractor to its own module (#258)

											
										
										
											2019-05-27 23:49:23 +02:00
+								    "sankakucomplex" : "Sankaku Complex",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "seaotterscans"  : "Sea Otter Scans",
 								    "seiga"          : "Niconico Seiga",
 								    "senmanga"       : "Sen Manga",
 								    "sensescans"     : "Sense-Scans",
-												[sexcom] add pin and board extractors (#147)

											
										
										
											2019-04-23 22:10:39 +02:00
+								    "sexcom"         : "Sex.com",
-												[simplyhentai] add gallery extractor (#89)

											
										
										
											2018-05-27 15:25:04 +02:00
+								    "simplyhentai"   : "Simply Hentai",
-												[slickpic] add album extractor (#249)

											
										
										
											2019-06-09 21:59:22 +02:00
+								    "slickpic"       : "SlickPic",
-												[slideshare] improve metadata; flake8

- added 'views' and 'published' keywords
- fixed longer titles and descriptions

											
										
										
											2017-12-13 21:15:05 +01:00
+								    "slideshare"     : "SlideShare",
-												[smugmug] added image and album extractor

just some initial code that still requires a lot of work ...

TODO:
- folders
- old-style albums (which are nearly all of them ...)
- images from users
- OAuth

It could also happen that the API credentials used will become invalid
whenever my 14 day trial period ends (7 days remaining), but that
would just require users to supply their own.

											
										
										
											2018-04-29 21:27:25 +02:00
+								    "smugmug"        : "SmugMug",
-												update supportedsites.rst

											
										
										
											2017-07-24 10:50:40 +02:00
+								    "thebarchive"    : "The /b/ Archive",
-												[vanillarock] add post and tag extractors (closes #254)

											
										
										
											2019-06-23 22:02:54 +02:00
+								    "vanillarock"    : "もえぴりあ",
-												[vsco] add user extractor (#331)

											
										
										
											2019-07-22 22:15:36 +02:00
+								    "vsco"           : "VSCO",
-												[wikiart] add extractors (#179)

for
- artists:          https://www.wikiart.org/en/thomas-cole
- artist-listings:  https://www.wikiart.org/en/artists-by-century/12
- artwork-listings: https://www.wikiart.org/en/paintings-by-media/grisaille

											
										
										
											2019-04-02 17:34:57 +02:00
+								    "wikiart"        : "WikiArt.org",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "worldthree"     : "World Three",
-												[xhamster] add gallery & user extractor (#281)

											
										
										
											2019-06-04 22:23:32 +02:00
+								    "xhamster"       : "xHamster",
-												[xvideos] add user profile extractor (#45)

											
										
										
											2017-11-02 17:28:35 +01:00
+								    "xvideos"        : "XVideos",
-												[yuki] add thread extractor (closes #111)

											
										
										
											2018-09-28 12:46:39 +02:00
+								    "yuki"           : "yuki.la 4chan archive",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								}
 								SUBCATEGORY_MAP = {
-												[nijie] add favorites extractor

adds support for 'https://nijie.info/user_like_illust_view.php?id=...'

											
										
										
											2018-03-31 18:54:25 +02:00
+								    "doujin" : "Doujin",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "gallery": "Galleries",
 								    "image"  : "individual Images",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											2020-03-24 02:00:50 +01:00
+								    "issue"  : "Comic Issues",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "manga"  : "Manga",
-												[booru] add extractors for "Popular" images

											
										
										
											2017-08-24 21:24:51 +02:00
+								    "popular": "Popular Images",
-												[hentaifoundry] add 'popular' and 'recent' extractors

for "Popular Pictures" and "Recent Pictures" listings

											
										
										
											2018-09-22 21:28:16 +02:00
+								    "recent" : "Recent Images",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								    "search" : "Search Results",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "status" : "Images from Statuses",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											2020-03-24 02:00:50 +01:00
+								    "tag"    : "Tag Searches",
-												change text representation of user extractors to "User Profiles"

											
										
										
											2019-09-22 22:21:48 +02:00
+								    "user"   : "User Profiles",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    "following"    : "",
-												[pinterest] add extractors for related pins

Related pins can not be accessed by adding a "#related" fragment
to the end of a Pinterest URL, for example:
- https://www.pinterest.com/pin/858146903966145189/#related
- https://www.pinterest.com/g1952849/test-/#related

There are no explicit real URLs for related pins,
using an option to enable them results in "clunky" code,
and a custom "related:<URL>" scheme doesn't feel right either.

											
										
										
											2018-08-15 21:28:27 +02:00
+								    "related-pin"  : "related Pins",
 								    "related-board": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
 								    "artstation": {
 								        "artwork": "Artwork Listings",
 								    },
 								    "deviantart": {
 								        "stash": "Sta.sh",
 								    },
-												[instagram] use 'itertools.chain()'

											
										
										
											2020-03-16 22:57:30 +01:00
+								    "instagram": {
 								        "saved": "Saved Posts",
 								    },
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    "newgrounds": {
 								        "art"  : "Art",
 								        "audio": "Audio",
 								        "media": "Media Files",
 								    },
 								    "pinterest": {
 								        "pinit": "pin.it Links",
 								    },
 								    "pixiv": {
 								        "me"  : "pixiv.me Links",
 								        "work": "individual Images",
 								    },
 								    "smugmug": {
 								        "path": "Images from Users and Folders",
 								    },
 								    "twitter": {
 								        "media": "Media Timelines",
 								    },
 								    "wikiart": {
 								        "artists": "Artist Listings",
 								    },
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								}
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								AUTH_MAP = {
-												update build_supportedsites.py

											
										
										
											2019-01-09 14:21:19 +01:00
+								    "danbooru"   : "Optional",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											2018-01-09 17:52:12 +01:00
+								    "deviantart" : "Optional (OAuth)",
-												[e621] document username & password support (#640)

											
										
										
											2020-03-14 01:13:14 +01:00
+								    "e621"       : "Optional",
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											2019-08-03 11:42:28 +02:00
+								    "e-hentai"   : "Optional",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											2018-01-09 17:52:12 +01:00
+								    "exhentai"   : "Optional",
 								    "flickr"     : "Optional (OAuth)",
 								    "idolcomplex": "Optional",
-												[imgbb] add user extractor + login support (#361)

											
										
										
											2019-08-01 21:39:20 +02:00
+								    "imgbb"      : "Optional",
-												[instagram] implement login support (#195)

											
										
										
											2019-06-26 23:54:38 +02:00
+								    "instagram"  : "Optional",
-												[mangoxo] add login support (#184)

A very recent change: It is now only possible to see more
than the first 5 images of an album if you are logged in.

											
										
										
											2019-04-09 16:54:15 +02:00
+								    "mangoxo"    : "Optional",
-												[newgrounds] implement login support (#394)

											
										
										
											2019-11-15 23:54:07 +01:00
+								    "newgrounds" : "Optional",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											2018-01-09 17:52:12 +01:00
+								    "nijie"      : "Required",
 								    "pixiv"      : "Required",
 								    "reddit"     : "Optional (OAuth)",
 								    "sankaku"    : "Optional",
 								    "seiga"      : "Required",
-												[smugmug] add OAuth support

											
										
										
											2018-05-10 18:58:05 +02:00
+								    "smugmug"    : "Optional (OAuth)",
-												[tsumino] add login capabilities (#161)

											
										
										
											2019-01-30 17:58:48 +01:00
+								    "tsumino"    : "Optional",
-												[tumblr] add support for OAuth authentication (#65)

											
										
										
											2018-01-11 14:11:37 +01:00
+								    "tumblr"     : "Optional (OAuth)",
-												[twitter] add login support (#214)

											
										
										
											2019-04-07 23:06:57 +02:00
+								    "twitter"    : "Optional",
-												[docs] Fix inconsistency about which sites have optional authentication (#359)

* [docs] Fix inconsistency about which sites have optional authentication

* update authentication docs

											
										
										
											2019-07-29 18:22:31 +02:00
+								    "wallhaven"  : ("Optional (`API Key "
 								                    "<configuration.rst#extractorwallhavenapi-key>`__)"),
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								}
 								IGNORE_LIST = (
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    "directlink",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								    "oauth",
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    "recursive",
 								    "test",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def domain(cls):
 								    """Return the web-domain related to an extractor class"""
 								    url = sys.modules[cls.__module__].__doc__.split()[-1]
 								    if url.startswith("http"):
 								        return url
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if hasattr(cls, "root") and cls.root:
 								        return cls.root + "/"
-												update build_supportedsites.py script

											
										
										
											2018-09-28 12:39:05 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if hasattr(cls, "https"):
 								        scheme = "https" if cls.https else "http"
 								        netloc = cls.__doc__.split()[-1]
 								        return "{}://{}/".format(scheme, netloc)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    test = next(cls._get_tests(), None)
 								    if test:
 								        url = test[0]
 								        return url[:url.find("/", 8)+1]
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    return ""
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def category_text(cls):
 								    """Return a human-readable representation of a category"""
 								    c = cls.category
 								    return CATEGORY_MAP.get(c) or c.capitalize()
-												update build_supportedsites.py script

											
										
										
											2018-09-28 12:39:05 +02:00
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def subcategory_text(cls):
 								    """Return a human-readable representation of a subcategory"""
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    c, sc = cls.category, cls.subcategory
 								    if c in SUBCATEGORY_MAP:
 								        scm = SUBCATEGORY_MAP[c]
 								        if sc in scm:
 								            return scm[sc]
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if sc in SUBCATEGORY_MAP:
 								        return SUBCATEGORY_MAP[sc]
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    sc = sc.capitalize()
 								    return sc if sc.endswith("s") else sc + "s"
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def category_key(cls):
 								    """Generate sorting keys by category"""
 								    key = category_text(cls).lower()
 								    if cls.__module__.endswith(".imagehosts"):
 								        key = "zz" + key
 								    return key
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def subcategory_key(cls):
 								    """Generate sorting keys by subcategory"""
-												change text representation of user extractors to "User Profiles"

											
										
										
											2019-09-22 22:21:48 +02:00
+								    if cls.subcategory == "issue":
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								        return "A"
 								    return cls.subcategory
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def build_extractor_list():
 								    """Generate a sorted list of lists of extractor classes"""
 								    extractors = collections.defaultdict(list)
-												add simple imagehosts to list of supported sites

											
										
										
											2017-04-23 17:08:45 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # get lists of extractor classes grouped by category
 								    for extr in extractor.extractors():
-												update build_supportedsites.py

											
										
										
											2019-01-09 14:21:19 +01:00
+								        if not extr.category or extr.category in IGNORE_LIST:
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								            continue
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								        extractors[extr.category].append(extr)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # sort extractor lists with the same category
 								    for extrlist in extractors.values():
 								        extrlist.sort(key=subcategory_key)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											2019-08-03 11:42:28 +02:00
+								    # ugly hack to add e-hentai.org
 								    eh = []
 								    for extr in extractors["exhentai"]:
 								        class eh_extr(extr):
 								            category = "e-hentai"
 								            root = "https://e-hentai.org"
 								        eh.append(eh_extr)
 								    extractors["e-hentai"] = eh
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # sort lists by category
 								    return sorted(
 								        extractors.values(),
 								        key=lambda lst: category_key(lst[0]),
 								    )
 								# define table columns
 								COLUMNS = (
 								    ("Site", 20,
 								     lambda x: category_text(x[0])),
 								    ("URL" , 35,
 								     lambda x: domain(x[0])),
 								    ("Capabilities", 50,
 								     lambda x: ", ".join(subcategory_text(extr) for extr in x
 								                         if subcategory_text(extr))),
 								    ("Authentication", 16,
 								     lambda x: AUTH_MAP.get(x[0].category, "")),
 								)
-												update build_supportedsites.py

											
										
										
											2019-01-09 14:21:19 +01:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def write_output(fobj, columns, extractors):
-												add mastodon/foolslide/foolfuuka examples to example config

											
										
										
											2019-02-05 16:17:25 +01:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    def pad(output, col, category=None):
 								        size = col[1]
 								        output = output if isinstance(output, str) else col[2](output)
-												update handling of extractor URL patterns

When loading extractor classes during 'extractor.find(…)', their
'pattern' attribute will be replaced with a compiled version of itself.

											
										
										
											2019-02-08 20:08:16 +01:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								        if len(output) > size:
 								            sub = "|{}-{}|".format(category, col[0][0])
 								            subs.append((sub, output))
 								            output = sub
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								        return output + " " * (size - len(output))
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    w = fobj.write
 								    subs = []
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # caption
 								    w("Supported Sites\n")
 								    w("===============\n")
-												add warning about NSFW sites in supportedsites.rst (#335)

											
										
										
											2019-07-15 21:44:34 +02:00
+								    w("Unless otherwise known, assume all sites to be NSFW\n\n")
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # table head
 								    sep = " ".join("=" * c[1] for c in columns) + "\n"
 								    w(sep)
 								    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
 								    w(sep)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # table body
 								    for lst in extractors:
 								        w(" ".join(
 								            pad(col[2](lst), col, lst[0].category)
 								            for col in columns
 								        ).strip())
 								        w("\n")
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # table bottom
 								    w(sep)
 								    w("\n")
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    # substitutions
 								    for sub, value in subs:
 								        w(".. {} replace:: {}\n".format(sub, value))
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
 								outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											2019-04-16 18:16:48 +02:00
+								with open(util.path("docs", outfile), "w") as file:
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    write_output(file, COLUMNS, build_extractor_list())