gallery-dl/scripts/supportedsites.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Generate a reStructuredText document with all supported sites"""

import sys
import collections

import util
from gallery_dl import extractor


CATEGORY_MAP = {
    "2chan"          : "Futaba Channel",
    "35photo"        : "35PHOTO",
    "archivedmoe"    : "Archived.Moe",
    "archiveofsins"  : "Archive of Sins",
    "artstation"     : "ArtStation",
    "b4k"            : "arch.b4k.co",
    "bobx"           : "BobX",
    "deviantart"     : "DeviantArt",
    "dokireader"     : "Doki Reader",
    "dynastyscans"   : "Dynasty Reader",
    "e621"           : "e621",
    "exhentai"       : "ExHentai",
    "fallenangels"   : "Fallen Angels Scans",
    "fashionnova"    : "Fashion Nova",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
    "hentaicafe"     : "Hentai Cafe",
    "hentaifoundry"  : "Hentai Foundry",
    "hentaifox"      : "HentaiFox",
    "hentaihere"     : "HentaiHere",
    "hitomi"         : "Hitomi.la",
    "idolcomplex"    : "Idol Complex",
    "imagebam"       : "ImageBam",
    "imagefap"       : "ImageFap",
    "imgbox"         : "imgbox",
    "imgth"          : "imgth",
    "imgur"          : "imgur",
    "jaiminisbox"    : "Jaimini's Box",
    "kireicake"      : "Kirei Cake",
    "kissmanga"      : "KissManga",
    "livedoor"       : "livedoor Blog",
    "mangadex"       : "MangaDex",
    "mangafox"       : "Manga Fox",
    "mangahere"      : "Manga Here",
    "mangapark"      : "MangaPark",
    "mangastream"    : "Manga Stream",
    "myportfolio"    : "Adobe Portfolio",
    "nhentai"        : "nhentai",
    "nijie"          : "nijie",
    "nsfwalbum"      : "NSFWalbum.com",
    "nyafuu"         : "Nyafuu Archive",
    "paheal"         : "rule #34",
    "powermanga"     : "PowerManga",
    "readcomiconline": "Read Comic Online",
    "rbt"            : "RebeccaBlackTech",
    "rule34"         : "Rule 34",
    "sankaku"        : "Sankaku Channel",
    "sankakucomplex" : "Sankaku Complex",
    "seaotterscans"  : "Sea Otter Scans",
    "seiga"          : "Niconico Seiga",
    "senmanga"       : "Sen Manga",
    "sensescans"     : "Sense-Scans",
    "sexcom"         : "Sex.com",
    "simplyhentai"   : "Simply Hentai",
    "slickpic"       : "SlickPic",
    "slideshare"     : "SlideShare",
    "smugmug"        : "SmugMug",
    "thebarchive"    : "The /b/ Archive",
    "vanillarock"    : "もえぴりあ",
    "wikiart"        : "WikiArt.org",
    "worldthree"     : "World Three",
    "xhamster"       : "xHamster",
    "xvideos"        : "XVideos",
    "yaplog"         : "yaplog!",
    "yuki"           : "yuki.la 4chan archive",
}

SUBCATEGORY_MAP = {
    "artwork": "Artwork Listings",
    "artists": "",
    "doujin" : "Doujin",
    "gallery": "Galleries",
    "image"  : "individual Images",
    "issue"  : "Comic-Issues",
    "manga"  : "Manga",
    "me"     : "pixiv.me Links",
    "media"  : "Media Timelines",
    "path"   : "Images from Users and Folders",
    "pinit"  : "pin.it Links",
    "popular": "Popular Images",
    "recent" : "Recent Images",
    "search" : "Search Results",
    "stash"  : "Sta.sh",
    "status" : "Images from Statuses",
    "tag"    : "Tag-Searches",
    "user"   : "Images from Users",
    "work"   : "Individual Images",
    "related-pin"  : "related Pins",
    "related-board": "",
}

AUTH_MAP = {
    "danbooru"   : "Optional",
    "deviantart" : "Optional (OAuth)",
    "exhentai"   : "Optional",
    "flickr"     : "Optional (OAuth)",
    "idolcomplex": "Optional",
    "instagram"  : "Optional",
    "luscious"   : "Optional",
    "mangoxo"    : "Optional",
    "nijie"      : "Required",
    "pixiv"      : "Required",
    "reddit"     : "Optional (OAuth)",
    "sankaku"    : "Optional",
    "seiga"      : "Required",
    "smugmug"    : "Optional (OAuth)",
    "tsumino"    : "Optional",
    "tumblr"     : "Optional (OAuth)",
    "twitter"    : "Optional",
}

IGNORE_LIST = (
    "directlink",
    "oauth",
    "recursive",
    "test",
)


def domain(cls):
    """Return the web-domain related to an extractor class"""
    url = sys.modules[cls.__module__].__doc__.split()[-1]
    if url.startswith("http"):
        return url

    if hasattr(cls, "root") and cls.root:
        return cls.root + "/"

    if hasattr(cls, "https"):
        scheme = "https" if cls.https else "http"
        netloc = cls.__doc__.split()[-1]
        return "{}://{}/".format(scheme, netloc)

    test = next(cls._get_tests(), None)
    if test:
        url = test[0]
        return url[:url.find("/", 8)+1]

    return ""


def category_text(cls):
    """Return a human-readable representation of a category"""
    c = cls.category
    return CATEGORY_MAP.get(c) or c.capitalize()


def subcategory_text(cls):
    """Return a human-readable representation of a subcategory"""
    sc = cls.subcategory
    if sc in SUBCATEGORY_MAP:
        return SUBCATEGORY_MAP[sc]
    sc = sc.capitalize()
    return sc if sc.endswith("s") else sc + "s"


def category_key(cls):
    """Generate sorting keys by category"""
    key = category_text(cls).lower()
    if cls.__module__.endswith(".imagehosts"):
        key = "zz" + key
    return key


def subcategory_key(cls):
    """Generate sorting keys by subcategory"""
    if cls.subcategory in ("user", "issue"):
        return "A"
    return cls.subcategory


def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )


# define table columns
COLUMNS = (
    ("Site", 20,
     lambda x: category_text(x[0])),
    ("URL" , 35,
     lambda x: domain(x[0])),
    ("Capabilities", 50,
     lambda x: ", ".join(subcategory_text(extr) for extr in x
                         if subcategory_text(extr))),
    ("Authentication", 16,
     lambda x: AUTH_MAP.get(x[0].category, "")),
)


def write_output(fobj, columns, extractors):

    def pad(output, col, category=None):
        size = col[1]
        output = output if isinstance(output, str) else col[2](output)

        if len(output) > size:
            sub = "|{}-{}|".format(category, col[0][0])
            subs.append((sub, output))
            output = sub

        return output + " " * (size - len(output))

    w = fobj.write
    subs = []

    # caption
    w("Supported Sites\n")
    w("===============\n")

    # table head
    sep = " ".join("=" * c[1] for c in columns) + "\n"
    w(sep)
    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
    w(sep)

    # table body
    for lst in extractors:
        w(" ".join(
            pad(col[2](lst), col, lst[0].category)
            for col in columns
        ).strip())
        w("\n")

    # table bottom
    w(sep)
    w("\n")

    # substitutions
    for sub, value in subs:
        w(".. {} replace:: {}\n".format(sub, value))


outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
with open(util.path("docs", outfile), "w") as file:
    write_output(file, COLUMNS, build_extractor_list())
update/cleanup Python dev scripts - put common code in its own util.py file - same Python3 shebang for all scripts - add file docstrings - fix format string replacement fields in man page template 2019-04-16 18:16:48 +02:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`"""Generate a reStructuredText document with all supported sites"""`
add a script to automatically build a list of supported sites 2017-01-15 21:31:21 +01:00
			`import sys`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`import collections`
add a script to automatically build a list of supported sites 2017-01-15 21:31:21 +01:00
update/cleanup Python dev scripts - put common code in its own util.py file - same Python3 shebang for all scripts - add file docstrings - fix format string replacement fields in man page template 2019-04-16 18:16:48 +02:00			`import util`
			`from gallery_dl import extractor`
add a script to automatically build a list of supported sites 2017-01-15 21:31:21 +01:00

improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`CATEGORY_MAP = {`
update supportedsites.rst 2017-07-15 15:01:30 +02:00			`"2chan" : "Futaba Channel",`
[35photo] add user-, genre-, and image-extractors (#162) 2019-03-18 01:11:30 +01:00			`"35photo" : "35PHOTO",`
update supportedsites.rst 2017-07-15 15:01:30 +02:00			`"archivedmoe" : "Archived.Moe",`
			`"archiveofsins" : "Archive of Sins",`
check supportedsites.rst in release script 2018-03-17 15:35:38 +01:00			`"artstation" : "ArtStation",`
[foolfuuka] add support for more sites (#18) - https://arch.b4k.co - https://archive.whatisthisimnotgoodwithcomputers.com - https://archive.yeet.net Notes: - The name "whatisthisimnotgoodwithcomputers" is way too long ... - archive.yeet.net is out of date and also blocked by 4chan servers - newest threads are 2 weeks old - using "https://archive.yeet.net" as Referer header results in "403 Forbidden" when accessing 4chan 2017-09-16 21:11:44 +02:00			`"b4k" : "arch.b4k.co",`
[bobx] add gallery and model extractors 2018-09-13 20:13:12 +02:00			`"bobx" : "BobX",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"deviantart" : "DeviantArt",`
			`"dokireader" : "Doki Reader",`
			`"dynastyscans" : "Dynasty Reader",`
			`"e621" : "e621",`
			`"exhentai" : "ExHentai",`
			`"fallenangels" : "Fallen Angels Scans",`
[shopify] add generic collection and product extractors (#175) with fashionnova.com as a default domain 2019-03-05 22:33:37 +01:00			`"fashionnova" : "Fashion Nova",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"hbrowse" : "HBrowse",`
			`"hentai2read" : "Hentai2Read",`
[hentaicafe] add chapter and manga extractors (#101) 2018-09-05 21:08:40 +02:00			`"hentaicafe" : "Hentai Cafe",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"hentaifoundry" : "Hentai Foundry",`
[hentaifox] add chapter extractor (#160) 2019-01-28 18:00:32 +01:00			`"hentaifox" : "HentaiFox",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"hentaihere" : "HentaiHere",`
			`"hitomi" : "Hitomi.la",`
[idolcomplex] add support for idol.sankakucomplex.com 2018-01-09 17:52:12 +01:00			`"idolcomplex" : "Idol Complex",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"imagebam" : "ImageBam",`
			`"imagefap" : "ImageFap",`
			`"imgbox" : "imgbox",`
			`"imgth" : "imgth",`
			`"imgur" : "imgur",`
			`"jaiminisbox" : "Jaimini's Box",`
			`"kireicake" : "Kirei Cake",`
			`"kissmanga" : "KissManga",`
[livedoor] add blog- and post-extractors (#190) 2019-04-06 16:10:29 +02:00			`"livedoor" : "livedoor Blog",`
[mangadex] general improvements - support >100 chapter entries per manga - custom archive ID format - detect non-existing chapters 2018-03-06 14:15:15 +01:00			`"mangadex" : "MangaDex",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"mangafox" : "Manga Fox",`
			`"mangahere" : "Manga Here",`
			`"mangapark" : "MangaPark",`
			`"mangastream" : "Manga Stream",`
[myportfolio] add user and gallery extractors (#95) 2018-07-19 18:56:45 +02:00			`"myportfolio" : "Adobe Portfolio",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"nhentai" : "nhentai",`
			`"nijie" : "nijie",`
[nsfwalbum] add album extractor (closes #287) 2019-06-22 22:43:09 +02:00			`"nsfwalbum" : "NSFWalbum.com",`
update supportedsites.rst 2017-07-15 15:01:30 +02:00			`"nyafuu" : "Nyafuu Archive",`
[paheal] add tag- and post-extractors (closes #69) 2018-01-15 16:39:05 +01:00			`"paheal" : "rule #34",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"powermanga" : "PowerManga",`
			`"readcomiconline": "Read Comic Online",`
update supportedsites.rst 2017-07-24 10:50:40 +02:00			`"rbt" : "RebeccaBlackTech",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"rule34" : "Rule 34",`
			`"sankaku" : "Sankaku Channel",`
[sankakucomplex] move article extractor to its own module (#258) 2019-05-27 23:49:23 +02:00			`"sankakucomplex" : "Sankaku Complex",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"seaotterscans" : "Sea Otter Scans",`
			`"seiga" : "Niconico Seiga",`
			`"senmanga" : "Sen Manga",`
			`"sensescans" : "Sense-Scans",`
[sexcom] add pin and board extractors (#147) 2019-04-23 22:10:39 +02:00			`"sexcom" : "Sex.com",`
[simplyhentai] add gallery extractor (#89) 2018-05-27 15:25:04 +02:00			`"simplyhentai" : "Simply Hentai",`
[slickpic] add album extractor (#249) 2019-06-09 21:59:22 +02:00			`"slickpic" : "SlickPic",`
[slideshare] improve metadata; flake8 - added 'views' and 'published' keywords - fixed longer titles and descriptions 2017-12-13 21:15:05 +01:00			`"slideshare" : "SlideShare",`
[smugmug] added image and album extractor just some initial code that still requires a lot of work ... TODO: - folders - old-style albums (which are nearly all of them ...) - images from users - OAuth It could also happen that the API credentials used will become invalid whenever my 14 day trial period ends (7 days remaining), but that would just require users to supply their own. 2018-04-29 21:27:25 +02:00			`"smugmug" : "SmugMug",`
update supportedsites.rst 2017-07-24 10:50:40 +02:00			`"thebarchive" : "The /b/ Archive",`
[vanillarock] add post and tag extractors (closes #254) 2019-06-23 22:02:54 +02:00			`"vanillarock" : "もえぴりあ",`
[wikiart] add extractors (#179) for - artists: https://www.wikiart.org/en/thomas-cole - artist-listings: https://www.wikiart.org/en/artists-by-century/12 - artwork-listings: https://www.wikiart.org/en/paintings-by-media/grisaille 2019-04-02 17:34:57 +02:00			`"wikiart" : "WikiArt.org",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"worldthree" : "World Three",`
[xhamster] add gallery & user extractor (#281) 2019-06-04 22:23:32 +02:00			`"xhamster" : "xHamster",`
[xvideos] add user profile extractor (#45) 2017-11-02 17:28:35 +01:00			`"xvideos" : "XVideos",`
[yaplog] add user- and post-extractors (#190) 2019-04-04 17:56:56 +02:00			`"yaplog" : "yaplog!",`
[yuki] add thread extractor (closes #111) 2018-09-28 12:46:39 +02:00			`"yuki" : "yuki.la 4chan archive",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`}`

			`SUBCATEGORY_MAP = {`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`"artwork": "Artwork Listings",`
[wikiart] add extractors (#179) for - artists: https://www.wikiart.org/en/thomas-cole - artist-listings: https://www.wikiart.org/en/artists-by-century/12 - artwork-listings: https://www.wikiart.org/en/paintings-by-media/grisaille 2019-04-02 17:34:57 +02:00			`"artists": "",`
[nijie] add favorites extractor adds support for 'https://nijie.info/user_like_illust_view.php?id=...' 2018-03-31 18:54:25 +02:00			`"doujin" : "Doujin",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"gallery": "Galleries",`
			`"image" : "individual Images",`
			`"issue" : "Comic-Issues",`
			`"manga" : "Manga",`
update extractor docstrings 2017-06-28 17:39:07 +02:00			`"me" : "pixiv.me Links",`
[behance] add user extractor 2018-08-31 17:40:44 +02:00			`"media" : "Media Timelines",`
release version 1.3.5 2018-05-04 10:03:20 +02:00			`"path" : "Images from Users and Folders",`
add simple imagehosts to list of supported sites 2017-04-23 17:08:45 +02:00			`"pinit" : "pin.it Links",`
[booru] add extractors for "Popular" images 2017-08-24 21:24:51 +02:00			`"popular": "Popular Images",`
[hentaifoundry] add 'popular' and 'recent' extractors for "Popular Pictures" and "Recent Pictures" listings 2018-09-22 21:28:16 +02:00			`"recent" : "Recent Images",`
add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`"search" : "Search Results",`
[deviantart] add separate 'sta.sh' extractor (#113) - supports multiple stashed deviations per page - explicitly mentions sta.sh support on supportedsites.rst 2018-12-26 18:50:55 +01:00			`"stash" : "Sta.sh",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`"status" : "Images from Statuses",`
			`"tag" : "Tag-Searches",`
			`"user" : "Images from Users",`
			`"work" : "Individual Images",`
[pinterest] add extractors for related pins Related pins can not be accessed by adding a "#related" fragment to the end of a Pinterest URL, for example: - https://www.pinterest.com/pin/858146903966145189/#related - https://www.pinterest.com/g1952849/test-/#related There are no explicit real URLs for related pins, using an option to enable them results in "clunky" code, and a custom "related:<URL>" scheme doesn't feel right either. 2018-08-15 21:28:27 +02:00			`"related-pin" : "related Pins",`
			`"related-board": "",`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00			`}`

add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`AUTH_MAP = {`
update build_supportedsites.py 2019-01-09 14:21:19 +01:00			`"danbooru" : "Optional",`
[idolcomplex] add support for idol.sankakucomplex.com 2018-01-09 17:52:12 +01:00			`"deviantart" : "Optional (OAuth)",`
			`"exhentai" : "Optional",`
			`"flickr" : "Optional (OAuth)",`
			`"idolcomplex": "Optional",`
[instagram] implement login support (#195) 2019-06-26 23:54:38 +02:00			`"instagram" : "Optional",`
[luscious] add login capabilities (closes #159) 2019-01-28 17:14:15 +01:00			`"luscious" : "Optional",`
[mangoxo] add login support (#184) A very recent change: It is now only possible to see more than the first 5 images of an album if you are logged in. 2019-04-09 16:54:15 +02:00			`"mangoxo" : "Optional",`
[idolcomplex] add support for idol.sankakucomplex.com 2018-01-09 17:52:12 +01:00			`"nijie" : "Required",`
			`"pixiv" : "Required",`
			`"reddit" : "Optional (OAuth)",`
			`"sankaku" : "Optional",`
			`"seiga" : "Required",`
[smugmug] add OAuth support 2018-05-10 18:58:05 +02:00			`"smugmug" : "Optional (OAuth)",`
[tsumino] add login capabilities (#161) 2019-01-30 17:58:48 +01:00			`"tsumino" : "Optional",`
[tumblr] add support for OAuth authentication (#65) 2018-01-11 14:11:37 +01:00			`"tumblr" : "Optional (OAuth)",`
[twitter] add login support (#214) 2019-04-07 23:06:57 +02:00			`"twitter" : "Optional",`
add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`}`

			`IGNORE_LIST = (`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`"directlink",`
add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`"oauth",`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`"recursive",`
			`"test",`
add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`)`

improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def domain(cls):`
			`"""Return the web-domain related to an extractor class"""`
			`url = sys.modules[cls.__module__].__doc__.split()[-1]`
			`if url.startswith("http"):`
			`return url`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`if hasattr(cls, "root") and cls.root:`
			`return cls.root + "/"`
update build_supportedsites.py script 2018-09-28 12:39:05 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`if hasattr(cls, "https"):`
			`scheme = "https" if cls.https else "http"`
			`netloc = cls.__doc__.split()[-1]`
			`return "{}://{}/".format(scheme, netloc)`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`test = next(cls._get_tests(), None)`
			`if test:`
			`url = test[0]`
			`return url[:url.find("/", 8)+1]`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`return ""`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00

simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def category_text(cls):`
			`"""Return a human-readable representation of a category"""`
			`c = cls.category`
			`return CATEGORY_MAP.get(c) or c.capitalize()`
update build_supportedsites.py script 2018-09-28 12:39:05 +02:00
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def subcategory_text(cls):`
			`"""Return a human-readable representation of a subcategory"""`
			`sc = cls.subcategory`
			`if sc in SUBCATEGORY_MAP:`
			`return SUBCATEGORY_MAP[sc]`
			`sc = sc.capitalize()`
			`return sc if sc.endswith("s") else sc + "s"`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00

simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def category_key(cls):`
			`"""Generate sorting keys by category"""`
			`key = category_text(cls).lower()`
			`if cls.__module__.endswith(".imagehosts"):`
			`key = "zz" + key`
			`return key`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00

simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def subcategory_key(cls):`
			`"""Generate sorting keys by subcategory"""`
			`if cls.subcategory in ("user", "issue"):`
			`return "A"`
			`return cls.subcategory`
update supportedsites.rst 2017-07-15 15:01:30 +02:00

simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def build_extractor_list():`
			`"""Generate a sorted list of lists of extractor classes"""`
			`extractors = collections.defaultdict(list)`
add simple imagehosts to list of supported sites 2017-04-23 17:08:45 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# get lists of extractor classes grouped by category`
			`for extr in extractor.extractors():`
update build_supportedsites.py 2019-01-09 14:21:19 +01:00			`if not extr.category or extr.category in IGNORE_LIST:`
add 'Authentication' column to supportedsites.rst 2017-06-15 21:06:20 +02:00			`continue`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`extractors[extr.category].append(extr)`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# sort extractor lists with the same category`
			`for extrlist in extractors.values():`
			`extrlist.sort(key=subcategory_key)`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# sort lists by category`
			`return sorted(`
			`extractors.values(),`
			`key=lambda lst: category_key(lst[0]),`
			`)`


			`# define table columns`
			`COLUMNS = (`
			`("Site", 20,`
			`lambda x: category_text(x[0])),`
			`("URL" , 35,`
			`lambda x: domain(x[0])),`
			`("Capabilities", 50,`
			`lambda x: ", ".join(subcategory_text(extr) for extr in x`
			`if subcategory_text(extr))),`
			`("Authentication", 16,`
			`lambda x: AUTH_MAP.get(x[0].category, "")),`
			`)`
update build_supportedsites.py 2019-01-09 14:21:19 +01:00

simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def write_output(fobj, columns, extractors):`
add mastodon/foolslide/foolfuuka examples to example config 2019-02-05 16:17:25 +01:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`def pad(output, col, category=None):`
			`size = col[1]`
			`output = output if isinstance(output, str) else col[2](output)`
update handling of extractor URL patterns When loading extractor classes during 'extractor.find(…)', their 'pattern' attribute will be replaced with a compiled version of itself. 2019-02-08 20:08:16 +01:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`if len(output) > size:`
			`sub = "\|{}-{}\|".format(category, col[0][0])`
			`subs.append((sub, output))`
			`output = sub`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`return output + " " * (size - len(output))`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`w = fobj.write`
			`subs = []`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# caption`
			`w("Supported Sites\n")`
			`w("===============\n")`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# table head`
			`sep = " ".join("=" * c[1] for c in columns) + "\n"`
			`w(sep)`
			`w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")`
			`w(sep)`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# table body`
			`for lst in extractors:`
			`w(" ".join(`
			`pad(col[2](lst), col, lst[0].category)`
			`for col in columns`
			`).strip())`
			`w("\n")`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# table bottom`
			`w(sep)`
			`w("\n")`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`# substitutions`
			`for sub, value in subs:`
			`w(".. {} replace:: {}\n".format(sub, value))`
improve supportedsites.rst and build script 2017-04-20 16:56:50 +02:00
add a script to automatically build a list of supported sites 2017-01-15 21:31:21 +01:00
			`outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"`
update/cleanup Python dev scripts - put common code in its own util.py file - same Python3 shebang for all scripts - add file docstrings - fix format string replacement fields in man page template 2019-04-16 18:16:48 +02:00			`with open(util.path("docs", outfile), "w") as file:`
simplify build_supportedsites.py 2019-02-20 19:25:41 +01:00			`write_output(file, COLUMNS, build_extractor_list())`