gallery-dl/scripts/supportedsites.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Generate a Markdown document listing all supported sites"""

import os
import sys
import collections

import util
from gallery_dl import extractor


CATEGORY_MAP = {
    "2chan"          : "Futaba Channel",
    "35photo"        : "35PHOTO",
    "adultempire"    : "Adult Empire",
    "allgirlbooru"   : "All girl",
    "archivedmoe"    : "Archived.Moe",
    "archiveofsins"  : "Archive of Sins",
    "artstation"     : "ArtStation",
    "aryion"         : "Eka's Portal",
    "b4k"            : "arch.b4k.co",
    "baraag"         : "baraag",
    "bcy"            : "半次元",
    "bobx"           : "BobX",
    "deviantart"     : "DeviantArt",
    "dokireader"     : "Doki Reader",
    "drawfriends"    : "Draw Friends",
    "dynastyscans"   : "Dynasty Reader",
    "e621"           : "e621",
    "erome"          : "EroMe",
    "e-hentai"       : "E-Hentai",
    "exhentai"       : "ExHentai",
    "fallenangels"   : "Fallen Angels Scans",
    "fanbox"         : "pixivFANBOX",
    "fashionnova"    : "Fashion Nova",
    "furaffinity"    : "Fur Affinity",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
    "hentaicafe"     : "Hentai Cafe",
    "hentaicosplays" : "Hentai Cosplay",
    "hentaifoundry"  : "Hentai Foundry",
    "hentaifox"      : "HentaiFox",
    "hentaihand"     : "HentaiHand",
    "hentaihere"     : "HentaiHere",
    "hentaiimg"      : "Hentai Image",
    "hitomi"         : "Hitomi.la",
    "idolcomplex"    : "Idol Complex",
    "illusioncardsbooru": "Illusion Game Cards",
    "imagebam"       : "ImageBam",
    "imagefap"       : "ImageFap",
    "imgbb"          : "ImgBB",
    "imgbox"         : "imgbox",
    "imagechest"     : "ImageChest",
    "imgth"          : "imgth",
    "imgur"          : "imgur",
    "jaiminisbox"    : "Jaimini's Box",
    "kabeuchi"       : "かべうち",
    "kireicake"      : "Kirei Cake",
    "kissmanga"      : "KissManga",
    "lineblog"       : "LINE BLOG",
    "livedoor"       : "livedoor Blog",
    "omgmiamiswimwear": "Omg Miami Swimwear",
    "mangadex"       : "MangaDex",
    "mangafox"       : "Manga Fox",
    "mangahere"      : "Manga Here",
    "mangakakalot"   : "MangaKakalot",
    "manganelo"      : "Manganato",
    "mangapark"      : "MangaPark",
    "mangastream"    : "Manga Stream",
    "mastodon.social": "mastodon.social",
    "myhentaigallery": "My Hentai Gallery",
    "myportfolio"    : "Adobe Portfolio",
    "naverwebtoon"   : "NaverWebtoon",
    "nhentai"        : "nhentai",
    "nijie"          : "nijie",
    "nozomi"         : "Nozomi.la",
    "nsfwalbum"      : "NSFWalbum.com",
    "nyafuu"         : "Nyafuu Archive",
    "paheal"         : "rule #34",
    "photovogue"     : "PhotoVogue",
    "pornimagesxxx"  : "Porn Image",
    "powermanga"     : "PowerManga",
    "readcomiconline": "Read Comic Online",
    "rbt"            : "RebeccaBlackTech",
    "redgifs"        : "RedGIFs",
    "rule34"         : "Rule 34",
    "sankaku"        : "Sankaku Channel",
    "sankakucomplex" : "Sankaku Complex",
    "seaotterscans"  : "Sea Otter Scans",
    "seiga"          : "Niconico Seiga",
    "senmanga"       : "Sen Manga",
    "sensescans"     : "Sense-Scans",
    "sexcom"         : "Sex.com",
    "simplyhentai"   : "Simply Hentai",
    "slickpic"       : "SlickPic",
    "slideshare"     : "SlideShare",
    "smugmug"        : "SmugMug",
    "speakerdeck"    : "Speaker Deck",
    "subscribestar"  : "SubscribeStar",
    "tbib"           : "The Big ImageBoard",
    "thebarchive"    : "The /b/ Archive",
    "thecollection"  : "The /co/llection",
    "theloudbooru"   : "The Loud Booru",
    "tumblrgallery"  : "TumblrGallery",
    "vanillarock"    : "もえぴりあ",
    "vidyart"        : "/v/idyart",
    "vk"             : "VK",
    "vsco"           : "VSCO",
    "wakarimasen"    : "Wakarimasen Archive",
    "webtoons"       : "Webtoon",
    "wikiart"        : "WikiArt.org",
    "worldthree"     : "World Three",
    "xhamster"       : "xHamster",
    "xvideos"        : "XVideos",
    "yandere"        : "yande.re",
}

SUBCATEGORY_MAP = {
    "doujin" : "Doujin",
    "gallery": "Galleries",
    "image"  : "individual Images",
    "index"  : "Site Index",
    "issue"  : "Comic Issues",
    "manga"  : "Manga",
    "popular": "Popular Images",
    "recent" : "Recent Images",
    "search" : "Search Results",
    "status" : "Images from Statuses",
    "tag"    : "Tag Searches",
    "user"   : "User Profiles",
    "watch"  : "Watches",
    "following"    : "",
    "related-pin"  : "related Pins",
    "related-board": "",

    "artstation": {
        "artwork": "Artwork Listings",
    },
    "deviantart": {
        "stash": "Sta.sh",
        "watch-posts": "",
    },
    "hentaifoundry": {
        "story": "",
    },
    "instagram": {
        "posts": "",
        "saved": "Saved Posts",
        "tagged": "Tagged Posts",
    },
    "mangadex": {
        "feed" : "Followed Feed",
    },
    "newgrounds": {
        "art"  : "Art",
        "audio": "Audio",
        "media": "Media Files",
    },
    "pinterest": {
        "board": "",
        "pinit": "pin.it Links",
    },
    "pixiv": {
        "me"  : "pixiv.me Links",
        "work": "individual Images",
    },
    "sankaku": {
        "books": "Book Searches",
    },
    "smugmug": {
        "path": "Images from Users and Folders",
    },
    "twitter": {
        "media": "Media Timelines",
        "list-members": "List Members",
    },
    "wallhaven": {
        "collections": "",
    },
    "weasyl": {
        "journals"   : "",
        "submissions": "",
    },
    "wikiart": {
        "artists": "Artist Listings",
    },
}

BASE_MAP = {
    "foolfuuka"   : "FoolFuuka 4chan Archives",
    "foolslide"   : "FoOlSlide Instances",
    "gelbooru_v01": "Gelbooru Beta 0.1.11",
    "gelbooru_v02": "Gelbooru Beta 0.2",
    "moebooru"    : "Moebooru and MyImouto",
}

_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
_APIKEY_DB = \
    '<a href="configuration.rst#extractorderpibooruapi-key">API Key</a>'
_APIKEY_WH = \
    '<a href="configuration.rst#extractorwallhavenapi-key">API Key</a>'
_APIKEY_WY = \
    '<a href="configuration.rst#extractorweasylapi-key">API Key</a>'

AUTH_MAP = {
    "aryion"         : "Supported",
    "baraag"         : _OAUTH,
    "danbooru"       : "Supported",
    "derpibooru"     : _APIKEY_DB,
    "deviantart"     : _OAUTH,
    "e621"           : "Supported",
    "e-hentai"       : "Supported",
    "exhentai"       : "Supported",
    "fanbox"         : _COOKIES,
    "fantia"         : _COOKIES,
    "flickr"         : _OAUTH,
    "furaffinity"    : _COOKIES,
    "idolcomplex"    : "Supported",
    "imgbb"          : "Supported",
    "inkbunny"       : "Supported",
    "instagram"      : "Supported",
    "mangadex"       : "Supported",
    "mangoxo"        : "Supported",
    "mastodon.social": _OAUTH,
    "newgrounds"     : "Supported",
    "nijie"          : "Required",
    "patreon"        : _COOKIES,
    "pawoo"          : _OAUTH,
    "pillowfort"     : "Supported",
    "pinterest"      : "Supported",
    "pixiv"          : _OAUTH,
    "ponybooru"      : "API Key",
    "reddit"         : _OAUTH,
    "sankaku"        : "Supported",
    "seiga"          : "Required",
    "smugmug"        : _OAUTH,
    "subscribestar"  : "Supported",
    "tapas"          : "Supported",
    "tsumino"        : "Supported",
    "tumblr"         : _OAUTH,
    "twitter"        : "Supported",
    "wallhaven"      : _APIKEY_WH,
    "weasyl"         : _APIKEY_WY,
}

IGNORE_LIST = (
    "directlink",
    "oauth",
    "recursive",
    "test",
)


def domain(cls):
    """Return the web-domain related to an extractor class"""
    try:
        url = sys.modules[cls.__module__].__doc__.split()[-1]
        if url.startswith("http"):
            return url
    except Exception:
        pass

    if hasattr(cls, "root") and cls.root:
        return cls.root + "/"

    if hasattr(cls, "https"):
        scheme = "https" if cls.https else "http"
        netloc = cls.__doc__.split()[-1]
        return "{}://{}/".format(scheme, netloc)

    test = next(cls._get_tests(), None)
    if test:
        url = test[0]
        return url[:url.find("/", 8)+1]

    return ""


def category_text(c):
    """Return a human-readable representation of a category"""
    return CATEGORY_MAP.get(c) or c.capitalize()


def subcategory_text(c, sc):
    """Return a human-readable representation of a subcategory"""
    if c in SUBCATEGORY_MAP:
        scm = SUBCATEGORY_MAP[c]
        if sc in scm:
            return scm[sc]

    if sc in SUBCATEGORY_MAP:
        return SUBCATEGORY_MAP[sc]

    sc = sc.capitalize()
    return sc if sc.endswith("s") else sc + "s"


def category_key(c):
    """Generate sorting keys by category"""
    return category_text(c[0]).lower()


def subcategory_key(sc):
    """Generate sorting keys by subcategory"""
    return "A" if sc == "issue" else sc


def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    categories = collections.defaultdict(lambda: collections.defaultdict(list))
    default = categories[""]
    domains = {}

    for extr in extractor._list_classes():
        category = extr.category
        if category in IGNORE_LIST:
            continue
        if category:
            default[category].append(extr.subcategory)
            if category not in domains:
                domains[category] = domain(extr)
        else:
            base = categories[extr.basecategory]
            for category, root in extr.instances:
                base[category].append(extr.subcategory)
                if category not in domains:
                    domains[category] = root + "/"

    # sort subcategory lists
    for base in categories.values():
        for subcategories in base.values():
            subcategories.sort(key=subcategory_key)

    # add e-hentai.org
    default["e-hentai"] = default["exhentai"]
    domains["e-hentai"] = domains["exhentai"].replace("x", "-")

    # add hentai-cosplays sister sites (hentai-img, porn-images-xxx)
    default["hentaiimg"] = default["hentaicosplays"]
    domains["hentaiimg"] = "https://hentai-img.com/"

    default["pornimagesxxx"] = default["hentaicosplays"]
    domains["pornimagesxxx"] = "https://porn-images-xxx.com/"

    return categories, domains


# define table columns
COLUMNS = (
    ("Site", 20,
     lambda c, scs, d: category_text(c)),
    ("URL" , 35,
     lambda c, scs, d: d),
    ("Capabilities", 50,
     lambda c, scs, d: ", ".join(subcategory_text(c, sc) for sc in scs
                                 if subcategory_text(c, sc))),
    ("Authentication", 16,
     lambda c, scs, d: AUTH_MAP.get(c, "")),
)


def generate_output(columns, categories, domains):

    thead = []
    append = thead.append
    append("<tr>")
    for column in columns:
        append("    <th>" + column[0] + "</th>")
    append("</tr>")

    tbody = []
    append = tbody.append

    for name, base in categories.items():

        if name and base:
            name = BASE_MAP.get(name) or (name.capitalize() + " Instances")
            append('\n<tr>\n    <td colspan="4"><strong>' +
                   name + '</strong></td>\n</tr>')

        clist = sorted(base.items(), key=category_key)
        for category, subcategories in clist:
            append("<tr>")
            for column in columns:
                domain = domains[category]
                content = column[2](category, subcategories, domain)
                append("    <td>" + content + "</td>")
            append("</tr>")

    TEMPLATE = """# Supported Sites

<!-- auto-generated by {} -->
Consider all sites to be NSFW unless otherwise known.

<table>
<thead valign="bottom">
{}
</thead>
<tbody valign="top">
{}
</tbody>
</table>
"""
    return TEMPLATE.format(
        "/".join(os.path.normpath(__file__).split(os.sep)[-2:]),
        "\n".join(thead),
        "\n".join(tbody),
    )


categories, domains = build_extractor_list()
outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.md"
with open(util.path("docs", outfile), "w") as fp:
    fp.write(generate_output(COLUMNS, categories, domains))
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											2019-04-16 18:16:48 +02:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								"""Generate a Markdown document listing all supported sites"""
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												add a 'generated by …' comment to supportedsites.rst

											
										
										
											2021-01-22 21:02:12 +01:00
+								import os
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
+								import sys
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								import collections
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												update/cleanup Python dev scripts

- put common code in its own util.py file
- same Python3 shebang for all scripts
- add file docstrings
- fix format string replacement fields in man page template

											
										
										
											2019-04-16 18:16:48 +02:00
+								import util
 								from gallery_dl import extractor
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								CATEGORY_MAP = {
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "2chan"          : "Futaba Channel",
-												[35photo] add user-, genre-, and image-extractors (#162)

											
										
										
											2019-03-18 01:11:30 +01:00
+								    "35photo"        : "35PHOTO",
-												[adultempire] add gallery extractor (closes #340)

											
										
										
											2019-07-21 22:29:57 +02:00
+								    "adultempire"    : "Adult Empire",
-												[gelbooru_v01] support some more boorus by default

- https://drawfriends.booru.org/
- https://vidyart.booru.org/
- https://tlb.booru.org/

											
										
										
											2021-03-13 17:48:10 +01:00
+								    "allgirlbooru"   : "All girl",
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "archivedmoe"    : "Archived.Moe",
 								    "archiveofsins"  : "Archive of Sins",
-												check supportedsites.rst in release script

											
										
										
											2018-03-17 15:35:38 +01:00
+								    "artstation"     : "ArtStation",
-												[aryion] add gallery and post extractors (#390, #673)

											
										
										
											2020-04-08 21:38:02 +02:00
+								    "aryion"         : "Eka's Portal",
-												[foolfuuka] add support for more sites (#18)

- https://arch.b4k.co
- https://archive.whatisthisimnotgoodwithcomputers.com
- https://archive.yeet.net

Notes:
- The name "whatisthisimnotgoodwithcomputers" is way too long ...
- archive.yeet.net is out of date and also blocked by 4chan servers
  - newest threads are 2 weeks old
  - using "https://archive.yeet.net" as Referer header results in
    "403 Forbidden" when accessing 4chan

											
										
										
											2017-09-16 21:11:44 +02:00
+								    "b4k"            : "arch.b4k.co",
-												[mastodon] add access tokens for mastodon.social and baraag.net

(closes #665)

											
										
										
											2020-04-02 22:34:32 +02:00
+								    "baraag"         : "baraag",
-												[bcy] add user and post extractors (#592)

											
										
										
											2020-02-08 23:25:53 +01:00
+								    "bcy"            : "半次元",
-												[bobx] add gallery and model extractors

											
										
										
											2018-09-13 20:13:12 +02:00
+								    "bobx"           : "BobX",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "deviantart"     : "DeviantArt",
 								    "dokireader"     : "Doki Reader",
-												[gelbooru_v01] support some more boorus by default

- https://drawfriends.booru.org/
- https://vidyart.booru.org/
- https://tlb.booru.org/

											
										
										
											2021-03-13 17:48:10 +01:00
+								    "drawfriends"    : "Draw Friends",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "dynastyscans"   : "Dynasty Reader",
 								    "e621"           : "e621",
-												[erome] add extractors for albums, users, searches (closes #409)

											
										
										
											2021-02-07 22:58:19 +01:00
+								    "erome"          : "EroMe",
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											2019-08-03 11:42:28 +02:00
+								    "e-hentai"       : "E-Hentai",
 								    "exhentai"       : "ExHentai",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "fallenangels"   : "Fallen Angels Scans",
-												update fanbox entry in supportedsites.md

											
										
										
											2021-04-25 19:44:19 +02:00
+								    "fanbox"         : "pixivFANBOX",
-												[shopify] add generic collection and product extractors (#175)

with fashionnova.com  as a default domain

											
										
										
											2019-03-05 22:33:37 +01:00
+								    "fashionnova"    : "Fashion Nova",
-												[furaffinity] add extractors (#284)

											
										
										
											2020-02-11 19:51:24 +01:00
+								    "furaffinity"    : "Fur Affinity",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hbrowse"        : "HBrowse",
 								    "hentai2read"    : "Hentai2Read",
-												[hentaicafe] add chapter and manga extractors (#101)

											
										
										
											2018-09-05 21:08:40 +02:00
+								    "hentaicafe"     : "Hentai Cafe",
-												[hentaicosplays] Add extractor (#1473)


											
										
										
											2021-04-18 20:28:00 +02:00
+								    "hentaicosplays" : "Hentai Cosplay",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hentaifoundry"  : "Hentai Foundry",
-												[hentaifox] add chapter extractor (#160)

											
										
										
											2019-01-28 18:00:32 +01:00
+								    "hentaifox"      : "HentaiFox",
-												[hentaihand] add extractors (closes #605)

											
										
										
											2020-02-18 23:49:59 +01:00
+								    "hentaihand"     : "HentaiHand",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hentaihere"     : "HentaiHere",
-												[hentaicosplays] Add extractor (#1473)


											
										
										
											2021-04-18 20:28:00 +02:00
+								    "hentaiimg"      : "Hentai Image",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "hitomi"         : "Hitomi.la",
-												[idolcomplex] add support for idol.sankakucomplex.com

											
										
										
											2018-01-09 17:52:12 +01:00
+								    "idolcomplex"    : "Idol Complex",
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    "illusioncardsbooru": "Illusion Game Cards",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "imagebam"       : "ImageBam",
 								    "imagefap"       : "ImageFap",
-												[imgbb] add album extractor (#361)

											
										
										
											2019-07-30 23:02:21 +02:00
+								    "imgbb"          : "ImgBB",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "imgbox"         : "imgbox",
-												[imagechest] Add new extractor for ImageChest (#750)

* [imagechest] Add new extractor for ImageChest

* [imagechest] Fix flake8 compliance issues
											
										
										
											2020-05-18 19:02:56 +02:00
+								    "imagechest"     : "ImageChest",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "imgth"          : "imgth",
 								    "imgur"          : "imgur",
 								    "jaiminisbox"    : "Jaimini's Box",
-												[kabeuchi] add 'user' extractor (closes #561)

											
										
										
											2020-03-13 16:45:42 +01:00
+								    "kabeuchi"       : "かべうち",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "kireicake"      : "Kirei Cake",
 								    "kissmanga"      : "KissManga",
-												[lineblog] add blog and post extractors (closes #404)

											
										
										
											2019-09-06 21:58:13 +02:00
+								    "lineblog"       : "LINE BLOG",
-												[livedoor] add blog- and post-extractors (#190)

											
										
										
											2019-04-06 16:10:29 +02:00
+								    "livedoor"       : "livedoor Blog",
-												[shopify] support omgmiamiswimwear.com (closes #1280)

											
										
										
											2021-04-13 03:05:23 +02:00
+								    "omgmiamiswimwear": "Omg Miami Swimwear",
-												[mangadex] general improvements

- support >100 chapter entries per manga
- custom archive ID format
- detect non-existing chapters

											
										
										
											2018-03-06 14:15:15 +01:00
+								    "mangadex"       : "MangaDex",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "mangafox"       : "Manga Fox",
 								    "mangahere"      : "Manga Here",
-												[mangakakalot] Added extractors for MangaKakalot (#876)


											
										
										
											2020-07-13 21:20:09 +02:00
+								    "mangakakalot"   : "MangaKakalot",
-												[manganelo] update domain to 'manganato.com'

											
										
										
											2021-05-28 17:52:30 +02:00
+								    "manganelo"      : "Manganato",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "mangapark"      : "MangaPark",
 								    "mangastream"    : "Manga Stream",
-												[mastodon] add access tokens for mastodon.social and baraag.net

(closes #665)

											
										
										
											2020-04-02 22:34:32 +02:00
+								    "mastodon.social": "mastodon.social",
-												[myhentaigallery] update and fix extraction (#1001)

- extract more metadata
- match "/show/" URLs
- complete test results
- fix missing images for lines starting with " <img"
- fix missing comma in supportedsites.py

											
										
										
											2020-09-17 18:06:12 +02:00
+								    "myhentaigallery": "My Hentai Gallery",
-												[myportfolio] add user and gallery extractors (#95)

											
										
										
											2018-07-19 18:56:45 +02:00
+								    "myportfolio"    : "Adobe Portfolio",
-												Support naver webtoon (#1331)

* Support naver webtoon (WIP)

* Apply patch

* Change filename format

* Fill test results

* Fill test result
											
										
										
											2021-03-03 15:21:13 +01:00
+								    "naverwebtoon"   : "NaverWebtoon",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "nhentai"        : "nhentai",
 								    "nijie"          : "nijie",
-												[nozomi] add post and tag extractors (#388)

											
										
										
											2019-10-13 22:10:32 +02:00
+								    "nozomi"         : "Nozomi.la",
-												[nsfwalbum] add album extractor (closes #287)

											
										
										
											2019-06-22 22:43:09 +02:00
+								    "nsfwalbum"      : "NSFWalbum.com",
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
+								    "nyafuu"         : "Nyafuu Archive",
-												[paheal] add tag- and post-extractors (closes #69)

											
										
										
											2018-01-15 16:39:05 +01:00
+								    "paheal"         : "rule #34",
-												update supportedsites.rst

											
										
										
											2021-01-22 19:41:50 +01:00
+								    "photovogue"     : "PhotoVogue",
-												[hentaicosplays] Add extractor (#1473)


											
										
										
											2021-04-18 20:28:00 +02:00
+								    "pornimagesxxx"  : "Porn Image",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "powermanga"     : "PowerManga",
 								    "readcomiconline": "Read Comic Online",
-												update supportedsites.rst

											
										
										
											2017-07-24 10:50:40 +02:00
+								    "rbt"            : "RebeccaBlackTech",
-												[redgifs] add image extractor (#724)

											
										
										
											2020-05-10 00:31:42 +02:00
+								    "redgifs"        : "RedGIFs",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "rule34"         : "Rule 34",
 								    "sankaku"        : "Sankaku Channel",
-												[sankakucomplex] move article extractor to its own module (#258)

											
										
										
											2019-05-27 23:49:23 +02:00
+								    "sankakucomplex" : "Sankaku Complex",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "seaotterscans"  : "Sea Otter Scans",
 								    "seiga"          : "Niconico Seiga",
 								    "senmanga"       : "Sen Manga",
 								    "sensescans"     : "Sense-Scans",
-												[sexcom] add pin and board extractors (#147)

											
										
										
											2019-04-23 22:10:39 +02:00
+								    "sexcom"         : "Sex.com",
-												[simplyhentai] add gallery extractor (#89)

											
										
										
											2018-05-27 15:25:04 +02:00
+								    "simplyhentai"   : "Simply Hentai",
-												[slickpic] add album extractor (#249)

											
										
										
											2019-06-09 21:59:22 +02:00
+								    "slickpic"       : "SlickPic",
-												[slideshare] improve metadata; flake8

- added 'views' and 'published' keywords
- fixed longer titles and descriptions

											
										
										
											2017-12-13 21:15:05 +01:00
+								    "slideshare"     : "SlideShare",
-												[smugmug] added image and album extractor

just some initial code that still requires a lot of work ...

TODO:
- folders
- old-style albums (which are nearly all of them ...)
- images from users
- OAuth

It could also happen that the API credentials used will become invalid
whenever my 14 day trial period ends (7 days remaining), but that
would just require users to supply their own.

											
										
										
											2018-04-29 21:27:25 +02:00
+								    "smugmug"        : "SmugMug",
-												[speakerdeck] Add a new extractor for speakerdeck.com (#726)


											
										
										
											2020-05-01 22:32:22 +02:00
+								    "speakerdeck"    : "Speaker Deck",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											2020-07-14 16:18:21 +02:00
+								    "subscribestar"  : "SubscribeStar",
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    "tbib"           : "The Big ImageBoard",
-												update supportedsites.rst

											
										
										
											2017-07-24 10:50:40 +02:00
+								    "thebarchive"    : "The /b/ Archive",
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    "thecollection"  : "The /co/llection",
-												[gelbooru_v01] support some more boorus by default

- https://drawfriends.booru.org/
- https://vidyart.booru.org/
- https://tlb.booru.org/

											
										
										
											2021-03-13 17:48:10 +01:00
+								    "theloudbooru"   : "The Loud Booru",
-												support `tumblrgallery.xyz` (#1298)

* support `tumblrgallery.xyz`

* fix format issues

* Refactor and add post and search page support

* Fix warnings

* Few improvments

* Better file names

* Fix linting errors

* move id closer to the begining of the file name

Co-authored-by: topozorra <none>
											
										
										
											2021-03-03 15:20:47 +01:00
+								    "tumblrgallery"  : "TumblrGallery",
-												[vanillarock] add post and tag extractors (closes #254)

											
										
										
											2019-06-23 22:02:54 +02:00
+								    "vanillarock"    : "もえぴりあ",
-												[gelbooru_v01] support some more boorus by default

- https://drawfriends.booru.org/
- https://vidyart.booru.org/
- https://tlb.booru.org/

											
										
										
											2021-03-13 17:48:10 +01:00
+								    "vidyart"        : "/v/idyart",
-												[vk] initial support for albums (#474)

											
										
										
											2021-03-23 18:48:01 +01:00
+								    "vk"             : "VK",
-												[vsco] add user extractor (#331)

											
										
										
											2019-07-22 22:15:36 +02:00
+								    "vsco"           : "VSCO",
-												[foolfuuka] support 'archive.wakarimasen.moe' (closes #1595)

											
										
										
											2021-06-02 15:45:43 +02:00
+								    "wakarimasen"    : "Wakarimasen Archive",
-												[webtoons] Add a new extractor for webtoons.com (#761)

The webtoons extractor can extract episode and entire comic (all
episodes) from webtoons.com.

All the logic of the extractors should be trivial except for a couple
of kludges needed:

 - `ageGatePass' cookie is always set to avoid possible redirect and stop of
    extraction, especially in the comic extractor
 - The image URLs returned by the episode extractor could not be fetched
   directly and the `Referer:' HTTP header needs to be passed to fetch them

Close #593.
											
										
										
											2020-05-18 19:04:20 +02:00
+								    "webtoons"       : "Webtoon",
-												[wikiart] add extractors (#179)

for
- artists:          https://www.wikiart.org/en/thomas-cole
- artist-listings:  https://www.wikiart.org/en/artists-by-century/12
- artwork-listings: https://www.wikiart.org/en/paintings-by-media/grisaille

											
										
										
											2019-04-02 17:34:57 +02:00
+								    "wikiart"        : "WikiArt.org",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "worldthree"     : "World Three",
-												[xhamster] add gallery & user extractor (#281)

											
										
										
											2019-06-04 22:23:32 +02:00
+								    "xhamster"       : "xHamster",
-												[xvideos] add user profile extractor (#45)

											
										
										
											2017-11-02 17:28:35 +01:00
+								    "xvideos"        : "XVideos",
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    "yandere"        : "yande.re",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								}
 								SUBCATEGORY_MAP = {
-												[nijie] add favorites extractor

adds support for 'https://nijie.info/user_like_illust_view.php?id=...'

											
										
										
											2018-03-31 18:54:25 +02:00
+								    "doujin" : "Doujin",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "gallery": "Galleries",
 								    "image"  : "individual Images",
-												[nozomi] support '/index-N.html' URLs (closes #1365)

and '/index-Popular-N.html'

											
										
										
											2021-03-11 01:06:47 +01:00
+								    "index"  : "Site Index",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											2020-03-24 02:00:50 +01:00
+								    "issue"  : "Comic Issues",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "manga"  : "Manga",
-												[booru] add extractors for "Popular" images

											
										
										
											2017-08-24 21:24:51 +02:00
+								    "popular": "Popular Images",
-												[hentaifoundry] add 'popular' and 'recent' extractors

for "Popular Pictures" and "Recent Pictures" listings

											
										
										
											2018-09-22 21:28:16 +02:00
+								    "recent" : "Recent Images",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								    "search" : "Search Results",
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								    "status" : "Images from Statuses",
-												remove dashes from subcategory names in supportedsites.rst

											
										
										
											2020-03-24 02:00:50 +01:00
+								    "tag"    : "Tag Searches",
-												change text representation of user extractors to "User Profiles"

											
										
										
											2019-09-22 22:21:48 +02:00
+								    "user"   : "User Profiles",
-												[deviantart] add 'watch' extractor (#794)

											
										
										
											2021-03-17 22:50:02 +01:00
+								    "watch"  : "Watches",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    "following"    : "",
-												[pinterest] add extractors for related pins

Related pins can not be accessed by adding a "#related" fragment
to the end of a Pinterest URL, for example:
- https://www.pinterest.com/pin/858146903966145189/#related
- https://www.pinterest.com/g1952849/test-/#related

There are no explicit real URLs for related pins,
using an option to enable them results in "clunky" code,
and a custom "related:<URL>" scheme doesn't feel right either.

											
										
										
											2018-08-15 21:28:27 +02:00
+								    "related-pin"  : "related Pins",
 								    "related-board": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
 								    "artstation": {
 								        "artwork": "Artwork Listings",
 								    },
 								    "deviantart": {
 								        "stash": "Sta.sh",
-												[deviantart] add support for posts from watched users (#794)

											
										
										
											2021-03-22 19:20:08 +01:00
+								        "watch-posts": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    },
-												[hentaifoundry] add support for stories (closes #734)

											
										
										
											2020-09-27 02:22:23 +02:00
+								    "hentaifoundry": {
 								        "story": "",
 								    },
-												[instagram] use 'itertools.chain()'

											
										
										
											2020-03-16 22:57:30 +01:00
+								    "instagram": {
-												[instagram] add 'include' option (closes #1180)

Split the functionality of the old 'user' extractor into separate
'posts' and 'highlights' extractors, which respond to virtual URLs
('/<user>/posts' and '/<user>/highlights')

											
										
										
											2020-12-20 23:20:32 +01:00
+								        "posts": "",
-												[instagram] use 'itertools.chain()'

											
										
										
											2020-03-16 22:57:30 +01:00
+								        "saved": "Saved Posts",
-												update supportedsites.md entry for Instagram

											
										
										
											2021-04-22 22:57:06 +02:00
+								        "tagged": "Tagged Posts",
-												[instagram] use 'itertools.chain()'

											
										
										
											2020-03-16 22:57:30 +01:00
+								    },
-												[mangadex] add extractor for a user's followed feed (#1535)

											
										
										
											2021-06-08 02:45:36 +02:00
+								    "mangadex": {
 								        "feed" : "Followed Feed",
 								    },
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    "newgrounds": {
 								        "art"  : "Art",
 								        "audio": "Audio",
 								        "media": "Media Files",
 								    },
 								    "pinterest": {
-												[pinterest] add support for getting all boards of a user

(#1205)

											
										
										
											2020-12-29 16:57:03 +01:00
+								        "board": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								        "pinit": "pin.it Links",
 								    },
 								    "pixiv": {
 								        "me"  : "pixiv.me Links",
 								        "work": "individual Images",
 								    },
-												[sankaku] add support for book searches (closes #1204)

											
										
										
											2020-12-29 17:36:37 +01:00
+								    "sankaku": {
 								        "books": "Book Searches",
 								    },
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    "smugmug": {
 								        "path": "Images from Users and Folders",
 								    },
 								    "twitter": {
 								        "media": "Media Timelines",
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											2020-11-13 06:47:45 +01:00
+								        "list-members": "List Members",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    },
-												[wallhaven] add 'collections' extractor (#1351)

											
										
										
											2021-03-02 01:32:26 +01:00
+								    "wallhaven": {
 								        "collections": "",
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    },
-												[weasyl] update and simplify

- simplify 'pattern' regexps
- parse 'posted_at' as 'date'
- use unaltered 'title' ({title!l:R /_/} to lowercase and replace spaces)

											
										
										
											2020-09-25 23:43:11 +02:00
+								    "weasyl": {
 								        "journals"   : "",
 								        "submissions": "",
 								    },
-												[wallhaven] add 'collections' extractor (#1351)

											
										
										
											2021-03-02 01:32:26 +01:00
+								    "wikiart": {
 								        "artists": "Artist Listings",
 								    },
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
+								}
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								BASE_MAP = {
 								    "foolfuuka"   : "FoolFuuka 4chan Archives",
 								    "foolslide"   : "FoOlSlide Instances",
 								    "gelbooru_v01": "Gelbooru Beta 0.1.11",
 								    "gelbooru_v02": "Gelbooru Beta 0.2",
 								    "moebooru"    : "Moebooru and MyImouto",
 								}
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
+								_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
 								_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
 								_APIKEY_DB = \
 								    '<a href="configuration.rst#extractorderpibooruapi-key">API Key</a>'
 								_APIKEY_WH = \
 								    '<a href="configuration.rst#extractorwallhavenapi-key">API Key</a>'
 								_APIKEY_WY = \
 								    '<a href="configuration.rst#extractorweasylapi-key">API Key</a>'
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											2020-07-14 16:18:21 +02:00
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								AUTH_MAP = {
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "aryion"         : "Supported",
 								    "baraag"         : _OAUTH,
 								    "danbooru"       : "Supported",
-												[derpibooru] add search and gallery extractors (#862)

											
										
										
											2021-01-07 18:05:32 +01:00
+								    "derpibooru"     : _APIKEY_DB,
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "deviantart"     : _OAUTH,
 								    "e621"           : "Supported",
 								    "e-hentai"       : "Supported",
 								    "exhentai"       : "Supported",
-												add extractors for fantia and fanbox (#1459)

* add extractors for fantia and fanbox

* appease linter

* make docstrings unique

* [fantia] refactor post extraction

* [fantia] capitalize

* [fantia] improve regex pattern

* code style

* capitalize

* [fanbox] use BASE_PATTERN for url regexes

* [fanbox] refactor metadata and post extraction

* [fanbox] improve url base pattern

* [fanbox] accept creator page links ending with /posts

* [fanbox] more tests

* [fantia] improved pagination

* [fanbox] misc. code logic improvements

* [fantia] finish restructuring pagination code

* [fanbox] avoid making a request for each individual post when processing a creator page

* [fanbox] support embedded videos

* [fanbox] fix errors

* [fanbox] document extractor.fanbox.videos

* [fanbox] handle "article" and "entry" post types, all embeds

* [fanbox] fix downloading of embedded fanbox posts

											
										
										
											2021-04-25 19:39:13 +02:00
+								    "fanbox"         : _COOKIES,
 								    "fantia"         : _COOKIES,
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "flickr"         : _OAUTH,
 								    "furaffinity"    : _COOKIES,
 								    "idolcomplex"    : "Supported",
 								    "imgbb"          : "Supported",
 								    "inkbunny"       : "Supported",
 								    "instagram"      : "Supported",
-												[mangadex] implement login with username & password (#1535)

											
										
										
											2021-06-08 02:06:19 +02:00
+								    "mangadex"       : "Supported",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "mangoxo"        : "Supported",
 								    "mastodon.social": _OAUTH,
 								    "newgrounds"     : "Supported",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											2020-07-14 16:18:21 +02:00
+								    "nijie"          : "Required",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "patreon"        : _COOKIES,
 								    "pawoo"          : _OAUTH,
-												[pillowfort] implement login with username & password (#846)

											
										
										
											2021-05-19 02:57:36 +02:00
+								    "pillowfort"     : "Supported",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "pinterest"      : "Supported",
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								    "pixiv"          : _OAUTH,
-												[philomena] add generalized extractors for philomena sites

(closes #1379)

											
										
										
											2021-03-14 16:31:33 +01:00
+								    "ponybooru"      : "API Key",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "reddit"         : _OAUTH,
-												[sankaku] reimplement login support (#1176, #1182)

											
										
										
											2020-12-17 16:12:59 +01:00
+								    "sankaku"        : "Supported",
-												update supportedsites.py (#889, #893)

- mention optional auth access for more sites
- link to OAuth and Cookies sections in README

											
										
										
											2020-07-14 16:18:21 +02:00
+								    "seiga"          : "Required",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "smugmug"        : _OAUTH,
 								    "subscribestar"  : "Supported",
-												[tapas] implement login with username & password (#692)

											
										
										
											2021-03-29 23:06:47 +02:00
+								    "tapas"          : "Supported",
-												rename 'Authentication' entries in supportedsites.rst

- change 'Optional' to 'Supported'
- use 'OAuth' and 'Cookies' in their own
- add link to weasyl API key option

											
										
										
											2020-10-18 22:18:27 +02:00
+								    "tsumino"        : "Supported",
 								    "tumblr"         : _OAUTH,
 								    "twitter"        : "Supported",
 								    "wallhaven"      : _APIKEY_WH,
 								    "weasyl"         : _APIKEY_WY,
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								}
 								IGNORE_LIST = (
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    "directlink",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								    "oauth",
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    "recursive",
 								    "test",
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def domain(cls):
 								    """Return the web-domain related to an extractor class"""
-												fix supportedsites.py for modules without docstring

(fixes #1332)

											
										
										
											2021-02-21 22:57:37 +01:00
+								    try:
 								        url = sys.modules[cls.__module__].__doc__.split()[-1]
 								        if url.startswith("http"):
 								            return url
 								    except Exception:
 								        pass
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if hasattr(cls, "root") and cls.root:
 								        return cls.root + "/"
-												update build_supportedsites.py script

											
										
										
											2018-09-28 12:39:05 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if hasattr(cls, "https"):
 								        scheme = "https" if cls.https else "http"
 								        netloc = cls.__doc__.split()[-1]
 								        return "{}://{}/".format(scheme, netloc)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    test = next(cls._get_tests(), None)
 								    if test:
 								        url = test[0]
 								        return url[:url.find("/", 8)+1]
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    return ""
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								def category_text(c):
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    """Return a human-readable representation of a category"""
 								    return CATEGORY_MAP.get(c) or c.capitalize()
-												update build_supportedsites.py script

											
										
										
											2018-09-28 12:39:05 +02:00
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								def subcategory_text(c, sc):
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    """Return a human-readable representation of a subcategory"""
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
+								    if c in SUBCATEGORY_MAP:
 								        scm = SUBCATEGORY_MAP[c]
 								        if sc in scm:
 								            return scm[sc]
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    if sc in SUBCATEGORY_MAP:
 								        return SUBCATEGORY_MAP[sc]
-												fix auto-generation of supportedsites.rst

											
										
										
											2020-01-13 22:29:42 +01:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    sc = sc.capitalize()
 								    return sc if sc.endswith("s") else sc + "s"
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								def category_key(c):
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    """Generate sorting keys by category"""
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    return category_text(c[0]).lower()
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								def subcategory_key(sc):
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    """Generate sorting keys by subcategory"""
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    return "A" if sc == "issue" else sc
-												update supportedsites.rst

											
										
										
											2017-07-15 15:01:30 +02:00
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								def build_extractor_list():
 								    """Generate a sorted list of lists of extractor classes"""
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    categories = collections.defaultdict(lambda: collections.defaultdict(list))
 								    default = categories[""]
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    domains = {}
-												add simple imagehosts to list of supported sites

											
										
										
											2017-04-23 17:08:45 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    for extr in extractor._list_classes():
 								        category = extr.category
 								        if category in IGNORE_LIST:
-												add 'Authentication' column to supportedsites.rst

											
										
										
											2017-06-15 21:06:20 +02:00
+								            continue
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								        if category:
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								            default[category].append(extr.subcategory)
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								            if category not in domains:
 								                domains[category] = domain(extr)
 								        else:
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								            base = categories[extr.basecategory]
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								            for category, root in extr.instances:
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								                base[category].append(extr.subcategory)
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								                if category not in domains:
 								                    domains[category] = root + "/"
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    # sort subcategory lists
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    for base in categories.values():
 								        for subcategories in base.values():
 								            subcategories.sort(key=subcategory_key)
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    # add e-hentai.org
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
+								    default["e-hentai"] = default["exhentai"]
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    domains["e-hentai"] = domains["exhentai"].replace("x", "-")
-												have e-hentai and exhentai on supportedsites.rst (#365)

											
										
										
											2019-08-03 11:42:28 +02:00
-												[hentaicosplays] Add extractor (#1473)


											
										
										
											2021-04-18 20:28:00 +02:00
+								    # add hentai-cosplays sister sites (hentai-img, porn-images-xxx)
 								    default["hentaiimg"] = default["hentaicosplays"]
 								    domains["hentaiimg"] = "https://hentai-img.com/"
 								    default["pornimagesxxx"] = default["hentaicosplays"]
 								    domains["pornimagesxxx"] = "https://porn-images-xxx.com/"
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								    return categories, domains
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
 								# define table columns
 								COLUMNS = (
 								    ("Site", 20,
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								     lambda c, scs, d: category_text(c)),
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    ("URL" , 35,
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								     lambda c, scs, d: d),
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    ("Capabilities", 50,
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								     lambda c, scs, d: ", ".join(subcategory_text(c, sc) for sc in scs
 								                                 if subcategory_text(c, sc))),
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								    ("Authentication", 16,
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								     lambda c, scs, d: AUTH_MAP.get(c, "")),
-												simplify build_supportedsites.py

											
										
										
											2019-02-20 19:25:41 +01:00
+								)
-												update build_supportedsites.py

											
										
										
											2019-01-09 14:21:19 +01:00
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
+								def generate_output(columns, categories, domains):
-												add mastodon/foolslide/foolfuuka examples to example config

											
										
										
											2019-02-05 16:17:25 +01:00
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
+								    thead = []
 								    append = thead.append
 								    append("<tr>")
 								    for column in columns:
 								        append("    <th>" + column[0] + "</th>")
 								    append("</tr>")
-												update handling of extractor URL patterns

When loading extractor classes during 'extractor.find(…)', their
'pattern' attribute will be replaced with a compiled version of itself.

											
										
										
											2019-02-08 20:08:16 +01:00
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
+								    tbody = []
 								    append = tbody.append
-												categorize sites in supportedsites.md by basecategory

											
										
										
											2021-03-12 03:56:54 +01:00
 								    for name, base in categories.items():
 								        if name and base:
 								            name = BASE_MAP.get(name) or (name.capitalize() + " Instances")
 								            append('\n<tr>\n    <td colspan="4"><strong>' +
 								                   name + '</strong></td>\n</tr>')
 								        clist = sorted(base.items(), key=category_key)
 								        for category, subcategories in clist:
 								            append("<tr>")
 								            for column in columns:
 								                domain = domains[category]
 								                content = column[2](category, subcategories, domain)
 								                append("    <td>" + content + "</td>")
 								            append("</tr>")
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
 								    TEMPLATE = """# Supported Sites
 								<!-- auto-generated by {} -->
 								Consider all sites to be NSFW unless otherwise known.
 								<table>
 								<thead valign="bottom">
 								{}
 								</thead>
 								<tbody valign="top">
 								{}
 								</tbody>
 								</table>
 								"""
 								    return TEMPLATE.format(
 								        "/".join(os.path.normpath(__file__).split(os.sep)[-2:]),
 								        "\n".join(thead),
 								        "\n".join(tbody),
 								    )
-												improve supportedsites.rst and build script

											
										
										
											2017-04-20 16:56:50 +02:00
-												add a script to automatically build a list of supported sites

											
										
										
											2017-01-15 21:31:21 +01:00
-												fix supportedsites.py

											
										
										
											2021-02-20 22:31:21 +01:00
+								categories, domains = build_extractor_list()
-												update docs/supportedsites

- use Markdown with inline HTML instead of reStructuredText
- move file from docs/supportedsites.rst to docs/supportedsites.md
- update Makefile, README, etc

											
										
										
											2021-03-12 03:03:48 +01:00
+								outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.md"
 								with open(util.path("docs", outfile), "w") as fp:
 								    fp.write(generate_output(COLUMNS, categories, domains))