mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 11:12:40 +01:00
e47952ac14
* add extractors for fantia and fanbox * appease linter * make docstrings unique * [fantia] refactor post extraction * [fantia] capitalize * [fantia] improve regex pattern * code style * capitalize * [fanbox] use BASE_PATTERN for url regexes * [fanbox] refactor metadata and post extraction * [fanbox] improve url base pattern * [fanbox] accept creator page links ending with /posts * [fanbox] more tests * [fantia] improved pagination * [fanbox] misc. code logic improvements * [fantia] finish restructuring pagination code * [fanbox] avoid making a request for each individual post when processing a creator page * [fanbox] support embedded videos * [fanbox] fix errors * [fanbox] document extractor.fanbox.videos * [fanbox] handle "article" and "entry" post types, all embeds * [fanbox] fix downloading of embedded fanbox posts
206 lines
3.7 KiB
Python
206 lines
3.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2015-2021 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
import re
|
|
|
|
modules = [
|
|
"2chan",
|
|
"35photo",
|
|
"3dbooru",
|
|
"4chan",
|
|
"500px",
|
|
"8kun",
|
|
"8muses",
|
|
"adultempire",
|
|
"architizer",
|
|
"artstation",
|
|
"aryion",
|
|
"bcy",
|
|
"behance",
|
|
"blogger",
|
|
"cyberdrop",
|
|
"danbooru",
|
|
"deviantart",
|
|
"dynastyscans",
|
|
"e621",
|
|
"erome",
|
|
"exhentai",
|
|
"fallenangels",
|
|
"fanbox",
|
|
"fantia",
|
|
"flickr",
|
|
"furaffinity",
|
|
"fuskator",
|
|
"gelbooru",
|
|
"gelbooru_v01",
|
|
"gelbooru_v02",
|
|
"gfycat",
|
|
"hbrowse",
|
|
"hentai2read",
|
|
"hentaicosplays",
|
|
"hentaifoundry",
|
|
"hentaifox",
|
|
"hentaihand",
|
|
"hentaihere",
|
|
"hiperdex",
|
|
"hitomi",
|
|
"idolcomplex",
|
|
"imagebam",
|
|
"imagechest",
|
|
"imagefap",
|
|
"imgbb",
|
|
"imgbox",
|
|
"imgth",
|
|
"imgur",
|
|
"inkbunny",
|
|
"instagram",
|
|
"issuu",
|
|
"kabeuchi",
|
|
"keenspot",
|
|
"kemonoparty",
|
|
"khinsider",
|
|
"komikcast",
|
|
"lineblog",
|
|
"livedoor",
|
|
"luscious",
|
|
"mangadex",
|
|
"mangafox",
|
|
"mangahere",
|
|
"mangakakalot",
|
|
"manganelo",
|
|
"mangapark",
|
|
"mangoxo",
|
|
"myhentaigallery",
|
|
"myportfolio",
|
|
"naver",
|
|
"naverwebtoon",
|
|
"newgrounds",
|
|
"ngomik",
|
|
"nhentai",
|
|
"nijie",
|
|
"nozomi",
|
|
"nsfwalbum",
|
|
"paheal",
|
|
"patreon",
|
|
"philomena",
|
|
"photobucket",
|
|
"photovogue",
|
|
"piczel",
|
|
"pillowfort",
|
|
"pinterest",
|
|
"pixiv",
|
|
"pixnet",
|
|
"plurk",
|
|
"pornhub",
|
|
"pururin",
|
|
"reactor",
|
|
"readcomiconline",
|
|
"reddit",
|
|
"redgifs",
|
|
"sankaku",
|
|
"sankakucomplex",
|
|
"seiga",
|
|
"senmanga",
|
|
"sexcom",
|
|
"simplyhentai",
|
|
"slickpic",
|
|
"slideshare",
|
|
"smugmug",
|
|
"speakerdeck",
|
|
"subscribestar",
|
|
"tapas",
|
|
"tsumino",
|
|
"tumblr",
|
|
"tumblrgallery",
|
|
"twitter",
|
|
"unsplash",
|
|
"vanillarock",
|
|
"vk",
|
|
"vsco",
|
|
"wallhaven",
|
|
"warosu",
|
|
"weasyl",
|
|
"webtoons",
|
|
"weibo",
|
|
"wikiart",
|
|
"xhamster",
|
|
"xvideos",
|
|
"booru",
|
|
"moebooru",
|
|
"foolfuuka",
|
|
"foolslide",
|
|
"mastodon",
|
|
"shopify",
|
|
"imagehosts",
|
|
"directlink",
|
|
"recursive",
|
|
"oauth",
|
|
"test",
|
|
]
|
|
|
|
|
|
def find(url):
|
|
"""Find a suitable extractor for the given URL"""
|
|
for cls in _list_classes():
|
|
match = cls.pattern.match(url)
|
|
if match:
|
|
return cls(match)
|
|
return None
|
|
|
|
|
|
def add(cls):
|
|
"""Add 'cls' to the list of available extractors"""
|
|
cls.pattern = re.compile(cls.pattern)
|
|
_cache.append(cls)
|
|
return cls
|
|
|
|
|
|
def add_module(module):
|
|
"""Add all extractors in 'module' to the list of available extractors"""
|
|
classes = _get_classes(module)
|
|
for cls in classes:
|
|
cls.pattern = re.compile(cls.pattern)
|
|
_cache.extend(classes)
|
|
return classes
|
|
|
|
|
|
def extractors():
|
|
"""Yield all available extractor classes"""
|
|
return sorted(
|
|
_list_classes(),
|
|
key=lambda x: x.__name__
|
|
)
|
|
|
|
|
|
# --------------------------------------------------------------------
|
|
# internals
|
|
|
|
_cache = []
|
|
_module_iter = iter(modules)
|
|
|
|
|
|
def _list_classes():
|
|
"""Yield all available extractor classes"""
|
|
yield from _cache
|
|
|
|
globals_ = globals()
|
|
for module_name in _module_iter:
|
|
module = __import__(module_name, globals_, None, (), 1)
|
|
yield from add_module(module)
|
|
|
|
globals_["_list_classes"] = lambda : _cache
|
|
|
|
|
|
def _get_classes(module):
|
|
"""Return a list of all extractor classes in a module"""
|
|
return [
|
|
cls for cls in module.__dict__.values() if (
|
|
hasattr(cls, "pattern") and cls.__module__ == module.__name__
|
|
)
|
|
]
|