2015-06-28 22:53:52 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2018-01-07 21:42:28 +01:00
|
|
|
# Copyright 2015-2018 Mike Fährmann
|
2015-06-28 22:53:52 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
import re
|
|
|
|
import importlib
|
|
|
|
|
|
|
|
modules = [
|
2017-07-14 08:44:31 +02:00
|
|
|
"2chan",
|
2015-06-28 22:53:52 +02:00
|
|
|
"3dbooru",
|
|
|
|
"4chan",
|
2017-07-03 16:43:04 +02:00
|
|
|
"4plebs",
|
2015-06-28 22:53:52 +02:00
|
|
|
"8chan",
|
2017-07-14 13:25:53 +02:00
|
|
|
"archivedmoe",
|
2017-07-15 13:23:04 +02:00
|
|
|
"archiveofsins",
|
2017-09-16 21:11:44 +02:00
|
|
|
"b4k",
|
2015-06-28 22:53:52 +02:00
|
|
|
"danbooru",
|
2017-07-11 17:14:50 +02:00
|
|
|
"desuarchive",
|
2015-10-05 20:29:48 +02:00
|
|
|
"deviantart",
|
2016-09-26 21:58:18 +02:00
|
|
|
"dokireader",
|
2016-09-22 17:20:57 +02:00
|
|
|
"dynastyscans",
|
2015-06-28 22:53:52 +02:00
|
|
|
"e621",
|
2016-08-04 18:08:48 +02:00
|
|
|
"exhentai",
|
2017-02-06 20:05:58 +01:00
|
|
|
"fallenangels",
|
2017-07-15 14:51:58 +02:00
|
|
|
"fireden",
|
2017-05-30 17:43:02 +02:00
|
|
|
"flickr",
|
2016-08-04 18:08:48 +02:00
|
|
|
"gelbooru",
|
2017-05-28 17:09:54 +02:00
|
|
|
"gfycat",
|
2017-01-10 00:05:08 +01:00
|
|
|
"gomanga",
|
2015-11-15 01:30:26 +01:00
|
|
|
"hbrowse",
|
2016-02-19 15:24:49 +01:00
|
|
|
"hentai2read",
|
2015-11-14 03:19:44 +01:00
|
|
|
"hentaifoundry",
|
2016-10-05 09:20:03 +02:00
|
|
|
"hentaihere",
|
2015-10-28 16:24:35 +01:00
|
|
|
"hitomi",
|
2018-01-09 17:52:12 +01:00
|
|
|
"idolcomplex",
|
2015-06-28 22:53:52 +02:00
|
|
|
"imagebam",
|
2016-08-09 14:05:12 +02:00
|
|
|
"imagefap",
|
2015-06-28 22:53:52 +02:00
|
|
|
"imgbox",
|
|
|
|
"imgchili",
|
2015-10-28 23:26:47 +01:00
|
|
|
"imgth",
|
2015-10-12 22:34:45 +02:00
|
|
|
"imgur",
|
2016-12-29 16:41:08 +01:00
|
|
|
"jaiminisbox",
|
2016-04-20 08:34:44 +02:00
|
|
|
"khinsider",
|
2017-04-07 11:41:48 +02:00
|
|
|
"kireicake",
|
2017-04-05 12:16:23 +02:00
|
|
|
"kissmanga",
|
2018-02-04 16:27:44 +01:00
|
|
|
"komikcast",
|
2015-11-06 13:24:43 +01:00
|
|
|
"konachan",
|
2017-07-14 11:17:47 +02:00
|
|
|
"loveisover",
|
2016-08-01 15:36:56 +02:00
|
|
|
"luscious",
|
2017-01-14 19:39:21 +01:00
|
|
|
"mangafox",
|
2015-11-26 03:06:08 +01:00
|
|
|
"mangahere",
|
2015-11-08 00:02:37 +01:00
|
|
|
"mangapanda",
|
2015-12-08 22:29:34 +01:00
|
|
|
"mangapark",
|
2015-06-28 22:53:52 +02:00
|
|
|
"mangareader",
|
2015-11-08 00:03:14 +01:00
|
|
|
"mangastream",
|
2015-10-28 12:08:27 +01:00
|
|
|
"nhentai",
|
2015-06-28 22:53:52 +02:00
|
|
|
"nijie",
|
2017-07-08 17:16:41 +02:00
|
|
|
"nyafuu",
|
2018-01-15 16:39:05 +01:00
|
|
|
"paheal",
|
2017-04-19 10:17:43 +02:00
|
|
|
"pawoo",
|
2016-09-02 19:11:16 +02:00
|
|
|
"pinterest",
|
2017-10-17 16:49:42 +02:00
|
|
|
"pixiv",
|
2015-10-08 20:43:52 +02:00
|
|
|
"powermanga",
|
2018-01-07 21:42:28 +01:00
|
|
|
"puremashiro",
|
2016-11-14 18:29:45 +01:00
|
|
|
"readcomiconline",
|
2017-07-23 15:33:55 +02:00
|
|
|
"rebeccablacktech",
|
2017-05-23 09:38:50 +02:00
|
|
|
"reddit",
|
2016-09-17 18:12:37 +02:00
|
|
|
"rule34",
|
2015-11-06 13:52:40 +01:00
|
|
|
"safebooru",
|
2015-11-09 02:29:33 +01:00
|
|
|
"sankaku",
|
2017-04-07 13:20:35 +02:00
|
|
|
"seaotterscans",
|
2016-08-09 16:36:30 +02:00
|
|
|
"seiga",
|
2016-08-02 17:42:22 +02:00
|
|
|
"senmanga",
|
2016-10-25 15:25:25 +02:00
|
|
|
"sensescans",
|
2017-12-13 17:38:29 +01:00
|
|
|
"slideshare",
|
2015-11-13 00:21:50 +01:00
|
|
|
"spectrumnexus",
|
2017-07-23 15:45:17 +02:00
|
|
|
"thebarchive",
|
2016-02-20 11:29:10 +01:00
|
|
|
"tumblr",
|
2016-10-06 19:12:07 +02:00
|
|
|
"twitter",
|
2017-08-18 19:52:58 +02:00
|
|
|
"warosu",
|
2017-09-16 21:11:44 +02:00
|
|
|
"whatisthisimnotgoodwithcomputers",
|
2016-10-26 23:10:41 +02:00
|
|
|
"worldthree",
|
2015-06-28 22:53:52 +02:00
|
|
|
"yandere",
|
2017-09-16 21:11:44 +02:00
|
|
|
"yeet",
|
2017-11-02 15:36:53 +01:00
|
|
|
"xvideos",
|
2016-11-03 15:46:04 +01:00
|
|
|
"imagehosts",
|
2017-05-24 12:51:18 +02:00
|
|
|
"directlink",
|
2016-10-01 15:54:27 +02:00
|
|
|
"recursive",
|
2017-06-20 16:06:14 +02:00
|
|
|
"oauth",
|
2016-12-10 00:01:00 +01:00
|
|
|
"test",
|
2015-06-28 22:53:52 +02:00
|
|
|
]
|
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2015-10-05 17:52:50 +02:00
|
|
|
def find(url):
|
2015-12-12 15:58:07 +01:00
|
|
|
"""Find suitable extractor for the given url"""
|
2015-11-21 03:12:36 +01:00
|
|
|
for pattern, klass in _list_patterns():
|
2016-08-23 16:36:39 +02:00
|
|
|
match = pattern.match(url)
|
2018-01-14 18:47:22 +01:00
|
|
|
if match and klass not in _blacklist:
|
2015-11-21 00:30:31 +01:00
|
|
|
return klass(match)
|
|
|
|
return None
|
2015-06-28 22:53:52 +02:00
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2018-02-02 00:01:41 +01:00
|
|
|
def add(klass):
|
|
|
|
"""Add 'klass' to the list of available extractors"""
|
|
|
|
for pattern in klass:
|
|
|
|
_cache.append((re.compile(pattern), klass))
|
|
|
|
|
|
|
|
|
|
|
|
def add_module(module):
|
|
|
|
"""Add all extractors in 'module' to the list of available extractors"""
|
|
|
|
tuples = [
|
|
|
|
(re.compile(pattern), klass)
|
|
|
|
for klass in _get_classes(module)
|
|
|
|
for pattern in klass.pattern
|
|
|
|
]
|
|
|
|
_cache.extend(tuples)
|
|
|
|
return tuples
|
|
|
|
|
|
|
|
|
2015-12-12 15:58:07 +01:00
|
|
|
def extractors():
|
|
|
|
"""Yield all available extractor classes"""
|
2015-12-13 04:34:15 +01:00
|
|
|
return sorted(
|
|
|
|
set(klass for _, klass in _list_patterns()),
|
|
|
|
key=lambda x: x.__name__
|
|
|
|
)
|
2015-12-12 15:58:07 +01:00
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2017-05-24 12:32:44 +02:00
|
|
|
class blacklist():
|
|
|
|
"""Context Manager to blacklist extractor modules"""
|
2018-01-14 18:47:22 +01:00
|
|
|
def __init__(self, categories, extractors=None):
|
|
|
|
self.extractors = extractors or []
|
|
|
|
for _, klass in _list_patterns():
|
|
|
|
if klass.category in categories:
|
|
|
|
self.extractors.append(klass)
|
2017-05-24 12:32:44 +02:00
|
|
|
|
|
|
|
def __enter__(self):
|
2018-01-14 18:47:22 +01:00
|
|
|
_blacklist.update(self.extractors)
|
2017-05-24 12:32:44 +02:00
|
|
|
|
|
|
|
def __exit__(self, etype, value, traceback):
|
|
|
|
_blacklist.clear()
|
|
|
|
|
|
|
|
|
2015-06-28 22:53:52 +02:00
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# internals
|
|
|
|
|
|
|
|
_cache = []
|
2018-01-14 18:47:22 +01:00
|
|
|
_blacklist = set()
|
2015-06-28 22:53:52 +02:00
|
|
|
_module_iter = iter(modules)
|
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2015-06-28 22:53:52 +02:00
|
|
|
def _list_patterns():
|
2015-11-27 01:42:40 +01:00
|
|
|
"""Yield all available (pattern, class) tuples"""
|
2016-08-23 16:36:39 +02:00
|
|
|
yield from _cache
|
2015-10-05 18:10:18 +02:00
|
|
|
|
2015-06-28 22:53:52 +02:00
|
|
|
for module_name in _module_iter:
|
2018-02-02 00:01:41 +01:00
|
|
|
yield from add_module(
|
|
|
|
importlib.import_module("."+module_name, __package__)
|
|
|
|
)
|
2015-11-20 19:54:07 +01:00
|
|
|
|
2017-02-01 00:53:19 +01:00
|
|
|
|
2015-11-20 19:54:07 +01:00
|
|
|
def _get_classes(module):
|
|
|
|
"""Return a list of all extractor classes in a module"""
|
|
|
|
return [
|
|
|
|
klass for klass in module.__dict__.values() if (
|
2015-11-21 00:30:31 +01:00
|
|
|
hasattr(klass, "pattern") and klass.__module__ == module.__name__
|
2015-11-20 19:54:07 +01:00
|
|
|
)
|
|
|
|
]
|