From 26bb9d62de998d1b259d479aacfcdef41db80b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 28 Jun 2015 22:53:52 +0200 Subject: [PATCH] move and rework extractor-loading code --- gallery_dl/download.py | 68 +------------------------------- gallery_dl/extractor/__init__.py | 53 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 66 deletions(-) diff --git a/gallery_dl/download.py b/gallery_dl/download.py index 96ababa5..5366cba2 100644 --- a/gallery_dl/download.py +++ b/gallery_dl/download.py @@ -8,9 +8,8 @@ import os import sys -import re import importlib - +from . import extractor from .extractor.common import Message class DownloadManager(): @@ -19,7 +18,6 @@ class DownloadManager(): self.opts = opts self.config = config self.modules = {} - self.extractors = ExtractorFinder(config) def add(self, url): job = DownloadJob(self, url) @@ -45,7 +43,7 @@ class DownloadJob(): def __init__(self, mngr, url): self.mngr = mngr - self.extractor, self.info = mngr.extractors.get_for_url(url) + self.extractor, self.info = extractor.find(url, mngr.config) if self.extractor is None: return self.directory = mngr.get_base_directory() @@ -140,65 +138,3 @@ class DownloadJob(): if tries == 0: print("\r", end="") print("\r\033[1;32m", path, "\033[0m", sep="") - - -class ExtractorFinder(): - - def __init__(self, config): - self.config = config - - def get_for_url(self, url): - """Get an extractor-instance suitable for 'url'""" - name, match = self.find_pattern_match(url) - if match: - module = importlib.import_module(".extractor." + name, __package__) - klass = getattr(module, module.info["extractor"]) - return klass(match, self.config), module.info - else: - print("no suitable extractor found") - return None, None - - def find_pattern_match(self, url): - """Find a pattern, that matches 'url', and return the (category,match) tuple""" - for category in self.config: - for key, value in self.config[category].items(): - if key.startswith("regex"): - match = re.match(value, url) - if match: - return category, match - for category, info in self.extractor_metadata(): - for pattern in info["pattern"]: - match = re.match(pattern, url) - if match: - return category, match - return None, None - - def extractor_metadata(self): - """Yield all extractor-name, -metadata tuples""" - path = os.path.join(os.path.dirname(__file__), "extractor") - for name in os.listdir(path): - extractor_path = os.path.join(path, name) - info = self.get_info_dict(extractor_path) - if info is not None: - yield os.path.splitext(name)[0], info - - @staticmethod - def get_info_dict(extractor_path): - """Get info-/metadata-dictionary for an extractor""" - try: - with open(extractor_path) as file: - for _ in range(30): - line = next(file) - if line.startswith("info ="): - break - else: - return None - - info = [line[6:]] - for line in file: - info.append(line) - if line.startswith("}"): - break - except (StopIteration, OSError): - return None - return eval("".join(info)) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8b137891..d2086806 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1 +1,54 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import re +import importlib + +modules = [ + "pixiv", + "exhentai", + "gelbooru", + "3dbooru", + "4chan", + "8chan", + "batoto", + "danbooru", + "e621", + "imagebam", + "imgbox", + "imgchili", + "mangareader", + "nijie", + "redhawkscans", + "yandere", +] + +def find(url, config): + """Find extractor suitable for handling the given url""" + for pattern, module, klass in _list_patterns(): + match = re.match(pattern, url) + if match: + return klass(match, config), module.info + +# -------------------------------------------------------------------- +# internals + +_cache = [] +_module_iter = iter(modules) + +def _list_patterns(): + """Yield all available (pattern, module, klass) tuples""" + for entry in _cache: + yield entry + for module_name in _module_iter: + module = importlib.import_module("."+module_name, __package__) + klass = getattr(module, module.info["extractor"]) + for pattern in module.info["pattern"]: + etuple = (pattern, module, klass) + _cache.append(etuple) + yield etuple