From 652d60a5cdcc23c2a96b86467ce6f08dd50bef9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Thu, 9 Apr 2015 16:13:00 +0200
Subject: [PATCH] implement new extractor-module selection

---
 gallery_dl/download.py | 89 ++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 42 deletions(-)

diff --git a/gallery_dl/download.py b/gallery_dl/download.py
index 04f0ca5b..f73cbb72 100644
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@@ -9,7 +9,6 @@
 import os
 import sys
 import re
-import sqlite3
 import importlib
 
 from .extractor.common import Message
@@ -133,51 +132,57 @@ class ExtractorFinder():
 
     def __init__(self, config):
         self.config = config
-        self.match_list = list()
-        if "database" in config["general"]:
-            path = os.path.expanduser(config["general"]["database"])
-            conn = sqlite3.connect(path)
-            self.load_from_database(conn)
-        self.load_from_config(config)
 
     def get_for_url(self, url):
-        # TODO: implement general case
-        module = importlib.import_module(".extractor.8chan", __package__)
-        for pattern in module.info["pattern"]:
-            match = re.match(pattern, url)
-            if match:
-                klass = getattr(module, module.info["extractor"])
-                return klass(match, self.config), module.info
-        print("pattern mismatch")
-        sys.exit()
+        name, match = self.find_pattern_match(url)
+        if match:
+            module = importlib.import_module(".extractor." + name, __package__)
+            klass = getattr(module, module.info["extractor"])
+            return klass(match, self.config), module.info
+        else:
+            print("pattern mismatch")
+            return None
 
-    def match(self, url):
-        for category, regex in self.match_list:
-            match = regex.match(url)
-            if match:
-                module = importlib.import_module("."+category, __package__)
-                return module.Extractor(match, self.config)
+    def find_pattern_match(self, url):
+        for category in self.config:
+            for key, value in self.config[category].items():
+                if(key.startswith("regex")):
+                    print(value)
+                    match = re.match(value, url)
+                    if match:
+                        return category, match
+        for name, info in self.extractor_metadata():
+            for pattern in info["pattern"]:
+                print(pattern)
+                match = re.match(pattern, url)
+                if match:
+                    return name, match
         return None
 
-    def load_from_database(self, db):
-        query = (
-            "SELECT regex.re, category.name "
-            "FROM   regex JOIN category "
-            "ON     regex.category_id = category.id"
-        )
-        for row in db.execute(query):
-            self.add_match(row[1], row[0])
+    def extractor_metadata(self):
+        path = os.path.join(os.path.dirname(__file__), "extractor")
+        for name in os.listdir(path):
+            extractor_path = os.path.join(path, name)
+            info = self.get_info_dict(extractor_path)
+            if info is not None:
+                yield os.path.splitext(name)[0], info
 
-    def load_from_config(self, conf):
-        for category in conf:
-            for key, value in conf[category].items():
-                if(key.startswith("regex")):
-                    self.add_match(category, value)
-
-    def add_match(self, category, regex):
+    @staticmethod
+    def get_info_dict(extractor_path):
         try:
-            # print(category, regex)
-            self.match_list.append( (category, re.compile(regex)) )
-        except:
-            print("[Warning] [{0}] failed to compile regular expression '{1}'"
-                  .format(category, regex))
+            with open(extractor_path) as f:
+                for index in range(30):
+                    line = next(f)
+                    if line.startswith("info ="):
+                        break
+                else:
+                    return None
+
+                info = [line[6:]]
+                for line in f:
+                    info.append(line)
+                    if line.startswith("}"):
+                        break
+        except (StopIteration, OSError):
+            return None
+        return eval("".join(info))