update download-infrastructure

2024-11-21 18:22:30 +01:00 · 2015-04-08 01:51:48 +02:00 · 2015-04-08 01:51:48 +02:00 · 0abbee3710
commit 0abbee3710
parent 513808d156
1 changed files with 95 additions and 26 deletions
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@ -6,14 +6,13 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-""" """
-
 import os
 import sys
 import re
 import sqlite3
 import importlib

+from extractor.common import Message

 class DownloadManager():

@ -21,35 +20,94 @@ class DownloadManager():
        self.opts = opts
        self.conf = conf
        self.downloaders = {}
+        self.extractors = ExtractorFinder(conf)

-    def add(self, extr):
+    def add(self, url):
+        job = DownloadJob(self, url)
+        job.run()
+
+    def get_downloader_module(self, scheme):
+        """Return a downloader module suitable for 'scheme'"""
+        module = self.downloaders.get(scheme)
+        if module is None:
+            module = importlib.import_module(".downloader."+scheme, __package__)
+            self.downloaders[scheme] = module
+        return module
+
+    def get_base_directory(self):
        if self.opts.dest:
-            dest = self.opts.dest
-        elif extr.category in self.conf:
-            dest = self.conf[extr.category].get("destination", "/tmp/")
+            return self.opts.dest
        else:
-            dest = self.conf["general"].get("destination", "/tmp/")
-        dest = os.path.join(dest, extr.category, extr.directory)
-        os.makedirs(dest, exist_ok=True)
+            return self.conf["general"].get("destination", "/tmp/")

-        for url, filename in extr:
-            path = os.path.join(dest, filename)
-            if os.path.exists(path):
-                self.print_skip(path)
-                continue
-            dl = self.get_downloader(extr, url)
-            self.print_start(path)
-            tries = dl.download(url, path)
-            self.print_success(path, tries)

-    def get_downloader(self, extr, url):
-        end   = url.find("://")
-        proto = url[:end] if end != -1 else "http"
-        if proto not in self.downloaders:
-            # import downloader
-            module = importlib.import_module("."+proto, __package__)
-            self.downloaders[proto] = module.Downloader
-        return self.downloaders[proto](extr)
+class DownloadJob():
+
+    def __init__(self, mngr, url):
+        self.mngr = mngr
+        self.extractor, self.info = mngr.extractors.get_for_url(url)
+        self.directory = mngr.get_base_directory()
+        self.downloaders = {}
+
+    def run(self):
+        """Execute/Run the downlaod job"""
+        if self.extractor is None:
+            return # TODO: error msg
+
+        for msg in self.extractor:
+            print(msg)
+            print(type(msg))
+            if msg[0] == Message.Url:
+                self.download(msg)
+
+            elif msg[0] == Message.Directory:
+                self.set_directory(msg)
+
+            elif msg[0] == Message.Version:
+                if msg[1] != 1:
+                    raise "unsupported message-version ({}, {})".format(
+                        self.info.category, msg[1]
+                    )
+                # TODO: support for multiple message versions
+
+    def download(self, msg):
+        """Download the resource specified in 'msg'"""
+        _, url, metadata = msg
+        filename = self.info["filename"].format(**metadata)
+        path = os.path.join(self.directory, filename)
+        if os.path.exists(path):
+            self.print_skip(path)
+            return
+        dl = self.get_downloader(url)
+        self.print_start(path)
+        tries = dl.download(url, path)
+        self.print_success(path, tries)
+
+    def set_directory(self, msg):
+        """Set and create the target directory for downloads"""
+        path = []
+        for segment in self.info["directory"]:
+            path.append(segment.format(**msg[1]))
+        self.directory = os.path.join(
+            self.mngr.get_base_directory(),
+            *path
+        )
+        os.makedirs(self.directory, exist_ok=True)
+
+    def get_downloader(self, url):
+        """Return, and possibly construct, a downloader suitable for 'url'"""
+        pos = url.find(":")
+        scheme = url[:pos] if pos != -1 else "http"
+        if scheme == "https":
+            scheme = "http"
+
+        downloader = self.downloaders.get(scheme)
+        if downloader is None:
+            module = self.mngr.get_downloader_module(scheme)
+            downloader = module.Downloader(self.extractor)
+            self.downloaders[scheme] = downloader
+
+        return downloader

    @staticmethod
    def print_start(path):
@ -78,6 +136,17 @@ class ExtractorFinder():
            self.load_from_database(conn)
        self.load_from_config(config)

+    def get_for_url(self, url):
+        # TODO: implement general case
+        module = importlib.import_module(".extractor.8chan", __package__)
+        for pattern in module.info["pattern"]:
+            match = re.match(pattern, url)
+            if match:
+                klass = getattr(module, module.info["extractor"])
+                return klass(match, self.config), module.info
+        print("pattern mismatch")
+        sys.exit()
+
    def match(self, url):
        for category, regex in self.match_list:
            match = regex.match(url)