Merge branch 'config'

2024-11-22 02:32:33 +01:00 · 2015-10-05 17:04:43 +02:00 · 2015-10-05 17:04:43 +02:00 · 88442ad26d
commit 88442ad26d
parent ae535ba0c1 5ae3dd84ba
24 changed files with 248 additions and 107 deletions
--- a/18
+++ b/18
@ -1,18 +0,0 @@
-[pixiv]
-username = XXXXX
-password = XXXXX
-
-[exhentai-cookies]
-ipb_member_id = XXXXX
-ipb_pass_hash = XXXXX
-
-[nijie-cookies]
-NIJIEIJIEID = XXXXX
-nijie_email_hash = XXXXX
-nijie_login_hash = XXXXX
-
-[danbooru]
-regex0 = d(?:anbooru)?[.:-_](\w.+)
-
-[gelbooru]
-regex0 = g(?:elbooru)?[.:-_](\w.+)
--- a/config.json
+++ b/config.json
@ -0,0 +1,39 @@
+{
+    "base-directory": "/tmp/",
+    "extractor":
+    {
+        "pixiv":
+        {
+            "directory": ["{category}", "{artist-id}"],
+            "username": "XXX",
+            "password": "XXX"
+        },
+        "nijie":
+        {
+            "cookies":
+            {
+                "NIJIEIJIEID": "XXX",
+                "nijie_email_hash": "XXX",
+                "nijie_login_hash": "XXX"
+            }
+        },
+        "4chan":
+        {
+            "directory": ["{category}", "{board}", "{thread} - {title}"]
+        },
+        "danbooru":
+        {
+            "pattern": ["d(?:anbooru)?[.:-_](\\w.+)"],
+            "filename": "{category}_{id:>07}_{md5}.{extension}"
+        },
+        "gelbooru":
+        {
+            "pattern": ["g(?:elbooru)?[.:-_](\\w.+)"],
+            "filename": "{category}_{id:>07}_{md5}.{extension}"
+        },
+        "e621":
+        {
+            "pattern": ["e(?:621)?[.:-_](\\w.+)"]
+        }
+    }
+}
--- a/gallery_dl/init.py
+++ b/gallery_dl/init.py
@ -17,9 +17,7 @@ __email__      = "mike_faehrmann@web.de"
 import os
 import sys
 import argparse
-import configparser
-
-from .download import DownloadManager
+from . import config, download

 def parse_cmdline_options():
    parser = argparse.ArgumentParser(
@ -41,18 +39,10 @@ def parse_cmdline_options():
    )
    return parser.parse_args()

-def parse_config_file(path):
-    config = configparser.ConfigParser(
-        interpolation=None,
-    )
-    config.optionxform = lambda opt: opt
-    config.read(os.path.expanduser(path))
-    return config
-
 def main():
+    config.load()
    opts = parse_cmdline_options()
-    conf = parse_config_file(opts.config)
-    dlmgr = DownloadManager(opts, conf)
+    dlmgr = download.DownloadManager(opts)

    try:
        for url in opts.urls:
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Global configuration module"""
+
+import sys
+import json
+import os.path
+import platform
+
+# --------------------------------------------------------------------
+# public interface
+
+def load(*files):
+    """Load JSON configuration files"""
+    configfiles = files or _default_configs
+    for conf in configfiles:
+        try:
+            path = os.path.expanduser(conf)
+            with open(path) as file:
+                confdict = json.load(file)
+            _config.update(confdict)
+        except FileNotFoundError:
+            continue
+        except json.decoder.JSONDecodeError as exception:
+            print("Error while loading '", path, "':", sep="", file=sys.stderr)
+            print(exception, file=sys.stderr)
+
+def clear():
+    """Reset configuration to en empty state"""
+    globals()["_config"] = {}
+
+def get(keys, default=None):
+    """Get the value of property 'key' or a default-value if it doenst exist"""
+    conf = _config
+    try:
+        for k in keys:
+            conf = conf[k]
+        return conf
+    except (KeyError, AttributeError):
+        return default
+
+def interpolate(keys, default=None):
+    """Interpolate the value of 'key'"""
+    conf = _config
+    try:
+        for k in keys:
+            default = conf.get(keys[-1], default)
+            conf = conf[k]
+        return conf
+    except (KeyError, AttributeError):
+        return default
+
+def set(keys, value):
+    """Set the value of property 'key' for this session"""
+    conf = _config
+    for k in keys[:-1]:
+        try:
+            conf = conf[k]
+        except KeyError:
+            temp = {}
+            conf[k] = temp
+            conf = temp
+    conf[keys[-1]] = value
+
+
+# --------------------------------------------------------------------
+# internals
+
+_config = {}
+
+if platform.system() == "Windows":
+    _default_configs = [
+        r"~\.config\gallery-dl.conf",
+        r"~\.gallery-dl.conf",
+    ]
+else:
+    _default_configs = [
+        "/etc/gallery-dl.conf",
+        "~/.config/gallery/config.json",
+        "~/.config/gallery-dl.conf",
+        "~/.gallery-dl.conf",
+    ]
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@ -12,14 +12,14 @@ import re
 import importlib

 from .extractor.common import Message
+from . import config

 class DownloadManager():

-    def __init__(self, opts, config):
+    def __init__(self, opts):
        self.opts = opts
-        self.config = config
        self.modules = {}
-        self.extractors = ExtractorFinder(config)
+        self.extractors = ExtractorFinder()

    def add(self, url):
        job = DownloadJob(self, url)
@ -38,7 +38,7 @@ class DownloadManager():
        if self.opts.dest:
            return self.opts.dest
        else:
-            return self.config.get("general", "destination", fallback="/tmp/")
+            return config.get(("base-directory",), default="/tmp/")


 class DownloadJob():
@ -50,16 +50,14 @@ class DownloadJob():
            return
        self.directory = mngr.get_base_directory()
        self.downloaders = {}
-        self.filename_fmt = mngr.config.get(
-            self.info["category"], "filename",
-            fallback=self.info["filename"]
+        self.filename_fmt = config.get(
+            ("extractor", self.info["category"], "filename"),
+            default=self.info["filename"]
+        )
+        segments = config.get(
+            ("extractor", self.info["category"], "directory"),
+            default=self.info["directory"]
        )
-        try:
-            segments = mngr.config.get(
-                self.info["category"], "directory"
-            ).split("/")
-        except Exception:
-            segments = self.info["directory"]
        self.directory_fmt = os.path.join(*segments)

    def run(self):
@ -114,13 +112,11 @@ class DownloadJob():
        scheme = url[:pos] if pos != -1 else "http"
        if scheme == "https":
            scheme = "http"
-
        downloader = self.downloaders.get(scheme)
        if downloader is None:
            module = self.mngr.get_downloader_module(scheme)
            downloader = module.Downloader()
            self.downloaders[scheme] = downloader
-
        return downloader

    @staticmethod
@ -144,28 +140,25 @@ class DownloadJob():

 class ExtractorFinder():

-    def __init__(self, config):
-        self.config = config
-
    def get_for_url(self, url):
        """Get an extractor-instance suitable for 'url'"""
        name, match = self.find_pattern_match(url)
        if match:
            module = importlib.import_module(".extractor." + name, __package__)
            klass = getattr(module, module.info["extractor"])
-            return klass(match, self.config), module.info
+            return klass(match), module.info
        else:
            print("no suitable extractor found")
            return None, None

    def find_pattern_match(self, url):
-        """Find a pattern, that matches 'url', and return the (category,match) tuple"""
-        for category in self.config:
-            for key, value in self.config[category].items():
-                if key.startswith("regex"):
-                    match = re.match(value, url)
-                    if match:
-                        return category, match
+        """Find a pattern that matches 'url' and return the (category,match) tuple"""
+        for category in config.get(("extractor",)):
+            patterns = config.get(("extractor", category, "pattern"), default=[])
+            for pattern in patterns:
+                match = re.match(pattern, url)
+                if match:
+                    return category, match
        for category, info in self.extractor_metadata():
            for pattern in info["pattern"]:
                match = re.match(pattern, url)
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@ -22,8 +22,8 @@ info = {

 class ThreeDeeBooruExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "http://behoimi.org/post/index.json"
        self.headers = {
            "Referer": "http://behoimi.org/post/show/",
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@ -25,8 +25,8 @@ class FourChanExtractor(ChanExtractor):
    api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
    file_url = "https://i.4cdn.org/{board}/{tim}{ext}"

-    def __init__(self, match, config):
+    def __init__(self, match):
        ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
            match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@ -25,8 +25,8 @@ class InfinityChanExtractor(ChanExtractor):
    api_url = "https://8ch.net/{board}/res/{thread}.json"
    file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"

-    def __init__(self, match, config):
+    def __init__(self, match):
        ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
            match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@ -27,8 +27,8 @@ class BatotoExtractor(AsynchronousExtractor):

    url_base = "http://bato.to/read/_/"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.chapter_id = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@ -19,8 +19,8 @@ class BooruExtractor(SequentialExtractor):

    api_url = ""

-    def __init__(self, match, config, info):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match, info):
+        SequentialExtractor.__init__(self)
        self.info = info
        self.tags = text.unquote(match.group(1))
        self.page = "page"
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@ -10,15 +10,14 @@

 from .common import SequentialExtractor, Message
 from .. import text
-import re

 class ChanExtractor(SequentialExtractor):

    api_url = ""
    file_url = ""

-    def __init__(self, config, category, board, thread):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, category, board, thread):
+        SequentialExtractor.__init__(self)
        self.metadata = {
            "category": category,
            "board": board,
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@ -12,7 +12,7 @@ import time
 import queue
 import requests
 import threading
-import html.parser
+from .. import config


 class Message():
@ -47,15 +47,15 @@ class Extractor():

 class SequentialExtractor(Extractor):

-    def __init__(self, _):
+    def __init__(self):
        Extractor.__init__(self)


 class AsynchronousExtractor(Extractor):

-    def __init__(self, config):
+    def __init__(self):
        Extractor.__init__(self)
-        queue_size = int(config.get("general", "queue-size", fallback=5))
+        queue_size = int(config.get(("queue-size",), default=5))
        self.__queue = queue.Queue(maxsize=queue_size)
        self.__thread = threading.Thread(target=self.async_items, daemon=True)

--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@ -22,6 +22,6 @@ info = {

 class DanbooruExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://danbooru.donmai.us/posts.json"
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@ -23,6 +23,6 @@ info = {

 class E621Extractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://e621.net/post/index.json"
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@ -22,8 +22,8 @@ info = {

 class GelbooruExtractor(XMLBooruExtractor):

-    def __init__(self, match, config):
-        XMLBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        XMLBooruExtractor.__init__(self, match, info)
        self.api_url = "http://gelbooru.com/"
        self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}

--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@ -25,8 +25,8 @@ class ImagebamExtractor(AsynchronousExtractor):

    url_base = "http://www.imagebam.com"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.match = match
        self.num = 0
        self.metadata = {}
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@ -26,8 +26,8 @@ class ImgboxExtractor(AsynchronousExtractor):

    url_base = "http://imgbox.com"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.key = match.group(1)
        self.metadata = {}

--- a/gallery_dl/extractor/imgchili.py
+++ b/gallery_dl/extractor/imgchili.py
@ -24,8 +24,8 @@ info = {

 class ImgchiliExtractor(SequentialExtractor):

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.match = match
        self.num = 0

--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@ -28,8 +28,8 @@ class MangaReaderExtractor(AsynchronousExtractor):

    url_base = "http://www.mangareader.net"

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.part = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@ -9,7 +9,7 @@
 """Extract images from https://nijie.info/"""

 from .common import AsynchronousExtractor, Message
-from ..text  import filename_from_url
+from .. import config, text
 import re

 info = {
@ -26,8 +26,8 @@ class NijieExtractor(AsynchronousExtractor):

    popup_url = "https://nijie.info/view_popup.php?id="

-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
        self.artist_id = match.group(1)
        self.artist_url = (
            "https://nijie.info/members_illust.php?id="
@ -36,7 +36,9 @@ class NijieExtractor(AsynchronousExtractor):
        self.session.headers["Referer"] = self.artist_url
        self.session.cookies["R18"] = "1"
        self.session.cookies["nijie_referer"] = "nijie.info"
-        self.session.cookies.update(config["nijie-cookies"])
+        self.session.cookies.update(
+            config.get(("extractor", info["category"], "cookies"))
+        )

    def items(self):
        data = self.get_job_metadata()
@ -56,19 +58,19 @@ class NijieExtractor(AsynchronousExtractor):

    def get_image_ids(self):
        """Collect all image-ids for a specific artist"""
-        text = self.request(self.artist_url).text
+        page = self.request(self.artist_url).text
        regex = r'<a href="/view\.php\?id=(\d+)"'
-        return [m.group(1) for m in re.finditer(regex, text)]
+        return [m.group(1) for m in re.finditer(regex, page)]

    def get_image_data(self, image_id):
        """Get URL and metadata for images specified by 'image_id'"""
-        text = self.request(self.popup_url + image_id).text
-        matches = re.findall('<img src="([^"]+)"', text)
+        page = self.request(self.popup_url + image_id).text
+        matches = re.findall('<img src="([^"]+)"', page)
        for index, url in enumerate(matches):
            yield "https:" + url, {
                "count": len(matches),
                "index": index,
                "image-id": image_id,
-                "name" : filename_from_url(url),
+                "name" : text.filename_from_url(url),
                "extension": url[url.rfind(".")+1:],
            }
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@ -9,7 +9,7 @@
 """Extract images and ugoira from http://www.pixiv.net/"""

 from .common import SequentialExtractor, Message
-from .. import text
+from .. import config, text
 import re
 import json

@ -29,16 +29,15 @@ class PixivExtractor(SequentialExtractor):
    member_url = "http://www.pixiv.net/member_illust.php"
    illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
-        self.config = config
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.artist_id = match.group(1)
        self.api = PixivAPI(self.session)

    def items(self):
        self.api.login(
-            self.config.get("pixiv", "username"),
-            self.config.get("pixiv", "password"),
+            config.get(("extractor", "pixiv", "username")),
+            config.get(("extractor", "pixiv", "password")),
        )
        metadata = self.get_job_metadata()

--- a/gallery_dl/extractor/redhawkscans.py
+++ b/gallery_dl/extractor/redhawkscans.py
@ -28,8 +28,8 @@ class RedHawkScansExtractor(SequentialExtractor):

    url_base = "https://manga.redhawkscans.com/reader/read/"

-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
        self.part = match.group(1)

    def items(self):
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@ -22,6 +22,6 @@ info = {

 class YandereExtractor(JSONBooruExtractor):

-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
        self.api_url = "https://yande.re/post.json"
--- a/test/test_config.py
+++ b/test/test_config.py
@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import unittest
+import gallery_dl.config as config
+import os
+import tempfile
+
+class TestConfig(unittest.TestCase):
+
+    def setUp(self):
+        fd, self._configfile = tempfile.mkstemp()
+        with os.fdopen(fd, "w") as file:
+            file.write('{"a": "1", "b": {"c": "text"}}')
+        config.load(self._configfile)
+
+    def tearDown(self):
+        config.clear()
+        os.remove(self._configfile)
+
+    def test_get(self):
+        self.assertEqual(config.get(["a"]), "1")
+        self.assertEqual(config.get(["b", "c"]), "text")
+        self.assertEqual(config.get(["d"]), None)
+        self.assertEqual(config.get(["e", "f", "g"], 123), 123)
+
+    def test_set(self):
+        config.set(["b", "c"], [1, 2, 3])
+        config.set(["e", "f", "g"], value=234)
+        self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
+        self.assertEqual(config.get(["e", "f", "g"]), 234)
+
+    def test_interpolate(self):
+        self.assertEqual(config.interpolate(["a"]), "1")
+        self.assertEqual(config.interpolate(["b", "a"]), "1")
+        self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
+        self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
+        config.set(["d"], 123)
+        self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
+        self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
+
+if __name__ == '__main__':
+    unittest.main()