From c8e5b2e89eaffec912b37a56cfa42482cc2105b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 29 Jun 2015 23:09:35 +0200
Subject: [PATCH 01/15] base class for futaba-chan boards with api

---
 gallery_dl/extractor/chan.py | 47 ++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 gallery_dl/extractor/chan.py
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
new file mode 100644
index 00000000..cb336774
--- /dev/null
+++ b/gallery_dl/extractor/chan.py
@@ -0,0 +1,47 @@
+
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Base classes for extractors for different Futaba Channel boards"""
+
+from .common import SequentialExtractor, Message
+
+class ChanExtractor(SequentialExtractor):
+
+    api_url = ""
+    file_url = ""
+
+    def __init__(self, config, category, board, thread):
+        SequentialExtractor.__init__(self, config)
+        self.metadata = {
+            "category": category,
+            "board": board,
+            "thread": thread,
+        }
+
+    def items(self):
+        yield Message.Version, 1
+        posts = self.request(self.api_url.format(**self.metadata)).json()["posts"]
+        self.metadata["title"] = self.get_thread_title(posts[0])
+        yield Message.Directory, self.metadata
+        for post in posts:
+            if "filename" not in post:
+                continue
+            post.update(self.metadata)
+            yield Message.Url, self.file_url.format(**post), post
+
+    @staticmethod
+    def get_thread_title(post):
+        """Return thread title from first post"""
+        if "sub" in post:
+            return post["sub"]
+        com = post["com"]
+        pos = com.find("<br>")
+        if pos == -1:
+            return com
+        return com[:min(pos, 50)]

From c9ef181b3cd9890b8697a382c75270d8b87006a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 29 Jun 2015 23:14:35 +0200
Subject: [PATCH 02/15] [4chan] use api

---
 gallery_dl/extractor/4chan.py | 58 ++++++-----------------------------
 1 file changed, 9 insertions(+), 49 deletions(-)

diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 7d6c826b..74d1c867 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -8,65 +8,25 @@
 
 """Extract image- and video-urls from threads on https://www.4chan.org/"""
 
-from .common import SequentialExtractor, Message
-from urllib.parse import unquote
-import re
+from .chan import ChanExtractor
 
 info = {
     "category": "4chan",
     "extractor": "FourChanExtractor",
     "directory": ["{category}", "{board}-{thread-id}"],
-    "filename": "{timestamp}-{name}",
+    "filename": "{time}-{filename}{ext}",
     "pattern": [
         r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*",
     ],
 }
 
-class FourChanExtractor(SequentialExtractor):
+class FourChanExtractor(ChanExtractor):
 
-    url_fmt = "https://boards.4chan.org/{0}/res/{1}.html"
-    regex = (
-        r'<a (?:title="(?P<orig_name>[^"]+)" )?href="'
-        r'(?P<url>//i.4cdn.org/[^/]+/(?P<timestamp>\d+)\.(?P<extension>[^"]+))'
-        r'" target="_blank">(?P<name>[^<]+)</a> '
-        r'\((?P<size>[^,]+), (?P<width>\d+)x(?P<height>\d+)\)'
-    )
+    api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
+    file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
 
     def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
-        self.match = match
-        self.metadata = None
-
-    def items(self):
-        yield Message.Version, 1
-
-        url = self.url_fmt.format(*self.match.groups())
-        text = self.request(url).text
-        self.metadata = self.get_job_metadata(text)
-
-        yield Message.Directory, self.metadata
-        for match in re.finditer(self.regex, text):
-            yield Message.Url, self.get_file_url(match), self.get_file_metadata(match)
-
-    def get_job_metadata(self, text):
-        """Collect metadata for extractor-job"""
-        board, thread_id = self.match.groups()
-        title, _ = self.extract(text, '"description" content="', ' - &quot;/')
-        return {
-            "category": info["category"],
-            "board": board,
-            "thread-id": thread_id,
-            "title": unquote(title),
-        }
-
-    def get_file_metadata(self, match):
-        """Collect metadata for a downloadable file"""
-        data = self.metadata
-        data.update(match.groupdict(default=""))
-        data["name"] = unquote(data["orig_name"] or data["name"])
-        return data
-
-    @staticmethod
-    def get_file_url(match):
-        """Extract download-url from 'match'"""
-        return "https:" + match.group("url")
+        ChanExtractor.__init__(
+            self, config, info["category"],
+            match.group(1), match.group(2)
+        )

From 1998ec9b131dce38b652af06447ff667f01839bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 7 Sep 2015 13:48:16 +0200
Subject: [PATCH 03/15] [pixiv] update user-agent to newest version

---
 gallery_dl/extractor/pixiv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 0c51cd40..71674899 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -146,7 +146,7 @@ class PixivAPI():
         self.session = session
         self.session.headers.update({
             "Referer": "http://www.pixiv.net/",
-            "User-Agent": "PixivIOSApp/5.1.1",
+            "User-Agent": "PixivIOSApp/5.8.0",
             # "Authorization": "Bearer 8mMXXWT9iuwdJvsVIvQsFYDwuZpRCMePeyagSh30ZdU",
         })
 

From d8ef128e74a83e48bad3e8a35b7af8c9e13382a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 7 Sep 2015 13:49:47 +0200
Subject: [PATCH 04/15] [4chan] update default filename and directory

---
 gallery_dl/extractor/4chan.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 74d1c867..028ab7de 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -13,8 +13,8 @@ from .chan import ChanExtractor
 info = {
     "category": "4chan",
     "extractor": "FourChanExtractor",
-    "directory": ["{category}", "{board}-{thread-id}"],
-    "filename": "{time}-{filename}{ext}",
+    "directory": ["{category}", "{board}-{thread}"],
+    "filename": "{tim}-{filename}{ext}",
     "pattern": [
         r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*",
     ],

From d7e0d81bddcbef1b9697c8d221bbec805c77ab8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 7 Sep 2015 16:32:20 +0200
Subject: [PATCH 05/15] [8chan] use api

---
 gallery_dl/extractor/8chan.py | 62 +++++++----------------------------
 gallery_dl/extractor/chan.py  | 13 ++++----
 2 files changed, 18 insertions(+), 57 deletions(-)

diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index d56e5e6f..559951fa 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -8,65 +8,25 @@
 
 """Extract image- and video-urls from threads on https://8ch.net/"""
 
-from .common import SequentialExtractor, Message
-from urllib.parse import unquote
-import re
+from .chan import ChanExtractor
 
 info = {
     "category": "8chan",
     "extractor": "InfinityChanExtractor",
-    "directory": ["{category}", "{board}-{thread-id}"],
-    "filename": "{timestamp}-{name}",
+    "directory": ["{category}", "{board}-{thread}"],
+    "filename": "{tim}-{filename}{ext}",
     "pattern": [
-        r"(?:https?://)?(?:www\.)?(?:8chan\.co|8ch\.net)/([^/]+/res/\d+).*",
+        r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+).*",
     ],
 }
 
-class InfinityChanExtractor(SequentialExtractor):
+class InfinityChanExtractor(ChanExtractor):
 
-    url_base = "https://8ch.net"
-    url_fmt = url_base + "/{board}/res/{thread-id}.html"
-    regex = (
-        r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?'
-        r'<span class="postfilename"( title="([^"]+)")?>([^<]+)<'
-    )
+    api_url = "https://8ch.net/{board}/res/{thread}.json"
+    file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"
 
     def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
-        self.match = match
-
-    def items(self):
-        yield Message.Version, 1
-
-        metadata = self.get_job_metadata()
-        yield Message.Directory, metadata
-
-        url = self.url_fmt.format(**metadata)
-        text = self.request(url).text
-        for match in re.finditer(self.regex, text):
-            yield Message.Url, self.get_file_url(match), self.get_file_metadata(match)
-
-    def get_job_metadata(self):
-        """Collect metadata for extractor-job"""
-        board, _, thread_id = self.match.group(1).split("/")
-        return {
-            "category": info["category"],
-            "board": board,
-            "thread-id": thread_id,
-        }
-
-    @staticmethod
-    def get_file_metadata(match):
-        """Collect metadata for a downloadable file"""
-        return {
-            "timestamp": match.group(2),
-            "name": unquote(match.group(4) or match.group(5)),
-        }
-
-    def get_file_url(self, match):
-        """Extract download-url from 'match'"""
-        url = match.group(1)
-        if url.startswith("/"):
-            url = self.url_base + url
-        return url
-
+        ChanExtractor.__init__(
+            self, config, info["category"],
+            match.group(1), match.group(2)
+        )
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
index cb336774..2f943068 100644
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@@ -1,4 +1,3 @@
-
 # -*- coding: utf-8 -*-
 
 # Copyright 2015 Mike Fährmann
@@ -10,6 +9,7 @@
 """Base classes for extractors for different Futaba Channel boards"""
 
 from .common import SequentialExtractor, Message
+import re
 
 class ChanExtractor(SequentialExtractor):
 
@@ -34,14 +34,15 @@ class ChanExtractor(SequentialExtractor):
                 continue
             post.update(self.metadata)
             yield Message.Url, self.file_url.format(**post), post
+            if "extra_files" in post:
+                for file in post["extra_files"]:
+                    post.update(file)
+                    yield Message.Url, self.file_url.format(**post), post
 
     @staticmethod
     def get_thread_title(post):
         """Return thread title from first post"""
         if "sub" in post:
             return post["sub"]
-        com = post["com"]
-        pos = com.find("<br>")
-        if pos == -1:
-            return com
-        return com[:min(pos, 50)]
+        com = re.sub("<[^>]+?>", "", post["com"])
+        return " ".join(com.split())[:50]

From bc22f2bd3ab3d1f0ec55ca9b3993e84701716f70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Thu, 1 Oct 2015 14:55:55 +0200
Subject: [PATCH 06/15] update .gitignore

---
 .gitignore                               | 55 +++++++++++++++++++++++-
 gallery_dl.egg-info/PKG-INFO             | 23 ----------
 gallery_dl.egg-info/SOURCES.txt          | 35 ---------------
 gallery_dl.egg-info/dependency_links.txt |  1 -
 gallery_dl.egg-info/entry_points.txt     |  3 --
 gallery_dl.egg-info/requires.txt         |  1 -
 gallery_dl.egg-info/top_level.txt        |  1 -
 7 files changed, 54 insertions(+), 65 deletions(-)
 delete mode 100644 gallery_dl.egg-info/PKG-INFO
 delete mode 100644 gallery_dl.egg-info/SOURCES.txt
 delete mode 100644 gallery_dl.egg-info/dependency_links.txt
 delete mode 100644 gallery_dl.egg-info/entry_points.txt
 delete mode 100644 gallery_dl.egg-info/requires.txt
 delete mode 100644 gallery_dl.egg-info/top_level.txt

diff --git a/.gitignore b/.gitignore
index 12c84c56..ba746605 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,57 @@
+# Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
 build/
-dist/
\ No newline at end of file
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
deleted file mode 100644
index 74452230..00000000
--- a/gallery_dl.egg-info/PKG-INFO
+++ /dev/null
@@ -1,23 +0,0 @@
-Metadata-Version: 1.1
-Name: gallery-dl
-Version: 0.2
-Summary: gallery- and image downloader
-Home-page: https://github.com/mikf/gallery-dl
-Author: Mike Fährmann
-Author-email: mike_faehrmann@web.de
-License: GPLv2
-Description: download image galleries from several image hosting platforms
-Platform: UNKNOWN
-Classifier: Development Status :: 3 - Alpha
-Classifier: Environment :: Console
-Classifier: Intended Audience :: End Users/Desktop
-Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
-Classifier: Operating System :: POSIX
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.2
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
-Classifier: Topic :: Multimedia
-Classifier: Topic :: Multimedia :: Graphics
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
deleted file mode 100644
index 05ff4da8..00000000
--- a/gallery_dl.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-setup.py
-bin/gallery-dl
-gallery_dl/__init__.py
-gallery_dl/download.py
-gallery_dl.egg-info/PKG-INFO
-gallery_dl.egg-info/SOURCES.txt
-gallery_dl.egg-info/dependency_links.txt
-gallery_dl.egg-info/entry_points.txt
-gallery_dl.egg-info/requires.txt
-gallery_dl.egg-info/top_level.txt
-gallery_dl/downloader/__init__.py
-gallery_dl/downloader/common.py
-gallery_dl/downloader/http.py
-gallery_dl/downloader/https.py
-gallery_dl/downloader/text.py
-gallery_dl/extractor/3dbooru.py
-gallery_dl/extractor/4chan.py
-gallery_dl/extractor/8chan.py
-gallery_dl/extractor/__init__.py
-gallery_dl/extractor/batoto.py
-gallery_dl/extractor/booru.py
-gallery_dl/extractor/common.py
-gallery_dl/extractor/danbooru.py
-gallery_dl/extractor/e621.py
-gallery_dl/extractor/exhentai.py
-gallery_dl/extractor/gelbooru.py
-gallery_dl/extractor/imagebam.py
-gallery_dl/extractor/imgbox.py
-gallery_dl/extractor/imgchili.py
-gallery_dl/extractor/mangareader.py
-gallery_dl/extractor/nijie.py
-gallery_dl/extractor/pixiv.py
-gallery_dl/extractor/redhawkscans.py
-gallery_dl/extractor/sankaku.py
-gallery_dl/extractor/yandere.py
\ No newline at end of file
diff --git a/gallery_dl.egg-info/dependency_links.txt b/gallery_dl.egg-info/dependency_links.txt
deleted file mode 100644
index 8b137891..00000000
--- a/gallery_dl.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/gallery_dl.egg-info/entry_points.txt b/gallery_dl.egg-info/entry_points.txt
deleted file mode 100644
index 53cf5106..00000000
--- a/gallery_dl.egg-info/entry_points.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-[console_scripts]
-gallery-dl = gallery_dl:main
-
diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt
deleted file mode 100644
index d48cd089..00000000
--- a/gallery_dl.egg-info/requires.txt
+++ /dev/null
@@ -1 +0,0 @@
-requests >= 2.0
diff --git a/gallery_dl.egg-info/top_level.txt b/gallery_dl.egg-info/top_level.txt
deleted file mode 100644
index 9e5039cb..00000000
--- a/gallery_dl.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-gallery_dl

From c5801c9770d1da7a83e3b0a2fc527dbf1b3b06f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 3 Oct 2015 12:53:45 +0200
Subject: [PATCH 07/15] combine text related functions in new module

---
 gallery_dl/text.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 gallery_dl/text.py

diff --git a/gallery_dl/text.py b/gallery_dl/text.py
new file mode 100644
index 00000000..47fd7258
--- /dev/null
+++ b/gallery_dl/text.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Collection of functions that work in strings/text"""
+
+import re
+import html.parser
+import urllib.parse
+import platform
+
+def remove_html(text):
+    """Remove html-tags from a string"""
+    return " ".join(re.sub("<[^>]+?>", " ", text).split())
+
+def filename_from_url(url):
+    """Extract the last part of an url to use as a filename"""
+    try:
+        path = urllib.parse.urlparse(url).path
+        pos = path.rindex("/")
+        return path[pos+1:]
+    except ValueError:
+        return url
+
+def clean_path_windows(path):
+    """Remove illegal characters from a path-segment (Windows)"""
+    return re.sub(r'[<>:"\\/|?*]', "_", path)
+
+def clean_path_posix(path):
+    """Remove illegal characters from a path-segment (Posix)"""
+    return path.replace("/", "_")
+
+def extract(txt, begin, end, pos=0):
+    try:
+        first = txt.index(begin, pos) + len(begin)
+        last = txt.index(end, first)
+        return txt[first:last], last+len(end)
+    except ValueError:
+        return None, pos
+
+def extract_all(txt, begin, end, pos=0):
+    try:
+        first = txt.index(begin, pos)
+        last = txt.index(end, first + len(begin)) + len(end)
+        return txt[first:last], last
+    except ValueError:
+        return None, pos
+
+if platform.system() == "Windows":
+    clean_path = clean_path_windows
+else:
+    clean_path = clean_path_posix
+
+unquote = urllib.parse.unquote
+
+unescape = html.parser.HTMLParser().unescape

From 2962bf36f606d97ee0f3f9ca65227cf26c852ac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 3 Oct 2015 14:51:13 +0200
Subject: [PATCH 08/15] add tests for text-module

---
 setup.py          |  1 +
 test/__init__.py  |  0
 test/test_text.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)
 create mode 100644 test/__init__.py
 create mode 100644 test/test_text.py

diff --git a/setup.py b/setup.py
index db9adead..fe2d134e 100644
--- a/setup.py
+++ b/setup.py
@@ -46,4 +46,5 @@ setup(
         "Topic :: Multimedia",
         "Topic :: Multimedia :: Graphics",
     ],
+    test_suite='test',
 )
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/test/test_text.py b/test/test_text.py
new file mode 100644
index 00000000..91e0097e
--- /dev/null
+++ b/test/test_text.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import unittest
+import gallery_dl.text as text
+
+class TestText(unittest.TestCase):
+
+    def test_remove_html(self):
+        cases = (
+            "Hello World.",
+            " Hello  World. ",
+            "Hello<br/>World.",
+            "<div><span class='a'>Hello</span><strong>World.</strong></div>"
+        )
+        result = "Hello World."
+        for case in cases:
+            self.assertEqual(text.remove_html(case), result)
+
+    def test_filename_from_url(self):
+        cases = (
+            "http://example.org/v2/filename.ext",
+            "http://example.org/v2/filename.ext?param=value#fragment",
+            "example.org/filename.ext",
+            "/filename.ext",
+            "filename.ext",
+        )
+        result = "filename.ext"
+        for case in cases:
+            self.assertEqual(text.filename_from_url(case), result)
+
+    def test_clean_path(self):
+        cases = {
+            "Hello World." : ("Hello World.", "Hello World."),
+            "Hello/World/.": ("Hello_World_.", "Hello_World_."),
+            r'<Hello>:|"World\*?': (
+                '_Hello____World___', r'<Hello>:|"World\*?'
+            ),
+        }
+        for case, result in cases.items():
+            self.assertEqual(text.clean_path_windows(case), result[0])
+            self.assertEqual(text.clean_path_posix  (case), result[1])
+
+if __name__ == '__main__':
+    unittest.main()

From 42b8e81a680628dc0a4b36fc9a329fb25a9e5010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 3 Oct 2015 15:43:02 +0200
Subject: [PATCH 09/15] rewrite extractors to use text-module

---
 gallery_dl/extractor/batoto.py       | 26 ++++++++++++--------------
 gallery_dl/extractor/booru.py        | 12 +++++-------
 gallery_dl/extractor/chan.py         |  4 ++--
 gallery_dl/extractor/common.py       | 24 ------------------------
 gallery_dl/extractor/imagebam.py     | 25 ++++++++++++-------------
 gallery_dl/extractor/imgbox.py       | 17 +++++++++--------
 gallery_dl/extractor/imgchili.py     |  7 +++----
 gallery_dl/extractor/mangareader.py  | 12 +++++-------
 gallery_dl/extractor/nijie.py        |  6 +++---
 gallery_dl/extractor/pixiv.py        | 14 +++++++-------
 gallery_dl/extractor/redhawkscans.py | 17 ++++++++---------
 11 files changed, 66 insertions(+), 98 deletions(-)

diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index ac363052..65bc7c3d 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -8,10 +8,8 @@
 
 """Extract manga pages from http://bato.to/"""
 
-from .common import AsynchronousExtractor
-from .common import Message
-from .common import filename_from_url, unescape
-from urllib.parse import unquote
+from .common import AsynchronousExtractor, Message
+from .. import text
 import os.path
 import re
 
@@ -44,13 +42,13 @@ class BatotoExtractor(AsynchronousExtractor):
     def get_page_metadata(self, page_url):
         """Collect next url and metadata for one manga-page"""
         page = self.request(page_url).text
-        _    , pos = self.extract(page, 'selected="selected"', '')
-        title, pos = self.extract(page, ': ', '<', pos)
-        _    , pos = self.extract(page, 'selected="selected"', '', pos)
-        trans, pos = self.extract(page, '>', '<', pos)
-        _    , pos = self.extract(page, '<div id="full_image"', '', pos)
-        image, pos = self.extract(page, '<img src="', '"', pos)
-        url  , pos = self.extract(page, '<a href="', '"', pos)
+        _    , pos = text.extract(page, 'selected="selected"', '')
+        title, pos = text.extract(page, ': ', '<', pos)
+        _    , pos = text.extract(page, 'selected="selected"', '', pos)
+        trans, pos = text.extract(page, '>', '<', pos)
+        _    , pos = text.extract(page, '<div id="full_image"', '', pos)
+        image, pos = text.extract(page, '<img src="', '"', pos)
+        url  , pos = text.extract(page, '<a href="', '"', pos)
         mmatch = re.search(
             r"<title>(.+) - (?:vol (\d+) )?"
             r"ch (\d+)[^ ]+ Page (\d+) | Batoto!</title>",
@@ -60,18 +58,18 @@ class BatotoExtractor(AsynchronousExtractor):
             r"(.+) - ([^ ]+)",
             trans
         )
-        filename = unquote(filename_from_url(image))
+        filename = text.unquote(text.filename_from_url(image))
         name, ext = os.path.splitext(filename)
         return url, {
             "category": info["category"],
             "chapter-id": self.chapter_id,
-            "manga": unescape(mmatch.group(1)),
+            "manga": text.unescape(mmatch.group(1)),
             "volume": mmatch.group(2) or "",
             "chapter": mmatch.group(3),
             "page": mmatch.group(4),
             "group": tmatch.group(1),
             "language": tmatch.group(2),
-            "title": unescape(title),
+            "title": text.unescape(title),
             "image-url": image,
             "name": name,
             "extension": ext[1:],
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 88600397..f72bc789 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -8,15 +8,13 @@
 
 """Base classes for extractors for danbooru and co"""
 
-from .common import SequentialExtractor
-from .common import Message
-from .common import filename_from_url
+from .common import SequentialExtractor, Message
+from .. import text
 import xml.etree.ElementTree as ET
 import json
 import os.path
 import urllib.parse
 
-
 class BooruExtractor(SequentialExtractor):
 
     api_url = ""
@@ -24,7 +22,7 @@ class BooruExtractor(SequentialExtractor):
     def __init__(self, match, config, info):
         SequentialExtractor.__init__(self, config)
         self.info = info
-        self.tags = urllib.parse.unquote(match.group(1))
+        self.tags = text.unquote(match.group(1))
         self.page = "page"
         self.params = {"tags": self.tags}
         self.headers = {}
@@ -58,8 +56,8 @@ class BooruExtractor(SequentialExtractor):
     def get_file_metadata(self, data):
         """Collect metadata for a downloadable file"""
         data["category"] = self.info["category"]
-        data["name"] = urllib.parse.unquote(
-            filename_from_url(self.get_file_url(data))
+        data["name"] = text.unquote(
+            text.filename_from_url(self.get_file_url(data))
         )
         data["extension"] = os.path.splitext(data["name"])[1][1:]
         return data
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
index 2f943068..2d2b6fb4 100644
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@@ -9,6 +9,7 @@
 """Base classes for extractors for different Futaba Channel boards"""
 
 from .common import SequentialExtractor, Message
+from .. import text
 import re
 
 class ChanExtractor(SequentialExtractor):
@@ -44,5 +45,4 @@ class ChanExtractor(SequentialExtractor):
         """Return thread title from first post"""
         if "sub" in post:
             return post["sub"]
-        com = re.sub("<[^>]+?>", "", post["com"])
-        return " ".join(com.split())[:50]
+        return text.remove_html(post["com"])[:50]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index cb8e91ca..b364d870 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -44,24 +44,6 @@ class Extractor():
             "Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0"
         )
 
-    @staticmethod
-    def extract(txt, begin, end, pos=0):
-        try:
-            first = txt.index(begin, pos) + len(begin)
-            last = txt.index(end, first)
-            return txt[first:last], last+len(end)
-        except ValueError:
-            return None, pos
-
-    @staticmethod
-    def extract_all(txt, begin, end, pos=0):
-        try:
-            first = txt.index(begin, pos)
-            last = txt.index(end, first + len(begin)) + len(end)
-            return txt[first:last], last
-        except ValueError:
-            return None, pos
-
 
 class SequentialExtractor(Extractor):
 
@@ -123,9 +105,3 @@ def safe_request(session, url, method="GET", *args, **kwargs):
 
         # everything ok -- proceed to download
         return r
-
-def filename_from_url(url):
-    pos = url.rfind("/")
-    return url[pos+1:]
-
-unescape = html.parser.HTMLParser().unescape
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index f8886a7a..c89721f2 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -8,9 +8,8 @@
 
 """Extract images from galleries at http://www.imagebam.com/"""
 
-from .common import AsynchronousExtractor
-from .common import Message
-from .common import filename_from_url
+from .common import AsynchronousExtractor, Message
+from .. import text
 
 info = {
     "category": "imagebam",
@@ -42,28 +41,28 @@ class ImagebamExtractor(AsynchronousExtractor):
         done = False
         while not done:
             # get current page
-            text = self.request(self.url_base + next_url).text
+            page = self.request(self.url_base + next_url).text
 
             # get url for next page
-            next_url, pos = self.extract(text, "<a class='buttonblue' href='", "'")
+            next_url, pos = text.extract(page, "<a class='buttonblue' href='", "'")
 
             # if the following text isn't "><span>next image" we are done
-            if not text.startswith("><span>next image", pos):
+            if not page.startswith("><span>next image", pos):
                 done = True
 
             # get image url
-            img_url, pos = self.extract(text, 'onclick="scale(this);" src="', '"', pos)
+            img_url, pos = text.extract(page, 'onclick="scale(this);" src="', '"', pos)
 
             yield Message.Url, img_url, self.get_file_metadata(img_url)
 
     def get_job_metadata(self):
         """Collect metadata for extractor-job"""
         gallery_key = self.match.group(2)
-        text = self.request(self.url_base + "/gallery/" + gallery_key).text
-        _    , pos = self.extract(text, "<img src='/img/icons/photos.png'", "")
-        title, pos = self.extract(text, "'> ", " <", pos)
-        count, pos = self.extract(text, "'>", " images", pos)
-        url  , pos = self.extract(text, "<a href='http://www.imagebam.com", "'", pos)
+        page = self.request(self.url_base + "/gallery/" + gallery_key).text
+        _    , pos = text.extract(page, "<img src='/img/icons/photos.png'", "")
+        title, pos = text.extract(page, "'> ", " <", pos)
+        count, pos = text.extract(page, "'>", " images", pos)
+        url  , pos = text.extract(page, "<a href='http://www.imagebam.com", "'", pos)
         return {
             "category": info["category"],
             "key": gallery_key,
@@ -77,5 +76,5 @@ class ImagebamExtractor(AsynchronousExtractor):
         self.num += 1
         data = self.metadata.copy()
         data["num"] = self.num
-        data["name"] = filename_from_url(url)
+        data["name"] = text.filename_from_url(url)
         return data
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 50412eb7..3de51f27 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -9,6 +9,7 @@
 """Extract images from galleries at http://imgbox.com/"""
 
 from .common import AsynchronousExtractor, Message
+from .. import text
 import re
 
 info = {
@@ -36,8 +37,8 @@ class ImgboxExtractor(AsynchronousExtractor):
         yield Message.Version, 1
         yield Message.Directory, self.metadata
         for match in re.finditer(r'<a href="([^"]+)"><img alt="', page):
-            text = self.request(self.url_base + match.group(1)).text
-            yield Message.Url, self.get_file_url(text), self.get_file_metadata(text)
+            imgpage = self.request(self.url_base + match.group(1)).text
+            yield Message.Url, self.get_file_url(imgpage), self.get_file_metadata(imgpage)
 
     def get_job_metadata(self, page):
         """Collect metadata for extractor-job"""
@@ -51,16 +52,16 @@ class ImgboxExtractor(AsynchronousExtractor):
             "count": match.group(4),
         }
 
-    def get_file_metadata(self, text):
+    def get_file_metadata(self, page):
         """Collect metadata for a downloadable file"""
         data = self.metadata.copy()
-        data["num"]      , pos = self.extract(text, '</a> &nbsp; ', ' of ')
-        data["image-key"], pos = self.extract(text, '/i.imgbox.com/', '?download', pos)
-        data["name"]     , pos = self.extract(text, ' title="', '"', pos)
+        data["num"]      , pos = text.extract(page, '</a> &nbsp; ', ' of ')
+        data["image-key"], pos = text.extract(page, '/i.imgbox.com/', '?download', pos)
+        data["name"]     , pos = text.extract(page, ' title="', '"', pos)
         return data
 
-    def get_file_url(self, text):
+    def get_file_url(self, page):
         """Extract download-url"""
         base = "http://i.imgbox.com/"
-        path, _ = self.extract(text, base, '"')
+        path, _ = text.extract(page, base, '"')
         return base + path
diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py
index 40932912..9e591e57 100644
--- a/gallery_dl/extractor/imgchili.py
+++ b/gallery_dl/extractor/imgchili.py
@@ -8,9 +8,8 @@
 
 """Extract images from albums at http://imgchili.net/"""
 
-from .common import SequentialExtractor
-from .common import Message
-from .common import filename_from_url
+from .common import SequentialExtractor, Message
+from .. import text
 import re
 
 info = {
@@ -42,7 +41,7 @@ class ImgchiliExtractor(SequentialExtractor):
 
     def get_job_metadata(self, page):
         """Collect metadata for extractor-job"""
-        title = self.extract(page, "<h1>", "</h1>")[0]
+        title = text.extract(page, "<h1>", "</h1>")[0]
         return {
             "category": info["category"],
             "title": title,
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index 62575308..60ed473a 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -8,10 +8,8 @@
 
 """Extract manga pages from http://www.mangareader.net/"""
 
-from .common import AsynchronousExtractor
-from .common import Message
-from .common import unescape, filename_from_url
-from urllib.parse import unquote
+from .common import AsynchronousExtractor, Message
+from .. import text
 import os.path
 import re
 
@@ -47,7 +45,7 @@ class MangaReaderExtractor(AsynchronousExtractor):
     def get_page_metadata(self, page_url):
         """Collect next url, image-url and metadata for one manga-page"""
         page = self.request(page_url).text
-        extr = self.extract
+        extr = text.extract
         width = None
         descr, pos = extr(page, '<meta name="description" content="', '"')
         test , pos = extr(page, "document['pu']", '', pos)
@@ -62,13 +60,13 @@ class MangaReaderExtractor(AsynchronousExtractor):
             width , pos = extr(page, '<img id="img" width="', '"', pos)
             height, pos = extr(page, ' height="', '"', pos)
         image, pos = extr(page, ' src="', '"', pos)
-        filename = unquote(filename_from_url(image))
+        filename = text.unquote(text.filename_from_url(image))
         name, ext = os.path.splitext(filename)
         match = re.match(r"(.*) (\d+) - Read \1 \2 Manga Scans Page (\d+)", descr)
 
         return self.url_base + url, image, {
             "category": info["category"],
-            "manga": unescape(match.group(1)),
+            "manga": text.unescape(match.group(1)),
             "chapter": match.group(2),
             "page": match.group(3),
             "width": width,
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 3a9e19df..afeefd60 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -8,9 +8,8 @@
 
 """Extract images from https://nijie.info/"""
 
-from .common import AsynchronousExtractor
-from .common import Message
-from .common import filename_from_url
+from .common import AsynchronousExtractor, Message
+from ..text  import filename_from_url
 import re
 
 info = {
@@ -56,6 +55,7 @@ class NijieExtractor(AsynchronousExtractor):
         }
 
     def get_image_ids(self):
+        """Collect all image-ids for a specific artist"""
         text = self.request(self.artist_url).text
         regex = r'<a href="/view\.php\?id=(\d+)"'
         return [m.group(1) for m in re.finditer(regex, text)]
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 71674899..8b28af57 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -8,8 +8,8 @@
 
 """Extract images and ugoira from http://www.pixiv.net/"""
 
-from .common import SequentialExtractor
-from .common import Message
+from .common import SequentialExtractor, Message
+from .. import text
 import re
 import json
 
@@ -84,9 +84,9 @@ class PixivExtractor(SequentialExtractor):
 
     def get_works(self):
         """Yield all work-items for a pixiv-member"""
-        page = 1
+        pagenum = 1
         while True:
-            data = self.api.user_works(self.artist_id, page)
+            data = self.api.user_works(self.artist_id, pagenum)
             for work in data["response"]:
                 url = work["image_urls"]["large"]
                 work["num"] = ""
@@ -96,17 +96,17 @@ class PixivExtractor(SequentialExtractor):
             pinfo = data["pagination"]
             if pinfo["current"] == pinfo["pages"]:
                 return
-            page = pinfo["next"]
+            pagenum = pinfo["next"]
 
     def parse_ugoira(self, data):
         """Parse ugoira data"""
         # get illust page
-        text = self.request(
+        page = self.request(
             self.illust_url, params={"illust_id": data["id"]},
         ).text
 
         # parse page
-        frames, _ = self.extract(text, ',"frames":[', ']')
+        frames, _ = text.extract(page, ',"frames":[', ']')
 
         # build url
         url = re.sub(
diff --git a/gallery_dl/extractor/redhawkscans.py b/gallery_dl/extractor/redhawkscans.py
index c9fe0ca7..5c9593b2 100644
--- a/gallery_dl/extractor/redhawkscans.py
+++ b/gallery_dl/extractor/redhawkscans.py
@@ -8,9 +8,8 @@
 
 """Extract manga pages from http://manga.redhawkscans.com/"""
 
-from .common import SequentialExtractor
-from .common import Message
-from .common import unescape
+from .common import SequentialExtractor, Message
+from .. import text
 import os.path
 import json
 import re
@@ -50,16 +49,16 @@ class RedHawkScansExtractor(SequentialExtractor):
         response = self.request(self.url_base + self.part)
         response.encoding = "utf-8"
         page = response.text
-        _        , pos = self.extract(page, '<h1 class="tbtitle dnone">', '')
-        manga    , pos = self.extract(page, 'title="', '"', pos)
-        chapter  , pos = self.extract(page, '">', '</a>', pos)
-        json_data, pos = self.extract(page, 'var pages = ', ';\r\n', pos)
+        _        , pos = text.extract(page, '<h1 class="tbtitle dnone">', '')
+        manga    , pos = text.extract(page, 'title="', '"', pos)
+        chapter  , pos = text.extract(page, '">', '</a>', pos)
+        json_data, pos = text.extract(page, 'var pages = ', ';\r\n', pos)
         match = re.match(r"(Chapter (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter)
         return {
             "category": info["category"],
-            "manga": unescape(manga),
+            "manga": text.unescape(manga),
             "chapter": match.group(2) or match.group(1),
             "chapter-minor": match.group(3) or "",
             "language": "English",
-            "title": unescape(match.group(4) or ""),
+            "title": text.unescape(match.group(4) or ""),
         }, json.loads(json_data)

From 9986a5ffb50f3ffc5e9d91f6f8fe36fdf278d131 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 3 Oct 2015 20:23:55 +0200
Subject: [PATCH 10/15] json-based config module

---
 gallery_dl/config.py | 90 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 gallery_dl/config.py

diff --git a/gallery_dl/config.py b/gallery_dl/config.py
new file mode 100644
index 00000000..549e40e9
--- /dev/null
+++ b/gallery_dl/config.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Global configuration module"""
+
+import sys
+import json
+import os.path
+import platform
+
+# --------------------------------------------------------------------
+# public interface
+
+def load(*files):
+    """Load JSON configuration files"""
+    configfiles = files or _default_configs
+    for conf in configfiles:
+        try:
+            path = os.path.expanduser(conf)
+            with open(path) as file:
+                confdict = json.load(file)
+            _config.update(confdict)
+        except FileNotFoundError:
+            continue
+        except json.decoder.JSONDecodeError as exception:
+            print("Error while loading '", path, "':", sep="", file=sys.stderr)
+            print(exception, file=sys.stderr)
+
+def clear():
+    """Reset configuration to en empty state"""
+    globals()["_config"] = {}
+
+def get(key, default=None):
+    """Get the value of property 'key' or a default-value if it doenst exist"""
+    conf = _config
+    try:
+        for k in key.split("."):
+            conf = conf[k]
+        return conf
+    except (KeyError, AttributeError):
+        return default
+
+def interpolate(key, default=None):
+    """Interpolate the value of 'key'"""
+    conf = _config
+    keys = key.split(".")
+    try:
+        for k in keys:
+            default = conf.get(keys[-1], default)
+            conf = conf[k]
+        return conf
+    except (KeyError, AttributeError):
+        return default
+
+def set(key, value):
+    """Set the value of property 'key' for this session"""
+    conf = _config
+    keys = key.split(".")
+    for k in keys[:-1]:
+        try:
+            conf = conf[k]
+        except KeyError:
+            temp = {}
+            conf[k] = temp
+            conf = temp
+    conf[keys[-1]] = value
+
+
+# --------------------------------------------------------------------
+# internals
+
+_config = {}
+
+if platform.system() == "Windows":
+    _default_configs = [
+        r"~\.config\gallery-dl.conf",
+        r"~\.gallery-dl.conf",
+    ]
+else:
+    _default_configs = [
+        "/etc/gallery-dl.conf",
+        "~/.config/gallery/config.json",
+        "~/.config/gallery-dl.conf",
+        "~/.gallery-dl.conf",
+    ]

From 7ac106096f8da32aa20bc2c4f18730d88345786a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 3 Oct 2015 20:24:28 +0200
Subject: [PATCH 11/15] add tests for config-module

---
 test/test_config.py | 49 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 test/test_config.py

diff --git a/test/test_config.py b/test/test_config.py
new file mode 100644
index 00000000..3aaeb42c
--- /dev/null
+++ b/test/test_config.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import unittest
+import gallery_dl.config as config
+import os
+import tempfile
+
+class TestConfig(unittest.TestCase):
+
+    def setUp(self):
+        fd, self._configfile = tempfile.mkstemp()
+        with os.fdopen(fd, "w") as file:
+            file.write('{"a": "1", "b": {"c": "text"}}')
+        config.load(self._configfile)
+
+    def tearDown(self):
+        config.clear()
+        os.remove(self._configfile)
+
+    def test_get(self):
+        self.assertEqual(config.get("a"), "1")
+        self.assertEqual(config.get("b.c"), "text")
+        self.assertEqual(config.get("d"), None)
+        self.assertEqual(config.get("e.f.g", 123), 123)
+
+    def test_set(self):
+        config.set("b.c", [1, 2, 3])
+        config.set("e.f.g", 234)
+        self.assertEqual(config.get("b.c"), [1, 2, 3])
+        self.assertEqual(config.get("e.f.g"), 234)
+
+    def test_interpolate(self):
+        self.assertEqual(config.interpolate("a"), "1")
+        self.assertEqual(config.interpolate("b.a"), "1")
+        self.assertEqual(config.interpolate("b.c", "2"), "text")
+        self.assertEqual(config.interpolate("b.d", "2"), "2")
+        config.set("d", 123)
+        self.assertEqual(config.interpolate("b.d", "2"), 123)
+        self.assertEqual(config.interpolate("d.d", "2"), 123)
+
+if __name__ == '__main__':
+    unittest.main()

From 2026223ed10d1461ca7327b7b8aaa347b72c5b1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Oct 2015 12:42:42 +0200
Subject: [PATCH 12/15] change argument format for config-calls

---
 gallery_dl/config.py | 10 ++++------
 test/test_config.py  | 30 +++++++++++++++---------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 549e40e9..9a02f307 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -35,20 +35,19 @@ def clear():
     """Reset configuration to en empty state"""
     globals()["_config"] = {}
 
-def get(key, default=None):
+def get(keys, default=None):
     """Get the value of property 'key' or a default-value if it doenst exist"""
     conf = _config
     try:
-        for k in key.split("."):
+        for k in keys:
             conf = conf[k]
         return conf
     except (KeyError, AttributeError):
         return default
 
-def interpolate(key, default=None):
+def interpolate(keys, default=None):
     """Interpolate the value of 'key'"""
     conf = _config
-    keys = key.split(".")
     try:
         for k in keys:
             default = conf.get(keys[-1], default)
@@ -57,10 +56,9 @@ def interpolate(key, default=None):
     except (KeyError, AttributeError):
         return default
 
-def set(key, value):
+def set(keys, value):
     """Set the value of property 'key' for this session"""
     conf = _config
-    keys = key.split(".")
     for k in keys[:-1]:
         try:
             conf = conf[k]
diff --git a/test/test_config.py b/test/test_config.py
index 3aaeb42c..f8017626 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -25,25 +25,25 @@ class TestConfig(unittest.TestCase):
         os.remove(self._configfile)
 
     def test_get(self):
-        self.assertEqual(config.get("a"), "1")
-        self.assertEqual(config.get("b.c"), "text")
-        self.assertEqual(config.get("d"), None)
-        self.assertEqual(config.get("e.f.g", 123), 123)
+        self.assertEqual(config.get(["a"]), "1")
+        self.assertEqual(config.get(["b", "c"]), "text")
+        self.assertEqual(config.get(["d"]), None)
+        self.assertEqual(config.get(["e", "f", "g"], 123), 123)
 
     def test_set(self):
-        config.set("b.c", [1, 2, 3])
-        config.set("e.f.g", 234)
-        self.assertEqual(config.get("b.c"), [1, 2, 3])
-        self.assertEqual(config.get("e.f.g"), 234)
+        config.set(["b", "c"], [1, 2, 3])
+        config.set(["e", "f", "g"], value=234)
+        self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
+        self.assertEqual(config.get(["e", "f", "g"]), 234)
 
     def test_interpolate(self):
-        self.assertEqual(config.interpolate("a"), "1")
-        self.assertEqual(config.interpolate("b.a"), "1")
-        self.assertEqual(config.interpolate("b.c", "2"), "text")
-        self.assertEqual(config.interpolate("b.d", "2"), "2")
-        config.set("d", 123)
-        self.assertEqual(config.interpolate("b.d", "2"), 123)
-        self.assertEqual(config.interpolate("d.d", "2"), 123)
+        self.assertEqual(config.interpolate(["a"]), "1")
+        self.assertEqual(config.interpolate(["b", "a"]), "1")
+        self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
+        self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
+        config.set(["d"], 123)
+        self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
+        self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
 
 if __name__ == '__main__':
     unittest.main()

From 608d3193a9f8137e4d5f116251dde23aa8e358c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Oct 2015 13:26:38 +0200
Subject: [PATCH 13/15] use new config-module in downloader

---
 gallery_dl/__init__.py | 16 +++-------------
 gallery_dl/download.py | 39 +++++++++++++++++----------------------
 2 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index aed11666..b0cebaed 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -17,9 +17,7 @@ __email__      = "mike_faehrmann@web.de"
 import os
 import sys
 import argparse
-import configparser
-
-from .download import DownloadManager
+from . import config, download
 
 def parse_cmdline_options():
     parser = argparse.ArgumentParser(
@@ -41,18 +39,10 @@ def parse_cmdline_options():
     )
     return parser.parse_args()
 
-def parse_config_file(path):
-    config = configparser.ConfigParser(
-        interpolation=None,
-    )
-    config.optionxform = lambda opt: opt
-    config.read(os.path.expanduser(path))
-    return config
-
 def main():
+    config.load()
     opts = parse_cmdline_options()
-    conf = parse_config_file(opts.config)
-    dlmgr = DownloadManager(opts, conf)
+    dlmgr = download.DownloadManager(opts)
 
     try:
         for url in opts.urls:
diff --git a/gallery_dl/download.py b/gallery_dl/download.py
index 96ababa5..7fdfacfd 100644
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@@ -12,14 +12,14 @@ import re
 import importlib
 
 from .extractor.common import Message
+from . import config
 
 class DownloadManager():
 
-    def __init__(self, opts, config):
+    def __init__(self, opts):
         self.opts = opts
-        self.config = config
         self.modules = {}
-        self.extractors = ExtractorFinder(config)
+        self.extractors = ExtractorFinder()
 
     def add(self, url):
         job = DownloadJob(self, url)
@@ -38,7 +38,7 @@ class DownloadManager():
         if self.opts.dest:
             return self.opts.dest
         else:
-            return self.config.get("general", "destination", fallback="/tmp/")
+            return config.get(("base-directory",), default="/tmp/")
 
 
 class DownloadJob():
@@ -50,16 +50,14 @@ class DownloadJob():
             return
         self.directory = mngr.get_base_directory()
         self.downloaders = {}
-        self.filename_fmt = mngr.config.get(
-            self.info["category"], "filename",
-            fallback=self.info["filename"]
+        self.filename_fmt = config.get(
+            ("extractor", self.info["category"], "filename"),
+            default=self.info["filename"]
+        )
+        segments = config.get(
+            ("extractor", self.info["category"], "directory"),
+            default=self.info["directory"]
         )
-        try:
-            segments = mngr.config.get(
-                self.info["category"], "directory"
-            ).split("/")
-        except Exception:
-            segments = self.info["directory"]
         self.directory_fmt = os.path.join(*segments)
 
     def run(self):
@@ -144,26 +142,23 @@ class DownloadJob():
 
 class ExtractorFinder():
 
-    def __init__(self, config):
-        self.config = config
-
     def get_for_url(self, url):
         """Get an extractor-instance suitable for 'url'"""
         name, match = self.find_pattern_match(url)
         if match:
             module = importlib.import_module(".extractor." + name, __package__)
             klass = getattr(module, module.info["extractor"])
-            return klass(match, self.config), module.info
+            return klass(match, {}), module.info
         else:
             print("no suitable extractor found")
             return None, None
 
     def find_pattern_match(self, url):
-        """Find a pattern, that matches 'url', and return the (category,match) tuple"""
-        for category in self.config:
-            for key, value in self.config[category].items():
-                if key.startswith("regex"):
-                    match = re.match(value, url)
+        """Find a pattern that matches 'url' and return the (category,match) tuple"""
+        for category in config.get(("extractor",)):
+            patterns = config.get(("extractor", category, "pattern"), default=[])
+            for pattern in patterns:
+                    match = re.match(pattern, url)
                     if match:
                         return category, match
         for category, info in self.extractor_metadata():

From 3c13548f29502398b1cf785ecc44c3df57a696a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Oct 2015 15:35:48 +0200
Subject: [PATCH 14/15] rewrite extractors to use config-module

---
 gallery_dl/download.py               | 10 ++++------
 gallery_dl/extractor/3dbooru.py      |  4 ++--
 gallery_dl/extractor/4chan.py        |  4 ++--
 gallery_dl/extractor/8chan.py        |  4 ++--
 gallery_dl/extractor/batoto.py       |  4 ++--
 gallery_dl/extractor/booru.py        |  4 ++--
 gallery_dl/extractor/chan.py         |  5 ++---
 gallery_dl/extractor/common.py       |  8 ++++----
 gallery_dl/extractor/danbooru.py     |  4 ++--
 gallery_dl/extractor/e621.py         |  4 ++--
 gallery_dl/extractor/gelbooru.py     |  4 ++--
 gallery_dl/extractor/imagebam.py     |  4 ++--
 gallery_dl/extractor/imgbox.py       |  4 ++--
 gallery_dl/extractor/imgchili.py     |  4 ++--
 gallery_dl/extractor/mangareader.py  |  4 ++--
 gallery_dl/extractor/nijie.py        | 20 +++++++++++---------
 gallery_dl/extractor/pixiv.py        | 11 +++++------
 gallery_dl/extractor/redhawkscans.py |  4 ++--
 gallery_dl/extractor/yandere.py      |  4 ++--
 19 files changed, 54 insertions(+), 56 deletions(-)

diff --git a/gallery_dl/download.py b/gallery_dl/download.py
index 7fdfacfd..f1ba96d6 100644
--- a/gallery_dl/download.py
+++ b/gallery_dl/download.py
@@ -112,13 +112,11 @@ class DownloadJob():
         scheme = url[:pos] if pos != -1 else "http"
         if scheme == "https":
             scheme = "http"
-
         downloader = self.downloaders.get(scheme)
         if downloader is None:
             module = self.mngr.get_downloader_module(scheme)
             downloader = module.Downloader()
             self.downloaders[scheme] = downloader
-
         return downloader
 
     @staticmethod
@@ -148,7 +146,7 @@ class ExtractorFinder():
         if match:
             module = importlib.import_module(".extractor." + name, __package__)
             klass = getattr(module, module.info["extractor"])
-            return klass(match, {}), module.info
+            return klass(match), module.info
         else:
             print("no suitable extractor found")
             return None, None
@@ -158,9 +156,9 @@ class ExtractorFinder():
         for category in config.get(("extractor",)):
             patterns = config.get(("extractor", category, "pattern"), default=[])
             for pattern in patterns:
-                    match = re.match(pattern, url)
-                    if match:
-                        return category, match
+                match = re.match(pattern, url)
+                if match:
+                    return category, match
         for category, info in self.extractor_metadata():
             for pattern in info["pattern"]:
                 match = re.match(pattern, url)
diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py
index a17b954c..665c1e01 100644
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -22,8 +22,8 @@ info = {
 
 class ThreeDeeBooruExtractor(JSONBooruExtractor):
 
-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
         self.api_url = "http://behoimi.org/post/index.json"
         self.headers = {
             "Referer": "http://behoimi.org/post/show/",
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 028ab7de..9aab90a2 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -25,8 +25,8 @@ class FourChanExtractor(ChanExtractor):
     api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
     file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
 
-    def __init__(self, match, config):
+    def __init__(self, match):
         ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
             match.group(1), match.group(2)
         )
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 559951fa..43d34de5 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -25,8 +25,8 @@ class InfinityChanExtractor(ChanExtractor):
     api_url = "https://8ch.net/{board}/res/{thread}.json"
     file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"
 
-    def __init__(self, match, config):
+    def __init__(self, match):
         ChanExtractor.__init__(
-            self, config, info["category"],
+            self, info["category"],
             match.group(1), match.group(2)
         )
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 65bc7c3d..640df8ac 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -27,8 +27,8 @@ class BatotoExtractor(AsynchronousExtractor):
 
     url_base = "http://bato.to/read/_/"
 
-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
         self.chapter_id = match.group(1)
 
     def items(self):
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index f72bc789..14629fd6 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -19,8 +19,8 @@ class BooruExtractor(SequentialExtractor):
 
     api_url = ""
 
-    def __init__(self, match, config, info):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match, info):
+        SequentialExtractor.__init__(self)
         self.info = info
         self.tags = text.unquote(match.group(1))
         self.page = "page"
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
index 2d2b6fb4..c6389314 100644
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@@ -10,15 +10,14 @@
 
 from .common import SequentialExtractor, Message
 from .. import text
-import re
 
 class ChanExtractor(SequentialExtractor):
 
     api_url = ""
     file_url = ""
 
-    def __init__(self, config, category, board, thread):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, category, board, thread):
+        SequentialExtractor.__init__(self)
         self.metadata = {
             "category": category,
             "board": board,
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index b364d870..4d5b96a9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -12,7 +12,7 @@ import time
 import queue
 import requests
 import threading
-import html.parser
+from .. import config
 
 
 class Message():
@@ -47,15 +47,15 @@ class Extractor():
 
 class SequentialExtractor(Extractor):
 
-    def __init__(self, _):
+    def __init__(self):
         Extractor.__init__(self)
 
 
 class AsynchronousExtractor(Extractor):
 
-    def __init__(self, config):
+    def __init__(self):
         Extractor.__init__(self)
-        queue_size = int(config.get("general", "queue-size", fallback=5))
+        queue_size = int(config.get(("queue-size",), default=5))
         self.__queue = queue.Queue(maxsize=queue_size)
         self.__thread = threading.Thread(target=self.async_items, daemon=True)
 
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 3e94cd65..5024020f 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -22,6 +22,6 @@ info = {
 
 class DanbooruExtractor(JSONBooruExtractor):
 
-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
         self.api_url = "https://danbooru.donmai.us/posts.json"
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 3851e447..af4971e8 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -23,6 +23,6 @@ info = {
 
 class E621Extractor(JSONBooruExtractor):
 
-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
         self.api_url = "https://e621.net/post/index.json"
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index a95ed82e..87244904 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -22,8 +22,8 @@ info = {
 
 class GelbooruExtractor(XMLBooruExtractor):
 
-    def __init__(self, match, config):
-        XMLBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        XMLBooruExtractor.__init__(self, match, info)
         self.api_url = "http://gelbooru.com/"
         self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
 
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index c89721f2..809eaa1a 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -25,8 +25,8 @@ class ImagebamExtractor(AsynchronousExtractor):
 
     url_base = "http://www.imagebam.com"
 
-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
         self.match = match
         self.num = 0
         self.metadata = {}
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 3de51f27..f466c96a 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -26,8 +26,8 @@ class ImgboxExtractor(AsynchronousExtractor):
 
     url_base = "http://imgbox.com"
 
-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
         self.key = match.group(1)
         self.metadata = {}
 
diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py
index 9e591e57..8d164764 100644
--- a/gallery_dl/extractor/imgchili.py
+++ b/gallery_dl/extractor/imgchili.py
@@ -24,8 +24,8 @@ info = {
 
 class ImgchiliExtractor(SequentialExtractor):
 
-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
         self.match = match
         self.num = 0
 
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index 60ed473a..57fd3efc 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -28,8 +28,8 @@ class MangaReaderExtractor(AsynchronousExtractor):
 
     url_base = "http://www.mangareader.net"
 
-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
         self.part = match.group(1)
 
     def items(self):
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index afeefd60..7c309fbf 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -9,7 +9,7 @@
 """Extract images from https://nijie.info/"""
 
 from .common import AsynchronousExtractor, Message
-from ..text  import filename_from_url
+from .. import config, text
 import re
 
 info = {
@@ -26,8 +26,8 @@ class NijieExtractor(AsynchronousExtractor):
 
     popup_url = "https://nijie.info/view_popup.php?id="
 
-    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+    def __init__(self, match):
+        AsynchronousExtractor.__init__(self)
         self.artist_id = match.group(1)
         self.artist_url = (
             "https://nijie.info/members_illust.php?id="
@@ -36,7 +36,9 @@ class NijieExtractor(AsynchronousExtractor):
         self.session.headers["Referer"] = self.artist_url
         self.session.cookies["R18"] = "1"
         self.session.cookies["nijie_referer"] = "nijie.info"
-        self.session.cookies.update(config["nijie-cookies"])
+        self.session.cookies.update(
+            config.get(("extractor", info["category"], "cookies"))
+        )
 
     def items(self):
         data = self.get_job_metadata()
@@ -56,19 +58,19 @@ class NijieExtractor(AsynchronousExtractor):
 
     def get_image_ids(self):
         """Collect all image-ids for a specific artist"""
-        text = self.request(self.artist_url).text
+        page = self.request(self.artist_url).text
         regex = r'<a href="/view\.php\?id=(\d+)"'
-        return [m.group(1) for m in re.finditer(regex, text)]
+        return [m.group(1) for m in re.finditer(regex, page)]
 
     def get_image_data(self, image_id):
         """Get URL and metadata for images specified by 'image_id'"""
-        text = self.request(self.popup_url + image_id).text
-        matches = re.findall('<img src="([^"]+)"', text)
+        page = self.request(self.popup_url + image_id).text
+        matches = re.findall('<img src="([^"]+)"', page)
         for index, url in enumerate(matches):
             yield "https:" + url, {
                 "count": len(matches),
                 "index": index,
                 "image-id": image_id,
-                "name" : filename_from_url(url),
+                "name" : text.filename_from_url(url),
                 "extension": url[url.rfind(".")+1:],
             }
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8b28af57..4b1deb13 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -9,7 +9,7 @@
 """Extract images and ugoira from http://www.pixiv.net/"""
 
 from .common import SequentialExtractor, Message
-from .. import text
+from .. import config, text
 import re
 import json
 
@@ -29,16 +29,15 @@ class PixivExtractor(SequentialExtractor):
     member_url = "http://www.pixiv.net/member_illust.php"
     illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
 
-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
-        self.config = config
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
         self.artist_id = match.group(1)
         self.api = PixivAPI(self.session)
 
     def items(self):
         self.api.login(
-            self.config.get("pixiv", "username"),
-            self.config.get("pixiv", "password"),
+            config.get(("extractor", "pixiv", "username")),
+            config.get(("extractor", "pixiv", "password")),
         )
         metadata = self.get_job_metadata()
 
diff --git a/gallery_dl/extractor/redhawkscans.py b/gallery_dl/extractor/redhawkscans.py
index 5c9593b2..4138ea3b 100644
--- a/gallery_dl/extractor/redhawkscans.py
+++ b/gallery_dl/extractor/redhawkscans.py
@@ -28,8 +28,8 @@ class RedHawkScansExtractor(SequentialExtractor):
 
     url_base = "https://manga.redhawkscans.com/reader/read/"
 
-    def __init__(self, match, config):
-        SequentialExtractor.__init__(self, config)
+    def __init__(self, match):
+        SequentialExtractor.__init__(self)
         self.part = match.group(1)
 
     def items(self):
diff --git a/gallery_dl/extractor/yandere.py b/gallery_dl/extractor/yandere.py
index 2e574b11..95843176 100644
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@@ -22,6 +22,6 @@ info = {
 
 class YandereExtractor(JSONBooruExtractor):
 
-    def __init__(self, match, config):
-        JSONBooruExtractor.__init__(self, match, config, info)
+    def __init__(self, match):
+        JSONBooruExtractor.__init__(self, match, info)
         self.api_url = "https://yande.re/post.json"

From 5ae3dd84ba78cb7db180e1111c4a9eb19b0af411 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Oct 2015 15:55:11 +0200
Subject: [PATCH 15/15] change example-config to json

---
 config      | 18 ------------------
 config.json | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 18 deletions(-)
 delete mode 100644 config
 create mode 100644 config.json

diff --git a/config b/config
deleted file mode 100644
index dec8b374..00000000
--- a/config
+++ /dev/null
@@ -1,18 +0,0 @@
-[pixiv]
-username = XXXXX
-password = XXXXX
-
-[exhentai-cookies]
-ipb_member_id = XXXXX
-ipb_pass_hash = XXXXX
-
-[nijie-cookies]
-NIJIEIJIEID = XXXXX
-nijie_email_hash = XXXXX
-nijie_login_hash = XXXXX
-
-[danbooru]
-regex0 = d(?:anbooru)?[.:-_](\w.+)
-
-[gelbooru]
-regex0 = g(?:elbooru)?[.:-_](\w.+)
diff --git a/config.json b/config.json
new file mode 100644
index 00000000..deba0cef
--- /dev/null
+++ b/config.json
@@ -0,0 +1,39 @@
+{
+    "base-directory": "/tmp/",
+    "extractor":
+    {
+        "pixiv":
+        {
+            "directory": ["{category}", "{artist-id}"],
+            "username": "XXX",
+            "password": "XXX"
+        },
+        "nijie":
+        {
+            "cookies":
+            {
+                "NIJIEIJIEID": "XXX",
+                "nijie_email_hash": "XXX",
+                "nijie_login_hash": "XXX"
+            }
+        },
+        "4chan":
+        {
+            "directory": ["{category}", "{board}", "{thread} - {title}"]
+        },
+        "danbooru":
+        {
+            "pattern": ["d(?:anbooru)?[.:-_](\\w.+)"],
+            "filename": "{category}_{id:>07}_{md5}.{extension}"
+        },
+        "gelbooru":
+        {
+            "pattern": ["g(?:elbooru)?[.:-_](\\w.+)"],
+            "filename": "{category}_{id:>07}_{md5}.{extension}"
+        },
+        "e621":
+        {
+            "pattern": ["e(?:621)?[.:-_](\\w.+)"]
+        }
+    }
+}