1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

Merge branch 'config'

This commit is contained in:
Mike Fährmann 2015-10-05 17:04:43 +02:00
commit 88442ad26d
24 changed files with 248 additions and 107 deletions

18
config
View File

@ -1,18 +0,0 @@
[pixiv]
username = XXXXX
password = XXXXX
[exhentai-cookies]
ipb_member_id = XXXXX
ipb_pass_hash = XXXXX
[nijie-cookies]
NIJIEIJIEID = XXXXX
nijie_email_hash = XXXXX
nijie_login_hash = XXXXX
[danbooru]
regex0 = d(?:anbooru)?[.:-_](\w.+)
[gelbooru]
regex0 = g(?:elbooru)?[.:-_](\w.+)

39
config.json Normal file
View File

@ -0,0 +1,39 @@
{
"base-directory": "/tmp/",
"extractor":
{
"pixiv":
{
"directory": ["{category}", "{artist-id}"],
"username": "XXX",
"password": "XXX"
},
"nijie":
{
"cookies":
{
"NIJIEIJIEID": "XXX",
"nijie_email_hash": "XXX",
"nijie_login_hash": "XXX"
}
},
"4chan":
{
"directory": ["{category}", "{board}", "{thread} - {title}"]
},
"danbooru":
{
"pattern": ["d(?:anbooru)?[.:-_](\\w.+)"],
"filename": "{category}_{id:>07}_{md5}.{extension}"
},
"gelbooru":
{
"pattern": ["g(?:elbooru)?[.:-_](\\w.+)"],
"filename": "{category}_{id:>07}_{md5}.{extension}"
},
"e621":
{
"pattern": ["e(?:621)?[.:-_](\\w.+)"]
}
}
}

View File

@ -17,9 +17,7 @@ __email__ = "mike_faehrmann@web.de"
import os
import sys
import argparse
import configparser
from .download import DownloadManager
from . import config, download
def parse_cmdline_options():
parser = argparse.ArgumentParser(
@ -41,18 +39,10 @@ def parse_cmdline_options():
)
return parser.parse_args()
def parse_config_file(path):
config = configparser.ConfigParser(
interpolation=None,
)
config.optionxform = lambda opt: opt
config.read(os.path.expanduser(path))
return config
def main():
config.load()
opts = parse_cmdline_options()
conf = parse_config_file(opts.config)
dlmgr = DownloadManager(opts, conf)
dlmgr = download.DownloadManager(opts)
try:
for url in opts.urls:

88
gallery_dl/config.py Normal file
View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Global configuration module"""
import sys
import json
import os.path
import platform
# --------------------------------------------------------------------
# public interface
def load(*files):
"""Load JSON configuration files"""
configfiles = files or _default_configs
for conf in configfiles:
try:
path = os.path.expanduser(conf)
with open(path) as file:
confdict = json.load(file)
_config.update(confdict)
except FileNotFoundError:
continue
except json.decoder.JSONDecodeError as exception:
print("Error while loading '", path, "':", sep="", file=sys.stderr)
print(exception, file=sys.stderr)
def clear():
"""Reset configuration to en empty state"""
globals()["_config"] = {}
def get(keys, default=None):
"""Get the value of property 'key' or a default-value if it doenst exist"""
conf = _config
try:
for k in keys:
conf = conf[k]
return conf
except (KeyError, AttributeError):
return default
def interpolate(keys, default=None):
"""Interpolate the value of 'key'"""
conf = _config
try:
for k in keys:
default = conf.get(keys[-1], default)
conf = conf[k]
return conf
except (KeyError, AttributeError):
return default
def set(keys, value):
"""Set the value of property 'key' for this session"""
conf = _config
for k in keys[:-1]:
try:
conf = conf[k]
except KeyError:
temp = {}
conf[k] = temp
conf = temp
conf[keys[-1]] = value
# --------------------------------------------------------------------
# internals
_config = {}
if platform.system() == "Windows":
_default_configs = [
r"~\.config\gallery-dl.conf",
r"~\.gallery-dl.conf",
]
else:
_default_configs = [
"/etc/gallery-dl.conf",
"~/.config/gallery/config.json",
"~/.config/gallery-dl.conf",
"~/.gallery-dl.conf",
]

View File

@ -12,14 +12,14 @@ import re
import importlib
from .extractor.common import Message
from . import config
class DownloadManager():
def __init__(self, opts, config):
def __init__(self, opts):
self.opts = opts
self.config = config
self.modules = {}
self.extractors = ExtractorFinder(config)
self.extractors = ExtractorFinder()
def add(self, url):
job = DownloadJob(self, url)
@ -38,7 +38,7 @@ class DownloadManager():
if self.opts.dest:
return self.opts.dest
else:
return self.config.get("general", "destination", fallback="/tmp/")
return config.get(("base-directory",), default="/tmp/")
class DownloadJob():
@ -50,16 +50,14 @@ class DownloadJob():
return
self.directory = mngr.get_base_directory()
self.downloaders = {}
self.filename_fmt = mngr.config.get(
self.info["category"], "filename",
fallback=self.info["filename"]
self.filename_fmt = config.get(
("extractor", self.info["category"], "filename"),
default=self.info["filename"]
)
segments = config.get(
("extractor", self.info["category"], "directory"),
default=self.info["directory"]
)
try:
segments = mngr.config.get(
self.info["category"], "directory"
).split("/")
except Exception:
segments = self.info["directory"]
self.directory_fmt = os.path.join(*segments)
def run(self):
@ -114,13 +112,11 @@ class DownloadJob():
scheme = url[:pos] if pos != -1 else "http"
if scheme == "https":
scheme = "http"
downloader = self.downloaders.get(scheme)
if downloader is None:
module = self.mngr.get_downloader_module(scheme)
downloader = module.Downloader()
self.downloaders[scheme] = downloader
return downloader
@staticmethod
@ -144,28 +140,25 @@ class DownloadJob():
class ExtractorFinder():
def __init__(self, config):
self.config = config
def get_for_url(self, url):
"""Get an extractor-instance suitable for 'url'"""
name, match = self.find_pattern_match(url)
if match:
module = importlib.import_module(".extractor." + name, __package__)
klass = getattr(module, module.info["extractor"])
return klass(match, self.config), module.info
return klass(match), module.info
else:
print("no suitable extractor found")
return None, None
def find_pattern_match(self, url):
"""Find a pattern, that matches 'url', and return the (category,match) tuple"""
for category in self.config:
for key, value in self.config[category].items():
if key.startswith("regex"):
match = re.match(value, url)
if match:
return category, match
"""Find a pattern that matches 'url' and return the (category,match) tuple"""
for category in config.get(("extractor",)):
patterns = config.get(("extractor", category, "pattern"), default=[])
for pattern in patterns:
match = re.match(pattern, url)
if match:
return category, match
for category, info in self.extractor_metadata():
for pattern in info["pattern"]:
match = re.match(pattern, url)

View File

@ -22,8 +22,8 @@ info = {
class ThreeDeeBooruExtractor(JSONBooruExtractor):
def __init__(self, match, config):
JSONBooruExtractor.__init__(self, match, config, info)
def __init__(self, match):
JSONBooruExtractor.__init__(self, match, info)
self.api_url = "http://behoimi.org/post/index.json"
self.headers = {
"Referer": "http://behoimi.org/post/show/",

View File

@ -25,8 +25,8 @@ class FourChanExtractor(ChanExtractor):
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
def __init__(self, match, config):
def __init__(self, match):
ChanExtractor.__init__(
self, config, info["category"],
self, info["category"],
match.group(1), match.group(2)
)

View File

@ -25,8 +25,8 @@ class InfinityChanExtractor(ChanExtractor):
api_url = "https://8ch.net/{board}/res/{thread}.json"
file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"
def __init__(self, match, config):
def __init__(self, match):
ChanExtractor.__init__(
self, config, info["category"],
self, info["category"],
match.group(1), match.group(2)
)

View File

@ -27,8 +27,8 @@ class BatotoExtractor(AsynchronousExtractor):
url_base = "http://bato.to/read/_/"
def __init__(self, match, config):
AsynchronousExtractor.__init__(self, config)
def __init__(self, match):
AsynchronousExtractor.__init__(self)
self.chapter_id = match.group(1)
def items(self):

View File

@ -19,8 +19,8 @@ class BooruExtractor(SequentialExtractor):
api_url = ""
def __init__(self, match, config, info):
SequentialExtractor.__init__(self, config)
def __init__(self, match, info):
SequentialExtractor.__init__(self)
self.info = info
self.tags = text.unquote(match.group(1))
self.page = "page"

View File

@ -10,15 +10,14 @@
from .common import SequentialExtractor, Message
from .. import text
import re
class ChanExtractor(SequentialExtractor):
api_url = ""
file_url = ""
def __init__(self, config, category, board, thread):
SequentialExtractor.__init__(self, config)
def __init__(self, category, board, thread):
SequentialExtractor.__init__(self)
self.metadata = {
"category": category,
"board": board,

View File

@ -12,7 +12,7 @@ import time
import queue
import requests
import threading
import html.parser
from .. import config
class Message():
@ -47,15 +47,15 @@ class Extractor():
class SequentialExtractor(Extractor):
def __init__(self, _):
def __init__(self):
Extractor.__init__(self)
class AsynchronousExtractor(Extractor):
def __init__(self, config):
def __init__(self):
Extractor.__init__(self)
queue_size = int(config.get("general", "queue-size", fallback=5))
queue_size = int(config.get(("queue-size",), default=5))
self.__queue = queue.Queue(maxsize=queue_size)
self.__thread = threading.Thread(target=self.async_items, daemon=True)

View File

@ -22,6 +22,6 @@ info = {
class DanbooruExtractor(JSONBooruExtractor):
def __init__(self, match, config):
JSONBooruExtractor.__init__(self, match, config, info)
def __init__(self, match):
JSONBooruExtractor.__init__(self, match, info)
self.api_url = "https://danbooru.donmai.us/posts.json"

View File

@ -23,6 +23,6 @@ info = {
class E621Extractor(JSONBooruExtractor):
def __init__(self, match, config):
JSONBooruExtractor.__init__(self, match, config, info)
def __init__(self, match):
JSONBooruExtractor.__init__(self, match, info)
self.api_url = "https://e621.net/post/index.json"

View File

@ -22,8 +22,8 @@ info = {
class GelbooruExtractor(XMLBooruExtractor):
def __init__(self, match, config):
XMLBooruExtractor.__init__(self, match, config, info)
def __init__(self, match):
XMLBooruExtractor.__init__(self, match, info)
self.api_url = "http://gelbooru.com/"
self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}

View File

@ -25,8 +25,8 @@ class ImagebamExtractor(AsynchronousExtractor):
url_base = "http://www.imagebam.com"
def __init__(self, match, config):
AsynchronousExtractor.__init__(self, config)
def __init__(self, match):
AsynchronousExtractor.__init__(self)
self.match = match
self.num = 0
self.metadata = {}

View File

@ -26,8 +26,8 @@ class ImgboxExtractor(AsynchronousExtractor):
url_base = "http://imgbox.com"
def __init__(self, match, config):
AsynchronousExtractor.__init__(self, config)
def __init__(self, match):
AsynchronousExtractor.__init__(self)
self.key = match.group(1)
self.metadata = {}

View File

@ -24,8 +24,8 @@ info = {
class ImgchiliExtractor(SequentialExtractor):
def __init__(self, match, config):
SequentialExtractor.__init__(self, config)
def __init__(self, match):
SequentialExtractor.__init__(self)
self.match = match
self.num = 0

View File

@ -28,8 +28,8 @@ class MangaReaderExtractor(AsynchronousExtractor):
url_base = "http://www.mangareader.net"
def __init__(self, match, config):
AsynchronousExtractor.__init__(self, config)
def __init__(self, match):
AsynchronousExtractor.__init__(self)
self.part = match.group(1)
def items(self):

View File

@ -9,7 +9,7 @@
"""Extract images from https://nijie.info/"""
from .common import AsynchronousExtractor, Message
from ..text import filename_from_url
from .. import config, text
import re
info = {
@ -26,8 +26,8 @@ class NijieExtractor(AsynchronousExtractor):
popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self, match, config):
AsynchronousExtractor.__init__(self, config)
def __init__(self, match):
AsynchronousExtractor.__init__(self)
self.artist_id = match.group(1)
self.artist_url = (
"https://nijie.info/members_illust.php?id="
@ -36,7 +36,9 @@ class NijieExtractor(AsynchronousExtractor):
self.session.headers["Referer"] = self.artist_url
self.session.cookies["R18"] = "1"
self.session.cookies["nijie_referer"] = "nijie.info"
self.session.cookies.update(config["nijie-cookies"])
self.session.cookies.update(
config.get(("extractor", info["category"], "cookies"))
)
def items(self):
data = self.get_job_metadata()
@ -56,19 +58,19 @@ class NijieExtractor(AsynchronousExtractor):
def get_image_ids(self):
"""Collect all image-ids for a specific artist"""
text = self.request(self.artist_url).text
page = self.request(self.artist_url).text
regex = r'<a href="/view\.php\?id=(\d+)"'
return [m.group(1) for m in re.finditer(regex, text)]
return [m.group(1) for m in re.finditer(regex, page)]
def get_image_data(self, image_id):
"""Get URL and metadata for images specified by 'image_id'"""
text = self.request(self.popup_url + image_id).text
matches = re.findall('<img src="([^"]+)"', text)
page = self.request(self.popup_url + image_id).text
matches = re.findall('<img src="([^"]+)"', page)
for index, url in enumerate(matches):
yield "https:" + url, {
"count": len(matches),
"index": index,
"image-id": image_id,
"name" : filename_from_url(url),
"name" : text.filename_from_url(url),
"extension": url[url.rfind(".")+1:],
}

View File

@ -9,7 +9,7 @@
"""Extract images and ugoira from http://www.pixiv.net/"""
from .common import SequentialExtractor, Message
from .. import text
from .. import config, text
import re
import json
@ -29,16 +29,15 @@ class PixivExtractor(SequentialExtractor):
member_url = "http://www.pixiv.net/member_illust.php"
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
def __init__(self, match, config):
SequentialExtractor.__init__(self, config)
self.config = config
def __init__(self, match):
SequentialExtractor.__init__(self)
self.artist_id = match.group(1)
self.api = PixivAPI(self.session)
def items(self):
self.api.login(
self.config.get("pixiv", "username"),
self.config.get("pixiv", "password"),
config.get(("extractor", "pixiv", "username")),
config.get(("extractor", "pixiv", "password")),
)
metadata = self.get_job_metadata()

View File

@ -28,8 +28,8 @@ class RedHawkScansExtractor(SequentialExtractor):
url_base = "https://manga.redhawkscans.com/reader/read/"
def __init__(self, match, config):
SequentialExtractor.__init__(self, config)
def __init__(self, match):
SequentialExtractor.__init__(self)
self.part = match.group(1)
def items(self):

View File

@ -22,6 +22,6 @@ info = {
class YandereExtractor(JSONBooruExtractor):
def __init__(self, match, config):
JSONBooruExtractor.__init__(self, match, config, info)
def __init__(self, match):
JSONBooruExtractor.__init__(self, match, info)
self.api_url = "https://yande.re/post.json"

49
test/test_config.py Normal file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import unittest
import gallery_dl.config as config
import os
import tempfile
class TestConfig(unittest.TestCase):
def setUp(self):
fd, self._configfile = tempfile.mkstemp()
with os.fdopen(fd, "w") as file:
file.write('{"a": "1", "b": {"c": "text"}}')
config.load(self._configfile)
def tearDown(self):
config.clear()
os.remove(self._configfile)
def test_get(self):
self.assertEqual(config.get(["a"]), "1")
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["d"]), None)
self.assertEqual(config.get(["e", "f", "g"], 123), 123)
def test_set(self):
config.set(["b", "c"], [1, 2, 3])
config.set(["e", "f", "g"], value=234)
self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
self.assertEqual(config.get(["e", "f", "g"]), 234)
def test_interpolate(self):
self.assertEqual(config.interpolate(["a"]), "1")
self.assertEqual(config.interpolate(["b", "a"]), "1")
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
config.set(["d"], 123)
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
if __name__ == '__main__':
unittest.main()