2015-04-05 17:15:27 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2018-01-27 01:05:17 +01:00
|
|
|
# Copyright 2014-2018 Mike Fährmann
|
2015-04-05 17:15:27 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2016-10-04 14:33:50 +02:00
|
|
|
from __future__ import unicode_literals, print_function
|
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
__author__ = "Mike Fährmann"
|
|
|
|
__copyright__ = "Copyright 2014-2017 Mike Fährmann"
|
|
|
|
__license__ = "GPLv2"
|
2014-10-12 21:56:44 +02:00
|
|
|
__maintainer__ = "Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__email__ = "mike_faehrmann@web.de"
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2016-08-06 13:40:49 +02:00
|
|
|
import sys
|
2016-10-04 14:33:50 +02:00
|
|
|
|
|
|
|
if sys.hexversion < 0x3030000:
|
|
|
|
print("Python 3.3+ required", file=sys.stderr)
|
|
|
|
sys.exit(1)
|
|
|
|
|
2018-02-07 21:47:27 +01:00
|
|
|
import json
|
2017-03-07 23:50:19 +01:00
|
|
|
import logging
|
2018-01-26 18:51:51 +01:00
|
|
|
from . import version, config, option, extractor, job, util, exception
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2017-03-27 11:59:27 +02:00
|
|
|
__version__ = version.__version__
|
2017-03-11 01:47:57 +01:00
|
|
|
log = logging.getLogger("gallery-dl")
|
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
LOG_FORMAT = "[{name}][{levelname}] {message}"
|
|
|
|
LOG_FORMAT_DATE = "%Y-%m-%d %H:%M:%S"
|
|
|
|
LOG_LEVEL = logging.INFO
|
|
|
|
|
|
|
|
def initialize_logging(loglevel):
|
2018-01-27 01:05:17 +01:00
|
|
|
"""Setup basic logging functionality before configfiles have been loaded"""
|
2017-03-07 23:50:19 +01:00
|
|
|
# convert levelnames to lowercase
|
|
|
|
for level in (10, 20, 30, 40, 50):
|
|
|
|
name = logging.getLevelName(level)
|
|
|
|
logging.addLevelName(level, name.lower())
|
2017-03-11 01:47:57 +01:00
|
|
|
# setup basic logging to stderr
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
formatter = logging.Formatter(LOG_FORMAT, LOG_FORMAT_DATE, "{")
|
2017-03-11 01:47:57 +01:00
|
|
|
handler = logging.StreamHandler()
|
|
|
|
handler.setFormatter(formatter)
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
handler.setLevel(loglevel)
|
2017-03-11 01:47:57 +01:00
|
|
|
root = logging.getLogger()
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
root.setLevel(logging.NOTSET)
|
2017-03-11 01:47:57 +01:00
|
|
|
root.addHandler(handler)
|
2017-03-07 23:50:19 +01:00
|
|
|
|
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
|
|
|
|
"""Setup a new logging handler"""
|
|
|
|
opts = config.interpolate(("output", key))
|
|
|
|
if not opts:
|
|
|
|
return None
|
2018-05-28 22:14:38 +02:00
|
|
|
if not isinstance(opts, dict):
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
opts = {"path": opts}
|
|
|
|
|
|
|
|
path = opts.get("path")
|
|
|
|
mode = opts.get("mode", "w")
|
2018-05-25 16:28:23 +02:00
|
|
|
encoding = opts.get("encoding", "utf-8")
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
try:
|
|
|
|
path = util.expand_path(path)
|
2018-05-25 16:28:23 +02:00
|
|
|
handler = logging.FileHandler(path, mode, encoding)
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
except (OSError, ValueError) as exc:
|
|
|
|
log.warning("%s: %s", key, exc)
|
|
|
|
return None
|
|
|
|
except TypeError as exc:
|
|
|
|
log.warning("%s: missing or invalid path (%s)", key, exc)
|
|
|
|
return None
|
|
|
|
|
|
|
|
level = opts.get("level", lvl)
|
|
|
|
logfmt = opts.get("format", fmt)
|
|
|
|
datefmt = opts.get("format-date", LOG_FORMAT_DATE)
|
|
|
|
formatter = logging.Formatter(logfmt, datefmt, "{")
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
handler.setLevel(level)
|
|
|
|
|
|
|
|
return handler
|
|
|
|
|
|
|
|
|
|
|
|
def configure_logging_handler(key, handler):
|
|
|
|
"""Configure a logging handler"""
|
|
|
|
opts = config.interpolate(("output", key))
|
|
|
|
if not opts:
|
|
|
|
return
|
|
|
|
if isinstance(opts, str):
|
|
|
|
opts = {"format": opts}
|
|
|
|
if handler.level == LOG_LEVEL and "level" in opts:
|
|
|
|
handler.setLevel(opts["level"])
|
|
|
|
if "format" in opts or "format-date" in opts:
|
|
|
|
logfmt = opts.get("format", LOG_FORMAT)
|
|
|
|
datefmt = opts.get("format-date", LOG_FORMAT_DATE)
|
|
|
|
formatter = logging.Formatter(logfmt, datefmt, "{")
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
|
|
|
|
|
2018-04-04 17:30:42 +02:00
|
|
|
def replace_std_streams(errors="replace"):
|
|
|
|
"""Replace standard streams and set their error handlers to 'errors'"""
|
|
|
|
for name in ("stdout", "stdin", "stderr"):
|
|
|
|
stream = getattr(sys, name)
|
|
|
|
setattr(sys, name, stream.__class__(
|
|
|
|
stream.buffer,
|
|
|
|
errors=errors,
|
|
|
|
newline=stream.newlines,
|
|
|
|
line_buffering=stream.line_buffering,
|
|
|
|
))
|
|
|
|
|
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
def progress(urls, pformat):
|
2018-01-27 01:05:17 +01:00
|
|
|
"""Wrapper around urls to output a simple progress indicator"""
|
2017-06-09 20:12:15 +02:00
|
|
|
if pformat is True:
|
|
|
|
pformat = "[{current}/{total}] {url}"
|
|
|
|
pinfo = {"total": len(urls)}
|
|
|
|
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
|
|
|
|
print(pformat.format_map(pinfo), file=sys.stderr)
|
|
|
|
yield pinfo["url"]
|
|
|
|
|
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
def prepare_range(rangespec, target):
|
|
|
|
if rangespec:
|
|
|
|
range = util.optimize_range(util.parse_range(rangespec))
|
|
|
|
if range:
|
|
|
|
config.set(("_", target, "range"), range)
|
|
|
|
else:
|
|
|
|
log.warning("invalid/empty %s range", target)
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_filter(filterexpr, target):
|
|
|
|
if filterexpr:
|
|
|
|
try:
|
|
|
|
name = "<{} filter>".format(target)
|
|
|
|
codeobj = compile(filterexpr, name, "eval")
|
|
|
|
config.set(("_", target, "filter"), codeobj)
|
|
|
|
except (SyntaxError, ValueError, TypeError) as exc:
|
|
|
|
log.warning(exc)
|
|
|
|
|
|
|
|
|
2018-02-07 21:47:27 +01:00
|
|
|
def parse_inputfile(file):
|
2018-02-15 21:15:33 +01:00
|
|
|
"""Filter and process strings from an input file.
|
2018-02-07 21:47:27 +01:00
|
|
|
|
|
|
|
Lines starting with '#' and empty lines will be ignored.
|
2018-02-15 21:15:33 +01:00
|
|
|
Lines starting with '-' will be interpreted as a key-value pair separated
|
|
|
|
by an '='. where 'key' is a dot-separated option name and 'value' is a
|
|
|
|
JSON-parsable value for it. These config options will be applied while
|
|
|
|
processing the next URL.
|
|
|
|
Lines starting with '-G' are the same as above, except these options will
|
|
|
|
be valid for all following URLs, i.e. they are Global.
|
2018-02-07 21:47:27 +01:00
|
|
|
Everything else will be used as potential URL.
|
|
|
|
|
|
|
|
Example input file:
|
|
|
|
|
2018-02-15 21:15:33 +01:00
|
|
|
# settings global options
|
|
|
|
-G base-directory = "/tmp/"
|
|
|
|
-G skip = false
|
|
|
|
|
|
|
|
# setting local options for the next URL
|
|
|
|
-filename="spaces_are_optional.jpg"
|
|
|
|
-skip = true
|
|
|
|
|
2018-02-07 21:47:27 +01:00
|
|
|
https://example.org/
|
|
|
|
|
2018-02-15 21:15:33 +01:00
|
|
|
# next URL uses default filename and 'skip' is false.
|
2018-02-07 21:47:27 +01:00
|
|
|
https://example.com/index.htm
|
|
|
|
"""
|
2018-02-15 21:15:33 +01:00
|
|
|
gconf = []
|
|
|
|
lconf = []
|
2018-02-07 21:47:27 +01:00
|
|
|
|
|
|
|
for line in file:
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
if not line or line[0] == "#":
|
|
|
|
# empty line or comment
|
|
|
|
continue
|
|
|
|
|
2018-02-15 21:15:33 +01:00
|
|
|
elif line[0] == "-":
|
|
|
|
# config spec
|
|
|
|
if len(line) >= 2 and line[1] == "G":
|
|
|
|
conf = gconf
|
|
|
|
line = line[2:]
|
|
|
|
else:
|
|
|
|
conf = lconf
|
|
|
|
line = line[1:]
|
|
|
|
|
|
|
|
key, sep, value = line.partition("=")
|
|
|
|
if not sep:
|
|
|
|
log.warning("input file: invalid <key>=<value> pair: %s", line)
|
|
|
|
continue
|
|
|
|
|
2018-02-07 21:47:27 +01:00
|
|
|
try:
|
2018-02-15 21:15:33 +01:00
|
|
|
value = json.loads(value.strip())
|
2018-02-07 21:47:27 +01:00
|
|
|
except ValueError as exc:
|
2018-02-15 21:15:33 +01:00
|
|
|
log.warning("input file: unable to parse '%s': %s", value, exc)
|
2018-02-07 21:47:27 +01:00
|
|
|
continue
|
|
|
|
|
2018-02-15 21:15:33 +01:00
|
|
|
conf.append((key.strip().split("."), value))
|
2018-02-07 21:47:27 +01:00
|
|
|
|
|
|
|
else:
|
|
|
|
# url
|
2018-02-15 21:15:33 +01:00
|
|
|
if gconf or lconf:
|
|
|
|
yield util.ExtendedUrl(line, gconf, lconf)
|
|
|
|
gconf = []
|
|
|
|
lconf = []
|
2018-02-07 21:47:27 +01:00
|
|
|
else:
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
def main():
|
2015-04-10 17:31:49 +02:00
|
|
|
try:
|
2018-04-04 17:30:42 +02:00
|
|
|
if sys.stdout.encoding.lower() != "utf-8":
|
|
|
|
replace_std_streams()
|
|
|
|
|
2017-03-23 16:29:40 +01:00
|
|
|
parser = option.build_parser()
|
2015-11-14 15:31:07 +01:00
|
|
|
args = parser.parse_args()
|
2018-01-27 00:35:18 +01:00
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
initialize_logging(args.loglevel)
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
# configuration
|
2017-04-25 17:09:10 +02:00
|
|
|
if args.load_config:
|
|
|
|
config.load()
|
2015-11-14 17:22:56 +01:00
|
|
|
if args.cfgfiles:
|
|
|
|
config.load(*args.cfgfiles, strict=True)
|
2017-03-08 16:57:42 +01:00
|
|
|
if args.yamlfiles:
|
|
|
|
config.load(*args.yamlfiles, format="yaml", strict=True)
|
2017-03-23 16:29:40 +01:00
|
|
|
for key, value in args.options:
|
|
|
|
config.set(key, value)
|
2017-09-08 17:52:00 +02:00
|
|
|
config.set(("_",), {})
|
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
# stream logging handler
|
|
|
|
configure_logging_handler("log", logging.getLogger().handlers[0])
|
|
|
|
|
|
|
|
# file logging handler
|
|
|
|
handler = setup_logging_handler("logfile", lvl=args.loglevel)
|
|
|
|
if handler:
|
|
|
|
logging.getLogger().addHandler(handler)
|
2018-01-27 00:35:18 +01:00
|
|
|
|
|
|
|
# loglevels
|
2017-04-26 11:33:19 +02:00
|
|
|
if args.loglevel >= logging.ERROR:
|
|
|
|
config.set(("output", "mode"), "null")
|
2017-08-13 20:35:44 +02:00
|
|
|
elif args.loglevel <= logging.DEBUG:
|
2018-01-27 01:05:17 +01:00
|
|
|
import platform
|
|
|
|
import requests
|
2017-08-13 20:35:44 +02:00
|
|
|
log.debug("Version %s", __version__)
|
|
|
|
log.debug("Python %s - %s",
|
|
|
|
platform.python_version(), platform.platform())
|
2017-12-27 22:12:40 +01:00
|
|
|
try:
|
2018-01-27 01:05:17 +01:00
|
|
|
log.debug("requests %s - urllib3 %s",
|
|
|
|
requests.__version__,
|
|
|
|
requests.packages.urllib3.__version__)
|
2017-12-27 22:12:40 +01:00
|
|
|
except AttributeError:
|
|
|
|
pass
|
2015-11-14 15:11:44 +01:00
|
|
|
|
|
|
|
if args.list_modules:
|
|
|
|
for module_name in extractor.modules:
|
|
|
|
print(module_name)
|
2016-09-14 09:51:01 +02:00
|
|
|
elif args.list_extractors:
|
|
|
|
for extr in extractor.extractors():
|
2017-06-28 18:51:47 +02:00
|
|
|
if not extr.__doc__:
|
|
|
|
continue
|
2016-09-14 09:51:01 +02:00
|
|
|
print(extr.__name__)
|
2017-06-28 18:51:47 +02:00
|
|
|
print(extr.__doc__)
|
|
|
|
print("Category:", extr.category,
|
|
|
|
"- Subcategory:", extr.subcategory)
|
2016-09-14 09:51:01 +02:00
|
|
|
if hasattr(extr, "test") and extr.test:
|
2017-06-28 18:51:47 +02:00
|
|
|
print("Example :", extr.test[0][0])
|
2016-09-14 09:51:01 +02:00
|
|
|
print()
|
2015-11-14 15:11:44 +01:00
|
|
|
else:
|
2016-12-04 16:11:54 +01:00
|
|
|
if not args.urls and not args.inputfile:
|
2017-08-13 20:35:44 +02:00
|
|
|
parser.error(
|
|
|
|
"The following arguments are required: URL\n"
|
|
|
|
"Use 'gallery-dl --help' to get a list of all options.")
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2015-12-10 02:14:28 +01:00
|
|
|
if args.list_urls:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.UrlJob
|
2017-02-17 22:18:16 +01:00
|
|
|
jobtype.maxdepth = args.list_urls
|
2015-12-10 02:14:28 +01:00
|
|
|
elif args.list_keywords:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.KeywordJob
|
2017-04-12 18:43:41 +02:00
|
|
|
elif args.list_data:
|
|
|
|
jobtype = job.DataJob
|
2018-05-25 16:07:18 +02:00
|
|
|
elif args.simulate:
|
|
|
|
jobtype = job.SimulationJob
|
2015-12-10 02:14:28 +01:00
|
|
|
else:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.DownloadJob
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
urls = args.urls
|
|
|
|
if args.inputfile:
|
|
|
|
try:
|
|
|
|
if args.inputfile == "-":
|
|
|
|
file = sys.stdin
|
|
|
|
else:
|
|
|
|
file = open(args.inputfile)
|
2018-02-07 21:47:27 +01:00
|
|
|
urls += parse_inputfile(file)
|
2018-01-27 01:05:17 +01:00
|
|
|
file.close()
|
2017-05-27 16:16:57 +02:00
|
|
|
except OSError as exc:
|
2018-01-28 18:42:10 +01:00
|
|
|
log.warning("input file: %s", exc)
|
2017-05-27 16:16:57 +02:00
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
# unsupported file logging handler
|
|
|
|
handler = setup_logging_handler("unsupportedfile", fmt="{message}")
|
|
|
|
if handler:
|
|
|
|
ulog = logging.getLogger("unsupported")
|
|
|
|
ulog.addHandler(handler)
|
|
|
|
ulog.propagate = False
|
|
|
|
job.Job.ulog = ulog
|
2016-12-04 16:11:54 +01:00
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
prepare_range(args.image_range, "image")
|
|
|
|
prepare_range(args.chapter_range, "chapter")
|
|
|
|
prepare_filter(args.image_filter, "image")
|
2017-09-12 16:19:00 +02:00
|
|
|
prepare_filter(args.chapter_filter, "chapter")
|
2017-09-08 17:52:00 +02:00
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
pformat = config.get(("output", "progress"), True)
|
|
|
|
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
|
|
|
|
urls = progress(urls, pformat)
|
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
for url in urls:
|
2016-07-14 14:57:42 +02:00
|
|
|
try:
|
2017-04-18 11:38:48 +02:00
|
|
|
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
2018-02-07 21:47:27 +01:00
|
|
|
if isinstance(url, util.ExtendedUrl):
|
2018-02-15 21:15:33 +01:00
|
|
|
for key, value in url.gconfig:
|
|
|
|
config.set(key, value)
|
|
|
|
with config.apply(url.lconfig):
|
2018-02-07 21:47:27 +01:00
|
|
|
jobtype(url.value).run()
|
|
|
|
else:
|
|
|
|
jobtype(url).run()
|
2016-07-14 14:57:42 +02:00
|
|
|
except exception.NoExtractorError:
|
2017-03-11 01:47:57 +01:00
|
|
|
log.error("No suitable extractor found for '%s'", url)
|
2017-02-25 23:53:31 +01:00
|
|
|
|
2015-04-10 17:31:49 +02:00
|
|
|
except KeyboardInterrupt:
|
2016-08-06 13:40:49 +02:00
|
|
|
print("\nKeyboardInterrupt", file=sys.stderr)
|
2016-08-05 10:25:31 +02:00
|
|
|
except BrokenPipeError:
|
|
|
|
pass
|
2017-08-13 20:35:44 +02:00
|
|
|
except IOError as exc:
|
2016-08-05 10:25:31 +02:00
|
|
|
import errno
|
2017-08-13 20:35:44 +02:00
|
|
|
if exc.errno != errno.EPIPE:
|
2016-08-05 10:25:31 +02:00
|
|
|
raise
|