2015-04-05 17:15:27 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2022-02-13 22:39:26 +01:00
|
|
|
# Copyright 2014-2022 Mike Fährmann
|
2015-04-05 17:15:27 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2021-06-12 00:20:59 +02:00
|
|
|
import sys
|
|
|
|
import logging
|
|
|
|
from . import version, config, option, output, extractor, job, util, exception
|
2016-10-04 14:33:50 +02:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
__author__ = "Mike Fährmann"
|
2022-05-19 13:24:37 +02:00
|
|
|
__copyright__ = "Copyright 2014-2022 Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__license__ = "GPLv2"
|
2014-10-12 21:56:44 +02:00
|
|
|
__maintainer__ = "Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__email__ = "mike_faehrmann@web.de"
|
2017-03-27 11:59:27 +02:00
|
|
|
__version__ = version.__version__
|
2018-04-04 17:30:42 +02:00
|
|
|
|
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
def progress(urls, pformat):
|
2018-01-27 01:05:17 +01:00
|
|
|
"""Wrapper around urls to output a simple progress indicator"""
|
2017-06-09 20:12:15 +02:00
|
|
|
if pformat is True:
|
2022-05-19 13:24:37 +02:00
|
|
|
pformat = "[{current}/{total}] {url}\n"
|
|
|
|
else:
|
|
|
|
pformat += "\n"
|
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
pinfo = {"total": len(urls)}
|
|
|
|
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
|
2022-05-19 13:24:37 +02:00
|
|
|
output.stderr_write(pformat.format_map(pinfo))
|
2017-06-09 20:12:15 +02:00
|
|
|
yield pinfo["url"]
|
|
|
|
|
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
def main():
|
2015-04-10 17:31:49 +02:00
|
|
|
try:
|
2020-03-23 23:38:55 +01:00
|
|
|
if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
|
2019-02-13 17:39:43 +01:00
|
|
|
output.replace_std_streams()
|
2018-04-04 17:30:42 +02:00
|
|
|
|
2017-03-23 16:29:40 +01:00
|
|
|
parser = option.build_parser()
|
2015-11-14 15:31:07 +01:00
|
|
|
args = parser.parse_args()
|
2019-02-13 17:39:43 +01:00
|
|
|
log = output.initialize_logging(args.loglevel)
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
# configuration
|
2017-04-25 17:09:10 +02:00
|
|
|
if args.load_config:
|
|
|
|
config.load()
|
2015-11-14 17:22:56 +01:00
|
|
|
if args.cfgfiles:
|
2019-02-27 16:52:15 +01:00
|
|
|
config.load(args.cfgfiles, strict=True)
|
2017-03-08 16:57:42 +01:00
|
|
|
if args.yamlfiles:
|
2019-02-27 16:52:15 +01:00
|
|
|
config.load(args.yamlfiles, strict=True, fmt="yaml")
|
2021-12-27 23:31:54 +01:00
|
|
|
if args.filename:
|
2022-04-28 21:54:05 +02:00
|
|
|
filename = args.filename
|
|
|
|
if filename == "/O":
|
|
|
|
filename = "{filename}.{extension}"
|
|
|
|
elif filename.startswith("\\f"):
|
|
|
|
filename = "\f" + filename[2:]
|
|
|
|
config.set((), "filename", filename)
|
2021-12-27 23:31:54 +01:00
|
|
|
if args.directory:
|
|
|
|
config.set((), "base-directory", args.directory)
|
|
|
|
config.set((), "directory", ())
|
2019-05-10 15:32:23 +02:00
|
|
|
if args.postprocessors:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set((), "postprocessors", args.postprocessors)
|
2019-06-29 23:46:55 +02:00
|
|
|
if args.abort:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set((), "skip", "abort:" + str(args.abort))
|
2021-06-05 04:00:29 +02:00
|
|
|
if args.terminate:
|
|
|
|
config.set((), "skip", "terminate:" + str(args.terminate))
|
2022-05-07 23:03:48 +02:00
|
|
|
if args.cookies_from_browser:
|
|
|
|
browser, _, profile = args.cookies_from_browser.partition(":")
|
|
|
|
browser, _, keyring = browser.partition("+")
|
|
|
|
config.set((), "cookies", (browser, profile, keyring))
|
2019-11-23 23:50:16 +01:00
|
|
|
for opts in args.options:
|
|
|
|
config.set(*opts)
|
2017-09-08 17:52:00 +02:00
|
|
|
|
2022-02-13 22:39:26 +01:00
|
|
|
# signals
|
|
|
|
signals = config.get((), "signals-ignore")
|
|
|
|
if signals:
|
|
|
|
import signal
|
|
|
|
if isinstance(signals, str):
|
|
|
|
signals = signals.split(",")
|
|
|
|
for signal_name in signals:
|
|
|
|
signal_num = getattr(signal, signal_name, None)
|
|
|
|
if signal_num is None:
|
|
|
|
log.warning("signal '%s' is not defined", signal_name)
|
|
|
|
else:
|
|
|
|
signal.signal(signal_num, signal.SIG_IGN)
|
|
|
|
|
2022-05-29 19:15:25 +02:00
|
|
|
# enable ANSI escape sequences on Windows
|
|
|
|
if util.WINDOWS and config.get(("output",), "ansi"):
|
|
|
|
from ctypes import windll, wintypes, byref
|
|
|
|
kernel32 = windll.kernel32
|
|
|
|
mode = wintypes.DWORD()
|
|
|
|
|
|
|
|
for handle_id in (-11, -12): # stdout and stderr
|
|
|
|
handle = kernel32.GetStdHandle(handle_id)
|
|
|
|
kernel32.GetConsoleMode(handle, byref(mode))
|
|
|
|
if not mode.value & 0x4:
|
|
|
|
mode.value |= 0x4
|
|
|
|
kernel32.SetConsoleMode(handle, mode)
|
|
|
|
|
|
|
|
output.ANSI = True
|
|
|
|
|
2020-10-25 03:05:10 +01:00
|
|
|
# extractor modules
|
|
|
|
modules = config.get(("extractor",), "modules")
|
|
|
|
if modules is not None:
|
2021-07-18 00:34:04 +02:00
|
|
|
if isinstance(modules, str):
|
|
|
|
modules = modules.split(",")
|
2020-10-25 03:05:10 +01:00
|
|
|
extractor.modules = modules
|
|
|
|
extractor._module_iter = iter(modules)
|
|
|
|
|
2022-07-10 13:30:45 +02:00
|
|
|
# format string separator
|
|
|
|
separator = config.get((), "format-separator")
|
|
|
|
if separator:
|
|
|
|
from . import formatter
|
|
|
|
formatter._SEPARATOR = separator
|
|
|
|
|
2018-01-27 00:35:18 +01:00
|
|
|
# loglevels
|
2020-01-30 15:11:02 +01:00
|
|
|
output.configure_logging(args.loglevel)
|
2017-04-26 11:33:19 +02:00
|
|
|
if args.loglevel >= logging.ERROR:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set(("output",), "mode", "null")
|
2017-08-13 20:35:44 +02:00
|
|
|
elif args.loglevel <= logging.DEBUG:
|
2018-01-27 01:05:17 +01:00
|
|
|
import platform
|
|
|
|
import requests
|
2018-07-17 22:44:32 +02:00
|
|
|
|
2021-12-10 03:18:02 +01:00
|
|
|
extra = ""
|
|
|
|
if getattr(sys, "frozen", False):
|
|
|
|
extra = " - Executable"
|
|
|
|
else:
|
2022-11-04 17:35:47 +01:00
|
|
|
git_head = util.git_head()
|
|
|
|
if git_head:
|
|
|
|
extra = " - Git HEAD: " + git_head
|
2021-12-10 03:18:02 +01:00
|
|
|
|
|
|
|
log.debug("Version %s%s", __version__, extra)
|
2017-08-13 20:35:44 +02:00
|
|
|
log.debug("Python %s - %s",
|
|
|
|
platform.python_version(), platform.platform())
|
2017-12-27 22:12:40 +01:00
|
|
|
try:
|
2018-01-27 01:05:17 +01:00
|
|
|
log.debug("requests %s - urllib3 %s",
|
|
|
|
requests.__version__,
|
|
|
|
requests.packages.urllib3.__version__)
|
2017-12-27 22:12:40 +01:00
|
|
|
except AttributeError:
|
|
|
|
pass
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2022-11-18 17:15:32 +01:00
|
|
|
log.debug("Configuration Files %s", config._files)
|
|
|
|
|
2015-11-14 15:11:44 +01:00
|
|
|
if args.list_modules:
|
2022-05-19 13:24:37 +02:00
|
|
|
extractor.modules.append("")
|
|
|
|
sys.stdout.write("\n".join(extractor.modules))
|
|
|
|
|
2016-09-14 09:51:01 +02:00
|
|
|
elif args.list_extractors:
|
2022-05-19 13:24:37 +02:00
|
|
|
write = sys.stdout.write
|
|
|
|
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
|
|
|
|
|
2016-09-14 09:51:01 +02:00
|
|
|
for extr in extractor.extractors():
|
2017-06-28 18:51:47 +02:00
|
|
|
if not extr.__doc__:
|
|
|
|
continue
|
2019-02-06 17:24:44 +01:00
|
|
|
test = next(extr._get_tests(), None)
|
2022-05-19 13:24:37 +02:00
|
|
|
write(fmt(
|
|
|
|
extr.__name__, extr.__doc__,
|
|
|
|
extr.category, extr.subcategory,
|
|
|
|
"\nExample : " + test[0] if test else "",
|
|
|
|
))
|
|
|
|
|
2019-04-25 21:30:16 +02:00
|
|
|
elif args.clear_cache:
|
|
|
|
from . import cache
|
|
|
|
log = logging.getLogger("cache")
|
2021-05-03 22:24:15 +02:00
|
|
|
cnt = cache.clear(args.clear_cache)
|
2019-04-25 21:30:16 +02:00
|
|
|
|
|
|
|
if cnt is None:
|
|
|
|
log.error("Database file not available")
|
|
|
|
else:
|
|
|
|
log.info(
|
|
|
|
"Deleted %d %s from '%s'",
|
|
|
|
cnt, "entry" if cnt == 1 else "entries", cache._path(),
|
|
|
|
)
|
2015-11-14 15:11:44 +01:00
|
|
|
else:
|
2021-03-04 21:37:26 +01:00
|
|
|
if not args.urls and not args.inputfiles:
|
2017-08-13 20:35:44 +02:00
|
|
|
parser.error(
|
|
|
|
"The following arguments are required: URL\n"
|
|
|
|
"Use 'gallery-dl --help' to get a list of all options.")
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2015-12-10 02:14:28 +01:00
|
|
|
if args.list_urls:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.UrlJob
|
2017-02-17 22:18:16 +01:00
|
|
|
jobtype.maxdepth = args.list_urls
|
2021-04-12 01:55:55 +02:00
|
|
|
if config.get(("output",), "fallback", True):
|
|
|
|
jobtype.handle_url = \
|
|
|
|
staticmethod(jobtype.handle_url_fallback)
|
2015-12-10 02:14:28 +01:00
|
|
|
else:
|
2019-05-10 22:05:57 +02:00
|
|
|
jobtype = args.jobtype or job.DownloadJob
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
urls = args.urls
|
2021-03-04 21:37:26 +01:00
|
|
|
if args.inputfiles:
|
|
|
|
for inputfile in args.inputfiles:
|
|
|
|
try:
|
|
|
|
if inputfile == "-":
|
|
|
|
if sys.stdin:
|
2022-10-07 11:55:37 +02:00
|
|
|
urls += util.parse_inputfile(sys.stdin, log)
|
2021-03-04 21:37:26 +01:00
|
|
|
else:
|
2022-02-23 22:47:05 +01:00
|
|
|
log.warning(
|
|
|
|
"input file: stdin is not readable")
|
2020-03-25 22:30:24 +01:00
|
|
|
else:
|
2021-03-04 21:37:26 +01:00
|
|
|
with open(inputfile, encoding="utf-8") as file:
|
2022-10-07 11:55:37 +02:00
|
|
|
urls += util.parse_inputfile(file, log)
|
2021-03-04 21:37:26 +01:00
|
|
|
except OSError as exc:
|
|
|
|
log.warning("input file: %s", exc)
|
2017-05-27 16:16:57 +02:00
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
# unsupported file logging handler
|
2019-02-13 17:39:43 +01:00
|
|
|
handler = output.setup_logging_handler(
|
|
|
|
"unsupportedfile", fmt="{message}")
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
if handler:
|
|
|
|
ulog = logging.getLogger("unsupported")
|
|
|
|
ulog.addHandler(handler)
|
|
|
|
ulog.propagate = False
|
|
|
|
job.Job.ulog = ulog
|
2016-12-04 16:11:54 +01:00
|
|
|
|
2019-11-23 23:50:16 +01:00
|
|
|
pformat = config.get(("output",), "progress", True)
|
2017-06-09 20:12:15 +02:00
|
|
|
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
|
|
|
|
urls = progress(urls, pformat)
|
|
|
|
|
2019-10-27 23:34:52 +01:00
|
|
|
retval = 0
|
2016-12-04 16:11:54 +01:00
|
|
|
for url in urls:
|
2016-07-14 14:57:42 +02:00
|
|
|
try:
|
2017-04-18 11:38:48 +02:00
|
|
|
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
2018-02-07 21:47:27 +01:00
|
|
|
if isinstance(url, util.ExtendedUrl):
|
2019-11-23 23:50:16 +01:00
|
|
|
for opts in url.gconfig:
|
|
|
|
config.set(*opts)
|
2018-02-15 21:15:33 +01:00
|
|
|
with config.apply(url.lconfig):
|
2019-10-27 23:34:52 +01:00
|
|
|
retval |= jobtype(url.value).run()
|
2018-02-07 21:47:27 +01:00
|
|
|
else:
|
2019-10-27 23:34:52 +01:00
|
|
|
retval |= jobtype(url).run()
|
2021-05-12 02:22:28 +02:00
|
|
|
except exception.TerminateExtraction:
|
|
|
|
pass
|
2016-07-14 14:57:42 +02:00
|
|
|
except exception.NoExtractorError:
|
2022-10-28 11:49:20 +02:00
|
|
|
log.error("Unsupported URL '%s'", url)
|
2019-10-29 15:56:54 +01:00
|
|
|
retval |= 64
|
2019-10-27 23:34:52 +01:00
|
|
|
return retval
|
2017-02-25 23:53:31 +01:00
|
|
|
|
2015-04-10 17:31:49 +02:00
|
|
|
except KeyboardInterrupt:
|
2019-09-10 16:46:38 +02:00
|
|
|
sys.exit("\nKeyboardInterrupt")
|
2016-08-05 10:25:31 +02:00
|
|
|
except BrokenPipeError:
|
|
|
|
pass
|
2019-10-27 23:34:52 +01:00
|
|
|
except OSError as exc:
|
2016-08-05 10:25:31 +02:00
|
|
|
import errno
|
2017-08-13 20:35:44 +02:00
|
|
|
if exc.errno != errno.EPIPE:
|
2016-08-05 10:25:31 +02:00
|
|
|
raise
|
2019-10-27 23:34:52 +01:00
|
|
|
return 1
|