2015-04-05 17:15:27 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-01-26 14:59:24 +01:00
|
|
|
# Copyright 2014-2023 Mike Fährmann
|
2015-04-05 17:15:27 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2021-06-12 00:20:59 +02:00
|
|
|
import sys
|
|
|
|
import logging
|
|
|
|
from . import version, config, option, output, extractor, job, util, exception
|
2016-10-04 14:33:50 +02:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
__author__ = "Mike Fährmann"
|
2023-01-30 20:07:18 +01:00
|
|
|
__copyright__ = "Copyright 2014-2023 Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__license__ = "GPLv2"
|
2014-10-12 21:56:44 +02:00
|
|
|
__maintainer__ = "Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__email__ = "mike_faehrmann@web.de"
|
2017-03-27 11:59:27 +02:00
|
|
|
__version__ = version.__version__
|
2018-04-04 17:30:42 +02:00
|
|
|
|
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
def progress(urls, pformat):
|
2018-01-27 01:05:17 +01:00
|
|
|
"""Wrapper around urls to output a simple progress indicator"""
|
2017-06-09 20:12:15 +02:00
|
|
|
if pformat is True:
|
2022-05-19 13:24:37 +02:00
|
|
|
pformat = "[{current}/{total}] {url}\n"
|
|
|
|
else:
|
|
|
|
pformat += "\n"
|
|
|
|
|
2017-06-09 20:12:15 +02:00
|
|
|
pinfo = {"total": len(urls)}
|
|
|
|
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
|
2022-05-19 13:24:37 +02:00
|
|
|
output.stderr_write(pformat.format_map(pinfo))
|
2017-06-09 20:12:15 +02:00
|
|
|
yield pinfo["url"]
|
|
|
|
|
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
def main():
|
2015-04-10 17:31:49 +02:00
|
|
|
try:
|
2017-03-23 16:29:40 +01:00
|
|
|
parser = option.build_parser()
|
2015-11-14 15:31:07 +01:00
|
|
|
args = parser.parse_args()
|
2019-02-13 17:39:43 +01:00
|
|
|
log = output.initialize_logging(args.loglevel)
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
# configuration
|
2023-03-01 14:49:40 +01:00
|
|
|
if args.config_load:
|
2017-04-25 17:09:10 +02:00
|
|
|
config.load()
|
2023-02-28 21:54:46 +01:00
|
|
|
if args.configs_json:
|
|
|
|
config.load(args.configs_json, strict=True)
|
|
|
|
if args.configs_yaml:
|
2023-02-28 22:02:15 +01:00
|
|
|
import yaml
|
2023-08-21 21:18:40 +02:00
|
|
|
config.load(args.configs_yaml, strict=True, loads=yaml.safe_load)
|
2023-02-28 21:54:46 +01:00
|
|
|
if args.configs_toml:
|
2023-02-28 22:02:15 +01:00
|
|
|
try:
|
|
|
|
import tomllib as toml
|
|
|
|
except ImportError:
|
|
|
|
import toml
|
2023-08-21 21:18:40 +02:00
|
|
|
config.load(args.configs_toml, strict=True, loads=toml.loads)
|
2021-12-27 23:31:54 +01:00
|
|
|
if args.filename:
|
2022-04-28 21:54:05 +02:00
|
|
|
filename = args.filename
|
|
|
|
if filename == "/O":
|
|
|
|
filename = "{filename}.{extension}"
|
|
|
|
elif filename.startswith("\\f"):
|
|
|
|
filename = "\f" + filename[2:]
|
|
|
|
config.set((), "filename", filename)
|
2021-12-27 23:31:54 +01:00
|
|
|
if args.directory:
|
|
|
|
config.set((), "base-directory", args.directory)
|
|
|
|
config.set((), "directory", ())
|
2019-05-10 15:32:23 +02:00
|
|
|
if args.postprocessors:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set((), "postprocessors", args.postprocessors)
|
2019-06-29 23:46:55 +02:00
|
|
|
if args.abort:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set((), "skip", "abort:" + str(args.abort))
|
2021-06-05 04:00:29 +02:00
|
|
|
if args.terminate:
|
|
|
|
config.set((), "skip", "terminate:" + str(args.terminate))
|
2022-05-07 23:03:48 +02:00
|
|
|
if args.cookies_from_browser:
|
|
|
|
browser, _, profile = args.cookies_from_browser.partition(":")
|
|
|
|
browser, _, keyring = browser.partition("+")
|
2023-05-05 21:32:18 +02:00
|
|
|
browser, _, domain = browser.partition("/")
|
2022-12-09 19:43:55 +01:00
|
|
|
if profile.startswith(":"):
|
|
|
|
container = profile[1:]
|
|
|
|
profile = None
|
|
|
|
else:
|
|
|
|
profile, _, container = profile.partition("::")
|
2023-05-05 21:32:18 +02:00
|
|
|
config.set((), "cookies", (
|
|
|
|
browser, profile, keyring, container, domain))
|
2023-01-26 14:59:24 +01:00
|
|
|
if args.options_pp:
|
|
|
|
config.set((), "postprocessor-options", args.options_pp)
|
2019-11-23 23:50:16 +01:00
|
|
|
for opts in args.options:
|
|
|
|
config.set(*opts)
|
2017-09-08 17:52:00 +02:00
|
|
|
|
2023-02-26 14:56:19 +01:00
|
|
|
output.configure_standard_streams()
|
|
|
|
|
2022-02-13 22:39:26 +01:00
|
|
|
# signals
|
|
|
|
signals = config.get((), "signals-ignore")
|
|
|
|
if signals:
|
|
|
|
import signal
|
|
|
|
if isinstance(signals, str):
|
|
|
|
signals = signals.split(",")
|
|
|
|
for signal_name in signals:
|
|
|
|
signal_num = getattr(signal, signal_name, None)
|
|
|
|
if signal_num is None:
|
|
|
|
log.warning("signal '%s' is not defined", signal_name)
|
|
|
|
else:
|
|
|
|
signal.signal(signal_num, signal.SIG_IGN)
|
|
|
|
|
2022-05-29 19:15:25 +02:00
|
|
|
# enable ANSI escape sequences on Windows
|
|
|
|
if util.WINDOWS and config.get(("output",), "ansi"):
|
|
|
|
from ctypes import windll, wintypes, byref
|
|
|
|
kernel32 = windll.kernel32
|
|
|
|
mode = wintypes.DWORD()
|
|
|
|
|
|
|
|
for handle_id in (-11, -12): # stdout and stderr
|
|
|
|
handle = kernel32.GetStdHandle(handle_id)
|
|
|
|
kernel32.GetConsoleMode(handle, byref(mode))
|
|
|
|
if not mode.value & 0x4:
|
|
|
|
mode.value |= 0x4
|
|
|
|
kernel32.SetConsoleMode(handle, mode)
|
|
|
|
|
|
|
|
output.ANSI = True
|
|
|
|
|
2022-07-10 13:30:45 +02:00
|
|
|
# format string separator
|
|
|
|
separator = config.get((), "format-separator")
|
|
|
|
if separator:
|
|
|
|
from . import formatter
|
|
|
|
formatter._SEPARATOR = separator
|
|
|
|
|
2023-02-28 18:18:55 +01:00
|
|
|
# eval globals
|
|
|
|
path = config.get((), "globals")
|
|
|
|
if path:
|
2023-03-16 18:37:00 +01:00
|
|
|
util.GLOBALS.update(util.import_file(path).__dict__)
|
2023-02-28 18:18:55 +01:00
|
|
|
|
2018-01-27 00:35:18 +01:00
|
|
|
# loglevels
|
2020-01-30 15:11:02 +01:00
|
|
|
output.configure_logging(args.loglevel)
|
2017-04-26 11:33:19 +02:00
|
|
|
if args.loglevel >= logging.ERROR:
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set(("output",), "mode", "null")
|
2017-08-13 20:35:44 +02:00
|
|
|
elif args.loglevel <= logging.DEBUG:
|
2018-01-27 01:05:17 +01:00
|
|
|
import platform
|
|
|
|
import requests
|
2018-07-17 22:44:32 +02:00
|
|
|
|
2021-12-10 03:18:02 +01:00
|
|
|
extra = ""
|
2023-02-28 23:10:23 +01:00
|
|
|
if util.EXECUTABLE:
|
2021-12-10 03:18:02 +01:00
|
|
|
extra = " - Executable"
|
|
|
|
else:
|
2022-11-04 17:35:47 +01:00
|
|
|
git_head = util.git_head()
|
|
|
|
if git_head:
|
|
|
|
extra = " - Git HEAD: " + git_head
|
2021-12-10 03:18:02 +01:00
|
|
|
|
|
|
|
log.debug("Version %s%s", __version__, extra)
|
2017-08-13 20:35:44 +02:00
|
|
|
log.debug("Python %s - %s",
|
|
|
|
platform.python_version(), platform.platform())
|
2017-12-27 22:12:40 +01:00
|
|
|
try:
|
2018-01-27 01:05:17 +01:00
|
|
|
log.debug("requests %s - urllib3 %s",
|
|
|
|
requests.__version__,
|
|
|
|
requests.packages.urllib3.__version__)
|
2017-12-27 22:12:40 +01:00
|
|
|
except AttributeError:
|
|
|
|
pass
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2022-11-18 17:15:32 +01:00
|
|
|
log.debug("Configuration Files %s", config._files)
|
|
|
|
|
2023-01-30 20:07:18 +01:00
|
|
|
# extractor modules
|
|
|
|
modules = config.get(("extractor",), "modules")
|
|
|
|
if modules is not None:
|
|
|
|
if isinstance(modules, str):
|
|
|
|
modules = modules.split(",")
|
|
|
|
extractor.modules = modules
|
|
|
|
|
|
|
|
# external modules
|
|
|
|
if args.extractor_sources:
|
|
|
|
sources = args.extractor_sources
|
|
|
|
sources.append(None)
|
|
|
|
else:
|
|
|
|
sources = config.get(("extractor",), "module-sources")
|
|
|
|
|
|
|
|
if sources:
|
|
|
|
import os
|
|
|
|
modules = []
|
|
|
|
|
|
|
|
for source in sources:
|
|
|
|
if source:
|
|
|
|
path = util.expand_path(source)
|
|
|
|
try:
|
|
|
|
files = os.listdir(path)
|
|
|
|
modules.append(extractor._modules_path(path, files))
|
|
|
|
except Exception as exc:
|
|
|
|
log.warning("Unable to load modules from %s (%s: %s)",
|
|
|
|
path, exc.__class__.__name__, exc)
|
|
|
|
else:
|
|
|
|
modules.append(extractor._modules_internal())
|
|
|
|
|
|
|
|
if len(modules) > 1:
|
|
|
|
import itertools
|
|
|
|
extractor._module_iter = itertools.chain(*modules)
|
|
|
|
elif not modules:
|
|
|
|
extractor._module_iter = ()
|
|
|
|
else:
|
|
|
|
extractor._module_iter = iter(modules[0])
|
|
|
|
|
2015-11-14 15:11:44 +01:00
|
|
|
if args.list_modules:
|
2022-05-19 13:24:37 +02:00
|
|
|
extractor.modules.append("")
|
|
|
|
sys.stdout.write("\n".join(extractor.modules))
|
|
|
|
|
2016-09-14 09:51:01 +02:00
|
|
|
elif args.list_extractors:
|
2022-05-19 13:24:37 +02:00
|
|
|
write = sys.stdout.write
|
|
|
|
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
|
|
|
|
|
2016-09-14 09:51:01 +02:00
|
|
|
for extr in extractor.extractors():
|
2017-06-28 18:51:47 +02:00
|
|
|
if not extr.__doc__:
|
|
|
|
continue
|
2019-02-06 17:24:44 +01:00
|
|
|
test = next(extr._get_tests(), None)
|
2022-05-19 13:24:37 +02:00
|
|
|
write(fmt(
|
|
|
|
extr.__name__, extr.__doc__,
|
|
|
|
extr.category, extr.subcategory,
|
|
|
|
"\nExample : " + test[0] if test else "",
|
|
|
|
))
|
|
|
|
|
2019-04-25 21:30:16 +02:00
|
|
|
elif args.clear_cache:
|
|
|
|
from . import cache
|
|
|
|
log = logging.getLogger("cache")
|
2021-05-03 22:24:15 +02:00
|
|
|
cnt = cache.clear(args.clear_cache)
|
2019-04-25 21:30:16 +02:00
|
|
|
|
|
|
|
if cnt is None:
|
|
|
|
log.error("Database file not available")
|
|
|
|
else:
|
|
|
|
log.info(
|
|
|
|
"Deleted %d %s from '%s'",
|
|
|
|
cnt, "entry" if cnt == 1 else "entries", cache._path(),
|
|
|
|
)
|
2023-03-01 14:49:40 +01:00
|
|
|
|
|
|
|
elif args.config_init:
|
|
|
|
return config.initialize()
|
|
|
|
|
2015-11-14 15:11:44 +01:00
|
|
|
else:
|
2021-03-04 21:37:26 +01:00
|
|
|
if not args.urls and not args.inputfiles:
|
2017-08-13 20:35:44 +02:00
|
|
|
parser.error(
|
|
|
|
"The following arguments are required: URL\n"
|
|
|
|
"Use 'gallery-dl --help' to get a list of all options.")
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2015-12-10 02:14:28 +01:00
|
|
|
if args.list_urls:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.UrlJob
|
2017-02-17 22:18:16 +01:00
|
|
|
jobtype.maxdepth = args.list_urls
|
2021-04-12 01:55:55 +02:00
|
|
|
if config.get(("output",), "fallback", True):
|
|
|
|
jobtype.handle_url = \
|
|
|
|
staticmethod(jobtype.handle_url_fallback)
|
2015-12-10 02:14:28 +01:00
|
|
|
else:
|
2019-05-10 22:05:57 +02:00
|
|
|
jobtype = args.jobtype or job.DownloadJob
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
urls = args.urls
|
2021-03-04 21:37:26 +01:00
|
|
|
if args.inputfiles:
|
|
|
|
for inputfile in args.inputfiles:
|
|
|
|
try:
|
|
|
|
if inputfile == "-":
|
|
|
|
if sys.stdin:
|
2022-10-07 11:55:37 +02:00
|
|
|
urls += util.parse_inputfile(sys.stdin, log)
|
2021-03-04 21:37:26 +01:00
|
|
|
else:
|
2022-02-23 22:47:05 +01:00
|
|
|
log.warning(
|
|
|
|
"input file: stdin is not readable")
|
2020-03-25 22:30:24 +01:00
|
|
|
else:
|
2021-03-04 21:37:26 +01:00
|
|
|
with open(inputfile, encoding="utf-8") as file:
|
2022-10-07 11:55:37 +02:00
|
|
|
urls += util.parse_inputfile(file, log)
|
2021-03-04 21:37:26 +01:00
|
|
|
except OSError as exc:
|
|
|
|
log.warning("input file: %s", exc)
|
2017-05-27 16:16:57 +02:00
|
|
|
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
# unsupported file logging handler
|
2019-02-13 17:39:43 +01:00
|
|
|
handler = output.setup_logging_handler(
|
|
|
|
"unsupportedfile", fmt="{message}")
|
implement logging options
Standard logging to stderr, logfiles, and unsupported URL files (which
are now handled through the logging module) can now be configured by
setting their respective option keys (log, logfile, unsupportedfile)
to a dict and specifying the following options;
- format:
format string for logging messages
available keys: see [1]
default: "[{name}][{levelname}] {message}"
- format-date:
format string for {asctime} fields in logging messages
available keys: see [2]
default: "%Y-%m-%d %H:%M:%S"
- level:
the lowercase levelname until which the logger should activate;
available levels are debug, info, warning, error, exception
default: "info"
- path:
path of the file to be written to
- mode:
'mode' argument when opening the specified file
can be either "w" to truncate the file or "a" to append to it (see [3])
If 'output.log', '.logfile', or '.unsupportedfile' is a string, it will
be interpreted, as it has been, as the filepath
(or as format string for .log)
[1] https://docs.python.org/3/library/logging.html#logrecord-attributes
[2] https://docs.python.org/3/library/time.html#time.strftime
[3] https://docs.python.org/3/library/functions.html#open
2018-05-01 17:54:52 +02:00
|
|
|
if handler:
|
|
|
|
ulog = logging.getLogger("unsupported")
|
|
|
|
ulog.addHandler(handler)
|
|
|
|
ulog.propagate = False
|
|
|
|
job.Job.ulog = ulog
|
2016-12-04 16:11:54 +01:00
|
|
|
|
2019-11-23 23:50:16 +01:00
|
|
|
pformat = config.get(("output",), "progress", True)
|
2017-06-09 20:12:15 +02:00
|
|
|
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
|
|
|
|
urls = progress(urls, pformat)
|
2023-02-11 21:06:14 +01:00
|
|
|
else:
|
|
|
|
urls = iter(urls)
|
2017-06-09 20:12:15 +02:00
|
|
|
|
2019-10-27 23:34:52 +01:00
|
|
|
retval = 0
|
2023-02-11 21:06:14 +01:00
|
|
|
url = next(urls, None)
|
|
|
|
|
|
|
|
while url is not None:
|
2016-07-14 14:57:42 +02:00
|
|
|
try:
|
2017-04-18 11:38:48 +02:00
|
|
|
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
2018-02-07 21:47:27 +01:00
|
|
|
if isinstance(url, util.ExtendedUrl):
|
2019-11-23 23:50:16 +01:00
|
|
|
for opts in url.gconfig:
|
|
|
|
config.set(*opts)
|
2018-02-15 21:15:33 +01:00
|
|
|
with config.apply(url.lconfig):
|
2019-10-27 23:34:52 +01:00
|
|
|
retval |= jobtype(url.value).run()
|
2018-02-07 21:47:27 +01:00
|
|
|
else:
|
2019-10-27 23:34:52 +01:00
|
|
|
retval |= jobtype(url).run()
|
2021-05-12 02:22:28 +02:00
|
|
|
except exception.TerminateExtraction:
|
|
|
|
pass
|
2023-02-11 21:06:14 +01:00
|
|
|
except exception.RestartExtraction:
|
|
|
|
log.debug("Restarting '%s'", url)
|
|
|
|
continue
|
2016-07-14 14:57:42 +02:00
|
|
|
except exception.NoExtractorError:
|
2022-10-28 11:49:20 +02:00
|
|
|
log.error("Unsupported URL '%s'", url)
|
2019-10-29 15:56:54 +01:00
|
|
|
retval |= 64
|
2023-02-11 21:06:14 +01:00
|
|
|
|
|
|
|
url = next(urls, None)
|
|
|
|
|
2019-10-27 23:34:52 +01:00
|
|
|
return retval
|
2017-02-25 23:53:31 +01:00
|
|
|
|
2015-04-10 17:31:49 +02:00
|
|
|
except KeyboardInterrupt:
|
2019-09-10 16:46:38 +02:00
|
|
|
sys.exit("\nKeyboardInterrupt")
|
2016-08-05 10:25:31 +02:00
|
|
|
except BrokenPipeError:
|
|
|
|
pass
|
2019-10-27 23:34:52 +01:00
|
|
|
except OSError as exc:
|
2016-08-05 10:25:31 +02:00
|
|
|
import errno
|
2017-08-13 20:35:44 +02:00
|
|
|
if exc.errno != errno.EPIPE:
|
2016-08-05 10:25:31 +02:00
|
|
|
raise
|
2019-10-27 23:34:52 +01:00
|
|
|
return 1
|