1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 20:22:36 +01:00
gallery-dl/gallery_dl/__init__.py

308 lines
11 KiB
Python
Raw Normal View History

2015-04-05 17:15:27 +02:00
# -*- coding: utf-8 -*-
# Copyright 2014-2023 Mike Fährmann
2015-04-05 17:15:27 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2021-06-12 00:20:59 +02:00
import sys
import logging
from . import version, config, option, output, extractor, job, util, exception
2016-10-04 14:33:50 +02:00
2017-01-30 19:40:15 +01:00
__author__ = "Mike Fährmann"
__copyright__ = "Copyright 2014-2023 Mike Fährmann"
2017-01-30 19:40:15 +01:00
__license__ = "GPLv2"
2014-10-12 21:56:44 +02:00
__maintainer__ = "Mike Fährmann"
2017-01-30 19:40:15 +01:00
__email__ = "mike_faehrmann@web.de"
__version__ = version.__version__
def progress(urls, pformat):
"""Wrapper around urls to output a simple progress indicator"""
if pformat is True:
pformat = "[{current}/{total}] {url}\n"
else:
pformat += "\n"
pinfo = {"total": len(urls)}
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
output.stderr_write(pformat.format_map(pinfo))
yield pinfo["url"]
2014-10-12 21:56:44 +02:00
def main():
try:
2017-03-23 16:29:40 +01:00
parser = option.build_parser()
args = parser.parse_args()
log = output.initialize_logging(args.loglevel)
# configuration
if args.config_load:
2017-04-25 17:09:10 +02:00
config.load()
if args.configs_json:
config.load(args.configs_json, strict=True)
if args.configs_yaml:
2023-02-28 22:02:15 +01:00
import yaml
2023-08-21 21:18:40 +02:00
config.load(args.configs_yaml, strict=True, loads=yaml.safe_load)
if args.configs_toml:
2023-02-28 22:02:15 +01:00
try:
import tomllib as toml
except ImportError:
import toml
2023-08-21 21:18:40 +02:00
config.load(args.configs_toml, strict=True, loads=toml.loads)
if args.filename:
filename = args.filename
if filename == "/O":
filename = "{filename}.{extension}"
elif filename.startswith("\\f"):
filename = "\f" + filename[2:]
config.set((), "filename", filename)
if args.directory:
config.set((), "base-directory", args.directory)
config.set((), "directory", ())
if args.postprocessors:
2019-11-23 23:50:16 +01:00
config.set((), "postprocessors", args.postprocessors)
if args.abort:
2019-11-23 23:50:16 +01:00
config.set((), "skip", "abort:" + str(args.abort))
if args.terminate:
config.set((), "skip", "terminate:" + str(args.terminate))
if args.cookies_from_browser:
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
browser, _, domain = browser.partition("/")
if profile.startswith(":"):
container = profile[1:]
profile = None
else:
profile, _, container = profile.partition("::")
config.set((), "cookies", (
browser, profile, keyring, container, domain))
if args.options_pp:
config.set((), "postprocessor-options", args.options_pp)
2019-11-23 23:50:16 +01:00
for opts in args.options:
config.set(*opts)
output.configure_standard_streams()
2022-02-13 22:39:26 +01:00
# signals
signals = config.get((), "signals-ignore")
if signals:
import signal
if isinstance(signals, str):
signals = signals.split(",")
for signal_name in signals:
signal_num = getattr(signal, signal_name, None)
if signal_num is None:
log.warning("signal '%s' is not defined", signal_name)
else:
signal.signal(signal_num, signal.SIG_IGN)
2022-05-29 19:15:25 +02:00
# enable ANSI escape sequences on Windows
if util.WINDOWS and config.get(("output",), "ansi"):
from ctypes import windll, wintypes, byref
kernel32 = windll.kernel32
mode = wintypes.DWORD()
for handle_id in (-11, -12): # stdout and stderr
handle = kernel32.GetStdHandle(handle_id)
kernel32.GetConsoleMode(handle, byref(mode))
if not mode.value & 0x4:
mode.value |= 0x4
kernel32.SetConsoleMode(handle, mode)
output.ANSI = True
# format string separator
separator = config.get((), "format-separator")
if separator:
from . import formatter
formatter._SEPARATOR = separator
2023-02-28 18:18:55 +01:00
# eval globals
path = config.get((), "globals")
if path:
util.GLOBALS.update(util.import_file(path).__dict__)
2023-02-28 18:18:55 +01:00
# loglevels
output.configure_logging(args.loglevel)
2017-04-26 11:33:19 +02:00
if args.loglevel >= logging.ERROR:
2019-11-23 23:50:16 +01:00
config.set(("output",), "mode", "null")
2017-08-13 20:35:44 +02:00
elif args.loglevel <= logging.DEBUG:
import platform
import requests
extra = ""
2023-02-28 23:10:23 +01:00
if util.EXECUTABLE:
extra = " - Executable"
else:
git_head = util.git_head()
if git_head:
extra = " - Git HEAD: " + git_head
log.debug("Version %s%s", __version__, extra)
2017-08-13 20:35:44 +02:00
log.debug("Python %s - %s",
platform.python_version(), platform.platform())
try:
log.debug("requests %s - urllib3 %s",
requests.__version__,
requests.packages.urllib3.__version__)
except AttributeError:
pass
log.debug("Configuration Files %s", config._files)
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
if isinstance(modules, str):
modules = modules.split(",")
extractor.modules = modules
# external modules
if args.extractor_sources:
sources = args.extractor_sources
sources.append(None)
else:
sources = config.get(("extractor",), "module-sources")
if sources:
import os
modules = []
for source in sources:
if source:
path = util.expand_path(source)
try:
files = os.listdir(path)
modules.append(extractor._modules_path(path, files))
except Exception as exc:
log.warning("Unable to load modules from %s (%s: %s)",
path, exc.__class__.__name__, exc)
else:
modules.append(extractor._modules_internal())
if len(modules) > 1:
import itertools
extractor._module_iter = itertools.chain(*modules)
elif not modules:
extractor._module_iter = ()
else:
extractor._module_iter = iter(modules[0])
if args.list_modules:
extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules))
2016-09-14 09:51:01 +02:00
elif args.list_extractors:
write = sys.stdout.write
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
2016-09-14 09:51:01 +02:00
for extr in extractor.extractors():
if not extr.__doc__:
continue
test = next(extr._get_tests(), None)
write(fmt(
extr.__name__, extr.__doc__,
extr.category, extr.subcategory,
"\nExample : " + test[0] if test else "",
))
elif args.clear_cache:
from . import cache
log = logging.getLogger("cache")
cnt = cache.clear(args.clear_cache)
if cnt is None:
log.error("Database file not available")
else:
log.info(
"Deleted %d %s from '%s'",
cnt, "entry" if cnt == 1 else "entries", cache._path(),
)
elif args.config_init:
return config.initialize()
else:
if not args.urls and not args.inputfiles:
2017-08-13 20:35:44 +02:00
parser.error(
"The following arguments are required: URL\n"
"Use 'gallery-dl --help' to get a list of all options.")
2016-07-21 13:13:53 +02:00
2015-12-10 02:14:28 +01:00
if args.list_urls:
2016-07-14 14:25:56 +02:00
jobtype = job.UrlJob
jobtype.maxdepth = args.list_urls
if config.get(("output",), "fallback", True):
jobtype.handle_url = \
staticmethod(jobtype.handle_url_fallback)
2015-12-10 02:14:28 +01:00
else:
jobtype = args.jobtype or job.DownloadJob
2016-07-21 13:13:53 +02:00
2016-12-04 16:11:54 +01:00
urls = args.urls
if args.inputfiles:
for inputfile in args.inputfiles:
try:
if inputfile == "-":
if sys.stdin:
urls += util.parse_inputfile(sys.stdin, log)
else:
2022-02-23 22:47:05 +01:00
log.warning(
"input file: stdin is not readable")
else:
with open(inputfile, encoding="utf-8") as file:
urls += util.parse_inputfile(file, log)
except OSError as exc:
log.warning("input file: %s", exc)
2017-05-27 16:16:57 +02:00
# unsupported file logging handler
handler = output.setup_logging_handler(
"unsupportedfile", fmt="{message}")
if handler:
ulog = logging.getLogger("unsupported")
ulog.addHandler(handler)
ulog.propagate = False
job.Job.ulog = ulog
2016-12-04 16:11:54 +01:00
2019-11-23 23:50:16 +01:00
pformat = config.get(("output",), "progress", True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)
else:
urls = iter(urls)
retval = 0
url = next(urls, None)
while url is not None:
2016-07-14 14:57:42 +02:00
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl):
2019-11-23 23:50:16 +01:00
for opts in url.gconfig:
config.set(*opts)
with config.apply(url.lconfig):
retval |= jobtype(url.value).run()
else:
retval |= jobtype(url).run()
except exception.TerminateExtraction:
pass
except exception.RestartExtraction:
log.debug("Restarting '%s'", url)
continue
2016-07-14 14:57:42 +02:00
except exception.NoExtractorError:
2022-10-28 11:49:20 +02:00
log.error("Unsupported URL '%s'", url)
retval |= 64
url = next(urls, None)
return retval
2017-02-25 23:53:31 +01:00
except KeyboardInterrupt:
2019-09-10 16:46:38 +02:00
sys.exit("\nKeyboardInterrupt")
2016-08-05 10:25:31 +02:00
except BrokenPipeError:
pass
except OSError as exc:
2016-08-05 10:25:31 +02:00
import errno
2017-08-13 20:35:44 +02:00
if exc.errno != errno.EPIPE:
2016-08-05 10:25:31 +02:00
raise
return 1