Add basic prometheus metrics for backup status.

This requires that `prometheus-node-exporter` is installed.

If `reportprom` is set, then during the reporting phase, metrics will be written
to `${prom_textfile_dir}/backupninja.prom`, which will then be available for
scraping by prometheus.

The metrics simply report the hostname and the number of actions run, the number
of warnings, errors, fails, and halts that were produced in the backup run.

These are written to the prometheus-node-exporter directory in an atomic way.
This commit is contained in:
Micah Anderson 2021-06-23 13:52:23 -04:00
parent 5cc80402dd
commit fa2528a083
No known key found for this signature in database
GPG Key ID: 2D2C65DDB27446E5
2 changed files with 53 additions and 2 deletions

View File

@ -13,6 +13,10 @@
# 1 -- Fatal errors (only)
loglevel = 4
# Produce prometheus metrics of backup status (default = no).
# Requires `prometheus-node-exporter` to be installed
reportprom = false
# send a summary of the backup status to
# this email address:
reportemail = root

View File

@ -513,6 +513,8 @@ setfile $conffile
getconf configdirectory @CFGDIR@/backup.d
getconf scriptdirectory @datadir@
getconf reportdirectory
getconf reportprom
getconf prom_textfile_dir /var/lib/prometheus/node-exporter
getconf reportemail
getconf reporthost
getconf reportspace
@ -601,10 +603,10 @@ for file in $files; do
fi
done
## mail the messages to the report address
## reporting
if [ $actions_run == 0 ]; then doit=0
elif [ "$reportemail" == "" ]; then doit=0
elif [ "$reportemail" == "" -a "$reportprom" == "" ]; then doit=0
elif [ $fatals != 0 ]; then doit=1
elif [ $errors != 0 ]; then doit=1
elif [ $halts != 0 ]; then doit=1
@ -614,6 +616,51 @@ else doit=0
fi
if [ $doit == 1 ]; then
if [ ! -z "$reportprom" ]; then
if [ -d "$prom_textfile_dir" ]; then
debug "reporting to prometheus"
hostname=`hostname`
# set some defaults, so there aren't empty strings
[ ! -z $warnings ] || warnings=0
[ ! -z $errors ] || errors=0
[ ! -z $fatals ] || fatals=0
[ ! -z $halts ] || halts=0
[ ! -z $actions_run ] || actions_run=0
output_file="${prom_textfile_dir}/backupninja.prom"
tmp_file="${output_file}.$$"
trap "rm -f $tmp_file 2>/dev/null" EXIT INT TERM
cat <<EOF > "$tmp_file"
# HELP backupninja_warnings Number of warnings reported by Backupninja
# TYPE backupninja_warnings gauge
# HELP backupninja_errors Number of errors reported by Backupninja
# TYPE backupninja_errors gauge
# HELP backupninja_fatals Number of fatals reported by Backupninja
# TYPE backupninja_fatals gauge
# HELP backupninja_halts Number of halts reported by Backupninja
# TYPE backupninja_halts gauge
# HELP backupninja_actions Number of actions run by Backupninja
# TYPE backupninja_actions gauge
backupninja_warnings{host="$hostname"} $warnings
backupninja_errors{host="$hostname"} $errors
backupninja_fatals{host="$hostname"} $fatals
backupninja_halts{host="$hostname"} $halts
backupninja_actions{host="$hostname"} $actions_run
EOF
if [ $? -gt 0 ]; then
rm -f "$tmp_file" 2>/dev/null
error "could not write metrics to ${prom_textfile_dir}!"
let "errors +-1"
else
mv -f "$tmp_file" "$output_file"
chmod 0644 "$output_file"
fi
else
error "$prom_textfile_dir does not exist!"
let "errors +-1"
fi
fi
if [ -x "$(which mail 2>/dev/null)" ]; then
debug "send report to $reportemail"
hostname=`hostname`