SurvivorLibraryScrape/validate_pdfs.sh

20 lines
819 B
Bash

#!/bin/bash
# Requires you install `poppler-utils` for the `pdfinfo` command
# On Debian/Ubuntu it should be available as `poppler-utils`
# List of broken PDFs are outputted to the `validate.log` file.
# Most of the code provided by a StackOverflow answer: https://superuser.com/a/580895
# I highly recommend manually verifying the PDFs that are considered "broken" by this script.
# When running this script, it found 19 "broken" PDFs.
# 1 of them wasn't broken. The other 18 were.
# 1 of the remaining 18 was just a corrupted download,
# but the other 17 were actually broken and didn't even work when attempting to load from survivorlibrary.com
for f in *.pdf; do
if ! pdfinfo "$f" &> /dev/null; then
now="$(date +"%Y-%m-%d %H:%M:%S")";
echo "[$now] $f is broken" >> validate.log;
fi
done