SurvivorLibraryScrape/get_pages_with_pdfs.sh
2022-02-03 15:53:39 +01:00

14 lines
466 B
Bash

#!/bin/bash
OUTPUT_DIR="pages";
mkdir -p "${OUTPUT_DIR}";
for page in $(cat survivorlibrary_pages.txt);
do
name="$(echo $page | cut -d'/' -f 6 | sed 's/%20/_/g').html";
file_path="${OUTPUT_DIR}/${name}";
echo $file_path;
curl -fsSL -o "${file_path}" -H "Referer: http://www.survivorlibrary.com/library-download.html" -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0" -C - "$page";
sleep 1.5s;
done