#!/bin/bash set -e NOW=$(date +"%Y-%m-%d_%H-%M-%S") # Loop through arguments and process them for arg in "$@" do case $arg in -s|--site) SITE="$2" shift # Remove argument name from processing shift # Remove argument value from processing ;; # *) -k|--keyword) KEYWORD="$2" shift # Remove argument name from processing shift # Remove argument value from processing ;; esac done cd /data/scripts; echo "$PWD" python3 -c "import selenium; print(selenium.__version__)" # python3 /data/scripts/target/scraper.py -k gitlab # python3 /data/scripts/linkedin/scraper.py -k gitlab echo "Searching $SITE for $KEYWORD" python3 /data/scripts/gather/"$SITE".py -k "$KEYWORD" echo "List Generated" echo "Archiving raw JSON" cp "$SITE".json /data/data/archive/"$SITE"/json/"$NOW".json cp "$SITE"-blacklist.json /data/data/archive/"$SITE"/json/blacklist-"$NOW".json echo "Archived /data/data/archive/$SITE/json/$NOW.json" echo "Archiving raw CSV" cp "$SITE".csv /data/data/archive/"$SITE"/csv/"$NOW".csv echo "Archived /data/data/archive/$SITE/csv/$NOW.csv" echo "Archiving HTML" cp "$SITE"-page.html /data/data/archive/"$SITE"/html/page-"$NOW".html cp "$SITE"-snippet.html /data/data/archive/"$SITE"/html/snippet-"$NOW".html echo "Archived /data/data/archive/$SITE/html/page-$NOW.html" echo "Archived /data/data/archive/$SITE/html/snippet-$NOW.html" echo "Archiving screenshots" cp "$SITE"-scroll.png /data/data/archive/"$SITE"/png/scroll-"$NOW".png echo "Archived /data/data/archive/$SITE/png/$NOW.png" echo "Cleanslating" mv "$SITE"-scroll.png /data/data/archive/"$SITE"/last/"$SITE"-scroll.png mv "$SITE"-page.html /data/data/archive/"$SITE"/last/"$SITE"-page.html mv "$SITE"-snippet.html /data/data/archive/"$SITE"/last/"$SITE"-snippet.html mv "$SITE".csv /data/data/archive/"$SITE"/last/"$SITE".csv mv "$SITE".json /data/data/archive/"$SITE"/last/"$SITE".json mv "$SITE"-blacklist.json /data/data/archive/"$SITE"/last/"$SITE"-blacklist.json echo "Cleanslated" # cat target.json | sqlite-utils insert --alter /data/data/datasette/zip.db $NOW - # echo "Adding to master table" # sqlite-utils insert /data/data/datasette/"$SITE".db "$NOW" "$SITE".json --alter --truncate --pk=id # echo "Table added" # sqlite-utils analyze-tables /data/data/datasette/target.db --save