#!/bin/bash set -e NOW=$(date +"%Y-%m-%d_%H-%M-%S") # Loop through arguments and process them for arg in "$@" do case $arg in -s|--site) SITE="$2" shift # Remove argument name from processing shift # Remove argument value from processing ;; # *) -k|--keyword) KEYWORD="$2" shift # Remove argument name from processing shift # Remove argument value from processing ;; esac done cd /data/scripts; echo "$PWD" # python3 -c "import selenium; print(selenium.__version__)" # python3 /data/scripts/target/scraper.py -k gitlab # python3 /data/scripts/linkedin/scraper.py -k gitlab # echo "Searching $SITE for $KEYWORD" # python3 /data/scripts/gather/"$SITE".py -k "$KEYWORD" # echo "List Generated" echo "Archiving raw JSON" sudo cp /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/json/whitelist/"$NOW".json sudo cp /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/json/blacklist/"$NOW".json echo "Archived /data/data/archive/$SITE/json/$NOW.json" echo "Archived /data/data/archive/$SITE/json/blacklist-$NOW.json" echo "Archiving raw CSV" sudo cp /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/csv/whitelist/"$NOW".csv sudo cp /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/csv/blacklist/"$NOW".csv echo "Archived /data/data/archive/$SITE/csv/$NOW.csv" # echo "Archiving HTML" # cp "$SITE"-page.html /data/data/archive/"$SITE"/html/page-"$NOW".html # cp "$SITE"-snippet.html /data/data/archive/"$SITE"/html/snippet-"$NOW".html # echo "Archived /data/data/archive/$SITE/html/page-$NOW.html" # echo "Archived /data/data/archive/$SITE/html/snippet-$NOW.html" # echo "Archiving screenshots" # cp "$SITE"-scroll.png /data/data/archive/"$SITE"/png/scroll-"$NOW".png # echo "Archived /data/data/archive/$SITE/png/$NOW.png" echo "Cleanslating" # mv "$SITE"-scroll.png /data/data/archive/"$SITE"/last/"$SITE"-scroll.png # mv "$SITE"-page.html /data/data/archive/"$SITE"/last/"$SITE"-page.html # mv "$SITE"-snippet.html /data/data/archive/"$SITE"/last/"$SITE"-snippet.html sudo mv /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/last/jobs.json sudo mv /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/last/jobs.csv sudo mv /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/last/jobs-blacklist.json sudo mv /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/last/jobs-blacklist.csv echo "Cleanslated" # cat target.json | sqlite-utils insert --alter /data/data/datasette/zip.db $NOW - # echo "Adding to master table" # sqlite-utils insert /data/data/datasette/"$SITE".db "$NOW" "$SITE".json --alter --truncate --pk=id # echo "Table added" # sqlite-utils analyze-tables /data/data/datasette/target.db --save