Apache Airflow dags w/ backend configuration bundle.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

73 lines
2.8 KiB

#!/bin/bash
set -e
NOW=$(date +"%Y-%m-%d_%H-%M-%S")
# Loop through arguments and process them
for arg in "$@"
do
case $arg in
-s|--site)
SITE="$2"
shift # Remove argument name from processing
shift # Remove argument value from processing
;;
# *)
-k|--keyword)
KEYWORD="$2"
shift # Remove argument name from processing
shift # Remove argument value from processing
;;
esac
done
cd /data/scripts;
echo "$PWD"
# python3 -c "import selenium; print(selenium.__version__)"
# python3 /data/scripts/target/scraper.py -k gitlab
# python3 /data/scripts/linkedin/scraper.py -k gitlab
# echo "Searching $SITE for $KEYWORD"
# python3 /data/scripts/gather/"$SITE".py -k "$KEYWORD"
# echo "List Generated"
echo "Archiving raw JSON"
sudo cp /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/json/whitelist/"$NOW".json
sudo cp /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/json/blacklist/"$NOW".json
echo "Archived /data/data/archive/$SITE/json/$NOW.json"
echo "Archived /data/data/archive/$SITE/json/blacklist-$NOW.json"
echo "Archiving raw CSV"
sudo cp /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/csv/whitelist/"$NOW".csv
sudo cp /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/csv/blacklist/"$NOW".csv
echo "Archived /data/data/archive/$SITE/csv/$NOW.csv"
# echo "Archiving HTML"
# cp "$SITE"-page.html /data/data/archive/"$SITE"/html/page-"$NOW".html
# cp "$SITE"-snippet.html /data/data/archive/"$SITE"/html/snippet-"$NOW".html
# echo "Archived /data/data/archive/$SITE/html/page-$NOW.html"
# echo "Archived /data/data/archive/$SITE/html/snippet-$NOW.html"
# echo "Archiving screenshots"
# cp "$SITE"-scroll.png /data/data/archive/"$SITE"/png/scroll-"$NOW".png
# echo "Archived /data/data/archive/$SITE/png/$NOW.png"
echo "Cleanslating"
# mv "$SITE"-scroll.png /data/data/archive/"$SITE"/last/"$SITE"-scroll.png
# mv "$SITE"-page.html /data/data/archive/"$SITE"/last/"$SITE"-page.html
# mv "$SITE"-snippet.html /data/data/archive/"$SITE"/last/"$SITE"-snippet.html
sudo mv /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/last/jobs.json
sudo mv /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/last/jobs.csv
sudo mv /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/last/jobs-blacklist.json
sudo mv /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/last/jobs-blacklist.csv
echo "Cleanslated"
# cat target.json | sqlite-utils insert --alter /data/data/datasette/zip.db $NOW -
# echo "Adding to master table"
# sqlite-utils insert /data/data/datasette/"$SITE".db "$NOW" "$SITE".json --alter --truncate --pk=id
# echo "Table added"
# sqlite-utils analyze-tables /data/data/datasette/target.db --save