You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
2.8 KiB
73 lines
2.8 KiB
#!/bin/bash |
|
|
|
set -e |
|
|
|
NOW=$(date +"%Y-%m-%d_%H-%M-%S") |
|
|
|
# Loop through arguments and process them |
|
for arg in "$@" |
|
do |
|
case $arg in |
|
-s|--site) |
|
SITE="$2" |
|
shift # Remove argument name from processing |
|
shift # Remove argument value from processing |
|
;; |
|
# *) |
|
-k|--keyword) |
|
KEYWORD="$2" |
|
shift # Remove argument name from processing |
|
shift # Remove argument value from processing |
|
;; |
|
esac |
|
done |
|
|
|
cd /data/scripts; |
|
|
|
echo "$PWD" |
|
|
|
# python3 -c "import selenium; print(selenium.__version__)" |
|
|
|
# python3 /data/scripts/target/scraper.py -k gitlab |
|
# python3 /data/scripts/linkedin/scraper.py -k gitlab |
|
# echo "Searching $SITE for $KEYWORD" |
|
# python3 /data/scripts/gather/"$SITE".py -k "$KEYWORD" |
|
# echo "List Generated" |
|
|
|
echo "Archiving raw JSON" |
|
sudo cp /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/json/whitelist/"$NOW".json |
|
sudo cp /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/json/blacklist/"$NOW".json |
|
echo "Archived /data/data/archive/$SITE/json/$NOW.json" |
|
echo "Archived /data/data/archive/$SITE/json/blacklist-$NOW.json" |
|
|
|
echo "Archiving raw CSV" |
|
sudo cp /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/csv/whitelist/"$NOW".csv |
|
sudo cp /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/csv/blacklist/"$NOW".csv |
|
echo "Archived /data/data/archive/$SITE/csv/$NOW.csv" |
|
|
|
# echo "Archiving HTML" |
|
# cp "$SITE"-page.html /data/data/archive/"$SITE"/html/page-"$NOW".html |
|
# cp "$SITE"-snippet.html /data/data/archive/"$SITE"/html/snippet-"$NOW".html |
|
# echo "Archived /data/data/archive/$SITE/html/page-$NOW.html" |
|
# echo "Archived /data/data/archive/$SITE/html/snippet-$NOW.html" |
|
|
|
# echo "Archiving screenshots" |
|
# cp "$SITE"-scroll.png /data/data/archive/"$SITE"/png/scroll-"$NOW".png |
|
# echo "Archived /data/data/archive/$SITE/png/$NOW.png" |
|
|
|
echo "Cleanslating" |
|
# mv "$SITE"-scroll.png /data/data/archive/"$SITE"/last/"$SITE"-scroll.png |
|
# mv "$SITE"-page.html /data/data/archive/"$SITE"/last/"$SITE"-page.html |
|
# mv "$SITE"-snippet.html /data/data/archive/"$SITE"/last/"$SITE"-snippet.html |
|
sudo mv /data/data/staging/"$SITE"/jobs.json /data/data/archive/"$SITE"/last/jobs.json |
|
sudo mv /data/data/staging/"$SITE"/jobs.csv /data/data/archive/"$SITE"/last/jobs.csv |
|
sudo mv /data/data/staging/"$SITE"/jobs-blacklist.json /data/data/archive/"$SITE"/last/jobs-blacklist.json |
|
sudo mv /data/data/staging/"$SITE"/jobs-blacklist.csv /data/data/archive/"$SITE"/last/jobs-blacklist.csv |
|
echo "Cleanslated" |
|
|
|
# cat target.json | sqlite-utils insert --alter /data/data/datasette/zip.db $NOW - |
|
# echo "Adding to master table" |
|
# sqlite-utils insert /data/data/datasette/"$SITE".db "$NOW" "$SITE".json --alter --truncate --pk=id |
|
# echo "Table added" |
|
|
|
# sqlite-utils analyze-tables /data/data/datasette/target.db --save
|
|
|