You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
2.3 KiB
70 lines
2.3 KiB
#!/bin/bash |
|
|
|
set -e |
|
|
|
NOW=$(date +"%Y-%m-%d_%H-%M-%S") |
|
|
|
# Loop through arguments and process them |
|
for arg in "$@" |
|
do |
|
case $arg in |
|
-s|--site) |
|
SITE="$2" |
|
shift # Remove argument name from processing |
|
shift # Remove argument value from processing |
|
;; |
|
# *) |
|
-k|--keyword) |
|
KEYWORD="$2" |
|
shift # Remove argument name from processing |
|
shift # Remove argument value from processing |
|
;; |
|
esac |
|
done |
|
|
|
cd /data/scripts; |
|
|
|
echo "$PWD" |
|
|
|
python3 -c "import selenium; print(selenium.__version__)" |
|
|
|
# python3 /data/scripts/target/scraper.py -k gitlab |
|
# python3 /data/scripts/linkedin/scraper.py -k gitlab |
|
echo "Searching $SITE for $KEYWORD" |
|
python3 /data/scripts/gather/"$SITE".py -k "$KEYWORD" |
|
echo "List Generated" |
|
|
|
echo "Archiving raw JSON" |
|
cp "$SITE".json /data/data/archive/"$SITE"/json/"$NOW".json |
|
cp "$SITE"-blacklist.json /data/data/archive/"$SITE"/json/blacklist-"$NOW".json |
|
echo "Archived /data/data/archive/$SITE/json/$NOW.json" |
|
|
|
echo "Archiving raw CSV" |
|
cp "$SITE".csv /data/data/archive/"$SITE"/csv/"$NOW".csv |
|
echo "Archived /data/data/archive/$SITE/csv/$NOW.csv" |
|
|
|
echo "Archiving HTML" |
|
cp "$SITE"-page.html /data/data/archive/"$SITE"/html/page-"$NOW".html |
|
cp "$SITE"-snippet.html /data/data/archive/"$SITE"/html/snippet-"$NOW".html |
|
echo "Archived /data/data/archive/$SITE/html/page-$NOW.html" |
|
echo "Archived /data/data/archive/$SITE/html/snippet-$NOW.html" |
|
|
|
echo "Archiving screenshots" |
|
cp "$SITE"-scroll.png /data/data/archive/"$SITE"/png/scroll-"$NOW".png |
|
echo "Archived /data/data/archive/$SITE/png/$NOW.png" |
|
|
|
echo "Cleanslating" |
|
mv "$SITE"-scroll.png /data/data/archive/"$SITE"/last/"$SITE"-scroll.png |
|
mv "$SITE"-page.html /data/data/archive/"$SITE"/last/"$SITE"-page.html |
|
mv "$SITE"-snippet.html /data/data/archive/"$SITE"/last/"$SITE"-snippet.html |
|
mv "$SITE".csv /data/data/archive/"$SITE"/last/"$SITE".csv |
|
mv "$SITE".json /data/data/archive/"$SITE"/last/"$SITE".json |
|
mv "$SITE"-blacklist.json /data/data/archive/"$SITE"/last/"$SITE"-blacklist.json |
|
echo "Cleanslated" |
|
|
|
# cat target.json | sqlite-utils insert --alter /data/data/datasette/zip.db $NOW - |
|
# echo "Adding to master table" |
|
# sqlite-utils insert /data/data/datasette/"$SITE".db "$NOW" "$SITE".json --alter --truncate --pk=id |
|
# echo "Table added" |
|
|
|
# sqlite-utils analyze-tables /data/data/datasette/target.db --save
|
|
|