Notebooks >> Scripts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

153 lines
3.8 KiB

import csv
import random
import time
from datetime import date
from time import sleep
import json
import numpy as np
import pdfkit
import logging as log
def soup2json():
with open("nea.json", "a+") as fs:
for area in soup.find("weatherForecast").find_all("area"):
fs.write(str(area.attrs) + "\n")
# def soup2json(soup):
# with open("ziprecruiter-items.json",'a+') as fs:
# for area in soup.find('weatherForecast').find_all('area'):
# fs.write(str(area.attrs)+"\n")
# job_posts = soup.find_all("article", class_="job_result")
def random_wait():
moment = np.random.normal(5.0, 1.0)
print(moment)
return moment
def pretty_date():
today = date.today()
search_date = today.strftime("%m_%d_%Y")
return search_date
# clean all non-alphanumberic characters
# def strip(string):
# words = string.split()
# words = [word for word in words if "#" not in word]
# string = " ".join(words)
# clean = ""
# for c in string:
# if str.isalnum(c) or (c in [" ", ".", ","]):
# clean += c
# return clean
def generate_json(job_objects, filename):
try:
with open(filename, "w") as outfile:
json.dump(job_objects, outfile, indent=4)
log.info("Created jobs.json!")
except:
log.error("Failed to create jobs.json!")
def generate_csv(datas, csv_object_header, filename):
try:
with open(
filename,
"w+",
newline="",
encoding="utf-8",
) as save_file:
writer = csv.writer(save_file)
writer.writerow(csv_object_header)
# [
# "data_listing_version_key",
# "data_job_id",
# "data_location",
# "data_posted_on",
# "title",
# "job_url",
# "geo_lat",
# "geo_lng",
# "apply_method",
# "external_apply",
# "employer",
# "job_snippet",
# "status",
# "search_keyword",
# "search_window",
# "pull_date",
# "job_site",
# "blacklisted_title_ind",
# ]
# )
for data in datas:
writer.writerow(data)
log.info("Exported jobs.csv")
except:
log.error("couldnt generate csv")
def blacklisted_title_check(title, employer, blackListTitles):
for blacklist in blackListTitles:
if blacklist in title:
return True
return False
# else:
# # print(f"[GRABBED] {title} - {employer}")
# # job_objects.append(job_object)
# # datas.append([data_listing_version_key, job_site, search_keyword])
# blacklist_ind = 0
# return is_blacklist
# def soup2json(soup):
# with open("ziprecruiter-items.json",'a+') as fs:
# for area in soup.find('weatherForecast').find_all('area'):
# fs.write(str(area.attrs)+"\n")
# job_posts = soup.find_all("article", class_="job_result")
# creating CSV header
# def strip(string):
# words = string.split()
# words = [word for word in words if "#" not in word]
# string = " ".join(words)
# clean = ""
# for c in string:
# if str.isalnum(c) or (c in [" ", ".", ","]):
# clean += c
# return clean
# # link for extract html data
# def getdata(url):
# r = requests.get(url)
# return r.text
# htmldata = getdata("https://www.geeksforgeeks.org/how-to-automate-an-excel-sheet-in-python/?ref=feed")
# soup = BeautifulSoup(htmldata, 'html.parser')
# data = ''
# for data in soup.find_all("p"):
# print(data.get_text())
def urltopdf(url, filename):
pdfkit.from_url(url, filename)