import csv import random import time from datetime import date from time import sleep import json import numpy as np import pdfkit import logging as log def soup2json(): with open("nea.json", "a+") as fs: for area in soup.find("weatherForecast").find_all("area"): fs.write(str(area.attrs) + "\n") # def soup2json(soup): # with open("ziprecruiter-items.json",'a+') as fs: # for area in soup.find('weatherForecast').find_all('area'): # fs.write(str(area.attrs)+"\n") # job_posts = soup.find_all("article", class_="job_result") def random_wait(): moment = np.random.normal(5.0, 1.0) print(moment) return moment def pretty_date(): today = date.today() search_date = today.strftime("%m_%d_%Y") return search_date # clean all non-alphanumberic characters # def strip(string): # words = string.split() # words = [word for word in words if "#" not in word] # string = " ".join(words) # clean = "" # for c in string: # if str.isalnum(c) or (c in [" ", ".", ","]): # clean += c # return clean def generate_json(job_objects, filename): try: with open(filename, "w") as outfile: json.dump(job_objects, outfile, indent=4) log.info("Created jobs.json!") except: log.error("Failed to create jobs.json!") def generate_csv(datas, csv_object_header, filename): try: with open( filename, "w+", newline="", encoding="utf-8", ) as save_file: writer = csv.writer(save_file) writer.writerow(csv_object_header) # [ # "data_listing_version_key", # "data_job_id", # "data_location", # "data_posted_on", # "title", # "job_url", # "geo_lat", # "geo_lng", # "apply_method", # "external_apply", # "employer", # "job_snippet", # "status", # "search_keyword", # "search_window", # "pull_date", # "job_site", # "blacklisted_title_ind", # ] # ) for data in datas: writer.writerow(data) log.info("Exported jobs.csv") except: log.error("couldnt generate csv") def blacklisted_title_check(title, employer, blackListTitles): for blacklist in blackListTitles: if blacklist in title: return True return False # else: # # print(f"[GRABBED] {title} - {employer}") # # job_objects.append(job_object) # # datas.append([data_listing_version_key, job_site, search_keyword]) # blacklist_ind = 0 # return is_blacklist # def soup2json(soup): # with open("ziprecruiter-items.json",'a+') as fs: # for area in soup.find('weatherForecast').find_all('area'): # fs.write(str(area.attrs)+"\n") # job_posts = soup.find_all("article", class_="job_result") # creating CSV header # def strip(string): # words = string.split() # words = [word for word in words if "#" not in word] # string = " ".join(words) # clean = "" # for c in string: # if str.isalnum(c) or (c in [" ", ".", ","]): # clean += c # return clean # # link for extract html data # def getdata(url): # r = requests.get(url) # return r.text # htmldata = getdata("https://www.geeksforgeeks.org/how-to-automate-an-excel-sheet-in-python/?ref=feed") # soup = BeautifulSoup(htmldata, 'html.parser') # data = '' # for data in soup.find_all("p"): # print(data.get_text()) def urltopdf(url, filename): pdfkit.from_url(url, filename)