Notebooks >> Scripts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

85 lines
2.3 KiB

import os
import glob
import pandas as pd
import logging as log
def combine_csv():
os.chdir("/data/data/csv/ziprecruiter/jobs")
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "/data/data/csv/ziprecruiter/append/combined_csv.csv", index=False, encoding='utf-8-sig')
def page_html_grab(filename, source_data):
try:
file = open(filename, "w")
file.write(source_data)
file.close()
log.debug("HTML dumped")
except:
log.error("HTML not dumped")
def snippet_html_grab(filename, snippet):
try:
snippet_html = snippet.prettify()
file = open(filename, "w")
file.write(snippet_html)
file.close()
log.debug("Snippet dumped")
except:
log.error("Snippet not dumped")
def generate_json(json_data, filename):
try:
print(json_data)
with open(filename, "w") as outfile:
json.dump(json_data, outfile, indent=4)
log.info("Created " + filename)
except:
log.error("Failed to create " + filename)
def generate_csv(datas, csv_object_header, filename):
try:
with open(
filename,
"w+",
newline="",
encoding="utf-8",
) as save_file:
writer = csv.writer(save_file)
writer.writerow(csv_object_header)
for data in datas:
writer.writerow(data)
log.info("Succesfully created {}".format(filename))
except:
log.error("Couldn't create {}".format(filename))
def append_csv(datas, filename):
try:
with open(
filename,
"w+",
newline="",
encoding="utf-8",
) as save_file:
writer = csv.writer(save_file)
writer.writerow(csv_object_header)
for data in datas:
writer.writerow(data)
log.info("Succesfully appended {}".format(filename))
except:
log.error("Couldn't append {}".format(filename))
def read_html(file, output):
with open("./data/page.html", encoding="utf-8") as file:
output = file.read()
# print(output)
return(output)