You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
2.3 KiB
85 lines
2.3 KiB
import os |
|
import glob |
|
import pandas as pd |
|
import logging as log |
|
def combine_csv(): |
|
os.chdir("/data/data/csv/ziprecruiter/jobs") |
|
extension = 'csv' |
|
all_filenames = [i for i in glob.glob('*.{}'.format(extension))] |
|
#combine all files in the list |
|
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ]) |
|
#export to csv |
|
combined_csv.to_csv( "/data/data/csv/ziprecruiter/append/combined_csv.csv", index=False, encoding='utf-8-sig') |
|
|
|
|
|
def page_html_grab(filename, source_data): |
|
try: |
|
file = open(filename, "w") |
|
file.write(source_data) |
|
file.close() |
|
log.debug("HTML dumped") |
|
except: |
|
log.error("HTML not dumped") |
|
|
|
|
|
def snippet_html_grab(filename, snippet): |
|
try: |
|
snippet_html = snippet.prettify() |
|
file = open(filename, "w") |
|
file.write(snippet_html) |
|
file.close() |
|
log.debug("Snippet dumped") |
|
except: |
|
log.error("Snippet not dumped") |
|
|
|
def generate_json(json_data, filename): |
|
try: |
|
print(json_data) |
|
with open(filename, "w") as outfile: |
|
json.dump(json_data, outfile, indent=4) |
|
log.info("Created " + filename) |
|
except: |
|
log.error("Failed to create " + filename) |
|
|
|
|
|
def generate_csv(datas, csv_object_header, filename): |
|
try: |
|
with open( |
|
filename, |
|
"w+", |
|
newline="", |
|
encoding="utf-8", |
|
) as save_file: |
|
writer = csv.writer(save_file) |
|
writer.writerow(csv_object_header) |
|
for data in datas: |
|
writer.writerow(data) |
|
|
|
log.info("Succesfully created {}".format(filename)) |
|
except: |
|
log.error("Couldn't create {}".format(filename)) |
|
|
|
def append_csv(datas, filename): |
|
try: |
|
with open( |
|
filename, |
|
"w+", |
|
newline="", |
|
encoding="utf-8", |
|
) as save_file: |
|
writer = csv.writer(save_file) |
|
writer.writerow(csv_object_header) |
|
for data in datas: |
|
writer.writerow(data) |
|
log.info("Succesfully appended {}".format(filename)) |
|
except: |
|
log.error("Couldn't append {}".format(filename)) |
|
|
|
|
|
|
|
|
|
def read_html(file, output): |
|
with open("./data/page.html", encoding="utf-8") as file: |
|
output = file.read() |
|
# print(output) |
|
return(output)
|
|
|