import os import glob import pandas as pd import logging as log def combine_csv(): os.chdir("/data/data/csv/ziprecruiter/jobs") extension = 'csv' all_filenames = [i for i in glob.glob('*.{}'.format(extension))] #combine all files in the list combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ]) #export to csv combined_csv.to_csv( "/data/data/csv/ziprecruiter/append/combined_csv.csv", index=False, encoding='utf-8-sig') def page_html_grab(filename, source_data): try: file = open(filename, "w") file.write(source_data) file.close() log.debug("HTML dumped") except: log.error("HTML not dumped") def snippet_html_grab(filename, snippet): try: snippet_html = snippet.prettify() file = open(filename, "w") file.write(snippet_html) file.close() log.debug("Snippet dumped") except: log.error("Snippet not dumped") def generate_json(json_data, filename): try: print(json_data) with open(filename, "w") as outfile: json.dump(json_data, outfile, indent=4) log.info("Created " + filename) except: log.error("Failed to create " + filename) def generate_csv(datas, csv_object_header, filename): try: with open( filename, "w+", newline="", encoding="utf-8", ) as save_file: writer = csv.writer(save_file) writer.writerow(csv_object_header) for data in datas: writer.writerow(data) log.info("Succesfully created {}".format(filename)) except: log.error("Couldn't create {}".format(filename)) def append_csv(datas, filename): try: with open( filename, "w+", newline="", encoding="utf-8", ) as save_file: writer = csv.writer(save_file) writer.writerow(csv_object_header) for data in datas: writer.writerow(data) log.info("Succesfully appended {}".format(filename)) except: log.error("Couldn't append {}".format(filename)) def read_html(file, output): with open("./data/page.html", encoding="utf-8") as file: output = file.read() # print(output) return(output)