import csv import logging import os import platform import random import re import time # from webdriver_manager.chrome import ChromeDriverManager from datetime import datetime from urllib.request import urlopen # import pyautogui import pandas as pd import yaml from bs4 import BeautifulSoup # import mouseinfo from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait os.environ["DISPLAY"] = ":0" log = logging.getLogger(__name__) browser = webdriver.Remote( command_executor="http://192.168.1.101:4444/wd/hub", options=options, desired_capabilities=capabilities, ) class EasyApplyBot: setupLogger() # MAX_SEARCH_TIME is 10 hours by default, feel free to modify it MAX_SEARCH_TIME = 10 * 60 * 60 def __init__( self, username, password, uploads={}, filename="output.csv", blacklist=[], blackListTitles=[], ): log.info("Welcome to Easy Apply Bot") dirpath = os.getcwd() log.info("current directory is : " + dirpath) self.uploads = uploads past_ids = self.get_appliedIDs(filename) self.appliedJobIDs = past_ids if past_ids != None else [] self.filename = filename self.options = self.browser_options() self.browser = browser self.wait = WebDriverWait(self.browser, 30) self.blacklist = blacklist self.blackListTitles = blackListTitles self.start_linkedin(username, password) def get_appliedIDs(self, filename): try: df = pd.read_csv( filename, header=None, names=["timestamp", "jobID", "job", "company", "attempted", "result"], lineterminator="\n", encoding="utf-8", ) df["timestamp"] = pd.to_datetime( df["timestamp"], format="%Y-%m-%d %H:%M:%S" ) df = df[df["timestamp"] > (datetime.now() - timedelta(days=2))] jobIDs = list(df.jobID) log.info(f"{len(jobIDs)} jobIDs found") return jobIDs except Exception as e: log.info( str(e) + " jobIDs could not be loaded from CSV {}".format(filename) ) return None def start_linkedin(self, username, password): log.info("Logging in.....Please wait :) ") self.browser.get( "https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin" ) try: user_field = self.browser.find_element_by_id("username") pw_field = self.browser.find_element_by_id("password") login_button = self.browser.find_element_by_css_selector( ".btn__primary--large" ) user_field.send_keys(username) user_field.send_keys(Keys.TAB) time.sleep(2) pw_field.send_keys(password) time.sleep(2) login_button.click() time.sleep(3) except TimeoutException: log.info( "TimeoutException! Username/password field or login button not found" ) def fill_data(self): self.browser.set_window_size(0, 0) self.browser.set_window_position(2000, 2000) def start_apply(self, positions, locations): start = time.time() self.fill_data() combos = [] while len(combos) < len(positions) * len(locations): position = positions[random.randint(0, len(positions) - 1)] location = locations[random.randint(0, len(locations) - 1)] combo = (position, location) if combo not in combos: combos.append(combo) log.info(f"Applying to {position}: {location}") location = "&location=" + location self.applications_loop(position, location) if len(combos) > 500: break # self.finish_apply() --> this does seem to cause more harm than good, since it closes the browser which we usually don't want, other conditions will stop the loop and just break out def applications_loop(self, position, location): count_application = 0 count_job = 0 jobs_per_page = 0 start_time = time.time() log.info("Looking for jobs.. Please wait..") self.browser.set_window_position(0, 0) self.browser.maximize_window() self.browser, _ = self.next_jobs_page(position, location, jobs_per_page) log.info("Looking for jobs.. Please wait..") while time.time() - start_time < self.MAX_SEARCH_TIME: log.info( f"{(self.MAX_SEARCH_TIME - (time.time() - start_time)) // 60} minutes left in this search" ) # sleep to make sure everything loads, add random to make us look human. randoTime = random.uniform(3.5, 4.9) log.debug(f"Sleeping for {round(randoTime, 1)}") time.sleep(randoTime) self.load_page(sleep=1) # LinkedIn displays the search results in a scrollable
on the left side, we have to scroll to its bottom scrollresults = self.browser.find_element_by_class_name( "jobs-search-results" ) # Selenium only detects visible elements; if we scroll to the bottom too fast, only 8-9 results will be loaded into IDs list for i in range(300, 3000, 100): self.browser.execute_script( "arguments[0].scrollTo(0, {})".format(i), scrollresults ) time.sleep(1) # get job links links = self.browser.find_elements_by_xpath("//div[@data-job-id]") if len(links) == 0: break # get job ID of each job link IDs = [] for link in links: children = link.find_elements_by_xpath(".//a[@data-control-name]") for child in children: if child.text not in self.blacklist: temp = link.get_attribute("data-job-id") jobID = temp.split(":")[-1] IDs.append(int(jobID)) IDs = set(IDs) # remove already applied jobs before = len(IDs) jobIDs = [x for x in IDs if x not in self.appliedJobIDs] after = len(jobIDs) # it assumed that 25 jobs are listed in the results window if len(jobIDs) == 0 and len(IDs) > 23: jobs_per_page = jobs_per_page + 25 count_job = 0 self.avoid_lock() self.browser, jobs_per_page = self.next_jobs_page( position, location, jobs_per_page ) # loop over IDs to apply for i, jobID in enumerate(jobIDs): count_job += 1 self.get_job_page(jobID) # get easy apply button button = self.get_easy_apply_button() # word filter to skip positions not wanted if button is not False: if any(word in self.browser.title for word in blackListTitles): log.info( "skipping this application, a blacklisted keyword was found in the job position" ) string_easy = "* Contains blacklisted keyword" result = False else: string_easy = "* has Easy Apply Button" log.info("Clicking the EASY apply button") button.click() time.sleep(3) result = self.send_resume() count_application += 1 else: log.info("The button does not exist.") string_easy = "* Doesn't have Easy Apply Button" result = False position_number = str(count_job + jobs_per_page) log.info( f"\nPosition {position_number}:\n {self.browser.title} \n {string_easy} \n" ) self.write_to_file(button, jobID, self.browser.title, result) # sleep every 20 applications if count_application != 0 and count_application % 20 == 0: sleepTime = random.randint(500, 900) log.info( f"""********count_application: {count_application}************\n\n Time for a nap - see you in:{int(sleepTime / 60)} min ****************************************\n\n""" ) time.sleep(sleepTime) # go to new page if all jobs are done if count_job == len(jobIDs): jobs_per_page = jobs_per_page + 25 count_job = 0 log.info( """****************************************\n\n Going to next jobs page, YEAAAHHH!! ****************************************\n\n""" ) self.avoid_lock() self.browser, jobs_per_page = self.next_jobs_page( position, location, jobs_per_page ) # if len(jobIDs) == 0 or i == (len(jobIDs) - 1): # break def write_to_file(self, button, jobID, browserTitle, result): def re_extract(text, pattern): target = re.search(pattern, text) if target: target = target.group(1) return target timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") attempted = False if button == False else True job = re_extract(browserTitle.split(" | ")[0], r"\(?\d?\)?\s?(\w.*)") company = re_extract(browserTitle.split(" | ")[1], r"(\w.*)") toWrite = [timestamp, jobID, job, company, attempted, result] with open(self.filename, "a") as f: writer = csv.writer(f) writer.writerow(toWrite) def get_job_page(self, jobID): job = "https://www.linkedin.com/jobs/view/" + str(jobID) self.browser.get(job) self.job_page = self.load_page(sleep=0.5) return self.job_page def get_easy_apply_button(self): try: button = self.browser.find_elements_by_xpath( '//button[contains(@class, "jobs-apply")]/span[1]' ) EasyApplyButton = button[0] except: EasyApplyButton = False return EasyApplyButton def send_resume(self): def is_present(button_locator): return ( len(self.browser.find_elements(button_locator[0], button_locator[1])) > 0 ) try: time.sleep(random.uniform(1.5, 2.5)) next_locater = ( By.CSS_SELECTOR, "button[aria-label='Continue to next step']", ) review_locater = ( By.CSS_SELECTOR, "button[aria-label='Review your application']", ) submit_locater = ( By.CSS_SELECTOR, "button[aria-label='Submit application']", ) submit_application_locator = ( By.CSS_SELECTOR, "button[aria-label='Submit application']", ) error_locator = ( By.CSS_SELECTOR, "p[data-test-form-element-error-message='true']", ) upload_locator = (By.CSS_SELECTOR, "input[name='file']") submitted = False while True: # Upload Cover Letter if possible if is_present(upload_locator): input_buttons = self.browser.find_elements( upload_locator[0], upload_locator[1] ) for input_button in input_buttons: parent = input_button.find_element(By.XPATH, "..") sibling = parent.find_element(By.XPATH, "preceding-sibling::*") grandparent = sibling.find_element(By.XPATH, "..") for key in self.uploads.keys(): sibling_text = sibling.text gparent_text = grandparent.text if ( key.lower() in sibling_text.lower() or key in gparent_text.lower() ): input_button.send_keys(self.uploads[key]) # input_button[0].send_keys(self.cover_letter_loctn) time.sleep(random.uniform(4.5, 6.5)) # Click Next or submitt button if possible button = None buttons = [ next_locater, review_locater, submit_locater, submit_application_locator, ] for i, button_locator in enumerate(buttons): if is_present(button_locator): button = self.wait.until( EC.element_to_be_clickable(button_locator) ) if is_present(error_locator): for element in self.browser.find_elements( error_locator[0], error_locator[1] ): text = element.text if "Please enter a valid answer" in text: button = None break if button: button.click() time.sleep(random.uniform(1.5, 2.5)) if i in (2, 3): submitted = True break if button == None: log.info("Could not complete submission") break elif submitted: log.info("Application Submitted") break time.sleep(random.uniform(1.5, 2.5)) except Exception as e: log.info(e) log.info("cannot apply to this job") raise (e) return submitted def load_page(self, sleep=1): scroll_page = 0 while scroll_page < 4000: self.browser.execute_script("window.scrollTo(0," + str(scroll_page) + " );") scroll_page += 200 time.sleep(sleep) if sleep != 1: self.browser.execute_script("window.scrollTo(0,0);") time.sleep(sleep * 3) page = BeautifulSoup(self.browser.page_source, "lxml") return page def avoid_lock(self): print("avoid_lock") # x, _ = pyautogui.position() # pyautogui.moveTo(x + 200, pyautogui.position().y, duration=1.0) # pyautogui.moveTo(x, pyautogui.position().y, duration=0.5) # pyautogui.keyDown('ctrl') # pyautogui.press('esc') # pyautogui.keyUp('ctrl') # time.sleep(0.5) # pyautogui.press('esc') def next_jobs_page(self, position, location, jobs_per_page): self.browser.get( "https://www.linkedin.com/jobs/search/?f_LF=f_AL&keywords=" + position + location + "&start=" + str(jobs_per_page) ) self.avoid_lock() log.info("Lock avoided.") self.load_page() return (self.browser, jobs_per_page) def finish_apply(self): self.browser.close() if __name__ == "__main__": with open("config.yaml", "r") as stream: try: parameters = yaml.safe_load(stream) except yaml.YAMLError as exc: raise exc assert len(parameters["positions"]) > 0 assert len(parameters["locations"]) > 0 assert parameters["username"] is not None assert parameters["password"] is not None if "uploads" in parameters.keys() and type(parameters["uploads"]) == list: raise Exception( "uploads read from the config file appear to be in list format" + " while should be dict. Try removing '-' from line containing" + " filename & path" ) log.info( { k: parameters[k] for k in parameters.keys() if k not in ["username", "password"] } ) output_filename = [ f for f in parameters.get("output_filename", ["output.csv"]) if f != None ] output_filename = output_filename[0] if len(output_filename) > 0 else "output.csv" blacklist = parameters.get("blacklist", []) blackListTitles = parameters.get("blackListTitles", []) uploads = ( {} if parameters.get("uploads", {}) == None else parameters.get("uploads", {}) ) for key in uploads.keys(): assert uploads[key] != None bot = EasyApplyBot( parameters["username"], parameters["password"], uploads=uploads, filename=output_filename, blacklist=blacklist, blackListTitles=blackListTitles, ) locations = [l for l in parameters["locations"] if l != None] positions = [p for p in parameters["positions"] if p != None] bot.start_apply(positions, locations)