You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
528 lines
19 KiB
528 lines
19 KiB
import csv |
|
import logging |
|
import os |
|
import platform |
|
import random |
|
import re |
|
import time |
|
from datetime import datetime, timedelta |
|
from urllib.request import urlopen |
|
|
|
import pandas as pd |
|
import pyautogui |
|
import yaml |
|
from bs4 import BeautifulSoup |
|
from selenium import webdriver |
|
from selenium.common.exceptions import NoSuchElementException, TimeoutException |
|
from selenium.webdriver.chrome.options import Options |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.common.keys import Keys |
|
from selenium.webdriver.support import expected_conditions as EC |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from webdriver_manager.chrome import ChromeDriverManager |
|
|
|
log = logging.getLogger(__name__) |
|
driver = webdriver.Chrome(ChromeDriverManager().install()) |
|
|
|
|
|
def setupLogger(): |
|
dt = datetime.strftime(datetime.now(), "%m_%d_%y %H_%M_%S ") |
|
|
|
if not os.path.isdir("./logs"): |
|
os.mkdir("./logs") |
|
|
|
# TODO need to check if there is a log dir available or not |
|
logging.basicConfig( |
|
filename=("./logs/" + str(dt) + "applyJobs.log"), |
|
filemode="w", |
|
format="%(asctime)s::%(name)s::%(levelname)s::%(message)s", |
|
datefmt="./logs/%d-%b-%y %H:%M:%S", |
|
) |
|
log.setLevel(logging.DEBUG) |
|
c_handler = logging.StreamHandler() |
|
c_handler.setLevel(logging.DEBUG) |
|
c_format = logging.Formatter( |
|
"%(asctime)s - %(levelname)s - %(message)s", "%H:%M:%S" |
|
) |
|
c_handler.setFormatter(c_format) |
|
log.addHandler(c_handler) |
|
|
|
|
|
class EasyApplyBot: |
|
setupLogger() |
|
# MAX_SEARCH_TIME is 10 hours by default, feel free to modify it |
|
MAX_SEARCH_TIME = 10 * 60 * 60 |
|
|
|
def __init__( |
|
self, |
|
username, |
|
password, |
|
uploads={}, |
|
filename="output.csv", |
|
blacklist=[], |
|
blackListTitles=[], |
|
): |
|
|
|
log.info("Welcome to Easy Apply Bot") |
|
dirpath = os.getcwd() |
|
log.info("current directory is : " + dirpath) |
|
|
|
self.uploads = uploads |
|
past_ids = self.get_appliedIDs(filename) |
|
self.appliedJobIDs = past_ids if past_ids != None else [] |
|
self.filename = filename |
|
self.options = self.browser_options() |
|
self.browser = driver |
|
self.wait = WebDriverWait(self.browser, 30) |
|
self.blacklist = blacklist |
|
self.blackListTitles = blackListTitles |
|
self.start_linkedin(username, password) |
|
|
|
def get_appliedIDs(self, filename): |
|
try: |
|
df = pd.read_csv( |
|
filename, |
|
header=None, |
|
names=["timestamp", "jobID", "job", "company", "attempted", "result"], |
|
lineterminator="\n", |
|
encoding="utf-8", |
|
) |
|
|
|
df["timestamp"] = pd.to_datetime( |
|
df["timestamp"], format="%Y-%m-%d %H:%M:%S" |
|
) |
|
df = df[df["timestamp"] > (datetime.now() - timedelta(days=2))] |
|
jobIDs = list(df.jobID) |
|
log.info(f"{len(jobIDs)} jobIDs found") |
|
return jobIDs |
|
except Exception as e: |
|
log.info( |
|
str(e) + " jobIDs could not be loaded from CSV {}".format(filename) |
|
) |
|
return None |
|
|
|
def browser_options(self): |
|
options = Options() |
|
options.add_argument("--start-maximized") |
|
options.add_argument("--ignore-certificate-errors") |
|
options.add_argument("--no-sandbox") |
|
options.add_argument("--disable-extensions") |
|
|
|
# Disable webdriver flags or you will be easily detectable |
|
options.add_argument("--disable-blink-features") |
|
options.add_argument("--disable-blink-features=AutomationControlled") |
|
|
|
# extras |
|
return options |
|
|
|
def start_linkedin(self, username, password): |
|
log.info("Logging in.....Please wait :) ") |
|
self.browser.get( |
|
"https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin" |
|
) |
|
try: |
|
user_field = self.browser.find_element_by_id("username") |
|
pw_field = self.browser.find_element_by_id("password") |
|
login_button = self.browser.find_element_by_css_selector( |
|
".btn__primary--large" |
|
) |
|
user_field.send_keys(username) |
|
user_field.send_keys(Keys.TAB) |
|
time.sleep(2) |
|
pw_field.send_keys(password) |
|
time.sleep(2) |
|
login_button.click() |
|
time.sleep(3) |
|
except TimeoutException: |
|
log.info( |
|
"TimeoutException! Username/password field or login button not found" |
|
) |
|
|
|
def fill_data(self): |
|
self.browser.set_window_size(0, 0) |
|
self.browser.set_window_position(2000, 2000) |
|
|
|
def start_apply(self, positions, locations): |
|
start = time.time() |
|
self.fill_data() |
|
|
|
combos = [] |
|
while len(combos) < len(positions) * len(locations): |
|
position = positions[random.randint(0, len(positions) - 1)] |
|
location = locations[random.randint(0, len(locations) - 1)] |
|
combo = (position, location) |
|
if combo not in combos: |
|
combos.append(combo) |
|
log.info(f"Applying to {position}: {location}") |
|
location = "&location=" + location |
|
self.applications_loop(position, location) |
|
if len(combos) > 500: |
|
break |
|
|
|
# self.finish_apply() --> this does seem to cause more harm than good, since it closes the browser which we usually don't want, other conditions will stop the loop and just break out |
|
|
|
def applications_loop(self, position, location): |
|
|
|
count_application = 0 |
|
count_job = 0 |
|
jobs_per_page = 0 |
|
start_time = time.time() |
|
|
|
log.info("Looking for jobs.. Please wait..") |
|
|
|
self.browser.set_window_position(0, 0) |
|
self.browser.maximize_window() |
|
self.browser, _ = self.next_jobs_page(position, location, jobs_per_page) |
|
log.info("Looking for jobs.. Please wait..") |
|
|
|
while time.time() - start_time < self.MAX_SEARCH_TIME: |
|
log.info( |
|
f"{(self.MAX_SEARCH_TIME - (time.time() - start_time)) // 60} minutes left in this search" |
|
) |
|
|
|
# sleep to make sure everything loads, add random to make us look human. |
|
randoTime = random.uniform(3.5, 4.9) |
|
log.debug(f"Sleeping for {round(randoTime, 1)}") |
|
time.sleep(randoTime) |
|
self.load_page(sleep=1) |
|
|
|
# LinkedIn displays the search results in a scrollable <div> on the left side, we have to scroll to its bottom |
|
|
|
scrollresults = self.browser.find_element_by_class_name( |
|
"jobs-search-results" |
|
) |
|
# Selenium only detects visible elements; if we scroll to the bottom too fast, only 8-9 results will be loaded into IDs list |
|
for i in range(300, 3000, 100): |
|
self.browser.execute_script( |
|
"arguments[0].scrollTo(0, {})".format(i), scrollresults |
|
) |
|
|
|
time.sleep(1) |
|
|
|
# get job links |
|
links = self.browser.find_elements_by_xpath("//div[@data-job-id]") |
|
|
|
if len(links) == 0: |
|
break |
|
|
|
# get job ID of each job link |
|
IDs = [] |
|
for link in links: |
|
children = link.find_elements_by_xpath(".//a[@data-control-name]") |
|
for child in children: |
|
if child.text not in self.blacklist: |
|
temp = link.get_attribute("data-job-id") |
|
jobID = temp.split(":")[-1] |
|
IDs.append(int(jobID)) |
|
IDs = set(IDs) |
|
|
|
# remove already applied jobs |
|
before = len(IDs) |
|
jobIDs = [x for x in IDs if x not in self.appliedJobIDs] |
|
after = len(jobIDs) |
|
|
|
# it assumed that 25 jobs are listed in the results window |
|
if len(jobIDs) == 0 and len(IDs) > 23: |
|
jobs_per_page = jobs_per_page + 25 |
|
count_job = 0 |
|
self.avoid_lock() |
|
self.browser, jobs_per_page = self.next_jobs_page( |
|
position, location, jobs_per_page |
|
) |
|
# loop over IDs to apply |
|
for i, jobID in enumerate(jobIDs): |
|
count_job += 1 |
|
self.get_job_page(jobID) |
|
|
|
# get easy apply button |
|
button = self.get_easy_apply_button() |
|
# word filter to skip positions not wanted |
|
|
|
if button is not False: |
|
if any(word in self.browser.title for word in blackListTitles): |
|
log.info( |
|
"skipping this application, a blacklisted keyword was found in the job position" |
|
) |
|
string_easy = "* Contains blacklisted keyword" |
|
result = False |
|
else: |
|
string_easy = "* has Easy Apply Button" |
|
log.info("Clicking the EASY apply button") |
|
button.click() |
|
time.sleep(3) |
|
result = self.send_resume() |
|
count_application += 1 |
|
else: |
|
log.info("The button does not exist.") |
|
string_easy = "* Doesn't have Easy Apply Button" |
|
result = False |
|
|
|
position_number = str(count_job + jobs_per_page) |
|
log.info( |
|
f"\nPosition {position_number}:\n {self.browser.title} \n {string_easy} \n" |
|
) |
|
|
|
self.write_to_file(button, jobID, self.browser.title, result) |
|
|
|
# sleep every 20 applications |
|
if count_application != 0 and count_application % 20 == 0: |
|
sleepTime = random.randint(500, 900) |
|
log.info( |
|
f"""********count_application: {count_application}************\n\n |
|
Time for a nap - see you in:{int(sleepTime / 60)} min |
|
****************************************\n\n""" |
|
) |
|
time.sleep(sleepTime) |
|
|
|
# go to new page if all jobs are done |
|
if count_job == len(jobIDs): |
|
jobs_per_page = jobs_per_page + 25 |
|
count_job = 0 |
|
log.info( |
|
"""****************************************\n\n |
|
Going to next jobs page, YEAAAHHH!! |
|
****************************************\n\n""" |
|
) |
|
self.avoid_lock() |
|
self.browser, jobs_per_page = self.next_jobs_page( |
|
position, location, jobs_per_page |
|
) |
|
# if len(jobIDs) == 0 or i == (len(jobIDs) - 1): |
|
# break |
|
|
|
def write_to_file(self, button, jobID, browserTitle, result): |
|
def re_extract(text, pattern): |
|
target = re.search(pattern, text) |
|
if target: |
|
target = target.group(1) |
|
return target |
|
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
attempted = False if button == False else True |
|
job = re_extract(browserTitle.split(" | ")[0], r"\(?\d?\)?\s?(\w.*)") |
|
company = re_extract(browserTitle.split(" | ")[1], r"(\w.*)") |
|
|
|
toWrite = [timestamp, jobID, job, company, attempted, result] |
|
with open(self.filename, "a") as f: |
|
writer = csv.writer(f) |
|
writer.writerow(toWrite) |
|
|
|
def get_job_page(self, jobID): |
|
|
|
job = "https://www.linkedin.com/jobs/view/" + str(jobID) |
|
self.browser.get(job) |
|
self.job_page = self.load_page(sleep=0.5) |
|
return self.job_page |
|
|
|
def get_easy_apply_button(self): |
|
try: |
|
button = self.browser.find_elements_by_xpath( |
|
'//button[contains(@class, "jobs-apply")]/span[1]' |
|
) |
|
|
|
EasyApplyButton = button[0] |
|
except: |
|
EasyApplyButton = False |
|
|
|
return EasyApplyButton |
|
|
|
def send_resume(self): |
|
def is_present(button_locator): |
|
return ( |
|
len(self.browser.find_elements(button_locator[0], button_locator[1])) |
|
> 0 |
|
) |
|
|
|
try: |
|
time.sleep(random.uniform(1.5, 2.5)) |
|
next_locater = ( |
|
By.CSS_SELECTOR, |
|
"button[aria-label='Continue to next step']", |
|
) |
|
review_locater = ( |
|
By.CSS_SELECTOR, |
|
"button[aria-label='Review your application']", |
|
) |
|
submit_locater = ( |
|
By.CSS_SELECTOR, |
|
"button[aria-label='Submit application']", |
|
) |
|
submit_application_locator = ( |
|
By.CSS_SELECTOR, |
|
"button[aria-label='Submit application']", |
|
) |
|
error_locator = ( |
|
By.CSS_SELECTOR, |
|
"p[data-test-form-element-error-message='true']", |
|
) |
|
upload_locator = (By.CSS_SELECTOR, "input[name='file']") |
|
|
|
submitted = False |
|
while True: |
|
|
|
# Upload Cover Letter if possible |
|
if is_present(upload_locator): |
|
|
|
input_buttons = self.browser.find_elements( |
|
upload_locator[0], upload_locator[1] |
|
) |
|
for input_button in input_buttons: |
|
parent = input_button.find_element(By.XPATH, "..") |
|
sibling = parent.find_element(By.XPATH, "preceding-sibling::*") |
|
grandparent = sibling.find_element(By.XPATH, "..") |
|
for key in self.uploads.keys(): |
|
sibling_text = sibling.text |
|
gparent_text = grandparent.text |
|
if ( |
|
key.lower() in sibling_text.lower() |
|
or key in gparent_text.lower() |
|
): |
|
input_button.send_keys(self.uploads[key]) |
|
|
|
# input_button[0].send_keys(self.cover_letter_loctn) |
|
time.sleep(random.uniform(4.5, 6.5)) |
|
|
|
# Click Next or submitt button if possible |
|
button = None |
|
buttons = [ |
|
next_locater, |
|
review_locater, |
|
submit_locater, |
|
submit_application_locator, |
|
] |
|
for i, button_locator in enumerate(buttons): |
|
if is_present(button_locator): |
|
button = self.wait.until( |
|
EC.element_to_be_clickable(button_locator) |
|
) |
|
|
|
if is_present(error_locator): |
|
for element in self.browser.find_elements( |
|
error_locator[0], error_locator[1] |
|
): |
|
text = element.text |
|
if "Please enter a valid answer" in text: |
|
button = None |
|
break |
|
if button: |
|
button.click() |
|
time.sleep(random.uniform(1.5, 2.5)) |
|
if i in (2, 3): |
|
submitted = True |
|
break |
|
if button == None: |
|
log.info("Could not complete submission") |
|
break |
|
elif submitted: |
|
log.info("Application Submitted") |
|
break |
|
|
|
time.sleep(random.uniform(1.5, 2.5)) |
|
|
|
except Exception as e: |
|
log.info(e) |
|
log.info("cannot apply to this job") |
|
raise (e) |
|
|
|
return submitted |
|
|
|
def load_page(self, sleep=1): |
|
scroll_page = 0 |
|
while scroll_page < 4000: |
|
self.browser.execute_script("window.scrollTo(0," + str(scroll_page) + " );") |
|
scroll_page += 200 |
|
time.sleep(sleep) |
|
|
|
if sleep != 1: |
|
self.browser.execute_script("window.scrollTo(0,0);") |
|
time.sleep(sleep * 3) |
|
|
|
page = BeautifulSoup(self.browser.page_source, "lxml") |
|
return page |
|
|
|
def avoid_lock(self): |
|
x, _ = pyautogui.position() |
|
pyautogui.moveTo(x + 200, pyautogui.position().y, duration=1.0) |
|
pyautogui.moveTo(x, pyautogui.position().y, duration=0.5) |
|
pyautogui.keyDown("ctrl") |
|
pyautogui.press("esc") |
|
pyautogui.keyUp("ctrl") |
|
time.sleep(0.5) |
|
pyautogui.press("esc") |
|
|
|
def next_jobs_page(self, position, location, jobs_per_page): |
|
self.browser.get( |
|
"https://www.linkedin.com/jobs/search/?f_LF=f_AL&keywords=" |
|
+ position |
|
+ location |
|
+ "&start=" |
|
+ str(jobs_per_page) |
|
) |
|
self.avoid_lock() |
|
log.info("Lock avoided.") |
|
self.load_page() |
|
return (self.browser, jobs_per_page) |
|
|
|
def finish_apply(self): |
|
self.browser.close() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
with open("config.yaml", "r") as stream: |
|
try: |
|
parameters = yaml.safe_load(stream) |
|
except yaml.YAMLError as exc: |
|
raise exc |
|
|
|
assert len(parameters["positions"]) > 0 |
|
assert len(parameters["locations"]) > 0 |
|
assert parameters["username"] is not None |
|
assert parameters["password"] is not None |
|
|
|
if "uploads" in parameters.keys() and type(parameters["uploads"]) == list: |
|
raise Exception( |
|
"uploads read from the config file appear to be in list format" |
|
+ " while should be dict. Try removing '-' from line containing" |
|
+ " filename & path" |
|
) |
|
|
|
log.info( |
|
{ |
|
k: parameters[k] |
|
for k in parameters.keys() |
|
if k not in ["username", "password"] |
|
} |
|
) |
|
|
|
output_filename = [ |
|
f for f in parameters.get("output_filename", ["output.csv"]) if f != None |
|
] |
|
output_filename = output_filename[0] if len(output_filename) > 0 else "output.csv" |
|
blacklist = parameters.get("blacklist", []) |
|
blackListTitles = parameters.get("blackListTitles", []) |
|
|
|
uploads = ( |
|
{} if parameters.get("uploads", {}) == None else parameters.get("uploads", {}) |
|
) |
|
for key in uploads.keys(): |
|
assert uploads[key] != None |
|
|
|
bot = EasyApplyBot( |
|
parameters["username"], |
|
parameters["password"], |
|
uploads=uploads, |
|
filename=output_filename, |
|
blacklist=blacklist, |
|
blackListTitles=blackListTitles, |
|
) |
|
|
|
locations = [l for l in parameters["locations"] if l != None] |
|
positions = [p for p in parameters["positions"] if p != None] |
|
bot.start_apply(positions, locations) |
|
|
|
|
|
waltlabs.io |
|
spring tx |
|
|
|
|