import csv
import logging
import os
import platform
import random
import re
import time
from datetime import datetime, timedelta
from urllib.request import urlopen
import pandas as pd
import pyautogui
import yaml
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
log = logging.getLogger(__name__)
driver = webdriver.Chrome(ChromeDriverManager().install())
def setupLogger():
dt = datetime.strftime(datetime.now(), "%m_%d_%y %H_%M_%S ")
if not os.path.isdir("./logs"):
os.mkdir("./logs")
# TODO need to check if there is a log dir available or not
logging.basicConfig(
filename=("./logs/" + str(dt) + "applyJobs.log"),
filemode="w",
format="%(asctime)s::%(name)s::%(levelname)s::%(message)s",
datefmt="./logs/%d-%b-%y %H:%M:%S",
)
log.setLevel(logging.DEBUG)
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.DEBUG)
c_format = logging.Formatter(
"%(asctime)s - %(levelname)s - %(message)s", "%H:%M:%S"
)
c_handler.setFormatter(c_format)
log.addHandler(c_handler)
class EasyApplyBot:
setupLogger()
# MAX_SEARCH_TIME is 10 hours by default, feel free to modify it
MAX_SEARCH_TIME = 10 * 60 * 60
def __init__(
self,
username,
password,
uploads={},
filename="output.csv",
blacklist=[],
blackListTitles=[],
):
log.info("Welcome to Easy Apply Bot")
dirpath = os.getcwd()
log.info("current directory is : " + dirpath)
self.uploads = uploads
past_ids = self.get_appliedIDs(filename)
self.appliedJobIDs = past_ids if past_ids != None else []
self.filename = filename
self.options = self.browser_options()
self.browser = driver
self.wait = WebDriverWait(self.browser, 30)
self.blacklist = blacklist
self.blackListTitles = blackListTitles
self.start_linkedin(username, password)
def get_appliedIDs(self, filename):
try:
df = pd.read_csv(
filename,
header=None,
names=["timestamp", "jobID", "job", "company", "attempted", "result"],
lineterminator="\n",
encoding="utf-8",
)
df["timestamp"] = pd.to_datetime(
df["timestamp"], format="%Y-%m-%d %H:%M:%S"
)
df = df[df["timestamp"] > (datetime.now() - timedelta(days=2))]
jobIDs = list(df.jobID)
log.info(f"{len(jobIDs)} jobIDs found")
return jobIDs
except Exception as e:
log.info(
str(e) + " jobIDs could not be loaded from CSV {}".format(filename)
)
return None
def browser_options(self):
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--no-sandbox")
options.add_argument("--disable-extensions")
# Disable webdriver flags or you will be easily detectable
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
# extras
return options
def start_linkedin(self, username, password):
log.info("Logging in.....Please wait :) ")
self.browser.get(
"https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin"
)
try:
user_field = self.browser.find_element_by_id("username")
pw_field = self.browser.find_element_by_id("password")
login_button = self.browser.find_element_by_css_selector(
".btn__primary--large"
)
user_field.send_keys(username)
user_field.send_keys(Keys.TAB)
time.sleep(2)
pw_field.send_keys(password)
time.sleep(2)
login_button.click()
time.sleep(3)
except TimeoutException:
log.info(
"TimeoutException! Username/password field or login button not found"
)
def fill_data(self):
self.browser.set_window_size(0, 0)
self.browser.set_window_position(2000, 2000)
def start_apply(self, positions, locations):
start = time.time()
self.fill_data()
combos = []
while len(combos) < len(positions) * len(locations):
position = positions[random.randint(0, len(positions) - 1)]
location = locations[random.randint(0, len(locations) - 1)]
combo = (position, location)
if combo not in combos:
combos.append(combo)
log.info(f"Applying to {position}: {location}")
location = "&location=" + location
self.applications_loop(position, location)
if len(combos) > 500:
break
# self.finish_apply() --> this does seem to cause more harm than good, since it closes the browser which we usually don't want, other conditions will stop the loop and just break out
def applications_loop(self, position, location):
count_application = 0
count_job = 0
jobs_per_page = 0
start_time = time.time()
log.info("Looking for jobs.. Please wait..")
self.browser.set_window_position(0, 0)
self.browser.maximize_window()
self.browser, _ = self.next_jobs_page(position, location, jobs_per_page)
log.info("Looking for jobs.. Please wait..")
while time.time() - start_time < self.MAX_SEARCH_TIME:
log.info(
f"{(self.MAX_SEARCH_TIME - (time.time() - start_time)) // 60} minutes left in this search"
)
# sleep to make sure everything loads, add random to make us look human.
randoTime = random.uniform(3.5, 4.9)
log.debug(f"Sleeping for {round(randoTime, 1)}")
time.sleep(randoTime)
self.load_page(sleep=1)
# LinkedIn displays the search results in a scrollable
on the left side, we have to scroll to its bottom
scrollresults = self.browser.find_element_by_class_name(
"jobs-search-results"
)
# Selenium only detects visible elements; if we scroll to the bottom too fast, only 8-9 results will be loaded into IDs list
for i in range(300, 3000, 100):
self.browser.execute_script(
"arguments[0].scrollTo(0, {})".format(i), scrollresults
)
time.sleep(1)
# get job links
links = self.browser.find_elements_by_xpath("//div[@data-job-id]")
if len(links) == 0:
break
# get job ID of each job link
IDs = []
for link in links:
children = link.find_elements_by_xpath(".//a[@data-control-name]")
for child in children:
if child.text not in self.blacklist:
temp = link.get_attribute("data-job-id")
jobID = temp.split(":")[-1]
IDs.append(int(jobID))
IDs = set(IDs)
# remove already applied jobs
before = len(IDs)
jobIDs = [x for x in IDs if x not in self.appliedJobIDs]
after = len(jobIDs)
# it assumed that 25 jobs are listed in the results window
if len(jobIDs) == 0 and len(IDs) > 23:
jobs_per_page = jobs_per_page + 25
count_job = 0
self.avoid_lock()
self.browser, jobs_per_page = self.next_jobs_page(
position, location, jobs_per_page
)
# loop over IDs to apply
for i, jobID in enumerate(jobIDs):
count_job += 1
self.get_job_page(jobID)
# get easy apply button
button = self.get_easy_apply_button()
# word filter to skip positions not wanted
if button is not False:
if any(word in self.browser.title for word in blackListTitles):
log.info(
"skipping this application, a blacklisted keyword was found in the job position"
)
string_easy = "* Contains blacklisted keyword"
result = False
else:
string_easy = "* has Easy Apply Button"
log.info("Clicking the EASY apply button")
button.click()
time.sleep(3)
result = self.send_resume()
count_application += 1
else:
log.info("The button does not exist.")
string_easy = "* Doesn't have Easy Apply Button"
result = False
position_number = str(count_job + jobs_per_page)
log.info(
f"\nPosition {position_number}:\n {self.browser.title} \n {string_easy} \n"
)
self.write_to_file(button, jobID, self.browser.title, result)
# sleep every 20 applications
if count_application != 0 and count_application % 20 == 0:
sleepTime = random.randint(500, 900)
log.info(
f"""********count_application: {count_application}************\n\n
Time for a nap - see you in:{int(sleepTime / 60)} min
****************************************\n\n"""
)
time.sleep(sleepTime)
# go to new page if all jobs are done
if count_job == len(jobIDs):
jobs_per_page = jobs_per_page + 25
count_job = 0
log.info(
"""****************************************\n\n
Going to next jobs page, YEAAAHHH!!
****************************************\n\n"""
)
self.avoid_lock()
self.browser, jobs_per_page = self.next_jobs_page(
position, location, jobs_per_page
)
# if len(jobIDs) == 0 or i == (len(jobIDs) - 1):
# break
def write_to_file(self, button, jobID, browserTitle, result):
def re_extract(text, pattern):
target = re.search(pattern, text)
if target:
target = target.group(1)
return target
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
attempted = False if button == False else True
job = re_extract(browserTitle.split(" | ")[0], r"\(?\d?\)?\s?(\w.*)")
company = re_extract(browserTitle.split(" | ")[1], r"(\w.*)")
toWrite = [timestamp, jobID, job, company, attempted, result]
with open(self.filename, "a") as f:
writer = csv.writer(f)
writer.writerow(toWrite)
def get_job_page(self, jobID):
job = "https://www.linkedin.com/jobs/view/" + str(jobID)
self.browser.get(job)
self.job_page = self.load_page(sleep=0.5)
return self.job_page
def get_easy_apply_button(self):
try:
button = self.browser.find_elements_by_xpath(
'//button[contains(@class, "jobs-apply")]/span[1]'
)
EasyApplyButton = button[0]
except:
EasyApplyButton = False
return EasyApplyButton
def send_resume(self):
def is_present(button_locator):
return (
len(self.browser.find_elements(button_locator[0], button_locator[1]))
> 0
)
try:
time.sleep(random.uniform(1.5, 2.5))
next_locater = (
By.CSS_SELECTOR,
"button[aria-label='Continue to next step']",
)
review_locater = (
By.CSS_SELECTOR,
"button[aria-label='Review your application']",
)
submit_locater = (
By.CSS_SELECTOR,
"button[aria-label='Submit application']",
)
submit_application_locator = (
By.CSS_SELECTOR,
"button[aria-label='Submit application']",
)
error_locator = (
By.CSS_SELECTOR,
"p[data-test-form-element-error-message='true']",
)
upload_locator = (By.CSS_SELECTOR, "input[name='file']")
submitted = False
while True:
# Upload Cover Letter if possible
if is_present(upload_locator):
input_buttons = self.browser.find_elements(
upload_locator[0], upload_locator[1]
)
for input_button in input_buttons:
parent = input_button.find_element(By.XPATH, "..")
sibling = parent.find_element(By.XPATH, "preceding-sibling::*")
grandparent = sibling.find_element(By.XPATH, "..")
for key in self.uploads.keys():
sibling_text = sibling.text
gparent_text = grandparent.text
if (
key.lower() in sibling_text.lower()
or key in gparent_text.lower()
):
input_button.send_keys(self.uploads[key])
# input_button[0].send_keys(self.cover_letter_loctn)
time.sleep(random.uniform(4.5, 6.5))
# Click Next or submitt button if possible
button = None
buttons = [
next_locater,
review_locater,
submit_locater,
submit_application_locator,
]
for i, button_locator in enumerate(buttons):
if is_present(button_locator):
button = self.wait.until(
EC.element_to_be_clickable(button_locator)
)
if is_present(error_locator):
for element in self.browser.find_elements(
error_locator[0], error_locator[1]
):
text = element.text
if "Please enter a valid answer" in text:
button = None
break
if button:
button.click()
time.sleep(random.uniform(1.5, 2.5))
if i in (2, 3):
submitted = True
break
if button == None:
log.info("Could not complete submission")
break
elif submitted:
log.info("Application Submitted")
break
time.sleep(random.uniform(1.5, 2.5))
except Exception as e:
log.info(e)
log.info("cannot apply to this job")
raise (e)
return submitted
def load_page(self, sleep=1):
scroll_page = 0
while scroll_page < 4000:
self.browser.execute_script("window.scrollTo(0," + str(scroll_page) + " );")
scroll_page += 200
time.sleep(sleep)
if sleep != 1:
self.browser.execute_script("window.scrollTo(0,0);")
time.sleep(sleep * 3)
page = BeautifulSoup(self.browser.page_source, "lxml")
return page
def avoid_lock(self):
x, _ = pyautogui.position()
pyautogui.moveTo(x + 200, pyautogui.position().y, duration=1.0)
pyautogui.moveTo(x, pyautogui.position().y, duration=0.5)
pyautogui.keyDown("ctrl")
pyautogui.press("esc")
pyautogui.keyUp("ctrl")
time.sleep(0.5)
pyautogui.press("esc")
def next_jobs_page(self, position, location, jobs_per_page):
self.browser.get(
"https://www.linkedin.com/jobs/search/?f_LF=f_AL&keywords="
+ position
+ location
+ "&start="
+ str(jobs_per_page)
)
self.avoid_lock()
log.info("Lock avoided.")
self.load_page()
return (self.browser, jobs_per_page)
def finish_apply(self):
self.browser.close()
if __name__ == "__main__":
with open("config.yaml", "r") as stream:
try:
parameters = yaml.safe_load(stream)
except yaml.YAMLError as exc:
raise exc
assert len(parameters["positions"]) > 0
assert len(parameters["locations"]) > 0
assert parameters["username"] is not None
assert parameters["password"] is not None
if "uploads" in parameters.keys() and type(parameters["uploads"]) == list:
raise Exception(
"uploads read from the config file appear to be in list format"
+ " while should be dict. Try removing '-' from line containing"
+ " filename & path"
)
log.info(
{
k: parameters[k]
for k in parameters.keys()
if k not in ["username", "password"]
}
)
output_filename = [
f for f in parameters.get("output_filename", ["output.csv"]) if f != None
]
output_filename = output_filename[0] if len(output_filename) > 0 else "output.csv"
blacklist = parameters.get("blacklist", [])
blackListTitles = parameters.get("blackListTitles", [])
uploads = (
{} if parameters.get("uploads", {}) == None else parameters.get("uploads", {})
)
for key in uploads.keys():
assert uploads[key] != None
bot = EasyApplyBot(
parameters["username"],
parameters["password"],
uploads=uploads,
filename=output_filename,
blacklist=blacklist,
blackListTitles=blackListTitles,
)
locations = [l for l in parameters["locations"] if l != None]
positions = [p for p in parameters["positions"] if p != None]
bot.start_apply(positions, locations)
waltlabs.io
spring tx