Notebooks >> Scripts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

100 lines
3.6 KiB

def deep_links(self, search_keyword="devops", search_window="5"):
"""Parameter initialization"""
search_date = pretty_date()
# search_keyword = data["search_keyword"]
# search_window = data["search_window"]
PROXY = "192.168.1.101:8889"
# def get_default_chrome_options():
# chrome_options = webdriver.ChromeOptions()
chrome_options = Options()
# # chrome_options.add_argument('--headless')
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-extensions")
# chrome_options.add_argument('disable-blink-features=AutomationControlled')
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features")
chrome_options.add_argument("--verbose")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument(
"--no-default-browser-check"
) # Overrides default choices
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--disable-default-apps")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
)
chrome_options.page_load_strategy = "normal"
capabilities = {
"browserName": "chrome",
"browserVersion": "latest",
"pageLoadStrategy": "normal",
"javascriptEnabled": True,
"selenoid:options": {
"enableVNC": True,
"enableVideo": True,
"enableLog": True,
"videoName": f"Linkedin-{datetime.datetime.now()}.mp4",
"logName": f"Linkedin-{datetime.datetime.now()}.log",
"name": "Chrome",
},
"proxy": {
"httpProxy": PROXY,
"ftpProxy": PROXY,
"sslProxy": PROXY,
"proxyType": "MANUAL",
},
}
url = "https://www.linkedin.com/jobs/view/linux-systems-kubernetes-docker-at-motion-recruitment-2438784506"
driver = webdriver.Remote(
command_executor="http://192.168.1.101:4444/wd/hub",
options=chrome_options,
desired_capabilities=capabilities,
)
ip = driver.get("https://api.ipify.org").text
print("My public IP address is: {}".format(ip))
driver.maximize_window()
driver.get(url)
driver.get(url)
time.sleep(random_wait())
# element = driver.find_element_by_css_selector(".zrs_close_btn")
# element.click()
element = driver.find_element(By.CSS_SELECTOR, ".show-more-less-html__button--more")
element.click()
# scroll_down(driver)
source_data = driver.page_source
soup = BeautifulSoup(source_data, "lxml")
driver.save_screenshot("linkedin-deep.png")
driver.close()
# element = cdriver.find_element_by_css_selector('.some-css.selector')
# element.screenshot_as_png('elemenent.png')
# print(soup.prettify())
# description = soup.find("section", class_="description").text.replace("\n", "").strip()
description = soup.find("section", class_="description").text
# ps = soup
features = soup.find("ul", class_="job-criteria__list").text
external_link = soup.find("a", class_="apply-button--link").get("href")
print(description)
print(features)
print(external_link)
# with open("test.json", "w") as outfile:
# json.dump(ps, outfile, indent=4)
# print("Exported linkedin.json")
print("Soup pulled and browser session ended")