def deep_links(self, search_keyword="devops", search_window="5"): """Parameter initialization""" search_date = pretty_date() # search_keyword = data["search_keyword"] # search_window = data["search_window"] PROXY = "192.168.1.101:8889" # def get_default_chrome_options(): # chrome_options = webdriver.ChromeOptions() chrome_options = Options() # # chrome_options.add_argument('--headless') chrome_options.add_argument("--disable-infobars") chrome_options.add_argument("--disable-extensions") # chrome_options.add_argument('disable-blink-features=AutomationControlled') chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--incognito") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--disable-blink-features") chrome_options.add_argument("--verbose") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument( "--no-default-browser-check" ) # Overrides default choices chrome_options.add_argument("--no-first-run") chrome_options.add_argument("--disable-default-apps") chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36" ) chrome_options.page_load_strategy = "normal" capabilities = { "browserName": "chrome", "browserVersion": "latest", "pageLoadStrategy": "normal", "javascriptEnabled": True, "selenoid:options": { "enableVNC": True, "enableVideo": True, "enableLog": True, "videoName": f"Linkedin-{datetime.datetime.now()}.mp4", "logName": f"Linkedin-{datetime.datetime.now()}.log", "name": "Chrome", }, "proxy": { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "proxyType": "MANUAL", }, } url = "https://www.linkedin.com/jobs/view/linux-systems-kubernetes-docker-at-motion-recruitment-2438784506" driver = webdriver.Remote( command_executor="http://192.168.1.101:4444/wd/hub", options=chrome_options, desired_capabilities=capabilities, ) ip = driver.get("https://api.ipify.org").text print("My public IP address is: {}".format(ip)) driver.maximize_window() driver.get(url) driver.get(url) time.sleep(random_wait()) # element = driver.find_element_by_css_selector(".zrs_close_btn") # element.click() element = driver.find_element(By.CSS_SELECTOR, ".show-more-less-html__button--more") element.click() # scroll_down(driver) source_data = driver.page_source soup = BeautifulSoup(source_data, "lxml") driver.save_screenshot("linkedin-deep.png") driver.close() # element = cdriver.find_element_by_css_selector('.some-css.selector') # element.screenshot_as_png('elemenent.png') # print(soup.prettify()) # description = soup.find("section", class_="description").text.replace("\n", "").strip() description = soup.find("section", class_="description").text # ps = soup features = soup.find("ul", class_="job-criteria__list").text external_link = soup.find("a", class_="apply-button--link").get("href") print(description) print(features) print(external_link) # with open("test.json", "w") as outfile: # json.dump(ps, outfile, indent=4) # print("Exported linkedin.json") print("Soup pulled and browser session ended")