You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
3.6 KiB
100 lines
3.6 KiB
def deep_links(self, search_keyword="devops", search_window="5"): |
|
"""Parameter initialization""" |
|
search_date = pretty_date() |
|
# search_keyword = data["search_keyword"] |
|
# search_window = data["search_window"] |
|
PROXY = "192.168.1.101:8889" |
|
|
|
# def get_default_chrome_options(): |
|
|
|
# chrome_options = webdriver.ChromeOptions() |
|
chrome_options = Options() |
|
# # chrome_options.add_argument('--headless') |
|
chrome_options.add_argument("--disable-infobars") |
|
chrome_options.add_argument("--disable-extensions") |
|
# chrome_options.add_argument('disable-blink-features=AutomationControlled') |
|
chrome_options.add_argument("--no-sandbox") |
|
chrome_options.add_argument("--disable-dev-shm-usage") |
|
chrome_options.add_argument("--incognito") |
|
chrome_options.add_argument("--start-maximized") |
|
chrome_options.add_argument("--disable-blink-features") |
|
chrome_options.add_argument("--verbose") |
|
chrome_options.add_argument("--disable-gpu") |
|
chrome_options.add_argument( |
|
"--no-default-browser-check" |
|
) # Overrides default choices |
|
chrome_options.add_argument("--no-first-run") |
|
chrome_options.add_argument("--disable-default-apps") |
|
chrome_options.add_argument("--disable-blink-features=AutomationControlled") |
|
chrome_options.add_argument( |
|
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36" |
|
) |
|
chrome_options.page_load_strategy = "normal" |
|
capabilities = { |
|
"browserName": "chrome", |
|
"browserVersion": "latest", |
|
"pageLoadStrategy": "normal", |
|
"javascriptEnabled": True, |
|
"selenoid:options": { |
|
"enableVNC": True, |
|
"enableVideo": True, |
|
"enableLog": True, |
|
"videoName": f"Linkedin-{datetime.datetime.now()}.mp4", |
|
"logName": f"Linkedin-{datetime.datetime.now()}.log", |
|
"name": "Chrome", |
|
}, |
|
"proxy": { |
|
"httpProxy": PROXY, |
|
"ftpProxy": PROXY, |
|
"sslProxy": PROXY, |
|
"proxyType": "MANUAL", |
|
}, |
|
} |
|
|
|
url = "https://www.linkedin.com/jobs/view/linux-systems-kubernetes-docker-at-motion-recruitment-2438784506" |
|
|
|
driver = webdriver.Remote( |
|
command_executor="http://192.168.1.101:4444/wd/hub", |
|
options=chrome_options, |
|
desired_capabilities=capabilities, |
|
) |
|
|
|
ip = driver.get("https://api.ipify.org").text |
|
print("My public IP address is: {}".format(ip)) |
|
driver.maximize_window() |
|
driver.get(url) |
|
driver.get(url) |
|
|
|
time.sleep(random_wait()) |
|
# element = driver.find_element_by_css_selector(".zrs_close_btn") |
|
# element.click() |
|
|
|
element = driver.find_element(By.CSS_SELECTOR, ".show-more-less-html__button--more") |
|
element.click() |
|
# scroll_down(driver) |
|
|
|
source_data = driver.page_source |
|
|
|
soup = BeautifulSoup(source_data, "lxml") |
|
|
|
driver.save_screenshot("linkedin-deep.png") |
|
|
|
driver.close() |
|
# element = cdriver.find_element_by_css_selector('.some-css.selector') |
|
|
|
# element.screenshot_as_png('elemenent.png') |
|
# print(soup.prettify()) |
|
# description = soup.find("section", class_="description").text.replace("\n", "").strip() |
|
description = soup.find("section", class_="description").text |
|
# ps = soup |
|
features = soup.find("ul", class_="job-criteria__list").text |
|
external_link = soup.find("a", class_="apply-button--link").get("href") |
|
|
|
print(description) |
|
print(features) |
|
print(external_link) |
|
# with open("test.json", "w") as outfile: |
|
# json.dump(ps, outfile, indent=4) |
|
# print("Exported linkedin.json") |
|
|
|
print("Soup pulled and browser session ended")
|
|
|