Notebooks >> Scripts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

163 lines
6.1 KiB

import datetime
import json
import time
import requests
from bs4 import BeautifulSoup
from random_user_agent.params import OperatingSystem, SoftwareName
from random_user_agent.user_agent import UserAgent
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
def rand_ua():
software_names = [SoftwareName.CHROME.value]
operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]
user_agent_rotator = UserAgent(
software_names=software_names, operating_systems=operating_systems, limit=100
)
user_agent = user_agent_rotator.get_random_user_agent()
return user_agent
# https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md
def browser_options():
user_agent = rand_ua()
print(user_agent)
# chrome_options = webdriver.ChromeOptions()
chrome_options = Options()
# # chrome_options.add_argument('--headless')
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features")
chrome_options.add_argument("--verbose")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-default-browser-check")
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--disable-default-apps")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument(
# "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
# )
chrome_options.add_argument(f"user-agent={user_agent}")
chrome_options.page_load_strategy = "normal"
# extras
return chrome_options
# chrome_options.add_argument("--enable-logging=stderr")
# chrome_options.add_argument("--log-path=chromedriver.log")
# chrome_options.add_argument("--remote-debugging-port=9222")
# def geoLocationTest():
# driver = webdriver.Chrome()
# Map_coordinates = dict({
# "latitude": 41.8781,
# "longitude": -87.6298,
# "accuracy": 100
# })
# driver.execute_cdp_cmd("Emulation.setGeolocationOverride", Map_coordinates)
# driver.get("<your site url>")
# chrome_options.add_argument(
# 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36')
# chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
# chrome_options.add_argument('--log-path=/opt/selenoid/logs/chrome.log')
# # chrome_options.page_load_strategy = 'normal'
# # chrome_options.javascript_enabled = 'true'
# # chrome_options.set_capability("platformName", "Windows XP")
# # chrome_options.set_capability("javascriptEnabled", "True")
def browser_capabilities(module):
PROXY = "192.168.1.101:8889"
capabilities = {
"browserName": "chrome",
"browserVersion": "latest",
"pageLoadStrategy": "normal",
"javascriptEnabled": True,
"selenoid:options": {
"enableVNC": True,
"enableVideo": True,
"enableLog": True,
"videoName": f"{module}_{datetime.datetime.now()}.mp4",
"logName": f"{module}_{datetime.datetime.now()}.log",
"name": module,
# "sessionTimeout": "30m"
},
"proxy": {
"httpProxy": PROXY,
"ftpProxy": PROXY,
"sslProxy": PROXY,
"proxyType": "MANUAL",
"autodetect": False,
},
"loggingPrefs": {
"driver": "FINE",
"server": "FINE",
"browser": "FINE",
},
}
return capabilities
def example_capabilities(module):
PROXY = "192.168.1.101:8889"
capabilities = {
"acceptInsecureCerts": false,
"browserName": "chrome",
"browserVersion": "88.0.4324.96",
"chrome": {
"chromedriverVersion": "88.0.4324.96 (68dba2d8a0b149a1d3afac56fa74648032bcf46b-refs/branch-heads/4324@{#1784})",
"userDataDir": "/tmp/.com.google.Chrome.CN7yQ2",
},
"goog:chromeOptions": {"debuggerAddress": "localhost:38551"},
"networkConnectionEnabled": false,
"pageLoadStrategy": "normal",
"platformName": "linux",
"proxy": {},
"setWindowRect": true,
"strictFileInteractability": false,
"timeouts": {"implicit": 0, "pageLoad": 300000, "script": 30000},
"unhandledPromptBehavior": "dismiss and notify",
"webauthn:extension:largeBlob": true,
"webauthn:virtualAuthenticators": true,
}
def nonanon_options():
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--no-sandbox")
options.add_argument("--disable-extensions")
# Disable webdriver flags or you will be easily detectable
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
# extras
options.add_argument(
"Mozilla/5.0 (X11; CrOS x86_64 13597.105.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.208 Safari/537.36"
)
return options
def nonanon_capabilities(module):
capabilities = {
"browserName": "chrome",
"browserVersion": "latest",
"pageLoadStrategy": "normal",
"javascriptEnabled": True,
"selenoid:options": {
"enableVNC": True,
"enableVideo": True,
"enableLog": True,
"videoName": f"{module}_{datetime.datetime.now()}.mp4",
"logName": f"{module}_{datetime.datetime.now()}.log",
"name": module,
},
}
return capabilities