Apache Airflow dags w/ backend configuration bundle.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

95 lines
3.1 KiB

import logging
import logging as log
import os
import time
import docker
from airflow.hooks.base_hook import BaseHook
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
class SeleniumHook(BaseHook):
"""
Creates a Selenium Docker container on the host and controls the
browser by sending commands to the remote server.
"""
def __init__(self):
logging.info("initialised hook")
pass
def create_container(self):
"""
Creates the selenium docker container
"""
logging.info("creating_container")
cwd = os.getcwd()
self.local_downloads = os.path.join(cwd, "downloads")
self.sel_downloads = "/home/seluser/downloads"
volumes = [
"{}:{}".format(self.local_downloads, self.sel_downloads),
"/dev/shm:/dev/shm",
]
client = docker.from_env()
container = client.containers.run(
"selenium/standalone-chrome",
volumes=volumes,
network="container_bridge",
detach=True,
)
self.container = container
cli = docker.APIClient()
self.container_ip = cli.inspect_container(container.id)["NetworkSettings"][
"Networks"
]["container_bridge"]["IPAddress"]
def create_driver(self):
"""
creates and configure the remote Selenium webdriver.
"""
logging.info("creating driver")
options = Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920x1080")
chrome_driver = "{}:4444/wd/hub".format(self.container_ip)
# chrome_driver = '{}:4444/wd/hub'.format('http://127.0.0.1') # local
# wait for remote, unless timeout.
while True:
try:
driver = webdriver.Remote(
command_executor=chrome_driver,
desired_capabilities=DesiredCapabilities.CHROME,
options=options,
)
print("remote ready")
break
except:
print("remote not ready, sleeping for ten seconds.")
time.sleep(10)
# Enable downloads in headless chrome.
driver.command_executor._commands["send_command"] = (
"POST",
"/session/$sessionId/chromium/send_command",
)
params = {
"cmd": "Page.setDownloadBehavior",
"params": {"behavior": "allow", "downloadPath": self.sel_downloads},
}
driver.execute("send_command", params)
self.driver = driver
def remove_container(self):
"""
This removes the Selenium container.
"""
self.container.remove(force=True)
print("Removed container: {}".format(self.container.id))
def run_script(self, script, args):
"""
This is a wrapper around the python script which sends commands to
the docker container. The first variable of the script must be the web driver.
"""
script(self.driver, *args)