Apache Airflow dags w/ backend configuration bundle.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

93 lines
2.2 KiB

"""
Code that goes along with the Airflow located at:
http://airflow.readthedocs.org/en/latest/tutorial.html
"""
from datetime import datetime, timedelta
from airflow import DAG
# Operators; we need this to operate!
from airflow.operators.bash import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.dates import days_ago
default_args = {
"owner": "donaldrich",
"depends_on_past": False,
"start_date": datetime(2016, 7, 13),
"email": ["email@gmail.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=15),
}
script_path = "/data/scripts"
data_path = "/data/data/archive"
dag = DAG(
"zip_docker",
default_args=default_args,
description="A simple tutorial DAG",
schedule_interval=None,
start_date=days_ago(2),
tags=["zip", "docker"],
)
with dag:
start = DummyOperator(task_id="start", dag=dag)
pull1 = BashOperator(
task_id="pull_chrome",
bash_command="sudo docker pull selenoid/chrome:latest",
# retries=3,
# dag=dag
)
pull2 = BashOperator(
task_id="pull_recorder",
bash_command="sudo docker pull selenoid/video-recorder:latest-release",
# retries=3,
# dag=dag
)
scrape = BashOperator(
task_id="scrape_listings",
bash_command="sh /data/scripts/scrape.sh -s target -k docker",
# retries=3,
# dag=dag
)
cleanup = BashOperator(
task_id="cleanup",
bash_command="sh /data/scripts/post-scrape.sh -s target -k devops",
# retries=3,
# dag=dag
)
end = DummyOperator(task_id="end", dag=dag)
start >> [pull1, pull2] >> scrape >> cleanup >> end
# scrape = BashOperator(
# task_id="scrape_listings",
# bash_command="python3 " + script_path + '/gather/zip.py -k "devops"',
# # retries=3,
# # dag=dag
# )
# cleanup = BashOperator(
# task_id="cleanup",
# bash_command=script_path + "/post-scrape.sh -s zip -k devops",
# # retries=3,
# # dag=dag
# )
# init >> [pre1,pre2]
# init >> pre2
# pre2 >> scrape
# scrape >> cleanup