""" Code that goes along with the Airflow located at: http://airflow.readthedocs.org/en/latest/tutorial.html """ from datetime import datetime, timedelta from airflow import DAG # Operators; we need this to operate! from airflow.operators.bash import BashOperator from airflow.operators.dummy_operator import DummyOperator from airflow.utils.dates import days_ago default_args = { "owner": "donaldrich", "depends_on_past": False, "start_date": datetime(2016, 7, 13), "email": ["email@gmail.com"], "email_on_failure": False, "email_on_retry": False, "retries": 1, "retry_delay": timedelta(minutes=15), } script_path = "/data/scripts" data_path = "/data/data/archive" dag = DAG( "zip_docker", default_args=default_args, description="A simple tutorial DAG", schedule_interval=None, start_date=days_ago(2), tags=["zip", "docker"], ) with dag: start = DummyOperator(task_id="start", dag=dag) pull1 = BashOperator( task_id="pull_chrome", bash_command="sudo docker pull selenoid/chrome:latest", # retries=3, # dag=dag ) pull2 = BashOperator( task_id="pull_recorder", bash_command="sudo docker pull selenoid/video-recorder:latest-release", # retries=3, # dag=dag ) scrape = BashOperator( task_id="scrape_listings", bash_command="sh /data/scripts/scrape.sh -s target -k docker", # retries=3, # dag=dag ) cleanup = BashOperator( task_id="cleanup", bash_command="sh /data/scripts/post-scrape.sh -s target -k devops", # retries=3, # dag=dag ) end = DummyOperator(task_id="end", dag=dag) start >> [pull1, pull2] >> scrape >> cleanup >> end # scrape = BashOperator( # task_id="scrape_listings", # bash_command="python3 " + script_path + '/gather/zip.py -k "devops"', # # retries=3, # # dag=dag # ) # cleanup = BashOperator( # task_id="cleanup", # bash_command=script_path + "/post-scrape.sh -s zip -k devops", # # retries=3, # # dag=dag # ) # init >> [pre1,pre2] # init >> pre2 # pre2 >> scrape # scrape >> cleanup