You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
2.2 KiB
93 lines
2.2 KiB
""" |
|
Code that goes along with the Airflow located at: |
|
http://airflow.readthedocs.org/en/latest/tutorial.html |
|
""" |
|
from datetime import datetime, timedelta |
|
|
|
from airflow import DAG |
|
|
|
# Operators; we need this to operate! |
|
from airflow.operators.bash import BashOperator |
|
from airflow.operators.dummy_operator import DummyOperator |
|
from airflow.utils.dates import days_ago |
|
|
|
default_args = { |
|
"owner": "donaldrich", |
|
"depends_on_past": False, |
|
"start_date": datetime(2016, 7, 13), |
|
"email": ["email@gmail.com"], |
|
"email_on_failure": False, |
|
"email_on_retry": False, |
|
"retries": 1, |
|
"retry_delay": timedelta(minutes=15), |
|
} |
|
|
|
script_path = "/data/scripts" |
|
data_path = "/data/data/archive" |
|
|
|
dag = DAG( |
|
"zip_docker", |
|
default_args=default_args, |
|
description="A simple tutorial DAG", |
|
schedule_interval=None, |
|
start_date=days_ago(2), |
|
tags=["zip", "docker"], |
|
) |
|
|
|
with dag: |
|
|
|
start = DummyOperator(task_id="start", dag=dag) |
|
|
|
pull1 = BashOperator( |
|
task_id="pull_chrome", |
|
bash_command="sudo docker pull selenoid/chrome:latest", |
|
# retries=3, |
|
# dag=dag |
|
) |
|
|
|
pull2 = BashOperator( |
|
task_id="pull_recorder", |
|
bash_command="sudo docker pull selenoid/video-recorder:latest-release", |
|
# retries=3, |
|
# dag=dag |
|
) |
|
|
|
scrape = BashOperator( |
|
task_id="scrape_listings", |
|
bash_command="sh /data/scripts/scrape.sh -s target -k docker", |
|
# retries=3, |
|
# dag=dag |
|
) |
|
|
|
cleanup = BashOperator( |
|
task_id="cleanup", |
|
bash_command="sh /data/scripts/post-scrape.sh -s target -k devops", |
|
# retries=3, |
|
# dag=dag |
|
) |
|
|
|
end = DummyOperator(task_id="end", dag=dag) |
|
|
|
start >> [pull1, pull2] >> scrape >> cleanup >> end |
|
|
|
# scrape = BashOperator( |
|
# task_id="scrape_listings", |
|
# bash_command="python3 " + script_path + '/gather/zip.py -k "devops"', |
|
# # retries=3, |
|
# # dag=dag |
|
# ) |
|
|
|
# cleanup = BashOperator( |
|
# task_id="cleanup", |
|
# bash_command=script_path + "/post-scrape.sh -s zip -k devops", |
|
# # retries=3, |
|
# # dag=dag |
|
# ) |
|
|
|
# init >> [pre1,pre2] |
|
|
|
# init >> pre2 |
|
|
|
# pre2 >> scrape |
|
|
|
# scrape >> cleanup
|
|
|