Apache Airflow dags w/ backend configuration bundle.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

186 lines
6.1 KiB

version: "3.7"
x-airflow-common: &airflow-common
image: donaldrich/airflow:latest
# build: .
environment: &airflow-common-env
AIRFLOW__CORE__STORE_SERIALIZED_DAGS: "True"
AIRFLOW__CORE__STORE_DAG_CODE: "True"
AIRFLOW__CORE__EXECUTOR: "CeleryExecutor"
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres-dev:5432/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres-dev:5432/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
AIRFLOW__CORE__FERNET_KEY: ""
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "false"
AIRFLOW__CORE__LOAD_EXAMPLES:
"false"
# AIRFLOW__CORE__PARALLELISM: 4
# AIRFLOW__CORE__DAG_CONCURRENCY: 4
# AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG: 4
AIRFLOW_UID: "1000"
AIRFLOW_GID: "0"
_AIRFLOW_DB_UPGRADE: "true"
_AIRFLOW_WWW_USER_CREATE: "true"
_AIRFLOW_WWW_USER_USERNAME: "{{ user }}"
_AIRFLOW_WWW_USER_PASSWORD: "{{ password }}"
PYTHONPATH: "/data:$$PYTHONPATH"
user: "1000:0"
volumes:
- "./airflow:/opt/airflow"
- "./data:/data"
- "/var/run/docker.sock:/var/run/docker.sock"
# - "/usr/bin/docker:/bin/docker:ro"
networks:
- proxy
- backend
services:
airflow-init:
<<: *airflow-common
container_name: airflow-init
environment:
<<: *airflow-common-env
# depends_on:
# - airflow-db
command: bash -c "airflow db init && airflow db upgrade && airflow users create --role Admin --username {{ user }} --email {{ email }} --firstname Don --lastname Aldrich --password {{ password }}"
airflow-webserver:
<<: *airflow-common
container_name: airflow-webserver
hostname: airflow-webserver
command:
webserver
# command: >
# bash -c 'if [[ -z "$$AIRFLOW__API__AUTH_BACKEND" ]] && [[ $$(pip show -f apache-airflow | grep basic_auth.py) ]];
# then export AIRFLOW__API__AUTH_BACKEND=airflow.api.auth.backend.basic_auth ;
# else export AIRFLOW__API__AUTH_BACKEND=airflow.api.auth.backend.default ; fi &&
# { airflow create_user "$$@" || airflow users create "$$@" ; } &&
# { airflow sync_perm || airflow sync-perm ;} &&
# airflow webserver' -- -r Admin -u admin -e admin@example.com -f admin -l user -p admin
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 10s
timeout: 10s
retries: 5
restart: always
privileged: true
depends_on:
- airflow-scheduler
environment:
<<: *airflow-common-env
labels:
traefik.http.services.airflow.loadbalancer.server.port: "8080"
traefik.enable: "true"
traefik.http.routers.airflow.entrypoints: "https"
traefik.http.routers.airflow.tls.certResolver: "cloudflare"
traefik.http.routers.airflow.rule: "Host(`airflow.{{ domain }}.com`)"
traefik.http.routers.airflow.middlewares: "ip-whitelist@file"
traefik.http.routers.airflow.service: "airflow"
airflow-scheduler:
<<: *airflow-common
command: scheduler
container_name: airflow-scheduler
hostname: airflow-scheduler
restart: always
depends_on:
- airflow-init
environment:
<<: *airflow-common-env
# # entrypoint: sh -c '/app/scripts/wait-for postgres:5432 -- airflow db init && airflow scheduler'
airflow-worker:
<<: *airflow-common
command: celery worker
restart: always
container_name: airflow-worker
hostname: airflow-worker
airflow-queue:
<<: *airflow-common
command: celery flower
container_name: airflow-queue
hostname: airflow-queue
ports:
- 5555:5555
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
interval: 10s
timeout: 10s
retries: 5
restart: always
dbt:
image: fishtownanalytics/dbt:0.19.1
container_name: dbt
volumes:
- "/home/{{ user }}/projects/jobfunnel:/data"
- "/home/{{ user }}/projects/jobfunnel/dbt:/usr/app"
# - "dbt-db:/var/lib/postgresql/data"
ports:
- "8081"
networks:
- proxy
- backend
command: docs serve --project-dir /data/transform
working_dir: "/data/transform"
environment:
DBT_SCHEMA: dbt
DBT_RAW_DATA_SCHEMA: dbt_raw_data
DBT_PROFILES_DIR: "/data/transform/profile"
# DBT_PROJECT_DIR: "/data/transform"
# DBT_PROFILES_DIR: "/data"
# DBT_POSTGRES_PASSWORD: dbt
# DBT_POSTGRES_USER : dbt
# DBT_POSTGRES_DB : dbt
# DBT_DBT_SCHEMA: dbt
# DBT_DBT_RAW_DATA_SCHEMA: dbt_raw_data
# DBT_POSTGRES_HOST: dbt-db
meltano:
image: meltano/meltano:latest
container_name: meltano
volumes:
- "/home/{{ user }}/projects/jobfunnel:/project"
ports:
- "5000:5000"
networks:
- proxy
- backend
# environment:
# MELTANO_UI_SERVER_NAME: "etl.{{ domain }}.com"
# MELTANO_DATABASE_URI: "postgresql://meltano:meltano@meltano-db:5432/meltano"
# MELTANO_WEBAPP_POSTGRES_URL: localhost
# MELTANO_WEBAPP_POSTGRES_DB=meltano
# MELTANO_WEBAPP_POSTGRES_USER=meltano
# MELTANO_WEBAPP_POSTGRES_PASSWORD=meltano
# MELTANO_WEBAPP_POSTGRES_PORT=5501
# MELTANO_WEBAPP_LOG_PATH="/tmp/meltano.log"
# MELTANO_WEBAPP_API_URL="http://localhost:5000"
# LOG_PATH="/tmp/meltano.log"
# API_URL="http://localhost:5000"
# MELTANO_MODEL_DIR="./model"
# MELTANO_TRANSFORM_DIR="./transform"
# MELTANO_UI_SESSION_COOKIE_DOMAIN: "etl.{{ domain }}.com"
# MELTANO_UI_SESSION_COOKIE_SECURE: "true"
labels:
traefik.http.services.meltano.loadbalancer.server.port: "5000"
traefik.enable: "true"
traefik.http.routers.meltano.entrypoints: "https"
traefik.http.routers.meltano.tls.certResolver: "cloudflare"
traefik.http.routers.meltano.rule: "Host(`etl.{{ domain }}.com`)"
traefik.http.routers.meltano.middlewares: "secured@file,ip-whitelist@file"
traefik.http.routers.meltano.service: "meltano"
volumes:
tmp_airflow:
driver: local
airflow-db:
driver: local
dbt-db:
driver: local
networks:
proxy:
external: true
backend:
external: true