-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
170 lines (161 loc) · 4.68 KB
/
docker-compose.yaml
File metadata and controls
170 lines (161 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
version: '3'
x-airflow-common: &airflow-common
image: apache/airflow:2.7.1
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://root:root@pgdatabase:5432/project1
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__LOGGING__LOGGING_LEVEL: 'INFO'
AIRFLOW__WEBSERVER__SECRET_KEY: 'your-secret-key'
AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX: 'true'
volumes:
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
- ./config:/opt/airflow/config
- ./lab:/opt/airflow/lab
- ./spark-apps:/opt/airflow/spark-apps
- ./spark-data:/opt/airflow/data
services:
airflow-init:
<<: *airflow-common
command: version
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-admin}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-admin}
_PIP_ADDITIONAL_REQUIREMENTS: apache-airflow-providers-apache-spark==4.1.5
airflow-webserver:
<<: *airflow-common
build:
context: .
command: bash -c "airflow db upgrade && airflow webserver"
ports:
- "8081:8080"
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 30s
timeout: 30s
retries: 3
restart: always
depends_on:
- airflow-init
volumes:
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
- ./config:/opt/airflow/config
- ./lab:/opt/airflow/lab
- ./spark-apps:/opt/airflow/spark-apps
- ./spark-data:/opt/airflow/data
airflow-scheduler:
<<: *airflow-common
build:
context: .
command: bash -c "airflow db upgrade && airflow scheduler"
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 30s
retries: 3
restart: always
depends_on:
- airflow-init
volumes:
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
- ./config:/opt/airflow/config
- ./lab:/opt/airflow/lab
- ./spark-apps:/opt/airflow/spark-apps
- ./spark-data:/opt/airflow/data
# Spark 服务
spark-master:
image: bitnami/spark:3.3.0
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
ports:
- "8090:8080"
- "7077:7077"
volumes:
- ./spark-data:/opt/spark/data
- ./spark-apps:/opt/spark/apps
- ./lab:/opt/airflow/lab
spark-worker-1:
image: bitnami/spark:3.3.0
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_MEMORY=2G
- SPARK_WORKER_CORES=2
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
ports:
- "8091:8081"
volumes:
- ./spark-data:/opt/spark/data
- ./spark-apps:/opt/spark/apps
- ./lab:/opt/airflow/lab
depends_on:
- spark-master
spark-worker-2:
image: bitnami/spark:3.3.0
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_MEMORY=2G
- SPARK_WORKER_CORES=2
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
ports:
- "8092:8081"
volumes:
- ./spark-data:/opt/spark/data
- ./spark-apps:/opt/spark/apps
- ./lab:/opt/airflow/lab
depends_on:
- spark-master
# PostgreSQL 服务
pgdatabase:
image: postgres:13
environment:
- POSTGRES_USER=root
- POSTGRES_PASSWORD=root
- POSTGRES_DB=project1
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD", "pg_isready", "-U", "root"]
interval: 5s
retries: 5
pgadmin:
image: dpage/pgadmin4
environment:
- PGADMIN_DEFAULT_EMAIL=admin@admin.com
- PGADMIN_DEFAULT_PASSWORD=root
ports:
- "8080:80"
volumes:
- pgadmin_data:/var/lib/pgadmin # 添加持久化存储
depends_on:
- pgdatabase
volumes:
postgres_data:
name: airflow_postgres_data
pgadmin_data: # 添加 pgAdmin 数据卷
name: airflow_pgadmin_data
postgres-db-volume: