From 033b573c4afa458fa74af9ce61f47b5c10295769 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 24 Sep 2025 12:28:15 +0200 Subject: [PATCH 1/4] chore: Update uvicorn to fix worker timeout issue --- docs/modules/airflow/pages/troubleshooting/index.adoc | 9 ++++----- rust/operator-binary/src/env_vars.rs | 11 ----------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/docs/modules/airflow/pages/troubleshooting/index.adoc b/docs/modules/airflow/pages/troubleshooting/index.adoc index aa2ef848..526487e0 100644 --- a/docs/modules/airflow/pages/troubleshooting/index.adoc +++ b/docs/modules/airflow/pages/troubleshooting/index.adoc @@ -29,16 +29,15 @@ See e.g. https://github.com/minio/minio/issues/20845[this MinIO issue] for detai == Setting API Workers In Airflow the webserver (called the API Server in Airflow 3.x+) can use multiple workers. -This is determined by the environment variable `+AIRFLOW__API__WORKERS+` and is set by default to `4` in Airflow 2.x and `1` in Airflow 3.x+. -The reason for this difference is that Airflow uses a backend library to manage child processes and in 3.x+ this library can cause child processes to be killed if a hard-coded startup timeout is exceeded. -For most cases with Airflow 3.x+ a default of `1` should be sufficient, but if you run into performance issues and would like to add more workers, you can either modulate multiple worker processes at the level of webserver, keeping the default of a single worker per webserver: +This is determined by the environment variable `+AIRFLOW__API__WORKERS+` and is set by default to `4`. +For most cases the default should work without problem, but if you run into performance issues and would like to add more workers, you can either modulate multiple worker processes at the level of webserver, keeping the default value for each one: [source,yaml] ---- webservers: roleGroups: default: - replicas: 2 # add a replica (with a single worker) + replicas: 2 # add a replica (with the default number of workers) ---- or change the environment variable using `envOverrides`: @@ -47,7 +46,7 @@ or change the environment variable using `envOverrides`: ---- webservers: envOverrides: - AIRFLOW__API__WORKERS: 2 # something other than the default of 1 + AIRFLOW__API__WORKERS: 6 # something other than the default ---- TIP: Our strong recommendation is to increase the webserver replicas, with each webserver running a single worker, as this removes the risk of running into timeouts or memory issues. diff --git a/rust/operator-binary/src/env_vars.rs b/rust/operator-binary/src/env_vars.rs index 9c60f4fb..439fa705 100644 --- a/rust/operator-binary/src/env_vars.rs +++ b/rust/operator-binary/src/env_vars.rs @@ -507,17 +507,6 @@ fn add_version_specific_env_vars( ..Default::default() }, ); - // Airflow 3.x uses fast-api as a backend: newer versions of uvicorn can - // cause issues with child processes. See discussion here: . - // This will be considered as part of this issue: . - env.insert( - "AIRFLOW__API__WORKERS".into(), - EnvVar { - name: "AIRFLOW__API__WORKERS".into(), - value: Some("1".into()), - ..Default::default() - }, - ); } } else { env.insert( From 497642d1b83c4756212b2f0155022b08b3c585e9 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 24 Sep 2025 12:31:22 +0200 Subject: [PATCH 2/4] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09d16dc6..24763155 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ### Changed - Use internal secrets for secret- and jwt-keys ([#686]). +- Update uvicorn version and revert to default number of API workers ([#690]). ### Fixed @@ -31,6 +32,7 @@ [#685]: https://github.com/stackabletech/airflow-operator/pull/685 [#686]: https://github.com/stackabletech/airflow-operator/pull/686 [#687]: https://github.com/stackabletech/airflow-operator/pull/687 +[#690]: https://github.com/stackabletech/airflow-operator/pull/690 ## [25.7.0] - 2025-07-23 From 3ba3259e6034c449b24152d2467a52b1952f8722 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 24 Sep 2025 15:53:02 +0200 Subject: [PATCH 3/4] bumped resources for overrides test --- tests/templates/kuttl/overrides/10-install-airflow.yaml.j2 | 2 +- tests/templates/kuttl/overrides/20-install-airflow2.yaml.j2 | 2 +- tests/templates/kuttl/overrides/21-assert.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/templates/kuttl/overrides/10-install-airflow.yaml.j2 b/tests/templates/kuttl/overrides/10-install-airflow.yaml.j2 index efc2c61a..7ae53a0c 100644 --- a/tests/templates/kuttl/overrides/10-install-airflow.yaml.j2 +++ b/tests/templates/kuttl/overrides/10-install-airflow.yaml.j2 @@ -86,7 +86,7 @@ spec: min: 400m max: 800m memory: - limit: 2Gi + limit: 3Gi podOverrides: spec: containers: diff --git a/tests/templates/kuttl/overrides/20-install-airflow2.yaml.j2 b/tests/templates/kuttl/overrides/20-install-airflow2.yaml.j2 index da6e5d5a..96edce21 100644 --- a/tests/templates/kuttl/overrides/20-install-airflow2.yaml.j2 +++ b/tests/templates/kuttl/overrides/20-install-airflow2.yaml.j2 @@ -32,7 +32,7 @@ spec: - name: airflow resources: limits: - cpu: 810m + cpu: 2000m config: resources: cpu: diff --git a/tests/templates/kuttl/overrides/21-assert.yaml b/tests/templates/kuttl/overrides/21-assert.yaml index 93a70e03..0045c59b 100644 --- a/tests/templates/kuttl/overrides/21-assert.yaml +++ b/tests/templates/kuttl/overrides/21-assert.yaml @@ -4,5 +4,5 @@ kind: TestAssert timeout: 30 commands: - script: | - kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers.[0].resources.limits | select (.cpu == "750m")' + kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers.[0].resources.limits | select (.cpu == "2000m")' kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers[] | select (.name == "base") | .env[] | select (.name == "AIRFLOW__METRICS__STATSD_ON" and .value == "False")' From ecbe53ccf2ea667e5a67bc558a6a52d1a1977320 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 25 Sep 2025 11:45:16 +0200 Subject: [PATCH 4/4] fix test asserts --- tests/templates/kuttl/overrides/11-assert.yaml | 4 ++++ tests/templates/kuttl/overrides/21-assert.yaml | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/templates/kuttl/overrides/11-assert.yaml b/tests/templates/kuttl/overrides/11-assert.yaml index 33645a7a..2ac0f892 100644 --- a/tests/templates/kuttl/overrides/11-assert.yaml +++ b/tests/templates/kuttl/overrides/11-assert.yaml @@ -7,6 +7,8 @@ commands: # Test envOverrides # - script: | + set -eu + kubectl -n $NAMESPACE get sts airflow-celery-webserver-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' kubectl -n $NAMESPACE get sts airflow-celery-webserver-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' kubectl -n $NAMESPACE get sts airflow-celery-webserver-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' @@ -14,6 +16,8 @@ commands: - script: | + set -eu + kubectl -n $NAMESPACE get sts airflow-celery-worker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' kubectl -n $NAMESPACE get sts airflow-celery-worker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' kubectl -n $NAMESPACE get sts airflow-celery-worker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "airflow") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' diff --git a/tests/templates/kuttl/overrides/21-assert.yaml b/tests/templates/kuttl/overrides/21-assert.yaml index 0045c59b..dd789262 100644 --- a/tests/templates/kuttl/overrides/21-assert.yaml +++ b/tests/templates/kuttl/overrides/21-assert.yaml @@ -4,5 +4,7 @@ kind: TestAssert timeout: 30 commands: - script: | - kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers.[0].resources.limits | select (.cpu == "2000m")' + set -eu + + kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers.[0].resources.limits | select (.cpu == "750m")' kubectl -n $NAMESPACE get cm airflow-kubernetes-executor-pod-template -o json | jq -r '.data."airflow_executor_pod_template.yaml"' | yq -e '.spec.containers[] | select (.name == "base") | .env[] | select (.name == "AIRFLOW__METRICS__STATSD_ON" and .value == "False")'