diff --git a/CHANGELOG.md b/CHANGELOG.md index 65dcc281..08a078b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Add a flag to determine if database initialization steps should be executed ([#669]). + ### Fixed - Don't panic on invalid authorization config. Previously, a missing OPA ConfigMap would crash the operator ([#667]). @@ -9,6 +13,7 @@ [#667]: https://github.com/stackabletech/airflow-operator/pull/667 [#668]: https://github.com/stackabletech/airflow-operator/pull/668 +[#669]: https://github.com/stackabletech/airflow-operator/pull/669 ## [25.7.0] - 2025-07-23 diff --git a/deploy/helm/airflow-operator/crds/crds.yaml b/deploy/helm/airflow-operator/crds/crds.yaml index f09f4b9f..79612fca 100644 --- a/deploy/helm/airflow-operator/crds/crds.yaml +++ b/deploy/helm/airflow-operator/crds/crds.yaml @@ -591,6 +591,16 @@ spec: - repo type: object type: array + databaseInitialization: + default: + enabled: true + description: Settings related to the database initialization routines (which are always executed by default). + properties: + enabled: + default: true + description: 'Whether to execute the database initialization routines (a combination of database initialization, upgrade and migration depending on the Airflow version). Defaults to true to be backwards-compatible. WARNING: setting this to false is *unsupported* as subsequent updates to the Airflow cluster may result in broken behaviour due to inconsistent metadata! Do not change the default unless you know what you are doing!' + type: boolean + type: object exposeConfig: default: false description: for internal use only - not for production use. diff --git a/docs/modules/airflow/pages/usage-guide/db-init.adoc b/docs/modules/airflow/pages/usage-guide/db-init.adoc new file mode 100644 index 00000000..6e10661e --- /dev/null +++ b/docs/modules/airflow/pages/usage-guide/db-init.adoc @@ -0,0 +1,26 @@ += Database initialization +:description: Configure Airflow Database start-up. + +By default, Airflow will run database initialization routines (checking and/or creating the metadata schema and creating an admin user) on start-up. +These are idempotent and can be run every time as the overhead is minimal. +However, if these steps should be skipped, a running Airflow cluster can be patched with a resource like this to deactivate the initialization: + +[source,yaml] +---- +--- +apiVersion: airflow.stackable.tech/v1alpha1 +kind: AirflowCluster +metadata: + name: airflow +spec: + clusterConfig: + databaseInitialization: + enabled: false # <1> +---- +<1> Turn off the initialization routine by setting `databaseInitialization.enabled` to `false` + +NOTE: The field `databaseInitialization.enabled` is `true` by default to be backwards-compatible. +A fresh Airflow cluster cannot be created with this field set to `false` as this results in missing metadata in the Airflow database. + +WARNING: Setting `databaseInitialization.enabled` to `false` is an unsupported operation as subsequent updates to a running Airflow cluster can result in broken behaviour due to inconsistent metadata. +Only set `databaseInitialization.enabled` to `false` if you know what you are doing! diff --git a/docs/modules/airflow/partials/nav.adoc b/docs/modules/airflow/partials/nav.adoc index 7327e034..864760d7 100644 --- a/docs/modules/airflow/partials/nav.adoc +++ b/docs/modules/airflow/partials/nav.adoc @@ -3,6 +3,7 @@ ** xref:airflow:getting_started/first_steps.adoc[] * xref:airflow:required-external-components.adoc[] * xref:airflow:usage-guide/index.adoc[] +** xref:airflow:usage-guide/db-init.adoc[] ** xref:airflow:usage-guide/mounting-dags.adoc[] ** xref:airflow:usage-guide/applying-custom-resources.adoc[] ** xref:airflow:usage-guide/listenerclass.adoc[] diff --git a/rust/operator-binary/src/airflow_controller.rs b/rust/operator-binary/src/airflow_controller.rs index e6354935..5404e524 100644 --- a/rust/operator-binary/src/airflow_controller.rs +++ b/rust/operator-binary/src/airflow_controller.rs @@ -951,8 +951,11 @@ fn build_server_rolegroup_statefulset( .context(GracefulShutdownSnafu)?; let mut airflow_container_args = Vec::new(); - airflow_container_args - .extend(airflow_role.get_commands(authentication_config, resolved_product_image)); + airflow_container_args.extend(airflow_role.get_commands( + airflow, + authentication_config, + resolved_product_image, + )); airflow_container .image_from_product_image(resolved_product_image) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index e4b357ff..bb55141a 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -251,6 +251,10 @@ pub mod versioned { #[serde(default)] pub load_examples: bool, + /// Settings related to the database initialization routines (which are always executed by default). + #[serde(default)] + pub database_initialization: DatabaseInitializationConfig, + /// Name of the Vector aggregator [discovery ConfigMap](DOCS_BASE_URL_PLACEHOLDER/concepts/service_discovery). /// It must contain the key `ADDRESS` with the address of the Vector aggregator. /// Follow the [logging tutorial](DOCS_BASE_URL_PLACEHOLDER/tutorials/logging-vector-aggregator) @@ -268,7 +272,6 @@ pub mod versioned { #[schemars(schema_with = "raw_object_list_schema")] pub volume_mounts: Vec, } - // TODO: move generic version to op-rs? #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] @@ -282,6 +285,28 @@ pub mod versioned { } } +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DatabaseInitializationConfig { + /// Whether to execute the database initialization routines (a combination of database initialization, upgrade and migration depending on the Airflow version). Defaults to true to be backwards-compatible. + /// WARNING: setting this to false is *unsupported* as subsequent updates to the Airflow cluster may result in broken behaviour due to inconsistent metadata! + /// Do not change the default unless you know what you are doing! + #[serde(default = "default_db_init")] + pub enabled: bool, +} + +impl Default for DatabaseInitializationConfig { + fn default() -> Self { + Self { + enabled: default_db_init(), + } + } +} + +pub fn default_db_init() -> bool { + true +} + impl Default for v1alpha1::WebserverRoleConfig { fn default() -> Self { v1alpha1::WebserverRoleConfig { @@ -547,6 +572,7 @@ impl AirflowRole { /// if authentication is enabled. pub fn get_commands( &self, + airflow: &v1alpha1::AirflowCluster, auth_config: &AirflowClientAuthenticationDetailsResolved, resolved_product_image: &ResolvedProductImage, ) -> Vec { @@ -576,21 +602,30 @@ impl AirflowRole { "airflow api-server &".to_string(), ]); } - AirflowRole::Scheduler => command.extend(vec![ - "airflow db migrate".to_string(), - "airflow users create \ - --username \"$ADMIN_USERNAME\" \ - --firstname \"$ADMIN_FIRSTNAME\" \ - --lastname \"$ADMIN_LASTNAME\" \ - --email \"$ADMIN_EMAIL\" \ - --password \"$ADMIN_PASSWORD\" \ - --role \"Admin\"" - .to_string(), - "prepare_signal_handlers".to_string(), - container_debug_command(), - "airflow dag-processor &".to_string(), - "airflow scheduler &".to_string(), - ]), + AirflowRole::Scheduler => { + if airflow.spec.cluster_config.database_initialization.enabled { + tracing::info!("Database initialization has been enabled."); + command.extend(vec![ + "airflow db migrate".to_string(), + "airflow users create \ + --username \"$ADMIN_USERNAME\" \ + --firstname \"$ADMIN_FIRSTNAME\" \ + --lastname \"$ADMIN_LASTNAME\" \ + --email \"$ADMIN_EMAIL\" \ + --password \"$ADMIN_PASSWORD\" \ + --role \"Admin\"" + .to_string(), + ]); + } else { + tracing::info!("Database initialization routines have been skipped!") + } + command.extend(vec![ + "prepare_signal_handlers".to_string(), + container_debug_command(), + "airflow dag-processor &".to_string(), + "airflow scheduler &".to_string(), + ]); + } AirflowRole::Worker => command.extend(vec![ "prepare_signal_handlers".to_string(), container_debug_command(), @@ -608,22 +643,31 @@ impl AirflowRole { "airflow webserver &".to_string(), ]); } - AirflowRole::Scheduler => command.extend(vec![ - // Database initialization is limited to the scheduler, see https://github.com/stackabletech/airflow-operator/issues/259 - "airflow db init".to_string(), - "airflow db upgrade".to_string(), - "airflow users create \ - --username \"$ADMIN_USERNAME\" \ - --firstname \"$ADMIN_FIRSTNAME\" \ - --lastname \"$ADMIN_LASTNAME\" \ - --email \"$ADMIN_EMAIL\" \ - --password \"$ADMIN_PASSWORD\" \ - --role \"Admin\"" - .to_string(), - "prepare_signal_handlers".to_string(), - container_debug_command(), - "airflow scheduler &".to_string(), - ]), + AirflowRole::Scheduler => { + if airflow.spec.cluster_config.database_initialization.enabled { + tracing::info!("Database initialization has been enabled."); + command.extend(vec![ + // Database initialization is limited to the scheduler, see https://github.com/stackabletech/airflow-operator/issues/259 + "airflow db init".to_string(), + "airflow db upgrade".to_string(), + "airflow users create \ + --username \"$ADMIN_USERNAME\" \ + --firstname \"$ADMIN_FIRSTNAME\" \ + --lastname \"$ADMIN_LASTNAME\" \ + --email \"$ADMIN_EMAIL\" \ + --password \"$ADMIN_PASSWORD\" \ + --role \"Admin\"" + .to_string(), + ]); + } else { + tracing::info!("Database initialization routines have been skipped!") + } + command.extend(vec![ + "prepare_signal_handlers".to_string(), + container_debug_command(), + "airflow scheduler &".to_string(), + ]); + } AirflowRole::Worker => command.extend(vec![ "prepare_signal_handlers".to_string(), container_debug_command(), @@ -981,5 +1025,7 @@ mod tests { assert_eq!("KubernetesExecutor", cluster.spec.executor.to_string()); assert!(cluster.spec.cluster_config.load_examples); assert!(cluster.spec.cluster_config.expose_config); + // defaults to true + assert!(cluster.spec.cluster_config.database_initialization.enabled); } } diff --git a/tests/templates/kuttl/cluster-operation/09-assert.yaml b/tests/templates/kuttl/cluster-operation/09-assert.yaml new file mode 100644 index 00000000..82f31db8 --- /dev/null +++ b/tests/templates/kuttl/cluster-operation/09-assert.yaml @@ -0,0 +1,8 @@ +--- +# For this assert we expect the database operation to be logged +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +commands: +- script: | + kubectl -n $NAMESPACE logs airflow-scheduler-default-0 | grep "Database migrating done!" diff --git a/tests/templates/kuttl/cluster-operation/30-restart-airflow.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-restart-airflow.yaml.j2 index 5af77035..7b90ad6b 100644 --- a/tests/templates/kuttl/cluster-operation/30-restart-airflow.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-restart-airflow.yaml.j2 @@ -25,6 +25,8 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} credentialsSecret: test-airflow-credentials + databaseInitialization: + enabled: false webservers: roleConfig: listenerClass: external-unstable diff --git a/tests/templates/kuttl/cluster-operation/31-assert.yaml b/tests/templates/kuttl/cluster-operation/31-assert.yaml new file mode 100644 index 00000000..9c152c5f --- /dev/null +++ b/tests/templates/kuttl/cluster-operation/31-assert.yaml @@ -0,0 +1,8 @@ +--- +# For this step we expect the database operation to NOT be logged +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +commands: +- script: | + kubectl -n $NAMESPACE logs airflow-scheduler-default-0 | grep -q "Database migrating done!" && exit 1 || exit 0