stackhpc · Alex-Welsh · Jan 19, 2026 · Jan 19, 2026
diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml
@@ -25,6 +25,8 @@ sortOrder:
   - /azimuth-llm/ui/appSettings/model_instruction
   - /azimuth-llm/ui/appSettings/page_title
   - /azimuth-llm/api/image/version
+  - /azimuth-llm/api/vllmOmniEnabled
+  - /azimuth-llm/api/omniModelType
   - /azimuth-llm/ui/appSettings/llm_params/temperature
   - /azimuth-llm/ui/appSettings/llm_params/max_tokens
   - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty

diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json
@@ -48,6 +48,18 @@
                             "type": "string",
                             "title": "Node Group",
                             "description": "(Optional) Require that the LLM runs on a particular node group. Must match a node group name in the cluster."
+                        },
+                        "vllmOmniEnabled": {
+                            "type": "boolean",
+                            "title": "Use vLLM Omni",
+                            "description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
+                            "default": false
+                        },
+                        "omniModelType": {
+                            "type": "boolean",
+                            "title": "Use Omni-style Model",
+                            "description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
+                            "default": false
                         }
                     }
                 },

diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -24,6 +24,8 @@ sortOrder:
   - /azimuth-llm/api/azimuthNodeGroupSelector
   - /azimuth-llm/ui/appSettings/page_title
   - /azimuth-llm/api/image/version
+  - /azimuth-llm/api/vllmOmniEnabled
+  - /azimuth-llm/api/omniModelType
   - /azimuth-llm/ui/appSettings/llm_params/temperature
   - /azimuth-llm/ui/appSettings/llm_params/max_tokens
   - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty

diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
@@ -48,6 +48,18 @@
                             "type": "string",
                             "title": "Node Group",
                             "description": "(Optional) Require that the VLM runs on a particular node group. Must match a node group name in the cluster."
+                        },
+                        "vllmOmniEnabled": {
+                            "type": "boolean",
+                            "title": "Use vLLM Omni",
+                            "description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
+                            "default": false
+                        },
+                        "omniModelType": {
+                            "type": "boolean",
+                            "title": "Use Omni-style Model",
+                            "description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
+                            "default": false
                         }
                     }
                 },

diff --git a/charts/azimuth-llm/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml
@@ -22,6 +22,8 @@ sortOrder:
   - /huggingface/model
   - /huggingface/token
   - /api/azimuthNodeGroupSelector
+  - /api/omniModelType
+  - /api/vllmOmniEnabled
   - /ui/appSettings/model_instruction
   - /ui/appSettings/page_title
   - /api/image/version

diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml
@@ -23,9 +23,19 @@ spec:
         image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
         {{- else if .Values.api.intelXPUsEnabled }}
         image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
+        {{- else if .Values.api.vllmOmniEnabled }}
+        image: "vllm/vllm-omni:{{ .Values.api.image.version }}"
         {{- else }}
         image: "vllm/vllm-openai:{{ .Values.api.image.version }}"
         {{- end }}
+        {{- if .Values.api.vllmOmniEnabled }}
+        command:
+        - vllm
+        - serve
+        {{- if .Values.api.omniModelType }}
+        - --omni
+        {{- end }}
+        {{- end }}
         ports:
         - name: api
           containerPort: 8000

diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json
@@ -36,15 +36,27 @@
                         "version": {
                             "type": "string",
                             "title": "Backend vLLM version",
-                            "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
+                            "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags) when using vLLM or [this list](https://github.com/vllm-project/vllm-omni/tags) when using vLLM Omni.",
                             "default": "v0.11.0"
                         }
                     }
                 },
+                "vllmOmniEnabled": {
+                    "type": "boolean",
+                    "title": "Use vLLM Omni",
+                    "description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
+                    "default": false
+                },
+                "omniModelType": {
+                    "type": "boolean",
+                    "title": "Use Omni-style Model",
+                    "description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
+                    "default": false
+                },
                 "azimuthNodeGroupSelector": {
                     "type": "string",
                     "title": "Node Group",
-                            "description": "The node group to deploy the API backend to. Must match a node group label in the cluster. Leave empty to use any available node."
+                    "description": "The node group to deploy the API backend to. Must match a node group label in the cluster. Leave empty to use any available node."
                 }
             }
         },

diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
@@ -35,6 +35,7 @@ api:
   image:
     # Defaults to vllm/vllm-openai when api.gpus > 0,
     # ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
+    # vllm/vllm-omni when vllmOmniEnabled is true,
     # or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
     repository:
     version: v0.11.0
@@ -83,6 +84,10 @@ api:
   gpus: 1
   # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
   intelXPUsEnabled: false
+  # Whether to use the vLLM Omni image
+  vllmOmniEnabled: false
+  # Whether to use Omni-style models
+  omniModelType: false
   # The update strategy to use for the deployment
   # See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
   # NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
@@ -91,6 +96,7 @@ api:
   #   rollingUpdate:
   #     maxSurge: 1
   #     maxUnavailable: 0
+  # foo
   updateStrategy:
     type: Recreate
   # The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)