Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions charts/azimuth-chat/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ sortOrder:
- /azimuth-llm/ui/appSettings/model_instruction
- /azimuth-llm/ui/appSettings/page_title
- /azimuth-llm/api/image/version
- /azimuth-llm/api/vllmOmniEnabled
- /azimuth-llm/api/omniModelType
- /azimuth-llm/ui/appSettings/llm_params/temperature
- /azimuth-llm/ui/appSettings/llm_params/max_tokens
- /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
Expand Down
12 changes: 12 additions & 0 deletions charts/azimuth-chat/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@
"type": "string",
"title": "Node Group",
"description": "(Optional) Require that the LLM runs on a particular node group. Must match a node group name in the cluster."
},
"vllmOmniEnabled": {
"type": "boolean",
"title": "Use vLLM Omni",
"description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
"default": false
},
"omniModelType": {
"type": "boolean",
"title": "Use Omni-style Model",
"description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
"default": false
}
}
},
Expand Down
2 changes: 2 additions & 0 deletions charts/azimuth-image-analysis/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ sortOrder:
- /azimuth-llm/api/azimuthNodeGroupSelector
- /azimuth-llm/ui/appSettings/page_title
- /azimuth-llm/api/image/version
- /azimuth-llm/api/vllmOmniEnabled
- /azimuth-llm/api/omniModelType
- /azimuth-llm/ui/appSettings/llm_params/temperature
- /azimuth-llm/ui/appSettings/llm_params/max_tokens
- /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
Expand Down
12 changes: 12 additions & 0 deletions charts/azimuth-image-analysis/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@
"type": "string",
"title": "Node Group",
"description": "(Optional) Require that the VLM runs on a particular node group. Must match a node group name in the cluster."
},
"vllmOmniEnabled": {
"type": "boolean",
"title": "Use vLLM Omni",
"description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
"default": false
},
"omniModelType": {
"type": "boolean",
"title": "Use Omni-style Model",
"description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
"default": false
}
}
},
Expand Down
2 changes: 2 additions & 0 deletions charts/azimuth-llm/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ sortOrder:
- /huggingface/model
- /huggingface/token
- /api/azimuthNodeGroupSelector
- /api/omniModelType
- /api/vllmOmniEnabled
- /ui/appSettings/model_instruction
- /ui/appSettings/page_title
- /api/image/version
Expand Down
10 changes: 10 additions & 0 deletions charts/azimuth-llm/templates/api/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,19 @@ spec:
image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
{{- else if .Values.api.intelXPUsEnabled }}
image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
{{- else if .Values.api.vllmOmniEnabled }}
image: "vllm/vllm-omni:{{ .Values.api.image.version }}"
{{- else }}
image: "vllm/vllm-openai:{{ .Values.api.image.version }}"
{{- end }}
{{- if .Values.api.vllmOmniEnabled }}
command:
- vllm
- serve
{{- if .Values.api.omniModelType }}
- --omni
{{- end }}
{{- end }}
ports:
- name: api
containerPort: 8000
Expand Down
16 changes: 14 additions & 2 deletions charts/azimuth-llm/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,27 @@
"version": {
"type": "string",
"title": "Backend vLLM version",
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags) when using vLLM or [this list](https://github.com/vllm-project/vllm-omni/tags) when using vLLM Omni.",
"default": "v0.11.0"
}
}
},
"vllmOmniEnabled": {
"type": "boolean",
"title": "Use vLLM Omni",
"description": "Deploy vLLM Omni as backend. By default, this will still only work with standard LLMs unless 'Use Omni-style Model' is also enabled.",
"default": false
},
"omniModelType": {
"type": "boolean",
"title": "Use Omni-style Model",
"description": "Enable when using Omni-style models. This only works with vLLM Omni. NOTE: This is an experimental feature. The web interface only supports text-based interactions at this time.",
"default": false
},
"azimuthNodeGroupSelector": {
"type": "string",
"title": "Node Group",
"description": "The node group to deploy the API backend to. Must match a node group label in the cluster. Leave empty to use any available node."
"description": "The node group to deploy the API backend to. Must match a node group label in the cluster. Leave empty to use any available node."
}
}
},
Expand Down
6 changes: 6 additions & 0 deletions charts/azimuth-llm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ api:
image:
# Defaults to vllm/vllm-openai when api.gpus > 0,
# ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
# vllm/vllm-omni when vllmOmniEnabled is true,
# or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
repository:
version: v0.11.0
Expand Down Expand Up @@ -83,6 +84,10 @@ api:
gpus: 1
# Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
intelXPUsEnabled: false
# Whether to use the vLLM Omni image
vllmOmniEnabled: false
# Whether to use Omni-style models
omniModelType: false
# The update strategy to use for the deployment
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
# NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
Expand All @@ -91,6 +96,7 @@ api:
# rollingUpdate:
# maxSurge: 1
# maxUnavailable: 0
# foo
updateStrategy:
type: Recreate
# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
Expand Down
Loading