-
Notifications
You must be signed in to change notification settings - Fork 77
Description
Hi team, I am attempting to update my runtime_flags values within my docker compose file and I am suddenly facing issues with my docker compose up command working when I have the runtime_flags populated with any values (I know it expects a string value array). I have not made any changes to my compose file since I was last working on it yesterday evening. However, now I cannot get it to build unless I completely comment out the runtime_flags portion of my model configs. Please see the reference to my compose file below. I've tried looking through the model runner logs via Docker Desktop but they aren't displaying any useful context as to why the compose up is failing. Is there something i need to fix, or is there potentially a issue with the compose parsing logic for models?
services:
ollama-webui:
image: ghcr.io/open-webui/open-webui:latest
container_name: open-webui
restart: always
ports:
- 3021:8080/tcp
environment:
- WEB_SCRAPER_URL=http://host.docker.internal:3030/scrape
- GOOGLE_DRIVE_CLIENT_ID=${GOOGLE_DRIVE_CLIENT_ID}
- GOOGLE_DRIVE_API_KEY=${GOOGLE_DRIVE_API_KEY}
- GOOGLE_REDIRECT_URI=${GOOGLE_REDIRECT_URI}
volumes:
- open-webui:/app/backend/data # Persistent storage for UI data
models:
- gpt-oss
- qwen3-vl
- qwen3-vl-30b
- mistral3
- devstral2-24b
models:
gpt-oss:
model: ai/gpt-oss:20B-F16
context_size: 65536
runtime_flags:
- "--n-gpu-layers"
- "99"
- "--no-mmap"
- "--flash-attn"
- "on"
qwen3-vl:
model: ai/qwen3-vl:latest
context_size: 16384 # 32768
runtime_flags:
- "--n-gpu-layers"
- "99"
- "--jinja"
- "--top-p"
- "0.8"
- "--top-k"
- "20"
- "--temp"
- "0.7"
- "--min-p"
- "0.0"
- "--presence-penalty"
- "1.5"
- "--no-mmap"
- "--flash-attn"
- "on"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
qwen3-vl-30b:
model: localai/qwen3vl-30b-65k:latest # huggingface.co/unsloth/qwen3-vl-30b-a3b-instruct-gguf:q4_0
context_size: 81920 # 81920 | 131072
runtime_flags:
- "--n-gpu-layers"
- "99"
- "--jinja"
- "--no-mmap"
- "--top-p"
- "0.8"
- "--top-k"
- "20"
- "--temp"
- "0.7"
- "--presence-penalty"
- "1.5"
- "--n-predict"
- "32768"
- "--no-mmap"
- "--flash-attn"
- "on"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
mistral3:
model: ai/ministral3:14B-UD-Q8_K_XL
context_size: 65536
runtime_flags:
- "--threads"
- "-1"
- "--n-gpu-layers"
- "99"
- "--jinja"
# - "--top-p"
# - "0.95"
# - "--temp"
# - "0.7"
- "--no-mmap"
- "--flash-attn"
- "on"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
# - "--batch-size"
# - "1024"
# - "--ctx-size"
# - "49152"
devstral2-24b:
model: localai/devstral-small-2-24b # huggingface.co/unsloth/devstral-small-2-24b-instruct-2512-gguf:q4_0
context_size: 131072 # 81920 | 131072
runtime_flags:
- "--n-gpu-layers"
- "99"
- "--n-predict"
- "32768"
- "--no-mmap"
- "--flash-attn"
- "on"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
volumes:
open-webui:
Error Message whenever using runtime_flags:
[+] Running 0/5
- mistral3 Configuring 0.0s
- devstral2-24b Configuring 0.0s
- qwen3-vl Configuring 0.0s
- qwen3-vl-30b Configuring 0.0s
- gpt-oss Configuring 0.0s
exit status 1