From a0290fff7426c274f14a40f36524c6984f08e08b Mon Sep 17 00:00:00 2001 From: darthcav <7068084+darthcav@users.noreply.github.com> Date: Thu, 21 May 2026 13:57:50 +0200 Subject: [PATCH 1/2] feat(windows): add Vulkan GPU detection for Intel Arc and other non-CUDA GPUs CanUseGPU on Windows only checked NVIDIA CUDA (amd64) and Qualcomm Adreno OpenCL (arm64), so Intel Arc and other Vulkan-capable GPUs were silently ignored and fell back to the CPU llama.cpp variant. Add hasVulkanCapableGPU (PCI-based, excludes NVIDIA and Adreno which are handled by their own backends) and hasVulkan (probes vulkan-1.dll, mirroring the existing OpenCL.dll probe). Update CanUseGPU to call hasVulkan on amd64 when no CUDA GPU is found, and wire up a "vulkan" variant in ensureLatestLlamaCpp with priority CUDA > Vulkan > CPU. A TODO marks the point where a "vulkan" image variant of docker/docker-model-backend-llamacpp needs to be published to Docker Hub to complete the end-to-end fix. Fixes #925 Co-Authored-By: Claude Sonnet 4.6 --- .../backends/llamacpp/download_windows.go | 15 +++++- .../backends/llamacpp/gpuinfo_windows.go | 47 ++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/pkg/inference/backends/llamacpp/download_windows.go b/pkg/inference/backends/llamacpp/download_windows.go index c60574c71..63a3ee460 100644 --- a/pkg/inference/backends/llamacpp/download_windows.go +++ b/pkg/inference/backends/llamacpp/download_windows.go @@ -15,7 +15,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, llamaCppPath, vendoredServerStoragePath string, ) error { nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe") - var canUseCUDA11, canUseOpenCL bool + var canUseCUDA11, canUseOpenCL, canUseVulkan bool var err error ShouldUseGPUVariantLock.Lock() defer ShouldUseGPUVariantLock.Unlock() @@ -27,6 +27,17 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, l.status = inference.FormatError(fmt.Sprintf("failed to check CUDA 11 capability: %v", err)) return fmt.Errorf("failed to check CUDA 11 capability: %w", err) } + if !canUseCUDA11 { + // Check for Vulkan-capable GPUs (Intel Arc, AMD, etc.) when CUDA + // is not available. + // TODO: publish a "vulkan" variant of docker/docker-model-backend-llamacpp + // to Docker Hub so this detection selects a Vulkan-optimised build. + canUseVulkan, err = hasVulkan() + if err != nil { + l.status = inference.FormatError(fmt.Sprintf("failed to check Vulkan capability: %v", err)) + return fmt.Errorf("failed to check Vulkan capability: %w", err) + } + } case "arm64": canUseOpenCL, err = hasOpenCL() if err != nil { @@ -39,6 +50,8 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, desiredVariant := "cpu" if canUseCUDA11 { desiredVariant = "cuda" + } else if canUseVulkan { + desiredVariant = "vulkan" } else if canUseOpenCL { desiredVariant = "opencl" } diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go index e0bb0f646..cec2b84e4 100644 --- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go +++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go @@ -99,14 +99,59 @@ func hasOpenCL() (bool, error) { return true, nil } +// hasVulkanCapableGPU returns true if at least one GPU that is neither +// NVIDIA (handled via CUDA) nor a Qualcomm Adreno (handled via OpenCL) +// is detected. Intel Arc, AMD, and other Vulkan-capable discrete or +// integrated GPUs fall into this category. +func hasVulkanCapableGPU() (bool, error) { + gpus, err := ghw.GPU() + if err != nil { + return false, err + } + for _, gpu := range gpus.GraphicsCards { + vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name) + product := gpu.DeviceInfo.Product.Name + isNVIDIA := vendor == "nvidia" + isAdreno := strings.Contains(product, "Adreno") || strings.Contains(product, "Qualcomm") + if !isNVIDIA && !isAdreno { + return true, nil + } + } + return false, nil +} + +// hasVulkan returns true when a non-CUDA/non-OpenCL GPU is present AND +// the Vulkan runtime library (vulkan-1.dll) is loadable. This mirrors +// the OpenCL.dll probe used by hasOpenCL. +func hasVulkan() (bool, error) { + capable, err := hasVulkanCapableGPU() + if !capable || err != nil { + return false, err + } + h, err := syscall.LoadLibrary("vulkan-1.dll") + if err != nil { + if errors.Is(err, syscall.ERROR_MOD_NOT_FOUND) { + return false, nil + } + return false, fmt.Errorf("unable to load Vulkan DLL: %w", err) + } + syscall.FreeLibrary(h) + return true, nil +} + func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) { // We don't ship com.docker.nv-gpu-info.exe on Windows/ARM64 at the moment, - // so skip the CUDA check there for now. The OpenCL check is portable. + // so skip the CUDA and Vulkan checks there for now. The OpenCL check is portable. if runtime.GOARCH == "amd64" { haveCUDA11GPU, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin) if haveCUDA11GPU || err != nil { return haveCUDA11GPU, err } + // No CUDA GPU found: check for Vulkan-capable GPUs (Intel Arc, AMD, etc.). + haveVulkan, err := hasVulkan() + if haveVulkan || err != nil { + return haveVulkan, err + } } return hasOpenCL() } From 1b2038bf3d0602b94c6d98b5f22e1aecd44b15c4 Mon Sep 17 00:00:00 2001 From: darthcav <7068084+darthcav@users.noreply.github.com> Date: Thu, 21 May 2026 14:22:22 +0200 Subject: [PATCH 2/2] fix: add nil guards for DeviceInfo fields in GPU detection functions Co-Authored-By: Claude Sonnet 4.6 --- pkg/inference/backends/llamacpp/gpuinfo_windows.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go index cec2b84e4..be13693a9 100644 --- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go +++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go @@ -20,6 +20,9 @@ func hasNVIDIAGPU() (bool, error) { return false, err } for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil { + continue + } if strings.ToLower(gpu.DeviceInfo.Vendor.Name) == "nvidia" { return true, nil } @@ -65,6 +68,9 @@ func hasSupportedAdrenoGPU() (bool, error) { return false, err } for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Product == nil { + continue + } isAdrenoFamily := strings.Contains(gpu.DeviceInfo.Product.Name, "Adreno") || strings.Contains(gpu.DeviceInfo.Product.Name, "Qualcomm") if isAdrenoFamily { @@ -109,6 +115,9 @@ func hasVulkanCapableGPU() (bool, error) { return false, err } for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil || gpu.DeviceInfo.Product == nil { + continue + } vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name) product := gpu.DeviceInfo.Product.Name isNVIDIA := vendor == "nvidia"