From a0290fff7426c274f14a40f36524c6984f08e08b Mon Sep 17 00:00:00 2001
From: darthcav <7068084+darthcav@users.noreply.github.com>
Date: Thu, 21 May 2026 13:57:50 +0200
Subject: [PATCH 1/2] feat(windows): add Vulkan GPU detection for Intel Arc and
 other non-CUDA GPUs

CanUseGPU on Windows only checked NVIDIA CUDA (amd64) and Qualcomm
Adreno OpenCL (arm64), so Intel Arc and other Vulkan-capable GPUs were
silently ignored and fell back to the CPU llama.cpp variant.

Add hasVulkanCapableGPU (PCI-based, excludes NVIDIA and Adreno which
are handled by their own backends) and hasVulkan (probes vulkan-1.dll,
mirroring the existing OpenCL.dll probe). Update CanUseGPU to call
hasVulkan on amd64 when no CUDA GPU is found, and wire up a "vulkan"
variant in ensureLatestLlamaCpp with priority CUDA > Vulkan > CPU.

A TODO marks the point where a "vulkan" image variant of
docker/docker-model-backend-llamacpp needs to be published to Docker Hub
to complete the end-to-end fix.

Fixes #925

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../backends/llamacpp/download_windows.go     | 15 +++++-
 .../backends/llamacpp/gpuinfo_windows.go      | 47 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/pkg/inference/backends/llamacpp/download_windows.go b/pkg/inference/backends/llamacpp/download_windows.go
index c60574c71..63a3ee460 100644
--- a/pkg/inference/backends/llamacpp/download_windows.go
+++ b/pkg/inference/backends/llamacpp/download_windows.go
@@ -15,7 +15,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
 	llamaCppPath, vendoredServerStoragePath string,
 ) error {
 	nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
-	var canUseCUDA11, canUseOpenCL bool
+	var canUseCUDA11, canUseOpenCL, canUseVulkan bool
 	var err error
 	ShouldUseGPUVariantLock.Lock()
 	defer ShouldUseGPUVariantLock.Unlock()
@@ -27,6 +27,17 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
 				l.status = inference.FormatError(fmt.Sprintf("failed to check CUDA 11 capability: %v", err))
 				return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
 			}
+			if !canUseCUDA11 {
+				// Check for Vulkan-capable GPUs (Intel Arc, AMD, etc.) when CUDA
+				// is not available.
+				// TODO: publish a "vulkan" variant of docker/docker-model-backend-llamacpp
+				// to Docker Hub so this detection selects a Vulkan-optimised build.
+				canUseVulkan, err = hasVulkan()
+				if err != nil {
+					l.status = inference.FormatError(fmt.Sprintf("failed to check Vulkan capability: %v", err))
+					return fmt.Errorf("failed to check Vulkan capability: %w", err)
+				}
+			}
 		case "arm64":
 			canUseOpenCL, err = hasOpenCL()
 			if err != nil {
@@ -39,6 +50,8 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
 	desiredVariant := "cpu"
 	if canUseCUDA11 {
 		desiredVariant = "cuda"
+	} else if canUseVulkan {
+		desiredVariant = "vulkan"
 	} else if canUseOpenCL {
 		desiredVariant = "opencl"
 	}
diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go
index e0bb0f646..cec2b84e4 100644
--- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go
+++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go
@@ -99,14 +99,59 @@ func hasOpenCL() (bool, error) {
 	return true, nil
 }
 
+// hasVulkanCapableGPU returns true if at least one GPU that is neither
+// NVIDIA (handled via CUDA) nor a Qualcomm Adreno (handled via OpenCL)
+// is detected. Intel Arc, AMD, and other Vulkan-capable discrete or
+// integrated GPUs fall into this category.
+func hasVulkanCapableGPU() (bool, error) {
+	gpus, err := ghw.GPU()
+	if err != nil {
+		return false, err
+	}
+	for _, gpu := range gpus.GraphicsCards {
+		vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name)
+		product := gpu.DeviceInfo.Product.Name
+		isNVIDIA := vendor == "nvidia"
+		isAdreno := strings.Contains(product, "Adreno") || strings.Contains(product, "Qualcomm")
+		if !isNVIDIA && !isAdreno {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+// hasVulkan returns true when a non-CUDA/non-OpenCL GPU is present AND
+// the Vulkan runtime library (vulkan-1.dll) is loadable. This mirrors
+// the OpenCL.dll probe used by hasOpenCL.
+func hasVulkan() (bool, error) {
+	capable, err := hasVulkanCapableGPU()
+	if !capable || err != nil {
+		return false, err
+	}
+	h, err := syscall.LoadLibrary("vulkan-1.dll")
+	if err != nil {
+		if errors.Is(err, syscall.ERROR_MOD_NOT_FOUND) {
+			return false, nil
+		}
+		return false, fmt.Errorf("unable to load Vulkan DLL: %w", err)
+	}
+	syscall.FreeLibrary(h)
+	return true, nil
+}
+
 func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) {
 	// We don't ship com.docker.nv-gpu-info.exe on Windows/ARM64 at the moment,
-	// so skip the CUDA check there for now. The OpenCL check is portable.
+	// so skip the CUDA and Vulkan checks there for now. The OpenCL check is portable.
 	if runtime.GOARCH == "amd64" {
 		haveCUDA11GPU, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
 		if haveCUDA11GPU || err != nil {
 			return haveCUDA11GPU, err
 		}
+		// No CUDA GPU found: check for Vulkan-capable GPUs (Intel Arc, AMD, etc.).
+		haveVulkan, err := hasVulkan()
+		if haveVulkan || err != nil {
+			return haveVulkan, err
+		}
 	}
 	return hasOpenCL()
 }

From 1b2038bf3d0602b94c6d98b5f22e1aecd44b15c4 Mon Sep 17 00:00:00 2001
From: darthcav <7068084+darthcav@users.noreply.github.com>
Date: Thu, 21 May 2026 14:22:22 +0200
Subject: [PATCH 2/2] fix: add nil guards for DeviceInfo fields in GPU
 detection functions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pkg/inference/backends/llamacpp/gpuinfo_windows.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go
index cec2b84e4..be13693a9 100644
--- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go
+++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go
@@ -20,6 +20,9 @@ func hasNVIDIAGPU() (bool, error) {
 		return false, err
 	}
 	for _, gpu := range gpus.GraphicsCards {
+		if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil {
+			continue
+		}
 		if strings.ToLower(gpu.DeviceInfo.Vendor.Name) == "nvidia" {
 			return true, nil
 		}
@@ -65,6 +68,9 @@ func hasSupportedAdrenoGPU() (bool, error) {
 		return false, err
 	}
 	for _, gpu := range gpus.GraphicsCards {
+		if gpu.DeviceInfo == nil || gpu.DeviceInfo.Product == nil {
+			continue
+		}
 		isAdrenoFamily := strings.Contains(gpu.DeviceInfo.Product.Name, "Adreno") ||
 			strings.Contains(gpu.DeviceInfo.Product.Name, "Qualcomm")
 		if isAdrenoFamily {
@@ -109,6 +115,9 @@ func hasVulkanCapableGPU() (bool, error) {
 		return false, err
 	}
 	for _, gpu := range gpus.GraphicsCards {
+		if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil || gpu.DeviceInfo.Product == nil {
+			continue
+		}
 		vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name)
 		product := gpu.DeviceInfo.Product.Name
 		isNVIDIA := vendor == "nvidia"