From dcbc14dec9463319e43caf67b348b7305624efe8 Mon Sep 17 00:00:00 2001 From: Dan Isla Date: Thu, 19 Sep 2019 23:49:41 +0000 Subject: [PATCH] add nvidia-modeset device if present --- pkg/gpu/nvidia/alpha_plugin_test.go | 6 +++++- pkg/gpu/nvidia/beta_plugin_test.go | 6 +++++- pkg/gpu/nvidia/manager.go | 12 ++++++++++-- pkg/gpu/nvidia/multiple_versions_test.go | 6 +++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pkg/gpu/nvidia/alpha_plugin_test.go b/pkg/gpu/nvidia/alpha_plugin_test.go index effac3325..c54461d85 100644 --- a/pkg/gpu/nvidia/alpha_plugin_test.go +++ b/pkg/gpu/nvidia/alpha_plugin_test.go @@ -98,13 +98,16 @@ func TestNvidiaGPUManagerAlphaAPI(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -179,6 +182,7 @@ func TestNvidiaGPUManagerAlphaAPI(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ DevicesIDs: []string{"nvidia1", "nvidia3"}, }) diff --git a/pkg/gpu/nvidia/beta_plugin_test.go b/pkg/gpu/nvidia/beta_plugin_test.go index 8887867f8..8b650511f 100644 --- a/pkg/gpu/nvidia/beta_plugin_test.go +++ b/pkg/gpu/nvidia/beta_plugin_test.go @@ -45,13 +45,16 @@ func TestNvidiaGPUManagerBetaAPI(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -123,6 +126,7 @@ func TestNvidiaGPUManagerBetaAPI(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ ContainerRequests: []*pluginapi.ContainerAllocateRequest{ {DevicesIDs: []string{"nvidia1", "nvidia3"}}}}) diff --git a/pkg/gpu/nvidia/manager.go b/pkg/gpu/nvidia/manager.go index 42fc944ab..5eba627e2 100644 --- a/pkg/gpu/nvidia/manager.go +++ b/pkg/gpu/nvidia/manager.go @@ -34,8 +34,11 @@ const ( // If the driver installed correctly, these two devices will be there. nvidiaCtlDevice = "nvidiactl" nvidiaUVMDevice = "nvidia-uvm" - // Optional device. - nvidiaUVMToolsDevice = "nvidia-uvm-tools" + + // Optional devices. + nvidiaUVMToolsDevice = "nvidia-uvm-tools" + nvidiaModesetDevice = "nvidia-modeset" + nvidiaDeviceRE = `^nvidia[0-9]*$` gpuCheckInterval = 10 * time.Second pluginSocketCheckInterval = 1 * time.Second @@ -149,6 +152,11 @@ func (ngm *nvidiaGPUManager) Start() error { ngm.defaultDevices = []string{nvidiaCtlDevicePath, nvidiaUVMDevicePath} + nvidiaModesetDevicePath := path.Join(ngm.devDirectory, nvidiaModesetDevice) + if _, err := os.Stat(nvidiaModesetDevicePath); err == nil { + ngm.defaultDevices = append(ngm.defaultDevices, nvidiaModesetDevicePath) + } + nvidiaUVMToolsDevicePath := path.Join(ngm.devDirectory, nvidiaUVMToolsDevice) if _, err := os.Stat(nvidiaUVMToolsDevicePath); err == nil { ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevicePath) diff --git a/pkg/gpu/nvidia/multiple_versions_test.go b/pkg/gpu/nvidia/multiple_versions_test.go index d79ab5c8a..4d6216072 100644 --- a/pkg/gpu/nvidia/multiple_versions_test.go +++ b/pkg/gpu/nvidia/multiple_versions_test.go @@ -46,13 +46,16 @@ func TestNvidiaGPUManagerMultuipleAPIs(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -125,6 +128,7 @@ func TestNvidiaGPUManagerMultuipleAPIs(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = clientBeta.Allocate(context.Background(), &pluginbeta.AllocateRequest{ ContainerRequests: []*pluginbeta.ContainerAllocateRequest{ {DevicesIDs: []string{"nvidia1", "nvidia3"}}}})