From 6b680aa3e33bf874ffa6f38706b3ac9c672a2794 Mon Sep 17 00:00:00 2001 From: Dan Isla Date: Thu, 19 Sep 2019 23:49:41 +0000 Subject: [PATCH 1/2] add nvidia-modeset device if present --- pkg/gpu/nvidia/alpha_plugin_test.go | 6 +++++- pkg/gpu/nvidia/beta_plugin_test.go | 6 +++++- pkg/gpu/nvidia/manager.go | 12 ++++++++++-- pkg/gpu/nvidia/multiple_versions_test.go | 6 +++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pkg/gpu/nvidia/alpha_plugin_test.go b/pkg/gpu/nvidia/alpha_plugin_test.go index 559c0050e..ffafc86ca 100644 --- a/pkg/gpu/nvidia/alpha_plugin_test.go +++ b/pkg/gpu/nvidia/alpha_plugin_test.go @@ -98,13 +98,16 @@ func TestNvidiaGPUManagerAlphaAPI(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -179,6 +182,7 @@ func TestNvidiaGPUManagerAlphaAPI(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ DevicesIDs: []string{"nvidia1", "nvidia3"}, }) diff --git a/pkg/gpu/nvidia/beta_plugin_test.go b/pkg/gpu/nvidia/beta_plugin_test.go index 92547c25f..de1438684 100644 --- a/pkg/gpu/nvidia/beta_plugin_test.go +++ b/pkg/gpu/nvidia/beta_plugin_test.go @@ -45,13 +45,16 @@ func TestNvidiaGPUManagerBetaAPI(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -123,6 +126,7 @@ func TestNvidiaGPUManagerBetaAPI(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ ContainerRequests: []*pluginapi.ContainerAllocateRequest{ {DevicesIDs: []string{"nvidia1", "nvidia3"}}}}) diff --git a/pkg/gpu/nvidia/manager.go b/pkg/gpu/nvidia/manager.go index 713957cd8..d5879cec4 100644 --- a/pkg/gpu/nvidia/manager.go +++ b/pkg/gpu/nvidia/manager.go @@ -38,8 +38,11 @@ const ( // If the driver installed correctly, these two devices will be there. nvidiaCtlDevice = "nvidiactl" nvidiaUVMDevice = "nvidia-uvm" - // Optional device. - nvidiaUVMToolsDevice = "nvidia-uvm-tools" + + // Optional devices. + nvidiaUVMToolsDevice = "nvidia-uvm-tools" + nvidiaModesetDevice = "nvidia-modeset" + nvidiaDeviceRE = `^nvidia[0-9]*$` gpuCheckInterval = 10 * time.Second pluginSocketCheckInterval = 1 * time.Second @@ -207,6 +210,11 @@ func (ngm *nvidiaGPUManager) Start() error { } ngm.defaultDevices = []string{ngm.nvidiaCtlDevicePath, ngm.nvidiaUVMDevicePath} + nvidiaModesetDevicePath := path.Join(ngm.devDirectory, nvidiaModesetDevice) + if _, err := os.Stat(nvidiaModesetDevicePath); err == nil { + ngm.defaultDevices = append(ngm.defaultDevices, nvidiaModesetDevicePath) + } + nvidiaUVMToolsDevicePath := path.Join(ngm.devDirectory, nvidiaUVMToolsDevice) if _, err := os.Stat(nvidiaUVMToolsDevicePath); err == nil { ngm.defaultDevices = append(ngm.defaultDevices, nvidiaUVMToolsDevicePath) diff --git a/pkg/gpu/nvidia/multiple_versions_test.go b/pkg/gpu/nvidia/multiple_versions_test.go index 21c36cf5d..baab3f275 100644 --- a/pkg/gpu/nvidia/multiple_versions_test.go +++ b/pkg/gpu/nvidia/multiple_versions_test.go @@ -46,13 +46,16 @@ func TestNvidiaGPUManagerMultuipleAPIs(t *testing.T) { testNvidiaCtlDevice := path.Join(testDevDir, nvidiaCtlDevice) testNvidiaUVMDevice := path.Join(testDevDir, nvidiaUVMDevice) testNvidiaUVMToolsDevice := path.Join(testDevDir, nvidiaUVMToolsDevice) + testNvidiaModesetDevice := path.Join(testDevDir, nvidiaModesetDevice) os.Create(testNvidiaCtlDevice) os.Create(testNvidiaUVMDevice) os.Create(testNvidiaUVMToolsDevice) - testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice} + os.Create(testNvidiaModesetDevice) + testGpuManager.defaultDevices = []string{testNvidiaCtlDevice, testNvidiaUVMDevice, testNvidiaUVMToolsDevice, testNvidiaModesetDevice} defer os.Remove(testNvidiaCtlDevice) defer os.Remove(testNvidiaUVMDevice) defer os.Remove(testNvidiaUVMToolsDevice) + defer os.Remove(testNvidiaModesetDevice) gpu1 := path.Join(testDevDir, "nvidia1") gpu2 := path.Join(testDevDir, "nvidia2") @@ -125,6 +128,7 @@ func TestNvidiaGPUManagerMultuipleAPIs(t *testing.T) { as.Contains(retDevices, testNvidiaCtlDevice) as.Contains(retDevices, testNvidiaUVMDevice) as.Contains(retDevices, testNvidiaUVMToolsDevice) + as.Contains(retDevices, testNvidiaModesetDevice) resp, err = clientBeta.Allocate(context.Background(), &pluginbeta.AllocateRequest{ ContainerRequests: []*pluginbeta.ContainerAllocateRequest{ {DevicesIDs: []string{"nvidia1", "nvidia3"}}}}) From 53c0a7f89271b9b6103f0f8af5e1e81e5ad581af Mon Sep 17 00:00:00 2001 From: farbodmg Date: Tue, 29 Jun 2021 23:13:15 -0700 Subject: [PATCH 2/2] fix modeset device tests --- pkg/gpu/nvidia/alpha_plugin_test.go | 2 +- pkg/gpu/nvidia/beta_plugin_test.go | 2 +- pkg/gpu/nvidia/multiple_versions_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/gpu/nvidia/alpha_plugin_test.go b/pkg/gpu/nvidia/alpha_plugin_test.go index ffafc86ca..931a9dcfa 100644 --- a/pkg/gpu/nvidia/alpha_plugin_test.go +++ b/pkg/gpu/nvidia/alpha_plugin_test.go @@ -167,7 +167,7 @@ func TestNvidiaGPUManagerAlphaAPI(t *testing.T) { DevicesIDs: []string{"nvidia1"}, }) as.Nil(err) - as.Len(resp.Devices, 4) + as.Len(resp.Devices, 5) as.Len(resp.Mounts, 2) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ DevicesIDs: []string{"nvidia1", "nvidia2"}, diff --git a/pkg/gpu/nvidia/beta_plugin_test.go b/pkg/gpu/nvidia/beta_plugin_test.go index de1438684..7587acbd9 100644 --- a/pkg/gpu/nvidia/beta_plugin_test.go +++ b/pkg/gpu/nvidia/beta_plugin_test.go @@ -111,7 +111,7 @@ func TestNvidiaGPUManagerBetaAPI(t *testing.T) { {DevicesIDs: []string{"nvidia1"}}}}) as.Nil(err) as.Len(resp.ContainerResponses, 1) - as.Len(resp.ContainerResponses[0].Devices, 4) + as.Len(resp.ContainerResponses[0].Devices, 5) as.Len(resp.ContainerResponses[0].Mounts, 2) resp, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ ContainerRequests: []*pluginapi.ContainerAllocateRequest{ diff --git a/pkg/gpu/nvidia/multiple_versions_test.go b/pkg/gpu/nvidia/multiple_versions_test.go index baab3f275..1607b034f 100644 --- a/pkg/gpu/nvidia/multiple_versions_test.go +++ b/pkg/gpu/nvidia/multiple_versions_test.go @@ -113,7 +113,7 @@ func TestNvidiaGPUManagerMultuipleAPIs(t *testing.T) { {DevicesIDs: []string{"nvidia1"}}}}) as.Nil(err) as.Len(resp.ContainerResponses, 1) - as.Len(resp.ContainerResponses[0].Devices, 4) + as.Len(resp.ContainerResponses[0].Devices, 5) as.Len(resp.ContainerResponses[0].Mounts, 2) resp, err = clientBeta.Allocate(context.Background(), &pluginbeta.AllocateRequest{ ContainerRequests: []*pluginbeta.ContainerAllocateRequest{