diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b19566..c64bab5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: build: strategy: matrix: - go-version: [1.21.x] + go-version: [1.26.x] os: [ubuntu-latest] architecture: [x64] name: Build/Test (${{ matrix.os }}, ${{ matrix.architecture }}, Go ${{ matrix.go-version }}) @@ -50,12 +50,13 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: - go-version: 1.21.x + go-version: 1.26.x cache: true - name: Run golangci-lint uses: golangci/golangci-lint-action@v4 with: version: latest + install-mode: goinstall args: > -E errcheck -E stylecheck diff --git a/deviceplugin/ccdevice_test.go b/deviceplugin/ccdevice_test.go index d08e4b8..ac1f093 100644 --- a/deviceplugin/ccdevice_test.go +++ b/deviceplugin/ccdevice_test.go @@ -116,7 +116,7 @@ func TestDiscoverTDX(t *testing.T) { Resource: "intel.com/tdx", Type: HardwareAttestation, DevicePaths: []string{"dev/tdx-guest"}, - DeviceLimit: 1, + DeviceLimit: 256, } cdp := constructTestPlugin(t, spec) devices, err := cdp.discoverCcDevices() @@ -124,8 +124,8 @@ func TestDiscoverTDX(t *testing.T) { t.Fatalf("discoverCcDevices failed: %v", err) } - if len(devices) != 1 { - t.Fatalf("Expected 1 device, got %d", len(devices)) + if len(devices) != 256 { + t.Fatalf("Expected 256 devices, got %d", len(devices)) } // Hardware-based should NOT have mounts if len(devices[0].Mounts) != 0 { @@ -138,7 +138,7 @@ func TestDiscoverSEVSNP(t *testing.T) { Resource: "amd.com/sev-snp", Type: HardwareAttestation, DevicePaths: []string{"dev/sev-guest"}, - DeviceLimit: 1, + DeviceLimit: 256, } cdp := constructTestPlugin(t, spec) devices, err := cdp.discoverCcDevices() @@ -146,8 +146,8 @@ func TestDiscoverSEVSNP(t *testing.T) { t.Fatalf("discoverCcDevices failed: %v", err) } - if len(devices) != 1 { - t.Fatalf("Expected 1 device, got %d", len(devices)) + if len(devices) != 256 { + t.Fatalf("Expected 256 devices, got %d", len(devices)) } if len(devices[0].Mounts) != 0 { t.Errorf("SEV-SNP should have 0 mounts, got %d", len(devices[0].Mounts)) @@ -188,10 +188,9 @@ func TestRefreshDevices(t *testing.T) { Resource: "intel.com/tdx", Type: HardwareAttestation, DevicePaths: []string{"dev/tdx-guest"}, - DeviceLimit: 1, + DeviceLimit: 256, } cdp := constructTestPlugin(t, spec) - devPath := spec.DevicePaths[0] // 1. Initial Refresh changed, err := cdp.refreshDevices() @@ -199,14 +198,10 @@ func TestRefreshDevices(t *testing.T) { t.Errorf("First refresh: err=%v, changed=%v (want false)", err, changed) } - // 2. Second Refresh (No change) - changed, err = cdp.refreshDevices() - if err != nil || !changed { - t.Errorf("Second refresh: err=%v, changed=%v (want true)", err, changed) + // 2. Remove all devices and refresh + for _, path := range spec.DevicePaths { + os.Remove(path) } - - // 3. Remove device and refresh - os.Remove(devPath) changed, err = cdp.refreshDevices() if err != nil || changed { t.Errorf("Third refresh (removed): err=%v, changed=%v (want false)", err, changed) @@ -217,12 +212,13 @@ func TestRefreshDevices(t *testing.T) { } func TestAllocate(t *testing.T) { + limit := 2 spec := &CcDeviceSpec{ Resource: "google.com/cc", Type: SoftwareAttestation, DevicePaths: []string{"dev/tpmrm0"}, MeasurementPaths: []string{"sys/binary_bios_measurements"}, - DeviceLimit: 2, + DeviceLimit: limit, } cdp := constructTestPlugin(t, spec) if _, err := cdp.refreshDevices(); err != nil { @@ -230,26 +226,23 @@ func TestAllocate(t *testing.T) { } ctx := context.Background() - expectedID := getExpectedID(spec.Resource, spec.DeviceLimit, 0) - - req := &v1beta1.AllocateRequest{ - ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ - DevicesIDs: []string{expectedID}, - }}, - } - - resp, err := cdp.Allocate(ctx, req) - if err != nil { - t.Fatalf("Allocate failed: %v", err) - } + // Test allocation for index 0 and 1 + for i := 0; i < limit; i++ { + expectedID := getExpectedID(spec.Resource, limit, i) + req := &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ + DevicesIDs: []string{expectedID}, + }}, + } - if len(resp.ContainerResponses) != 1 { - t.Fatalf("Expected 1 response, got %d", len(resp.ContainerResponses)) - } + resp, err := cdp.Allocate(ctx, req) + if err != nil { + t.Fatalf("Allocate failed for ID %s: %v", expectedID, err) + } - // Verify the response contains the mount for software attestation - if len(resp.ContainerResponses[0].Mounts) == 0 { - t.Errorf("Expected mount in AllocateResponse for software attestation") + if len(resp.ContainerResponses) != 1 { + t.Fatalf("Expected 1 response, got %d", len(resp.ContainerResponses)) + } } } @@ -292,7 +285,7 @@ func TestListAndWatch(t *testing.T) { Resource: "intel.com/tdx", Type: HardwareAttestation, DevicePaths: []string{"dev/tdx-guest"}, - DeviceLimit: 1, + DeviceLimit: 256, } cdp := constructTestPlugin(t, spec) stream := listAndWatchServerStub{} @@ -327,3 +320,87 @@ func TestListAndWatch(t *testing.T) { t.Errorf("run group failed: %v", err) } } + +func TestAllocateHardware(t *testing.T) { + limit := 2 + spec := &CcDeviceSpec{ + Resource: "intel.com/tdx", + Type: HardwareAttestation, + DevicePaths: []string{"dev/tdx-guest"}, + DeviceLimit: limit, + } + cdp := constructTestPlugin(t, spec) + if _, err := cdp.refreshDevices(); err != nil { + t.Fatalf("refreshDevices failed: %v", err) + } + + ctx := context.Background() + expectedID := getExpectedID(spec.Resource, limit, 0) + req := &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ + DevicesIDs: []string{expectedID}, + }}, + } + + resp, err := cdp.Allocate(ctx, req) + if err != nil { + t.Fatalf("Allocate failed for ID %s: %v", expectedID, err) + } + + if len(resp.ContainerResponses) != 1 { + t.Fatalf("Expected 1 response, got %d", len(resp.ContainerResponses)) + } + + cResp := resp.ContainerResponses[0] + if len(cResp.Devices) != 1 { + t.Fatalf("Expected 1 mounted device, got %d", len(cResp.Devices)) + } + if len(cResp.Mounts) != 0 { + t.Fatalf("Expected 0 mounts for hardware attestation, got %d", len(cResp.Mounts)) + } +} + +func TestDiscoverMultiPaths(t *testing.T) { + tmpDir := t.TempDir() + + // Create only one of the multi paths + path1 := filepath.Join(tmpDir, "dev/tdx-guest") + path2 := filepath.Join(tmpDir, "dev/tdx_guest") + + if err := os.MkdirAll(filepath.Dir(path1), 0755); err != nil { + t.Fatalf("failed to create dir: %v", err) + } + if err := os.WriteFile(path2, []byte("test_device"), 0644); err != nil { + t.Fatalf("failed to create mock device: %v", err) + } + + spec := &CcDeviceSpec{ + Resource: "intel.com/tdx", + Type: HardwareAttestation, + DevicePaths: []string{path1, path2}, + DeviceLimit: 1, + } + + cdp := &CcDevicePlugin{ + cds: spec, + ccDevices: make(map[string]CcDevice), + logger: logger, + } + + devices, err := cdp.discoverCcDevices() + if err != nil { + t.Fatalf("discoverCcDevices failed: %v", err) + } + + if len(devices) != 1 { + t.Fatalf("Expected 1 device, got %d", len(devices)) + } + + specs := devices[0].DeviceSpecs + if len(specs) != 1 { + t.Fatalf("Expected 1 mapped device spec, got %d", len(specs)) + } + if specs[0].HostPath != path2 { + t.Errorf("Expected HostPath %q, got %q", path2, specs[0].HostPath) + } +} diff --git a/deviceplugin/plugin.go b/deviceplugin/plugin.go index b29f483..9d1cccc 100644 --- a/deviceplugin/plugin.go +++ b/deviceplugin/plugin.go @@ -156,6 +156,10 @@ Outer: _ = level.Info(p.logger).Log("msg", "waiting for gRPC server to be ready") select { case <-ctx.Done(): + p.grpcServer.Stop() + if err := l.Close(); err != nil { + _ = level.Warn(p.logger).Log("msg", "encountered error while closing the listener during shutdown", "err", err) + } return nil, nil, ctx.Err() case <-t.C: t.Reset(1 * time.Second) diff --git a/go.mod b/go.mod index d2506f9..21bec07 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ module github.com/google/cc-device-plugin -go 1.24.0 - -toolchain go1.24.13 +go 1.26 require ( github.com/go-kit/log v0.2.1 diff --git a/main.go b/main.go index fc04a29..00f2fef 100644 --- a/main.go +++ b/main.go @@ -45,6 +45,8 @@ const ( logLevelWarn = "warn" logLevelError = "error" logLevelNone = "none" + // By default, GKE allows up to 110 Pods per node on Standard clusters. Standard clusters can be configured to allow up to 256 Pods per node. + workloadSharedLimit = 256 ) var ( @@ -68,7 +70,7 @@ func Main() error { Type: deviceplugin.SoftwareAttestation, // Explicitly marked as software DevicePaths: []string{"/dev/tpmrm0"}, MeasurementPaths: []string{"/sys/kernel/security/tpm0/binary_bios_measurements"}, - DeviceLimit: 256, // Allow multiple pods to share the vTPM + DeviceLimit: workloadSharedLimit, }, { // Intel TDX @@ -77,7 +79,7 @@ func Main() error { DevicePaths: []string{"/dev/tdx-guest", "/dev/tdx_guest"}, // Some kernels use different names // TDX does not have a separate measurement file, attestation is done via ioctl. MeasurementPaths: []string{}, - DeviceLimit: 1, // Only one container can use the TDX device at a time per node + DeviceLimit: workloadSharedLimit, }, { // AMD SEV-SNP @@ -86,7 +88,7 @@ func Main() error { DevicePaths: []string{"/dev/sev-guest"}, // SEV-SNP also uses ioctl for attestation. MeasurementPaths: []string{}, - DeviceLimit: 1, // Only one container can use the SEV-SNP device at a time per node + DeviceLimit: workloadSharedLimit, }, }