diff --git a/cmd/ateapi/internal/controlapi/workflow_resume.go b/cmd/ateapi/internal/controlapi/workflow_resume.go index 1401461..4f792f2 100644 --- a/cmd/ateapi/internal/controlapi/workflow_resume.go +++ b/cmd/ateapi/internal/controlapi/workflow_resume.go @@ -187,6 +187,21 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, } ateletCtr.Env = append(ateletCtr.Env, ateletEnv) } + if sc := ctr.SecurityContext; sc != nil { + pbsc := &ateletpb.SecurityContext{} + if sc.Capabilities != nil { + pbsc.Capabilities = &ateletpb.Capabilities{ + Add: sc.Capabilities.Add, + } + } + if sc.RunAsUser != nil { + pbsc.RunAsUser = *sc.RunAsUser + } + if sc.RunAsGroup != nil { + pbsc.RunAsGroup = *sc.RunAsGroup + } + ateletCtr.SecurityContext = pbsc + } workloadSpec.Containers = append(workloadSpec.Containers, ateletCtr) } diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index 9c55652..d32e7bf 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -160,6 +160,21 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput } ateletCtr.Env = append(ateletCtr.Env, ateletEnv) } + if sc := ctr.SecurityContext; sc != nil { + pbsc := &ateletpb.SecurityContext{} + if sc.Capabilities != nil { + pbsc.Capabilities = &ateletpb.Capabilities{ + Add: sc.Capabilities.Add, + } + } + if sc.RunAsUser != nil { + pbsc.RunAsUser = *sc.RunAsUser + } + if sc.RunAsGroup != nil { + pbsc.RunAsGroup = *sc.RunAsGroup + } + ateletCtr.SecurityContext = pbsc + } req.Spec.Containers = append(req.Spec.Containers, ateletCtr) } _, err = client.Checkpoint(ctx, req) diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index bd6611d..158c2b1 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -390,6 +390,8 @@ func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*atele req.GetSpec().GetPauseImage(), []string{"/pause"}, nil, + nil, // pause container uses the default sandbox cap set only + 0, 0, // pause container always runs as root map[string]string{ "io.kubernetes.cri.container-type": "sandbox", "io.kubernetes.cri.container-name": "pause", @@ -408,6 +410,9 @@ func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*atele for _, env := range ctr.GetEnv() { envs = append(envs, fmt.Sprintf("%s=%s", env.GetName(), env.GetValue())) } + capAdds := ctr.GetSecurityContext().GetCapabilities().GetAdd() + runAsUser := ctr.GetSecurityContext().GetRunAsUser() + runAsGroup := ctr.GetSecurityContext().GetRunAsGroup() g.Go(func() error { if err := prepareOCIDirectory( @@ -420,6 +425,8 @@ func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*atele ctr.GetImage(), ctr.GetCommand(), envs, + capAdds, + runAsUser, runAsGroup, map[string]string{ "io.kubernetes.cri.container-type": "container", "io.kubernetes.cri.sandbox-id": "pause", @@ -639,6 +646,8 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) req.GetSpec().GetPauseImage(), []string{"/pause"}, nil, + nil, // pause container uses the default sandbox cap set only + 0, 0, // pause container always runs as root map[string]string{ "io.kubernetes.cri.container-type": "sandbox", "io.kubernetes.cri.container-name": "pause", @@ -657,6 +666,9 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) for _, env := range ctr.GetEnv() { envs = append(envs, fmt.Sprintf("%s=%s", env.GetName(), env.GetValue())) } + capAdds := ctr.GetSecurityContext().GetCapabilities().GetAdd() + runAsUser := ctr.GetSecurityContext().GetRunAsUser() + runAsGroup := ctr.GetSecurityContext().GetRunAsGroup() g.Go(func() error { if err := prepareOCIDirectory( @@ -669,6 +681,8 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) ctr.GetImage(), ctr.GetCommand(), envs, + capAdds, + runAsUser, runAsGroup, map[string]string{ "io.kubernetes.cri.container-type": "container", "io.kubernetes.cri.sandbox-id": "pause", diff --git a/cmd/atelet/oci.go b/cmd/atelet/oci.go index a2ae14c..0f87cb1 100644 --- a/cmd/atelet/oci.go +++ b/cmd/atelet/oci.go @@ -25,6 +25,7 @@ import ( "os" "path" "path/filepath" + "strings" "github.com/agent-substrate/substrate/internal/ateompath" "github.com/agent-substrate/substrate/internal/memorypullcache" @@ -33,7 +34,48 @@ import ( "go.opentelemetry.io/otel/attribute" ) -func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, annotations map[string]string, netns string) error { +// defaultSandboxCapabilities is the unconditional baseline cap set applied +// to every OCI bundle. Callers may add to it via `capAdds`; the pause +// container always uses this set unmodified. +var defaultSandboxCapabilities = []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", +} + +// resolveCapabilities merges the default sandbox cap set with the +// caller-requested adds, normalising each entry to its `CAP_…` form so +// templates may write either `NET_ADMIN` or `CAP_NET_ADMIN`. Duplicates +// are de-duplicated; ordering is stable (defaults first, then adds in the +// order supplied). +func resolveCapabilities(capAdds []string) []string { + seen := make(map[string]struct{}, len(defaultSandboxCapabilities)+len(capAdds)) + out := make([]string, 0, len(defaultSandboxCapabilities)+len(capAdds)) + for _, c := range defaultSandboxCapabilities { + if _, ok := seen[c]; ok { + continue + } + seen[c] = struct{}{} + out = append(out, c) + } + for _, c := range capAdds { + c = strings.ToUpper(strings.TrimSpace(c)) + if c == "" { + continue + } + if !strings.HasPrefix(c, "CAP_") { + c = "CAP_" + c + } + if _, ok := seen[c]; ok { + continue + } + seen[c] = struct{}{} + out = append(out, c) + } + return out +} + +func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, capAdds []string, runAsUser, runAsGroup int64, annotations map[string]string, netns string) error { tracer := otel.Tracer("prepareOCIDirectory") ctx, span := tracer.Start(ctx, "prepareOCIDirectory") @@ -69,35 +111,22 @@ func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryP ociSpec := &specs.Spec{ Process: &specs.Process{ User: specs.User{ - UID: 0, - GID: 0, + UID: uint32(runAsUser), + GID: uint32(runAsGroup), }, Args: args, Env: envVars, Cwd: "/", - Capabilities: &specs.LinuxCapabilities{ - Bounding: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Effective: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Inheritable: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Permitted: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - // TODO(gvisor.dev/issue/3166): support ambient capabilities - }, + Capabilities: func() *specs.LinuxCapabilities { + caps := resolveCapabilities(capAdds) + return &specs.LinuxCapabilities{ + Bounding: caps, + Effective: caps, + Inheritable: caps, + Permitted: caps, + // TODO(gvisor.dev/issue/3166): support ambient capabilities + } + }(), Rlimits: []specs.POSIXRlimit{ { Type: "RLIMIT_NOFILE", diff --git a/cmd/atelet/oci_test.go b/cmd/atelet/oci_test.go new file mode 100644 index 0000000..44c54be --- /dev/null +++ b/cmd/atelet/oci_test.go @@ -0,0 +1,75 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "reflect" + "testing" +) + +func TestResolveCapabilities(t *testing.T) { + defaults := []string{"CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE"} + + cases := []struct { + name string + adds []string + want []string + }{ + { + name: "nil adds yields defaults only", + adds: nil, + want: defaults, + }, + { + name: "empty adds yields defaults only", + adds: []string{}, + want: defaults, + }, + { + name: "prefix-less names normalised and appended", + adds: []string{"NET_ADMIN", "SETUID", "SETGID"}, + want: append(append([]string{}, defaults...), "CAP_NET_ADMIN", "CAP_SETUID", "CAP_SETGID"), + }, + { + name: "already-prefixed names accepted verbatim", + adds: []string{"CAP_NET_ADMIN"}, + want: append(append([]string{}, defaults...), "CAP_NET_ADMIN"), + }, + { + name: "lowercase normalised to uppercase", + adds: []string{"cap_net_admin", "setuid"}, + want: append(append([]string{}, defaults...), "CAP_NET_ADMIN", "CAP_SETUID"), + }, + { + name: "duplicates across defaults and adds collapse", + adds: []string{"CAP_KILL", "NET_ADMIN", "CAP_NET_ADMIN"}, + want: append(append([]string{}, defaults...), "CAP_NET_ADMIN"), + }, + { + name: "blank entries ignored", + adds: []string{"", " ", "NET_ADMIN"}, + want: append(append([]string{}, defaults...), "CAP_NET_ADMIN"), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := resolveCapabilities(tc.adds) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("resolveCapabilities(%v) = %v, want %v", tc.adds, got, tc.want) + } + }) + } +} diff --git a/internal/proto/ateletpb/atelet.pb.go b/internal/proto/ateletpb/atelet.pb.go index 404aa0d..96705ec 100644 --- a/internal/proto/ateletpb/atelet.pb.go +++ b/internal/proto/ateletpb/atelet.pb.go @@ -385,13 +385,14 @@ func (x *WorkloadSpec) GetPauseImage() string { } type Container struct { - state protoimpl.MessageState `protogen:"open.v1"` - Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - Image string `protobuf:"bytes,2,opt,name=image,proto3" json:"image,omitempty"` - Command []string `protobuf:"bytes,3,rep,name=command,proto3" json:"command,omitempty"` - Env []*EnvEntry `protobuf:"bytes,4,rep,name=env,proto3" json:"env,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Image string `protobuf:"bytes,2,opt,name=image,proto3" json:"image,omitempty"` + Command []string `protobuf:"bytes,3,rep,name=command,proto3" json:"command,omitempty"` + Env []*EnvEntry `protobuf:"bytes,4,rep,name=env,proto3" json:"env,omitempty"` + SecurityContext *SecurityContext `protobuf:"bytes,5,opt,name=security_context,json=securityContext,proto3" json:"security_context,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *Container) Reset() { @@ -452,6 +453,13 @@ func (x *Container) GetEnv() []*EnvEntry { return nil } +func (x *Container) GetSecurityContext() *SecurityContext { + if x != nil { + return x.SecurityContext + } + return nil +} + type EnvEntry struct { state protoimpl.MessageState `protogen:"open.v1"` Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` @@ -504,6 +512,119 @@ func (x *EnvEntry) GetValue() string { return "" } +// SecurityContext mirrors `pkg/api/v1alpha1.ContainerSecurityContext` over +// the wire. Atelet's OCI bundle builder honours `Capabilities.Add`, +// `run_as_user`, and `run_as_group`. The UID/GID fields are plain int64s +// rather than `optional`: at the proto boundary "unset" and "0" both mean +// root, which is identical to the atelet behaviour, so the extra +// nullability buys nothing. +type SecurityContext struct { + state protoimpl.MessageState `protogen:"open.v1"` + Capabilities *Capabilities `protobuf:"bytes,1,opt,name=capabilities,proto3" json:"capabilities,omitempty"` + RunAsUser int64 `protobuf:"varint,2,opt,name=run_as_user,json=runAsUser,proto3" json:"run_as_user,omitempty"` + RunAsGroup int64 `protobuf:"varint,3,opt,name=run_as_group,json=runAsGroup,proto3" json:"run_as_group,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *SecurityContext) Reset() { + *x = SecurityContext{} + mi := &file_atelet_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *SecurityContext) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SecurityContext) ProtoMessage() {} + +func (x *SecurityContext) ProtoReflect() protoreflect.Message { + mi := &file_atelet_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SecurityContext.ProtoReflect.Descriptor instead. +func (*SecurityContext) Descriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{8} +} + +func (x *SecurityContext) GetCapabilities() *Capabilities { + if x != nil { + return x.Capabilities + } + return nil +} + +func (x *SecurityContext) GetRunAsUser() int64 { + if x != nil { + return x.RunAsUser + } + return 0 +} + +func (x *SecurityContext) GetRunAsGroup() int64 { + if x != nil { + return x.RunAsGroup + } + return 0 +} + +// Capabilities mirrors `pkg/api/v1alpha1.Capabilities`. Each `add` entry +// is a Linux capability name with or without the `CAP_` prefix +// (e.g. `NET_ADMIN` or `CAP_NET_ADMIN`). +type Capabilities struct { + state protoimpl.MessageState `protogen:"open.v1"` + Add []string `protobuf:"bytes,1,rep,name=add,proto3" json:"add,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Capabilities) Reset() { + *x = Capabilities{} + mi := &file_atelet_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Capabilities) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Capabilities) ProtoMessage() {} + +func (x *Capabilities) ProtoReflect() protoreflect.Message { + mi := &file_atelet_proto_msgTypes[9] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Capabilities.ProtoReflect.Descriptor instead. +func (*Capabilities) Descriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{9} +} + +func (x *Capabilities) GetAdd() []string { + if x != nil { + return x.Add + } + return nil +} + type RunResponse struct { state protoimpl.MessageState `protogen:"open.v1"` unknownFields protoimpl.UnknownFields @@ -512,7 +633,7 @@ type RunResponse struct { func (x *RunResponse) Reset() { *x = RunResponse{} - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -524,7 +645,7 @@ func (x *RunResponse) String() string { func (*RunResponse) ProtoMessage() {} func (x *RunResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -537,7 +658,7 @@ func (x *RunResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RunResponse.ProtoReflect.Descriptor instead. func (*RunResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{8} + return file_atelet_proto_rawDescGZIP(), []int{10} } type CheckpointRequest struct { @@ -566,7 +687,7 @@ type CheckpointRequest struct { func (x *CheckpointRequest) Reset() { *x = CheckpointRequest{} - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -578,7 +699,7 @@ func (x *CheckpointRequest) String() string { func (*CheckpointRequest) ProtoMessage() {} func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -591,7 +712,7 @@ func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointRequest.ProtoReflect.Descriptor instead. func (*CheckpointRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{9} + return file_atelet_proto_rawDescGZIP(), []int{11} } func (x *CheckpointRequest) GetTargetAteomNamespace() string { @@ -658,7 +779,7 @@ type CheckpointResponse struct { func (x *CheckpointResponse) Reset() { *x = CheckpointResponse{} - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -670,7 +791,7 @@ func (x *CheckpointResponse) String() string { func (*CheckpointResponse) ProtoMessage() {} func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[12] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -683,7 +804,7 @@ func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointResponse.ProtoReflect.Descriptor instead. func (*CheckpointResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{10} + return file_atelet_proto_rawDescGZIP(), []int{12} } type RestoreRequest struct { @@ -703,7 +824,7 @@ type RestoreRequest struct { func (x *RestoreRequest) Reset() { *x = RestoreRequest{} - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -715,7 +836,7 @@ func (x *RestoreRequest) String() string { func (*RestoreRequest) ProtoMessage() {} func (x *RestoreRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[13] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -728,7 +849,7 @@ func (x *RestoreRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreRequest.ProtoReflect.Descriptor instead. func (*RestoreRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{11} + return file_atelet_proto_rawDescGZIP(), []int{13} } func (x *RestoreRequest) GetTargetAteomNamespace() string { @@ -795,7 +916,7 @@ type RestoreResponse struct { func (x *RestoreResponse) Reset() { *x = RestoreResponse{} - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[14] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -807,7 +928,7 @@ func (x *RestoreResponse) String() string { func (*RestoreResponse) ProtoMessage() {} func (x *RestoreResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[14] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -820,7 +941,7 @@ func (x *RestoreResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreResponse.ProtoReflect.Descriptor instead. func (*RestoreResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{12} + return file_atelet_proto_rawDescGZIP(), []int{14} } var File_atelet_proto protoreflect.FileDescriptor @@ -854,15 +975,23 @@ const file_atelet_proto_rawDesc = "" + "containers\x18\x01 \x03(\v2\x11.atelet.ContainerR\n" + "containers\x12\x1f\n" + "\vpause_image\x18\x02 \x01(\tR\n" + - "pauseImage\"s\n" + + "pauseImage\"\xb7\x01\n" + "\tContainer\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" + "\x05image\x18\x02 \x01(\tR\x05image\x12\x18\n" + "\acommand\x18\x03 \x03(\tR\acommand\x12\"\n" + - "\x03env\x18\x04 \x03(\v2\x10.atelet.EnvEntryR\x03env\"4\n" + + "\x03env\x18\x04 \x03(\v2\x10.atelet.EnvEntryR\x03env\x12B\n" + + "\x10security_context\x18\x05 \x01(\v2\x17.atelet.SecurityContextR\x0fsecurityContext\"4\n" + "\bEnvEntry\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" + - "\x05value\x18\x02 \x01(\tR\x05value\"\r\n" + + "\x05value\x18\x02 \x01(\tR\x05value\"\x8d\x01\n" + + "\x0fSecurityContext\x128\n" + + "\fcapabilities\x18\x01 \x01(\v2\x14.atelet.CapabilitiesR\fcapabilities\x12\x1e\n" + + "\vrun_as_user\x18\x02 \x01(\x03R\trunAsUser\x12 \n" + + "\frun_as_group\x18\x03 \x01(\x03R\n" + + "runAsGroup\" \n" + + "\fCapabilities\x12\x10\n" + + "\x03add\x18\x01 \x03(\tR\x03add\"\r\n" + "\vRunResponse\"\xff\x02\n" + "\x11CheckpointRequest\x124\n" + "\x16target_ateom_namespace\x18\x01 \x01(\tR\x14targetAteomNamespace\x12*\n" + @@ -902,7 +1031,7 @@ func file_atelet_proto_rawDescGZIP() []byte { return file_atelet_proto_rawDescData } -var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 13) +var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 15) var file_atelet_proto_goTypes = []any{ (*RunRequest)(nil), // 0: atelet.RunRequest (*GCPAuthenticationConfig)(nil), // 1: atelet.GCPAuthenticationConfig @@ -912,11 +1041,13 @@ var file_atelet_proto_goTypes = []any{ (*WorkloadSpec)(nil), // 5: atelet.WorkloadSpec (*Container)(nil), // 6: atelet.Container (*EnvEntry)(nil), // 7: atelet.EnvEntry - (*RunResponse)(nil), // 8: atelet.RunResponse - (*CheckpointRequest)(nil), // 9: atelet.CheckpointRequest - (*CheckpointResponse)(nil), // 10: atelet.CheckpointResponse - (*RestoreRequest)(nil), // 11: atelet.RestoreRequest - (*RestoreResponse)(nil), // 12: atelet.RestoreResponse + (*SecurityContext)(nil), // 8: atelet.SecurityContext + (*Capabilities)(nil), // 9: atelet.Capabilities + (*RunResponse)(nil), // 10: atelet.RunResponse + (*CheckpointRequest)(nil), // 11: atelet.CheckpointRequest + (*CheckpointResponse)(nil), // 12: atelet.CheckpointResponse + (*RestoreRequest)(nil), // 13: atelet.RestoreRequest + (*RestoreResponse)(nil), // 14: atelet.RestoreResponse } var file_atelet_proto_depIdxs = []int32{ 4, // 0: atelet.RunRequest.runsc:type_name -> atelet.RunscConfig @@ -927,21 +1058,23 @@ var file_atelet_proto_depIdxs = []int32{ 2, // 5: atelet.RunscConfig.authentication:type_name -> atelet.AuthenticationConfig 6, // 6: atelet.WorkloadSpec.containers:type_name -> atelet.Container 7, // 7: atelet.Container.env:type_name -> atelet.EnvEntry - 4, // 8: atelet.CheckpointRequest.runsc:type_name -> atelet.RunscConfig - 5, // 9: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec - 4, // 10: atelet.RestoreRequest.runsc:type_name -> atelet.RunscConfig - 5, // 11: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec - 0, // 12: atelet.AteomHerder.Run:input_type -> atelet.RunRequest - 9, // 13: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest - 11, // 14: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest - 8, // 15: atelet.AteomHerder.Run:output_type -> atelet.RunResponse - 10, // 16: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse - 12, // 17: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse - 15, // [15:18] is the sub-list for method output_type - 12, // [12:15] is the sub-list for method input_type - 12, // [12:12] is the sub-list for extension type_name - 12, // [12:12] is the sub-list for extension extendee - 0, // [0:12] is the sub-list for field type_name + 8, // 8: atelet.Container.security_context:type_name -> atelet.SecurityContext + 9, // 9: atelet.SecurityContext.capabilities:type_name -> atelet.Capabilities + 4, // 10: atelet.CheckpointRequest.runsc:type_name -> atelet.RunscConfig + 5, // 11: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec + 4, // 12: atelet.RestoreRequest.runsc:type_name -> atelet.RunscConfig + 5, // 13: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec + 0, // 14: atelet.AteomHerder.Run:input_type -> atelet.RunRequest + 11, // 15: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest + 13, // 16: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest + 10, // 17: atelet.AteomHerder.Run:output_type -> atelet.RunResponse + 12, // 18: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse + 14, // 19: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse + 17, // [17:20] is the sub-list for method output_type + 14, // [14:17] is the sub-list for method input_type + 14, // [14:14] is the sub-list for extension type_name + 14, // [14:14] is the sub-list for extension extendee + 0, // [0:14] is the sub-list for field type_name } func init() { file_atelet_proto_init() } @@ -955,7 +1088,7 @@ func file_atelet_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_atelet_proto_rawDesc), len(file_atelet_proto_rawDesc)), NumEnums: 0, - NumMessages: 13, + NumMessages: 15, NumExtensions: 0, NumServices: 1, }, diff --git a/internal/proto/ateletpb/atelet.proto b/internal/proto/ateletpb/atelet.proto index 8a356db..7d79c25 100644 --- a/internal/proto/ateletpb/atelet.proto +++ b/internal/proto/ateletpb/atelet.proto @@ -77,10 +77,11 @@ message WorkloadSpec { } message Container { - string name = 1; - string image = 2; - repeated string command = 3; - repeated EnvEntry env = 4; + string name = 1; + string image = 2; + repeated string command = 3; + repeated EnvEntry env = 4; + SecurityContext security_context = 5; } message EnvEntry { @@ -88,6 +89,25 @@ message EnvEntry { string value = 2; } +// SecurityContext mirrors `pkg/api/v1alpha1.ContainerSecurityContext` over +// the wire. Atelet's OCI bundle builder honours `Capabilities.Add`, +// `run_as_user`, and `run_as_group`. The UID/GID fields are plain int64s +// rather than `optional`: at the proto boundary "unset" and "0" both mean +// root, which is identical to the atelet behaviour, so the extra +// nullability buys nothing. +message SecurityContext { + Capabilities capabilities = 1; + int64 run_as_user = 2; + int64 run_as_group = 3; +} + +// Capabilities mirrors `pkg/api/v1alpha1.Capabilities`. Each `add` entry +// is a Linux capability name with or without the `CAP_` prefix +// (e.g. `NET_ADMIN` or `CAP_NET_ADMIN`). +message Capabilities { + repeated string add = 1; +} + message RunResponse { } diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index c3fd0ae..4482025 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -269,6 +269,47 @@ spec: - containerPort type: object type: array + securityContext: + description: |- + SecurityContext holds Substrate-honoured security settings for the + container. Workloads that set up their own network or user + namespaces — for example, a privileged supervisor that hands off to + a less-privileged inner process — may require additional + capabilities (such as `CAP_NET_ADMIN`, `CAP_SETUID`, `CAP_SETGID`) + on top of the small default set (`CAP_AUDIT_WRITE`, `CAP_KILL`, + `CAP_NET_BIND_SERVICE`). Opt-in per container. + properties: + capabilities: + description: Capabilities adjustments applied on top of + the default sandbox set. + properties: + add: + description: |- + Capabilities to grant in addition to the default set. Each entry + is a Linux capability name with or without the `CAP_` prefix + (e.g. `NET_ADMIN` or `CAP_NET_ADMIN`). + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + runAsGroup: + description: |- + RunAsGroup is the GID to run the container's process as. Unset + preserves atelet's default of root (GID 0). See `RunAsUser` for + interaction with `Capabilities.Add` / `CAP_SETGID`. + format: int64 + type: integer + runAsUser: + description: |- + RunAsUser is the UID to run the container's process as. Unset + preserves atelet's default of root (UID 0). Workloads that drop + privileges mid-startup (e.g. via setresuid) still need the + matching `CAP_SETUID` in `Capabilities.Add`; this field is what + makes the process *start* at the given UID instead. + format: int64 + type: integer + type: object required: - name type: object diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 5106fb5..04ffe58 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -49,6 +49,56 @@ type Container struct { // Environment variables to set in the worker replicas. Env []corev1.EnvVar `json:"env,omitempty"` + + // SecurityContext holds Substrate-honoured security settings for the + // container. Workloads that set up their own network or user + // namespaces — for example, a privileged supervisor that hands off to + // a less-privileged inner process — may require additional + // capabilities (such as `CAP_NET_ADMIN`, `CAP_SETUID`, `CAP_SETGID`) + // on top of the small default set (`CAP_AUDIT_WRITE`, `CAP_KILL`, + // `CAP_NET_BIND_SERVICE`). Opt-in per container. + // + // +optional + SecurityContext *ContainerSecurityContext `json:"securityContext,omitempty"` +} + +// ContainerSecurityContext is the Substrate subset of K8s +// `corev1.SecurityContext`. Substrate intentionally does not expose the +// full K8s shape because gVisor implements user/group/MAC primitives +// differently from the host kernel and because the actor lifecycle +// (checkpoint/restore) constrains what security state can be mutated +// across the snapshot boundary. Fields here are the ones atelet's OCI +// bundle builder can honour without violating either constraint. +type ContainerSecurityContext struct { + // Capabilities adjustments applied on top of the default sandbox set. + // +optional + Capabilities *Capabilities `json:"capabilities,omitempty"` + + // RunAsUser is the UID to run the container's process as. Unset + // preserves atelet's default of root (UID 0). Workloads that drop + // privileges mid-startup (e.g. via setresuid) still need the + // matching `CAP_SETUID` in `Capabilities.Add`; this field is what + // makes the process *start* at the given UID instead. + // +optional + RunAsUser *int64 `json:"runAsUser,omitempty"` + + // RunAsGroup is the GID to run the container's process as. Unset + // preserves atelet's default of root (GID 0). See `RunAsUser` for + // interaction with `Capabilities.Add` / `CAP_SETGID`. + // +optional + RunAsGroup *int64 `json:"runAsGroup,omitempty"` +} + +// Capabilities mirrors `corev1.Capabilities` but keeps the field types +// primitive so the same shape can ride the `ateletpb` / `ateompb` protos +// verbatim without a conversion layer. +type Capabilities struct { + // Capabilities to grant in addition to the default set. Each entry + // is a Linux capability name with or without the `CAP_` prefix + // (e.g. `NET_ADMIN` or `CAP_NET_ADMIN`). + // +optional + // +listType=atomic + Add []string `json:"add,omitempty"` } type SnapshotsConfig struct { diff --git a/pkg/api/v1alpha1/actortemplate_types_test.go b/pkg/api/v1alpha1/actortemplate_types_test.go index 217f010..8ec3b94 100644 --- a/pkg/api/v1alpha1/actortemplate_types_test.go +++ b/pkg/api/v1alpha1/actortemplate_types_test.go @@ -49,3 +49,52 @@ func TestActorTemplateDeepCopy(t *testing.T) { t.Errorf("DeepCopy() mismatch (-want +got):\n%s", diff) } } + +func TestContainerSecurityContextDeepCopy(t *testing.T) { + in := &ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-template", + Namespace: "test-ns", + }, + Spec: ActorTemplateSpec{ + WorkerPoolRef: corev1.ObjectReference{ + Namespace: "test-ns", + Name: "test-pool", + }, + SnapshotsConfig: SnapshotsConfig{ + Location: "gs://test-bucket/test-folder", + }, + Containers: []Container{ + { + Name: "app", + Image: "registry.example/app:test", + SecurityContext: &ContainerSecurityContext{ + Capabilities: &Capabilities{ + Add: []string{"NET_ADMIN", "SETUID", "SETGID"}, + }, + RunAsUser: ptrInt64(1000), + RunAsGroup: ptrInt64(1000), + }, + }, + }, + }, + } + + out := in.DeepCopy() + + if diff := cmp.Diff(in, out); diff != "" { + t.Errorf("DeepCopy() mismatch (-want +got):\n%s", diff) + } + + // Mutating the copy must not bleed into the original. + out.Spec.Containers[0].SecurityContext.Capabilities.Add[0] = "MUTATED" + if got := in.Spec.Containers[0].SecurityContext.Capabilities.Add[0]; got != "NET_ADMIN" { + t.Errorf("DeepCopy did not isolate Capabilities.Add slice: original now %q", got) + } + *out.Spec.Containers[0].SecurityContext.RunAsUser = 9999 + if got := *in.Spec.Containers[0].SecurityContext.RunAsUser; got != 1000 { + t.Errorf("DeepCopy did not isolate RunAsUser pointer: original now %d", got) + } +} + +func ptrInt64(v int64) *int64 { return &v } diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index a66bb6a..866a06a 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -151,6 +151,26 @@ func (in *AuthenticationConfig) DeepCopy() *AuthenticationConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Capabilities) DeepCopyInto(out *Capabilities) { + *out = *in + if in.Add != nil { + in, out := &in.Add, &out.Add + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Capabilities. +func (in *Capabilities) DeepCopy() *Capabilities { + if in == nil { + return nil + } + out := new(Capabilities) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Container) DeepCopyInto(out *Container) { *out = *in @@ -171,6 +191,11 @@ func (in *Container) DeepCopyInto(out *Container) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(ContainerSecurityContext) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Container. @@ -183,6 +208,36 @@ func (in *Container) DeepCopy() *Container { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ContainerSecurityContext) DeepCopyInto(out *ContainerSecurityContext) { + *out = *in + if in.Capabilities != nil { + in, out := &in.Capabilities, &out.Capabilities + *out = new(Capabilities) + (*in).DeepCopyInto(*out) + } + if in.RunAsUser != nil { + in, out := &in.RunAsUser, &out.RunAsUser + *out = new(int64) + **out = **in + } + if in.RunAsGroup != nil { + in, out := &in.RunAsGroup, &out.RunAsGroup + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerSecurityContext. +func (in *ContainerSecurityContext) DeepCopy() *ContainerSecurityContext { + if in == nil { + return nil + } + out := new(ContainerSecurityContext) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPAuthenticationConfig) DeepCopyInto(out *GCPAuthenticationConfig) { *out = *in