Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/runhcs/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func launchShim(cmd, pidFile, logFile string, args []string, data interface{}) (
}

fullargs = append(fullargs, "--log-format", logFormat)
if logrus.GetLevel() == logrus.DebugLevel {
if logrus.IsLevelEnabled(logrus.DebugLevel) {
fullargs = append(fullargs, "--debug")
}
}
Expand Down
39 changes: 22 additions & 17 deletions internal/annotations/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
// Do not rely on these annotations to customize production workload behavior.
package annotations

// uVM specific annotations

// uVM annotations.
const (
// UVMHyperVSocketConfigPrefix is the prefix of an annotation to map a [hyper-v socket] service GUID
// to a JSON-encoded string of its [configuration].
Expand All @@ -30,24 +29,15 @@ const (
// [configuration]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#HvSocketServiceConfig
UVMHyperVSocketConfigPrefix = "io.microsoft.virtualmachine.hv-socket.service-table."

// AdditionalRegistryValues specifies additional registry keys and their values to set in the WCOW UVM.
// The format is a JSON-encoded string of an array containing [HCS RegistryValue] objects.
//
// Registry values will be available under `HKEY_LOCAL_MACHINE` root key.
//
// For example:
//
// "[{\"Key\": {\"Hive\": \"System\", \"Name\": \"registry\\key\\path"}, \"Name\": \"ValueName\", \"Type\": \"String\", \"StringValue\": \"value\"}]"
//
// [HCS RegistryValue]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#registryvalue
AdditionalRegistryValues = "io.microsoft.virtualmachine.wcow.additional-reg-keys"

// ExtraVSockPorts adds additional ports to the list of ports that the UVM is allowed to use.
ExtraVSockPorts = "io.microsoft.virtualmachine.lcow.extra-vsock-ports"

// UVMConsolePipe is the name of the named pipe that the UVM console is connected to. This works only for non-SNP
// scenario, for SNP use a debugger.
UVMConsolePipe = "io.microsoft.virtualmachine.console.pipe"
)

// LCOW uVM annotations.
const (
// ExtraVSockPorts adds additional ports to the list of ports that the UVM is allowed to use.
ExtraVSockPorts = "io.microsoft.virtualmachine.lcow.extra-vsock-ports"

// NetworkingPolicyBasedRouting toggles on the ability to set policy based routing in the
// guest for LCOW.
Expand All @@ -57,3 +47,18 @@ const (
// LCOW scenarios. Ideally, this annotation should be removed if no issues are found.
NetworkingPolicyBasedRouting = "io.microsoft.virtualmachine.lcow.network.policybasedrouting"
)

// WCOW uVM annotations.
const (
// AdditionalRegistryValues specifies additional registry keys and their values to set in the WCOW UVM.
// The format is a JSON-encoded string of an array containing [HCS RegistryValue] objects.
//
// Registry values will be available under `HKEY_LOCAL_MACHINE` root key.
//
// For example:
//
// "[{\"Key\": {\"Hive\": \"System\", \"Name\": \"registry\\key\\path"}, \"Name\": \"ValueName\", \"Type\": \"String\", \"StringValue\": \"value\"}]"
//
// [HCS RegistryValue]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#registryvalue
AdditionalRegistryValues = "io.microsoft.virtualmachine.wcow.additional-reg-keys"
)
2 changes: 1 addition & 1 deletion internal/gcs/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ func (brdg *bridge) writeMessage(buf *bytes.Buffer, enc *json.Encoder, typ msgTy
// Update the message header with the size.
binary.LittleEndian.PutUint32(buf.Bytes()[hdrOffSize:], uint32(buf.Len()))

if brdg.log.Logger.GetLevel() > logrus.DebugLevel {
if brdg.log.Logger.IsLevelEnabled(logrus.TraceLevel) {
b := buf.Bytes()[hdrSize:]
switch typ {
// container environment vars are in rpCreate for linux; rpcExecuteProcess for windows
Expand Down
12 changes: 11 additions & 1 deletion internal/gcs/guestconnection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,19 @@ func TestGcsWaitProcessBridgeTerminated(t *testing.T) {
t.Fatal(err)
}
defer p.Close()

// There is a race condition here. gc.CreateProcess starts an AsyncRPC to wait on
// the created process. However, the AsyncRPC sends the request message on rpcCh
// and returns immediately (after the sendLoop reads that message). The test then
// sometimes ends up canceling the context (which closes the communication pipes)
// before the request message on rpcCh is processes and written on the pipe by
// `sendRPC`. In that case we receive the "bridge write failed" error instead of
// "bridge closed" error. To avoid this we put a small sleep here.
time.Sleep(1 * time.Second)

cancel()
err = p.Wait()
if err == nil || !strings.Contains(err.Error(), "bridge closed") {
if err == nil || (!strings.Contains(err.Error(), "bridge closed") && !strings.Contains(err.Error(), "bridge write")) {
t.Fatal("unexpected: ", err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion internal/guest/bridge/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser
trace.StringAttribute("cid", base.ContainerID))

entry := log.G(ctx)
if entry.Logger.GetLevel() > logrus.DebugLevel {
if entry.Logger.IsLevelEnabled(logrus.TraceLevel) {
var err error
var msgBytes []byte
switch header.Type {
Expand Down
2 changes: 1 addition & 1 deletion internal/guest/network/netns.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func NetNSConfig(ctx context.Context, ifStr string, nsPid int, adapter *guestres
}

// Add some debug logging
if entry.Logger.GetLevel() >= logrus.DebugLevel {
if entry.Logger.IsLevelEnabled(logrus.DebugLevel) {
curNS, _ := netns.Get()
// Refresh link attributes/state
link, _ = netlink.LinkByIndex(link.Attrs().Index)
Expand Down
5 changes: 3 additions & 2 deletions internal/guest/runtime/hcsv2/nvidia_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ import (
const nvidiaDebugFilePath = "nvidia-container.log"
const nvidiaToolBinary = "nvidia-container-cli"

// described here: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks
// addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the prestart hook
// addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the createRuntime [OCI hooks].
//
// [OCI hooks]: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks
func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec, ociBundlePath string) error {
genericHookBinary := "generichook"
genericHookPath, err := exec.LookPath(genericHookBinary)
Expand Down
15 changes: 12 additions & 3 deletions internal/guest/runtime/hcsv2/uvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,10 +841,19 @@ func (h *Host) GetProperties(ctx context.Context, containerID string, query prot
// zero out [Blkio] sections, since:
// 1. (Az)CRI (currently) only looks at the CPU and memory sections; and
// 2. it can get very large for containers with many layers
cgroupMetrics.Blkio.Reset()
if cgroupMetrics.GetBlkio() != nil {
cgroupMetrics.Blkio.Reset()
}
// also preemptively zero out [Rdma] and [Network], since they could also grow untenable large
cgroupMetrics.Rdma.Reset()
cgroupMetrics.Network = []*cgroup1stats.NetworkStat{}
if cgroupMetrics.GetRdma() != nil {
cgroupMetrics.Rdma.Reset()
}
if len(cgroupMetrics.GetNetwork()) > 0 {
cgroupMetrics.Network = []*cgroup1stats.NetworkStat{}
}
if logrus.IsLevelEnabled(logrus.TraceLevel) {
log.G(ctx).WithField("stats", log.Format(ctx, cgroupMetrics)).Trace("queried cgroup statistics")
}
properties.Metrics = cgroupMetrics
default:
log.G(ctx).WithField("propertyType", requestedProperty).Warn("unknown or empty property type")
Expand Down
19 changes: 19 additions & 0 deletions internal/guest/runtime/hcsv2/workload_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,25 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.
if err := addNvidiaDeviceHook(ctx, spec, ociBundlePath); err != nil {
return err
}

// The NVIDIA device hook `nvidia-container-cli` adds `rw` permissions for the
// GPU and ctl nodes (`c 195:*`) to the devices allow list, but CUDA apparently also
// needs `rwm` permission for other device nodes (e.g., `c 235`)
//
// Grant `rwm` to all character devices (`c *:* rwm`) to avoid hard coding exact node
// numbers, which are unknown before the driver runs (GPU devices are presented as I2C
// devices initially) or could change with driver implementation.
//
// Note: runc already grants mknod, `c *:* m`, so this really adds `rw` permissions for
// all character devices:
// https://github.com/opencontainers/runc/blob/6bae6cad4759a5b3537d550f43ea37d51c6b518a/libcontainer/specconv/spec_linux.go#L205-L222
spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices,
oci.LinuxDeviceCgroup{
Allow: true,
Type: "c",
Access: "rwm",
},
)
}
// add other assigned devices to the spec
if err := specGuest.AddAssignedDevice(ctx, spec); err != nil {
Expand Down
16 changes: 14 additions & 2 deletions internal/guest/spec/spec_devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (
"strings"
"time"

"github.com/Microsoft/hcsshim/internal/guest/storage/pci"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/opencontainers/runc/libcontainer/devices"
oci "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"

"github.com/Microsoft/hcsshim/internal/guest/storage/pci"
"github.com/Microsoft/hcsshim/internal/log"
)

const (
Expand All @@ -23,13 +25,17 @@ const (
charType = "char"
blockType = "block"

// TODO: consolidate with `internal\uvm\virtual_device.go` and use in both locations
gpuDeviceIDType = "gpu"
vpciDeviceIDTypeLegacy = "vpci"
vpciDeviceIDType = "vpci-instance-id"
)

// AddAssignedDevice goes through the assigned devices that have been enumerated
// on the spec and updates the spec so that the correct device nodes can be mounted
// into the resulting container by the runtime.
//
// GPU devices are skipped, since they are handled in [addNvidiaDeviceHook].
func AddAssignedDevice(ctx context.Context, spec *oci.Spec) error {
// Add an explicit timeout before we try to find the dev nodes so we
// aren't waiting forever.
Expand All @@ -52,6 +58,12 @@ func AddAssignedDevice(ctx context.Context, spec *oci.Spec) error {
for _, dev := range devs {
AddLinuxDeviceToSpec(ctx, dev, spec, true)
}
case gpuDeviceIDType:
default:
log.G(ctx).WithFields(logrus.Fields{
"type": d.IDType,
"id": d.ID,
}).Warn("unknown device type")
}
}

Expand Down
26 changes: 13 additions & 13 deletions internal/hcsoci/devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,21 +174,21 @@ func handleAssignedDevicesLCOW(

// assign device into UVM and create corresponding spec windows devices
for _, d := range specDevs {
if uvm.IsValidDeviceType(d.IDType) {
pciID, index := devices.GetDeviceInfoFromPath(d.ID)
vpci, err := vm.AssignDevice(ctx, pciID, index, "")
if err != nil {
return resultDevs, closers, errors.Wrapf(err, "failed to assign device %s, function %d to pod %s", pciID, index, vm.ID())
}
closers = append(closers, vpci)

// update device ID on the spec to the assigned device's resulting vmbus guid so gcs knows which devices to
// map into the container
d.ID = vpci.VMBusGUID
resultDevs = append(resultDevs, d)
} else {
if !uvm.IsValidDeviceType(d.IDType) {
return resultDevs, closers, errors.Errorf("specified device %s has unsupported type %s", d.ID, d.IDType)
}

pciID, index := devices.GetDeviceInfoFromPath(d.ID)
vpci, err := vm.AssignDevice(ctx, pciID, index, "")
if err != nil {
return resultDevs, closers, errors.Wrapf(err, "failed to assign device %s, function %d to pod %s", pciID, index, vm.ID())
}
closers = append(closers, vpci)

// update device ID on the spec to the assigned device's resulting vmbus guid so gcs knows which devices to
// map into the container
d.ID = vpci.VMBusGUID
resultDevs = append(resultDevs, d)
}

return resultDevs, closers, nil
Expand Down
2 changes: 1 addition & 1 deletion internal/oci/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func ProcessAnnotations(ctx context.Context, s *specs.Spec) error {

// expand annotations
var errs []error
for key, exps := range annotations.AnnotationExpansions {
for key, exps := range annotations.AnnotationExpansionMap() {
// check if annotation is present
if val, ok := s.Annotations[key]; ok {
// ideally, some normalization would occur here (ie, "True" -> "true")
Expand Down
3 changes: 2 additions & 1 deletion internal/oci/annotations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ func TestProccessAnnotations_Expansion(t *testing.T) {
subtest.Fatalf("could not update spec from options: %v", err)
}

for _, k := range annotations.AnnotationExpansions[annotations.DisableUnsafeOperations] {
ae := annotations.AnnotationExpansionMap()
for _, k := range ae[annotations.DisableUnsafeOperations] {
if vv := tt.spec.Annotations[k]; vv != v {
subtest.Fatalf("annotation %q was incorrectly expanded to %q, expected %q", k, vv, v)
}
Expand Down
5 changes: 4 additions & 1 deletion internal/oci/uvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,10 @@ func specToUVMCreateOptionsCommon(ctx context.Context, opts *uvm.Options, s *spe
opts.ProcessDumpLocation = ParseAnnotationsString(s.Annotations, annotations.ContainerProcessDumpLocation, opts.ProcessDumpLocation)
opts.NoWritableFileShares = ParseAnnotationsBool(ctx, s.Annotations, annotations.DisableWritableFileShares, opts.NoWritableFileShares)
opts.DumpDirectoryPath = ParseAnnotationsString(s.Annotations, annotations.DumpDirectoryPath, opts.DumpDirectoryPath)

// NUMA settings
opts.MaxProcessorsPerNumaNode = ParseAnnotationsUint32(ctx, s.Annotations, annotations.NumaMaximumProcessorsPerNode, opts.MaxProcessorsPerNumaNode)
opts.MaxSizePerNode = ParseAnnotationsUint64(ctx, s.Annotations, annotations.NumaMaximumSizePerNode, opts.MaxSizePerNode)
opts.MaxMemorySizePerNumaNode = ParseAnnotationsUint64(ctx, s.Annotations, annotations.NumaMaximumMemorySizePerNode, opts.MaxMemorySizePerNumaNode)
opts.PreferredPhysicalNumaNodes = ParseAnnotationCommaSeparatedUint32(ctx, s.Annotations, annotations.NumaPreferredPhysicalNodes,
opts.PreferredPhysicalNumaNodes)
opts.NumaMappedPhysicalNodes = ParseAnnotationCommaSeparatedUint32(ctx, s.Annotations, annotations.NumaMappedPhysicalNodes,
Expand All @@ -278,6 +280,7 @@ func specToUVMCreateOptionsCommon(ctx context.Context, opts *uvm.Options, s *spe
opts.NumaProcessorCounts)
opts.NumaMemoryBlocksCounts = ParseAnnotationCommaSeparatedUint64(ctx, s.Annotations, annotations.NumaCountOfMemoryBlocks,
opts.NumaMemoryBlocksCounts)

maps.Copy(opts.AdditionalHyperVConfig, parseHVSocketServiceTable(ctx, s.Annotations))
}

Expand Down
4 changes: 2 additions & 2 deletions internal/uvm/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ type Options struct {
AdditionalHyperVConfig map[string]hcsschema.HvSocketServiceConfig

// The following options are for implicit vNUMA topology settings.
// MaxSizePerNode is the maximum size of memory per vNUMA node.
MaxSizePerNode uint64
// MaxMemorySizePerNumaNode is the maximum size of memory (in MiB) per vNUMA node.
MaxMemorySizePerNumaNode uint64
// MaxProcessorsPerNumaNode is the maximum number of processors per vNUMA node.
MaxProcessorsPerNumaNode uint32
// PhysicalNumaNodes are the preferred physical NUMA nodes to map to vNUMA nodes.
Expand Down
2 changes: 1 addition & 1 deletion internal/uvm/create_lcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs
return nil, err
}

numa, numaProcessors, err := prepareVNumaTopology(opts.Options)
numa, numaProcessors, err := prepareVNumaTopology(ctx, opts.Options)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion internal/uvm/create_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func prepareCommonConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWC
Weight: uint64(opts.ProcessorWeight),
}

numa, numaProcessors, err := prepareVNumaTopology(opts.Options)
numa, numaProcessors, err := prepareVNumaTopology(ctx, opts.Options)
if err != nil {
return nil, err
}
Expand Down
Loading