From 683b0b0446045e8875c37863c12968bd224dce3c Mon Sep 17 00:00:00 2001 From: Melody Ma Date: Fri, 18 Mar 2022 00:05:27 +0000 Subject: [PATCH 1/2] add .phony for fastsocket_installer --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 0595cd796..fbe298ec9 100644 --- a/Makefile +++ b/Makefile @@ -56,3 +56,4 @@ fastsocket_installer: docker build --pull -t ${REGISTRY}/${FASTSOCKET_INSTALLER_IMAGE}:${TAG} -f fast-socket-installer/Dockerfile . .PHONY: all format test vet presubmit build container push partition-gpu +.PHONY: all format test vet presubmit build container push fastsocket_installer \ No newline at end of file From 57825a7e2b71a90b9e3a2a63f13bed6bf4d532c4 Mon Sep 17 00:00:00 2001 From: Melody Ma Date: Thu, 24 Mar 2022 18:33:13 +0000 Subject: [PATCH 2/2] Add fast-socket-installer.yaml --- .../fast-socket-installer.yaml | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 fast-socket-installer/fast-socket-installer.yaml diff --git a/fast-socket-installer/fast-socket-installer.yaml b/fast-socket-installer/fast-socket-installer.yaml new file mode 100644 index 000000000..af8024dd1 --- /dev/null +++ b/fast-socket-installer/fast-socket-installer.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nccl-fastsocket-installer + namespace: kube-system + labels: + k8s-app: nccl-fastsocket-installer +spec: + selector: + matchLabels: + k8s-app: nccl-fastsocket-installer + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + name: nccl-fastsocket-installer + k8s-app: nccl-fastsocket-installer + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-accelerator + operator: Exists + - key: cloud.google.com/gke-nccl-fastsocket + operator: Exists + tolerations: + - operator: "Exists" + hostNetwork: true + hostPID: true + volumes: + - name: nvidia-install-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + initContainers: + - image: gcr.io/gke-release/fastsocket-installer@sha256:cb8dca70b5611769fd2e0e8eb9aebf81a89d4378537cff104775c873abf2d9c5 + name: nccl-fastsocket-installer + command: + - bash + - -c + - | + cp /usr/lib/libnccl-net.so $NCCL_INSTALL_DIR + securityContext: + privileged: true + resources: + limits: + memory: 100Mi + env: + - name: NCCL_INSTALL_DIR + value: /usr/local/nvidia/lib64 + volumeMounts: + - name: nvidia-install-dir-host + mountPath: /usr/local/nvidia + containers: + - image: "gcr.io/google-containers/pause:2.0" + name: pause \ No newline at end of file