Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions nvidia-driver-installer/centos/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2017 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM centos:7

RUN yum update -y && \
yum install -y gcc make && \
yum clean all && \
rm -rf /var/cache/yum

#RUN apt-get update && \
# apt-get install -y kmod gcc make curl && \
# rm -rf /var/lib/apt/lists/*

COPY entrypoint.sh /entrypoint.sh

CMD /entrypoint.sh
25 changes: 25 additions & 0 deletions nvidia-driver-installer/centos/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2017 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

TAG?=latest
REGISTRY?=gcr.io/google-containers
IMAGE=centos-nvidia-driver-installer

container:
docker build --pull -t ${REGISTRY}/${IMAGE}:${TAG} .

push:
gcloud docker -- push ${REGISTRY}/${IMAGE}:${TAG}

.PHONY: container push
79 changes: 79 additions & 0 deletions nvidia-driver-installer/centos/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2017 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-driver-installer
namespace: kube-system
labels:
k8s-app: nvidia-driver-installer
spec:
selector:
matchLabels:
k8s-app: nvidia-driver-installer
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: nvidia-driver-installer
k8s-app: nvidia-driver-installer
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-accelerator
operator: Exists
tolerations:
- key: "nvidia.com/gpu"
effect: "NoSchedule"
operator: "Exists"
volumes:
- name: dev
hostPath:
path: /dev
- name: nvidia-install-dir-host
hostPath:
path: /home/kubernetes/bin/nvidia
- name: root-mount
hostPath:
path: /
initContainers:
- image: gcr.io/google-containers/centos-nvidia-driver-installer@sha256:to-be-build
name: nvidia-driver-installer
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
env:
- name: NVIDIA_INSTALL_DIR_HOST
value: /home/kubernetes/bin/nvidia
- name: NVIDIA_INSTALL_DIR_CONTAINER
value: /usr/local/nvidia
- name: ROOT_MOUNT_DIR
value: /root
volumeMounts:
- name: nvidia-install-dir-host
mountPath: /usr/local/nvidia
- name: dev
mountPath: /dev
- name: root-mount
mountPath: /root
containers:
- image: "gcr.io/google-containers/pause:2.0"
name: pause
182 changes: 182 additions & 0 deletions nvidia-driver-installer/centos/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#!/bin/bash
# Copyright 2017 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -u

set -x
NVIDIA_DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-384.111}"
NVIDIA_DRIVER_DOWNLOAD_URL_DEFAULT="https://us.download.nvidia.com/tesla/${NVIDIA_DRIVER_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}.run"
NVIDIA_DRIVER_DOWNLOAD_URL="${NVIDIA_DRIVER_DOWNLOAD_URL:-$NVIDIA_DRIVER_DOWNLOAD_URL_DEFAULT}"
NVIDIA_INSTALL_DIR_HOST="${NVIDIA_INSTALL_DIR_HOST:-/var/lib/nvidia}"
NVIDIA_INSTALL_DIR_CONTAINER="${NVIDIA_INSTALL_DIR_CONTAINER:-/usr/local/nvidia}"
NVIDIA_INSTALLER_RUNFILE="$(basename "${NVIDIA_DRIVER_DOWNLOAD_URL}")"
ROOT_MOUNT_DIR="${ROOT_MOUNT_DIR:-/root}"
CACHE_FILE="${NVIDIA_INSTALL_DIR_CONTAINER}/.cache"
KERNEL_VERSION="$(uname -r)"
set +x

check_cached_version() {
echo "Checking cached version"
if [[ ! -f "${CACHE_FILE}" ]]; then
echo "Cache file ${CACHE_FILE} not found."
return 1
fi

# Source the cache file and check if the cached driver matches
# currently running kernel version and requested driver versions.
. "${CACHE_FILE}"
if [[ "${KERNEL_VERSION}" == "${CACHE_KERNEL_VERSION}" ]]; then
if [[ "${NVIDIA_DRIVER_VERSION}" == "${CACHE_NVIDIA_DRIVER_VERSION}" ]]; then
echo "Found existing driver installation for kernel version ${KERNEL_VERSION} and driver version ${NVIDIA_DRIVER_VERSION}."
return 0
fi
fi
echo "Cache file ${CACHE_FILE} found but existing versions didn't match."
return 1
}

update_cached_version() {
cat >"${CACHE_FILE}"<<__EOF__
CACHE_KERNEL_VERSION=${KERNEL_VERSION}
CACHE_NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
__EOF__

echo "Updated cached version as:"
cat "${CACHE_FILE}"
}

update_container_ld_cache() {
echo "Updating container's ld cache..."
echo "${NVIDIA_INSTALL_DIR_CONTAINER}/lib64" > /etc/ld.so.conf.d/nvidia.conf
ldconfig
echo "Updating container's ld cache... DONE."
}

download_kernel_src() {
echo "Downloading kernel sources..."
# apt-get update && apt-get install -y linux-headers-${KERNEL_VERSION}
yum update -y && yum install -y kernel-devel-${KERNEL_VERSION}
echo "Downloading kernel sources... DONE."
}

configure_nvidia_installation_dirs() {
echo "Configuring installation directories..."
mkdir -p "${NVIDIA_INSTALL_DIR_CONTAINER}"
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"

# nvidia-installer does not provide an option to configure the
# installation path of `nvidia-modprobe` utility and always installs it
# under /usr/bin. The following workaround ensures that
# `nvidia-modprobe` is accessible outside the installer container
# filesystem.
mkdir -p bin bin-workdir
mount -t overlay -o lowerdir=/usr/bin,upperdir=bin,workdir=bin-workdir none /usr/bin

# nvidia-installer does not provide an option to configure the
# installation path of libraries such as libnvidia-ml.so. The following
# workaround ensures that the libs are accessible from outside the
# installer container filesystem.
mkdir -p lib64 lib64-workdir
mkdir -p /usr/lib/x86_64-linux-gnu
mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=lib64,workdir=lib64-workdir none /usr/lib/x86_64-linux-gnu

# nvidia-installer does not provide an option to configure the
# installation path of driver kernel modules such as nvidia.ko. The following
# workaround ensures that the modules are accessible from outside the
# installer container filesystem.
mkdir -p drivers drivers-workdir
mkdir -p /lib/modules/${KERNEL_VERSION}/video
mount -t overlay -o lowerdir=/lib/modules/${KERNEL_VERSION}/video,upperdir=drivers,workdir=drivers-workdir none /lib/modules/${KERNEL_VERSION}/video

# Populate ld.so.conf to avoid warning messages in nvidia-installer logs.
update_container_ld_cache

# Install an exit handler to cleanup the overlayfs mount points.
# /usr/bin is unmounted in a lazy way since /bin is a symlink to /usr/bin in centos
# this causes /usr/bin/bash to be always in use and be ununmountable.
trap "{ umount /lib/modules/${KERNEL_VERSION}/video; umount /usr/lib/x86_64-linux-gnu ; umount -l /usr/bin; }" EXIT
popd
echo "Configuring installation directories... DONE."
}

download_nvidia_installer() {
echo "Downloading Nvidia installer..."
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"
curl -L -S -f "${NVIDIA_DRIVER_DOWNLOAD_URL}" -o "${NVIDIA_INSTALLER_RUNFILE}"
popd
echo "Downloading Nvidia installer... DONE."
}

run_nvidia_installer() {
echo "Running Nvidia installer..."
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"
sh "${NVIDIA_INSTALLER_RUNFILE}" \
--utility-prefix="${NVIDIA_INSTALL_DIR_CONTAINER}" \
--opengl-prefix="${NVIDIA_INSTALL_DIR_CONTAINER}" \
--no-install-compat32-libs \
--log-file-name="${NVIDIA_INSTALL_DIR_CONTAINER}/nvidia-installer.log" \
--kernel-source-path=/usr/src/kernels/${KERNEL_VERSION} \
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be /root/usr/src

--no-drm \
--silent \
--accept-license
popd
echo "Running Nvidia installer... DONE."
}

configure_cached_installation() {
echo "Configuring cached driver installation..."
update_container_ld_cache
if ! lsmod | grep -q -w 'nvidia'; then
insmod "${NVIDIA_INSTALL_DIR_CONTAINER}/drivers/nvidia.ko"
fi
if ! lsmod | grep -q -w 'nvidia_uvm'; then
insmod "${NVIDIA_INSTALL_DIR_CONTAINER}/drivers/nvidia-uvm.ko"
fi
echo "Configuring cached driver installation... DONE"
}

verify_nvidia_installation() {
echo "Verifying Nvidia installation..."
export PATH="${NVIDIA_INSTALL_DIR_CONTAINER}/bin:${PATH}"
nvidia-smi
# Create unified memory device file.
nvidia-modprobe -c0 -u
echo "Verifying Nvidia installation... DONE."
}

update_host_ld_cache() {
echo "Updating host's ld cache..."
echo "${NVIDIA_INSTALL_DIR_HOST}/lib64" >> "${ROOT_MOUNT_DIR}/etc/ld.so.conf"
ldconfig -r "${ROOT_MOUNT_DIR}"
echo "Updating host's ld cache... DONE."
}

main() {
if check_cached_version; then
configure_cached_installation
verify_nvidia_installation
else
download_kernel_src
configure_nvidia_installation_dirs
download_nvidia_installer
run_nvidia_installer
update_cached_version
verify_nvidia_installation
fi
update_host_ld_cache
}

main "$@"