diff --git a/flags/flags.go b/flags/flags.go index 23a7dd8..c80323f 100644 --- a/flags/flags.go +++ b/flags/flags.go @@ -9,11 +9,12 @@ import ( ) var ( - ListenAddress = kingpin.Flag("listen", "Listen address - ip:port or :port").Default("0.0.0.0:80").Envar("LISTEN").String() - CgroupRoot = kingpin.Flag("cgroupfs-root", "The mount point of the host cgroupfs root").Default("/sys/fs/cgroup").Envar("CGROUPFS_ROOT").String() - DisableLogParsing = kingpin.Flag("disable-log-parsing", "Disable container log parsing").Default("false").Envar("DISABLE_LOG_PARSING").Bool() - DisablePinger = kingpin.Flag("disable-pinger", "Don't ping upstreams").Default("false").Envar("DISABLE_PINGER").Bool() - DisableL7Tracing = kingpin.Flag("disable-l7-tracing", "Disable L7 tracing").Default("false").Envar("DISABLE_L7_TRACING").Bool() + ListenAddress = kingpin.Flag("listen", "Listen address - ip:port or :port").Default("0.0.0.0:80").Envar("LISTEN").String() + CgroupRoot = kingpin.Flag("cgroupfs-root", "The mount point of the host cgroupfs root").Default("/sys/fs/cgroup").Envar("CGROUPFS_ROOT").String() + DisableLogParsing = kingpin.Flag("disable-log-parsing", "Disable container log parsing").Default("false").Envar("DISABLE_LOG_PARSING").Bool() + DisablePinger = kingpin.Flag("disable-pinger", "Don't ping upstreams").Default("false").Envar("DISABLE_PINGER").Bool() + DisableL7Tracing = kingpin.Flag("disable-l7-tracing", "Disable L7 tracing").Default("false").Envar("DISABLE_L7_TRACING").Bool() + DisableGPUMonitoring = kingpin.Flag("disable-gpu-monitoring", "Disable GPU monitoring (NVML)").Default("false").Envar("DISABLE_GPU_MONITORING").Bool() ContainerAllowlist = kingpin.Flag("container-allowlist", "List of allowed containers (regex patterns)").Envar("CONTAINER_ALLOWLIST").Strings() ContainerDenylist = kingpin.Flag("container-denylist", "List of denied containers (regex patterns)").Envar("CONTAINER_DENYLIST").Strings() diff --git a/gpu/gpu.go b/gpu/gpu.go index f91d690..232718b 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -12,6 +12,7 @@ import ( "time" "github.com/NVIDIA/go-nvml/pkg/nvml" + "github.com/coroot/coroot-node-agent/flags" "github.com/coroot/coroot-node-agent/proc" "github.com/prometheus/client_golang/prometheus" "k8s.io/klog/v2" @@ -91,7 +92,9 @@ func NewCollector() (*Collector, error) { c := &Collector{ ProcessUsageSampleCh: make(chan ProcessUsageSample, 100), } - + if *flags.DisableGPUMonitoring { + return c, nil + } libPath, err := findNvidiaMLLib() if err != nil { klog.Infoln(err) @@ -227,6 +230,9 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { } func (c *Collector) Close() { + if c.iface == nil { + return + } c.iface.Shutdown() } diff --git a/install.sh b/install.sh index 1b420b2..81735d1 100644 --- a/install.sh +++ b/install.sh @@ -16,7 +16,7 @@ SYSTEMD_SERVICE=${SYSTEM_NAME}.service UNINSTALL_SH=${BIN_DIR}/${SYSTEM_NAME}-uninstall.sh FILE_SERVICE=${SYSTEMD_DIR}/${SYSTEMD_SERVICE} FILE_ENV=${SYSTEMD_DIR}/${SYSTEMD_SERVICE}.env -ENV_VARS="^(LISTEN|CGROUPFS_ROOT|DISABLE_LOG_PARSING|DISABLE_PINGER|DISABLE_L7_TRACING|TRACK_PUBLIC_NETWORK|EPHEMERAL_PORT_RANGE|PROVIDER|REGION|AVAILABILITY_ZONE|INSTANCE_TYPE|INSTANCE_LIFE_CYCLE|LOG_PER_SECOND|LOG_BURST|COLLECTOR_ENDPOINT|API_KEY|METRICS_ENDPOINT|TRACES_ENDPOINT|LOGS_ENDPOINT|PROFILES_ENDPOINT|SCRAPE_INTERVAL|WAL_DIR)" +ENV_VARS="^(LISTEN|CGROUPFS_ROOT|DISABLE_LOG_PARSING|DISABLE_PINGER|DISABLE_L7_TRACING|DISABLE_GPU_MONITORING|TRACK_PUBLIC_NETWORK|EPHEMERAL_PORT_RANGE|PROVIDER|REGION|AVAILABILITY_ZONE|INSTANCE_TYPE|INSTANCE_LIFE_CYCLE|LOG_PER_SECOND|LOG_BURST|COLLECTOR_ENDPOINT|API_KEY|METRICS_ENDPOINT|TRACES_ENDPOINT|LOGS_ENDPOINT|PROFILES_ENDPOINT|SCRAPE_INTERVAL|WAL_DIR)" info() {