Skip to content

Commit 3a2e8e9

Browse files
authored
Cache sysinfo data, and only query for the data we actually use (#20)
* Cache sysinfo data, and only query for the data we actually use * Changelog
1 parent 9875895 commit 3a2e8e9

File tree

5 files changed

+73
-24
lines changed

5 files changed

+73
-24
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@ All notable changes to this project will be documented in this file.
99
- Downgraded DNS errors to warnings ([#17]).
1010
- All output is now wrapped in a "containerdebug" span ([#18]).
1111

12+
### Fixes
13+
14+
- Reduced memory usage dramatically by limiting and caching fetched information ([#20]).
15+
1216
[#17]: https://github.com/stackabletech/containerdebug/pull/17
1317
[#18]: https://github.com/stackabletech/containerdebug/pull/18
18+
[#20]: https://github.com/stackabletech/containerdebug/pull/20
1419

1520
## [0.1.0] - 2024-12-09
1621

src/main.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ fn main() {
5555
built_info::RUSTC_VERSION,
5656
);
5757

58+
let mut collect_ctx = SystemInformation::init();
59+
5860
let mut next_run = Instant::now();
5961
loop {
6062
let next_run_sleep = next_run.saturating_duration_since(Instant::now());
@@ -63,7 +65,7 @@ fn main() {
6365
}
6466
std::thread::sleep(next_run_sleep);
6567

66-
let system_information = SystemInformation::collect();
68+
let system_information = SystemInformation::collect(&mut collect_ctx);
6769

6870
let serialized = serde_json::to_string_pretty(&system_information).unwrap();
6971
if let Some(output_path) = &opts.output {

src/system_information/mod.rs

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ pub mod os;
88
pub mod resources;
99
pub mod user;
1010

11-
#[derive(Debug, Serialize)]
11+
#[derive(Debug, Serialize, Default)]
1212
pub struct SystemInformation {
13-
pub resources: resources::Resources,
14-
pub os: os::OperatingSystem,
15-
pub current_user: ComponentResult<user::User>,
16-
pub disks: Vec<disk::Disk>,
17-
pub network: network::SystemNetworkInfo,
13+
// All fields are optional, to make it easy to disable modules one by one
14+
pub resources: Option<resources::Resources>,
15+
pub os: Option<os::OperatingSystem>,
16+
pub current_user: Option<ComponentResult<user::User>>,
17+
pub disks: Option<Vec<disk::Disk>>,
18+
pub network: Option<network::SystemNetworkInfo>,
1819
// TODO:
1920
// Current time
2021
// SElinux/AppArmor
@@ -32,26 +33,47 @@ pub struct SystemInformation {
3233
// - Users/Groups
3334
}
3435

36+
/// Common data that is cached between [`SystemInformation::collect`] calls.
37+
pub struct CollectContext {
38+
system: sysinfo::System,
39+
}
40+
3541
impl SystemInformation {
36-
#[tracing::instrument(name = "SystemInformation::collect")]
37-
pub fn collect() -> Self {
38-
tracing::info!("Starting data collection");
42+
/// Collects static information that doesn't need to be refreshed.
43+
#[tracing::instrument(name = "SystemInformation::init")]
44+
pub fn init() -> CollectContext {
45+
tracing::info!("initializing");
46+
let mut ctx = CollectContext {
47+
// Each module is responsible for updating the information that it cares about.
48+
system: sysinfo::System::new(),
49+
};
50+
if let Err(err) = user::User::init(&mut ctx.system) {
51+
tracing::error!(
52+
error = &err as &dyn std::error::Error,
53+
"failed to initialize user module, ignoring but this will likely cause collection errors..."
54+
);
55+
}
56+
tracing::info!("init finished");
57+
ctx
58+
}
3959

40-
// Please note that we use "new_all" to ensure that all list of
41-
// components, network interfaces, disks and users are already
42-
// filled!
43-
let sys = sysinfo::System::new_all();
60+
/// Collects and reports
61+
#[tracing::instrument(name = "SystemInformation::collect", skip(ctx))]
62+
pub fn collect(ctx: &mut CollectContext) -> Self {
63+
tracing::info!("Starting data collection");
4464

4565
let info = Self {
46-
resources: resources::Resources::collect(&sys),
47-
os: os::OperatingSystem::collect(),
48-
current_user: ComponentResult::report_from_result(
66+
resources: Some(resources::Resources::collect(&mut ctx.system)),
67+
os: Some(os::OperatingSystem::collect()),
68+
current_user: Some(ComponentResult::report_from_result(
4969
"User::collect_current",
50-
user::User::collect_current(&sys),
51-
),
52-
disks: disk::Disk::collect_all(),
53-
network: network::SystemNetworkInfo::collect(),
70+
user::User::collect_current(&ctx.system),
71+
)),
72+
disks: Some(disk::Disk::collect_all()),
73+
network: Some(network::SystemNetworkInfo::collect()),
74+
// ..Default::default()
5475
};
76+
5577
tracing::info!("Data collection finished");
5678
info
5779
}

src/system_information/resources.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use serde::Serialize;
2-
use sysinfo::System;
2+
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System};
33

44
#[derive(Debug, Serialize)]
55
pub struct Resources {
@@ -22,10 +22,16 @@ pub struct Resources {
2222

2323
impl Resources {
2424
#[tracing::instrument(name = "Resources::collect", skip(sys))]
25-
pub fn collect(sys: &System) -> Self {
25+
pub fn collect(sys: &mut System) -> Self {
2626
// This style of "declare-then-log-then-merge becomes a bit verbose,
2727
// but should help keep each log statement local to where that info is collected.
2828

29+
sys.refresh_specifics(
30+
RefreshKind::new()
31+
.with_cpu(CpuRefreshKind::new().with_cpu_usage())
32+
.with_memory(MemoryRefreshKind::everything()),
33+
);
34+
2935
let cpu_count = sys.cpus().len();
3036
let physical_core_count = sys.physical_core_count();
3137
tracing::info!(

src/system_information/user.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use serde::Serialize;
22
use snafu::{OptionExt, ResultExt, Snafu};
3-
use sysinfo::{Gid, Pid, Uid};
3+
use sysinfo::{Gid, Pid, ProcessRefreshKind, Uid, UpdateKind};
44

55
use crate::error::SysinfoError;
66

@@ -21,6 +21,20 @@ pub struct User {
2121
}
2222

2323
impl User {
24+
#[tracing::instrument(name = "User::init", skip(sys))]
25+
pub fn init(sys: &mut sysinfo::System) -> Result<()> {
26+
let pid = sysinfo::get_current_pid()
27+
.map_err(|msg| SysinfoError { msg })
28+
.context(GetCurrentPidSnafu)?;
29+
// The process user is static, and there is a memory leak to updating it for every run, so cache it once and keep that.
30+
sys.refresh_processes_specifics(
31+
sysinfo::ProcessesToUpdate::Some(&[pid]),
32+
false,
33+
ProcessRefreshKind::new().with_user(UpdateKind::OnlyIfNotSet),
34+
);
35+
Ok(())
36+
}
37+
2438
#[tracing::instrument(name = "User::collect_current", skip(sys))]
2539
pub fn collect_current(sys: &sysinfo::System) -> Result<Self> {
2640
let pid = sysinfo::get_current_pid()

0 commit comments

Comments
 (0)