From 62387b6749af6f022640012ef4f2333208fa1af2 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:41:32 -0400 Subject: [PATCH] WS3 foundation: config-advisory facts root recommendations below the 0.5 threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit InferenceEngine.BuildStories only let facts with Severity >= 0.5 become story roots. Config facts (DB_CONFIG, future SERVER_CONFIG) score ~0.3 for a standing misconfig (RCSI off, single auto_shrink), so they NEVER rooted a finding on a quiet server — they only surfaced when an incident amplified them past 0.5 (e.g. LCK_M_S reader/writer contention for RCSI). That's why a healthy server with 9 RCSI-off databases produced zero recommendations: a standing misconfiguration is an ADVISORY the operator should see regardless of current load, not a severity-gated incident. Let config-advisory fact keys root at ANY positive severity. The existing severity-ordered `consumed` traversal still suppresses duplicates: a higher-severity incident story that consumes the config fact (CXPACKET → CONFIG_MAXDOP, LCK_M_S → DB_CONFIG) wins; only an un-consumed config fact roots a standalone recommendation — which also resolves the D6 leaf-fact double-display concern for free. Unit-tested both directions (config roots at 0.3; an incident fact at 0.3 does not). This is the WS3 enabler: with it, DB_CONFIG recommendations (and the new SERVER_CONFIG facts to follow) appear on any server that actually has the issue. Co-Authored-By: Claude Opus 4.8 (1M context) --- Dashboard.Tests/InferenceEngineTests.cs | 33 +++++++++++++++++++ .../InferenceEngine.cs | 24 ++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Dashboard.Tests/InferenceEngineTests.cs b/Dashboard.Tests/InferenceEngineTests.cs index 3b4f4179..4f3d9673 100644 --- a/Dashboard.Tests/InferenceEngineTests.cs +++ b/Dashboard.Tests/InferenceEngineTests.cs @@ -47,4 +47,37 @@ public void Graph_CxPacketEdgeDoesNotFire_WhenSosIsLow() var edges = graph.GetActiveEdges("CXPACKET", facts); Assert.DoesNotContain(edges, e => e.Destination == "SOS_SCHEDULER_YIELD"); } + + // WS3: a config-advisory fact (DB_CONFIG/SERVER_CONFIG) roots a standalone recommendation + // at its base severity (e.g. RCSI-off = 0.3), below the 0.5 incident threshold — so a + // standing misconfig surfaces on a quiet, healthy server. An incident fact at the same + // severity does NOT root. + [Fact] + public void ConfigFact_RootsStandalone_BelowMinimumSeverity() + { + var engine = new InferenceEngine(new RelationshipGraph()); + var facts = new List + { + new() { Key = "DB_CONFIG", Source = "config", Value = 1, Severity = 0.3, + Metadata = new Dictionary { ["rcsi_off_count"] = 9 } } + }; + + var stories = engine.BuildStories(facts); + + Assert.Contains(stories, s => s.RootFactKey == "DB_CONFIG"); + } + + [Fact] + public void IncidentFact_BelowMinimumSeverity_DoesNotRoot() + { + var engine = new InferenceEngine(new RelationshipGraph()); + var facts = new List + { + new() { Key = "CPU_SQL_PERCENT", Source = "cpu", Value = 60, Severity = 0.3 } + }; + + var stories = engine.BuildStories(facts); + + Assert.DoesNotContain(stories, s => s.RootFactKey == "CPU_SQL_PERCENT"); + } } diff --git a/PerformanceMonitor.Analysis/InferenceEngine.cs b/PerformanceMonitor.Analysis/InferenceEngine.cs index f18ec1e6..5d52a299 100644 --- a/PerformanceMonitor.Analysis/InferenceEngine.cs +++ b/PerformanceMonitor.Analysis/InferenceEngine.cs @@ -24,6 +24,21 @@ public class InferenceEngine private const double MinimumSeverityThreshold = 0.5; private const int MaxPathDepth = 10; // Safety limit + /// + /// Config-advisory fact keys that root a finding at ANY positive severity, bypassing the + /// MinimumSeverityThreshold. A standing misconfiguration (RCSI off, auto-shrink on, MAXDOP at + /// a silly default) is an advisory the operator should see regardless of current load — unlike + /// an incident fact, which must clear 0.5 to be worth surfacing. The existing severity-ordered + /// consumed traversal still suppresses duplicates: a higher-severity incident story that + /// consumes the config fact (e.g. CXPACKET → CONFIG_MAXDOP, or LCK_M_S → DB_CONFIG) wins, and + /// only an UN-consumed config fact roots a standalone recommendation. + /// + private static readonly HashSet ConfigAdvisoryRootKeys = new(StringComparer.Ordinal) + { + "DB_CONFIG", + "SERVER_CONFIG", + }; + private readonly RelationshipGraph _graph; public InferenceEngine(RelationshipGraph graph) @@ -43,9 +58,14 @@ public List BuildStories(List facts) .ToDictionary(f => f.Key, f => f); var consumed = new HashSet(); - // Process facts in severity order + // Process facts in severity order. Incident facts must clear the 0.5 threshold to root; + // config-advisory facts (DB_CONFIG/SERVER_CONFIG) root at any positive severity so a + // standing misconfig surfaces on a quiet, healthy server (it would otherwise never reach + // 0.5 without contention — e.g. RCSI-off is base 0.3). Severity ordering + `consumed` + // (below) keep an incident story from being shadowed by, or duplicating, its config leaf. var entryPoints = facts - .Where(f => f.Severity >= MinimumSeverityThreshold) + .Where(f => f.Severity >= MinimumSeverityThreshold + || (ConfigAdvisoryRootKeys.Contains(f.Key) && f.Severity > 0)) .OrderByDescending(f => f.Severity) .ToList();