Skip to content

Commit e968e0b

Browse files
authored
feat: add host-level summaries to markdown and json reports (#17)
1 parent 1d3979b commit e968e0b

12 files changed

Lines changed: 550 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ All notable user-visible changes should be recorded here.
88

99
- Added sanitized golden `report.md` / `report.json` regression fixtures to lock report contracts.
1010
- Added conservative parser coverage for `Accepted publickey` plus selected `pam_faillock` / `pam_sss` variants.
11+
- Added compact host-level summaries to Markdown and JSON reports for multi-host inputs.
1112

1213
### Changed
1314

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,9 @@ The CLI writes:
106106
- `report.md`
107107
- `report.json`
108108

109-
into the output directory you provide. If you omit the output directory, the files are written into the current working directory.
109+
into the output directory you provide. If you omit the output directory, the files are written into the current working directory.
110+
111+
When an input spans multiple hostnames, both reports add compact host-level summaries without changing detector thresholds or introducing cross-host correlation logic.
110112

111113
## Sample Output
112114

src/main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ int main(int argc, char* argv[]) {
153153
parsed.quality,
154154
parsed.events,
155155
findings,
156-
parsed.warnings};
156+
parsed.warnings,
157+
app_config.detector.auth_signal_mappings};
157158

158159
loglens::write_reports(report_data, options.output_directory);
159160

src/report.cpp

Lines changed: 235 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,25 @@
44
#include <filesystem>
55
#include <fstream>
66
#include <iomanip>
7+
#include <optional>
78
#include <sstream>
89
#include <string>
910
#include <string_view>
11+
#include <unordered_map>
12+
#include <unordered_set>
1013
#include <vector>
1114

1215
namespace loglens {
1316
namespace {
1417

18+
struct HostSummary {
19+
std::string hostname;
20+
std::size_t parsed_event_count = 0;
21+
std::size_t finding_count = 0;
22+
std::size_t warning_count = 0;
23+
std::vector<std::pair<EventType, std::size_t>> event_counts;
24+
};
25+
1526
std::string escape_json(std::string_view value) {
1627
std::string escaped;
1728
escaped.reserve(value.size());
@@ -125,13 +136,198 @@ std::string format_parse_success_percent(double rate) {
125136
return output.str();
126137
}
127138

139+
std::string_view trim_left(std::string_view value) {
140+
while (!value.empty() && (value.front() == ' ' || value.front() == '\t')) {
141+
value.remove_prefix(1);
142+
}
143+
return value;
144+
}
145+
146+
std::string_view consume_token(std::string_view& input) {
147+
input = trim_left(input);
148+
if (input.empty()) {
149+
return {};
150+
}
151+
152+
const auto separator = input.find(' ');
153+
if (separator == std::string_view::npos) {
154+
const auto token = input;
155+
input = {};
156+
return token;
157+
}
158+
159+
const auto token = input.substr(0, separator);
160+
input.remove_prefix(separator + 1);
161+
return token;
162+
}
163+
164+
std::optional<std::string> extract_hostname_from_input_line(std::string_view line, InputMode input_mode) {
165+
auto remaining = line;
166+
switch (input_mode) {
167+
case InputMode::SyslogLegacy:
168+
if (consume_token(remaining).empty()
169+
|| consume_token(remaining).empty()
170+
|| consume_token(remaining).empty()) {
171+
return std::nullopt;
172+
}
173+
break;
174+
case InputMode::JournalctlShortFull:
175+
if (consume_token(remaining).empty()
176+
|| consume_token(remaining).empty()
177+
|| consume_token(remaining).empty()
178+
|| consume_token(remaining).empty()) {
179+
return std::nullopt;
180+
}
181+
break;
182+
default:
183+
return std::nullopt;
184+
}
185+
186+
const auto hostname = consume_token(remaining);
187+
if (hostname.empty()) {
188+
return std::nullopt;
189+
}
190+
191+
return std::string(hostname);
192+
}
193+
194+
std::unordered_map<std::size_t, std::string> load_hostnames_by_line(const ReportData& data) {
195+
std::unordered_map<std::size_t, std::string> hostnames_by_line;
196+
if (data.warnings.empty()) {
197+
return hostnames_by_line;
198+
}
199+
200+
std::ifstream input(data.input_path);
201+
if (!input) {
202+
return hostnames_by_line;
203+
}
204+
205+
std::string line;
206+
std::size_t line_number = 0;
207+
while (std::getline(input, line)) {
208+
++line_number;
209+
const auto hostname = extract_hostname_from_input_line(line, data.parse_metadata.input_mode);
210+
if (hostname.has_value()) {
211+
hostnames_by_line.emplace(line_number, *hostname);
212+
}
213+
}
214+
215+
return hostnames_by_line;
216+
}
217+
218+
bool is_matching_finding_signal(const Finding& finding, const AuthSignal& signal) {
219+
if (signal.timestamp < finding.first_seen || signal.timestamp > finding.last_seen) {
220+
return false;
221+
}
222+
223+
switch (finding.type) {
224+
case FindingType::BruteForce:
225+
return signal.counts_as_terminal_auth_failure
226+
&& signal.source_ip == finding.subject;
227+
case FindingType::MultiUserProbing:
228+
if (!signal.counts_as_attempt_evidence || signal.source_ip != finding.subject) {
229+
return false;
230+
}
231+
if (finding.usernames.empty()) {
232+
return true;
233+
}
234+
return std::find(
235+
finding.usernames.begin(),
236+
finding.usernames.end(),
237+
signal.username)
238+
!= finding.usernames.end();
239+
case FindingType::SudoBurst:
240+
return signal.counts_as_sudo_burst_evidence
241+
&& signal.username == finding.subject;
242+
default:
243+
return false;
244+
}
245+
}
246+
247+
std::vector<HostSummary> build_host_summaries(const ReportData& data) {
248+
std::unordered_map<std::string, HostSummary> summaries_by_host;
249+
250+
for (const auto& event : data.events) {
251+
if (event.hostname.empty()) {
252+
continue;
253+
}
254+
255+
auto& summary = summaries_by_host[event.hostname];
256+
summary.hostname = event.hostname;
257+
++summary.parsed_event_count;
258+
}
259+
260+
const auto hostnames_by_line = load_hostnames_by_line(data);
261+
for (const auto& warning : data.warnings) {
262+
const auto hostname_it = hostnames_by_line.find(warning.line_number);
263+
if (hostname_it == hostnames_by_line.end() || hostname_it->second.empty()) {
264+
continue;
265+
}
266+
267+
auto& summary = summaries_by_host[hostname_it->second];
268+
summary.hostname = hostname_it->second;
269+
++summary.warning_count;
270+
}
271+
272+
if (summaries_by_host.size() <= 1) {
273+
return {};
274+
}
275+
276+
std::unordered_map<std::size_t, std::string> hostname_by_event_line;
277+
hostname_by_event_line.reserve(data.events.size());
278+
std::unordered_map<std::string, std::vector<Event>> events_by_host;
279+
events_by_host.reserve(summaries_by_host.size());
280+
281+
for (const auto& event : data.events) {
282+
hostname_by_event_line.emplace(event.line_number, event.hostname);
283+
events_by_host[event.hostname].push_back(event);
284+
}
285+
286+
const auto signals = build_auth_signals(data.events, data.auth_signal_mappings);
287+
for (const auto& finding : data.findings) {
288+
std::unordered_set<std::string> matching_hosts;
289+
for (const auto& signal : signals) {
290+
if (!is_matching_finding_signal(finding, signal)) {
291+
continue;
292+
}
293+
294+
const auto hostname_it = hostname_by_event_line.find(signal.line_number);
295+
if (hostname_it == hostname_by_event_line.end() || hostname_it->second.empty()) {
296+
continue;
297+
}
298+
matching_hosts.insert(hostname_it->second);
299+
}
300+
301+
for (const auto& hostname : matching_hosts) {
302+
++summaries_by_host[hostname].finding_count;
303+
}
304+
}
305+
306+
std::vector<HostSummary> summaries;
307+
summaries.reserve(summaries_by_host.size());
308+
for (auto& [hostname, summary] : summaries_by_host) {
309+
const auto events_it = events_by_host.find(hostname);
310+
if (events_it != events_by_host.end()) {
311+
summary.event_counts = build_event_counts(events_it->second);
312+
}
313+
summaries.push_back(std::move(summary));
314+
}
315+
316+
std::sort(summaries.begin(), summaries.end(), [](const HostSummary& left, const HostSummary& right) {
317+
return left.hostname < right.hostname;
318+
});
319+
320+
return summaries;
321+
}
322+
128323
} // namespace
129324

130325
std::string render_markdown_report(const ReportData& data) {
131326
std::ostringstream output;
132327
const auto findings = sorted_findings(data.findings);
133328
const auto warnings = sorted_warnings(data.warnings);
134329
const auto event_counts = build_event_counts(data.events);
330+
const auto host_summaries = build_host_summaries(data);
135331

136332
output << "# LogLens Report\n\n";
137333
output << "## Summary\n\n";
@@ -149,6 +345,19 @@ std::string render_markdown_report(const ReportData& data) {
149345
output << "- Findings: " << findings.size() << '\n';
150346
output << "- Parser warnings: " << warnings.size() << "\n\n";
151347

348+
if (!host_summaries.empty()) {
349+
output << "## Host Summary\n\n";
350+
output << "| Host | Parsed Events | Findings | Warnings |\n";
351+
output << "| --- | ---: | ---: | ---: |\n";
352+
for (const auto& summary : host_summaries) {
353+
output << "| " << summary.hostname
354+
<< " | " << summary.parsed_event_count
355+
<< " | " << summary.finding_count
356+
<< " | " << summary.warning_count << " |\n";
357+
}
358+
output << '\n';
359+
}
360+
152361
output << "## Findings\n\n";
153362
if (findings.empty()) {
154363
output << "No configured detections matched the analyzed events.\n\n";
@@ -205,6 +414,7 @@ std::string render_json_report(const ReportData& data) {
205414
const auto findings = sorted_findings(data.findings);
206415
const auto warnings = sorted_warnings(data.warnings);
207416
const auto event_counts = build_event_counts(data.events);
417+
const auto host_summaries = build_host_summaries(data);
208418

209419
output << "{\n";
210420
output << " \"tool\": \"LogLens\",\n";
@@ -236,7 +446,31 @@ std::string render_json_report(const ReportData& data) {
236446
output << " {\"event_type\": \"" << to_string(type) << "\", \"count\": " << count << "}";
237447
output << (index + 1 == event_counts.size() ? "\n" : ",\n");
238448
}
239-
output << " ],\n";
449+
output << " ]";
450+
if (!host_summaries.empty()) {
451+
output << ",\n";
452+
output << " \"host_summaries\": [\n";
453+
for (std::size_t host_index = 0; host_index < host_summaries.size(); ++host_index) {
454+
const auto& summary = host_summaries[host_index];
455+
output << " {\n";
456+
output << " \"hostname\": \"" << escape_json(summary.hostname) << "\",\n";
457+
output << " \"parsed_event_count\": " << summary.parsed_event_count << ",\n";
458+
output << " \"finding_count\": " << summary.finding_count << ",\n";
459+
output << " \"warning_count\": " << summary.warning_count << ",\n";
460+
output << " \"event_counts\": [\n";
461+
for (std::size_t event_index = 0; event_index < summary.event_counts.size(); ++event_index) {
462+
const auto& [type, count] = summary.event_counts[event_index];
463+
output << " {\"event_type\": \"" << to_string(type) << "\", \"count\": " << count << "}";
464+
output << (event_index + 1 == summary.event_counts.size() ? "\n" : ",\n");
465+
}
466+
output << " ]\n";
467+
output << " }";
468+
output << (host_index + 1 == host_summaries.size() ? "\n" : ",\n");
469+
}
470+
output << " ],\n";
471+
} else {
472+
output << ",\n";
473+
}
240474
output << " \"findings\": [\n";
241475
for (std::size_t index = 0; index < findings.size(); ++index) {
242476
const auto& finding = findings[index];

src/report.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include "signal.hpp"
34
#include "detector.hpp"
45
#include "parser.hpp"
56

@@ -16,6 +17,7 @@ struct ReportData {
1617
std::vector<Event> events;
1718
std::vector<Finding> findings;
1819
std::vector<ParseWarning> warnings;
20+
AuthSignalConfig auth_signal_mappings;
1921
};
2022

2123
std::string render_markdown_report(const ReportData& data);
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Wed 2026-03-11 09:00:00 UTC alpha-host sshd[2301]: Failed password for invalid user admin from 203.0.113.10 port 52022 ssh2
2+
Wed 2026-03-11 09:01:05 UTC alpha-host sshd[2302]: Failed password for root from 203.0.113.10 port 52030 ssh2
3+
Wed 2026-03-11 09:02:10 UTC alpha-host sshd[2303]: Failed password for test from 203.0.113.10 port 52040 ssh2
4+
Wed 2026-03-11 09:03:44 UTC alpha-host sshd[2304]: Failed password for guest from 203.0.113.10 port 52050 ssh2
5+
Wed 2026-03-11 09:04:05 UTC alpha-host sshd[2305]: Failed password for invalid user deploy from 203.0.113.10 port 52060 ssh2
6+
Wed 2026-03-11 09:10:10 UTC beta-host sshd[2401]: Accepted publickey for alice from 203.0.113.20 port 52111 ssh2
7+
Wed 2026-03-11 09:11:00 UTC beta-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/systemctl restart ssh
8+
Wed 2026-03-11 09:12:10 UTC beta-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/journalctl -xe
9+
Wed 2026-03-11 09:14:15 UTC beta-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/vi /etc/ssh/sshd_config
10+
Wed 2026-03-11 09:15:12 UTC alpha-host sshd[2306]: Connection closed by authenticating user alice 203.0.113.50 port 52290 [preauth]
11+
Wed 2026-03-11 09:16:18 UTC beta-host sshd[2402]: Timeout, client not responding from 203.0.113.51 port 52291

0 commit comments

Comments
 (0)