Skip to content

Commit 25605f1

Browse files
committed
Fingerprint ordering fix
1 parent 6a92df3 commit 25605f1

File tree

2 files changed

+89
-4
lines changed

2 files changed

+89
-4
lines changed

apps/webapp/app/utils/errorFingerprinting.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,22 @@ export function normalizeErrorMessage(message: string): string {
4343
.replace(/run_[a-zA-Z0-9]+/g, "<run-id>")
4444
// Task run friendly IDs (task_xxxxx or similar)
4545
.replace(/\b[a-z]+_[a-zA-Z0-9]{8,}\b/g, "<id>")
46-
// Standalone numeric IDs (4+ digits)
47-
.replace(/\b\d{4,}\b/g, "<id>")
46+
// --- Specific patterns must run before generic numeric/path replacements ---
4847
// ISO 8601 timestamps
4948
.replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z?/g, "<timestamp>")
5049
// Unix timestamps (10 or 13 digits)
5150
.replace(/\b\d{10,13}\b/g, "<timestamp>")
51+
// URLs (before path regex, which would strip the URL's path component)
52+
.replace(/https?:\/\/[^\s]+/g, "<url>")
53+
// --- Generic replacements ---
54+
// Standalone numeric IDs (4+ digits)
55+
.replace(/\b\d{4,}\b/g, "<id>")
5256
// File paths (Unix style)
5357
.replace(/(?:\/[^\/\s]+){2,}/g, "<path>")
5458
// File paths (Windows style)
5559
.replace(/[A-Z]:\\(?:[^\\]+\\)+[^\\]+/g, "<path>")
5660
// Email addresses
5761
.replace(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, "<email>")
58-
// URLs
59-
.replace(/https?:\/\/[^\s]+/g, "<url>")
6062
// Memory addresses (0x...)
6163
.replace(/0x[0-9a-fA-F]{8,}/g, "<addr>")
6264
// Quoted strings with dynamic content

apps/webapp/test/errorFingerprinting.test.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,89 @@ describe("normalizeErrorMessage", () => {
118118
const normalized = normalizeErrorMessage(message);
119119
expect(normalized).toBe("Connection timeout");
120120
});
121+
122+
describe("ordering: specific patterns before generic ones", () => {
123+
it("ISO timestamp year should not be consumed by numeric ID regex", () => {
124+
const message = "Deadline was 2025-12-31T23:59:59Z";
125+
expect(normalizeErrorMessage(message)).toBe("Deadline was <timestamp>");
126+
});
127+
128+
it("ISO timestamp without trailing Z should normalize correctly", () => {
129+
const message = "Started at 2024-01-15T08:00:00";
130+
expect(normalizeErrorMessage(message)).toBe("Started at <timestamp>");
131+
});
132+
133+
it("Unix timestamp (10 digits) should not become <id>", () => {
134+
const message = "Token expires 1700000000";
135+
expect(normalizeErrorMessage(message)).toBe("Token expires <timestamp>");
136+
});
137+
138+
it("Unix timestamp (13 digits) should not become <id>", () => {
139+
const message = "Sent at 1700000000000";
140+
expect(normalizeErrorMessage(message)).toBe("Sent at <timestamp>");
141+
});
142+
143+
it("URL path should not be stripped before URL regex runs", () => {
144+
const message = "Webhook failed for https://hooks.example.com/webhook/abc";
145+
expect(normalizeErrorMessage(message)).toBe("Webhook failed for <url>");
146+
});
147+
148+
it("URL with port and path should normalize to <url>", () => {
149+
const message = "Cannot reach http://localhost:8080/health/ready";
150+
expect(normalizeErrorMessage(message)).toBe("Cannot reach <url>");
151+
});
152+
153+
it("URL with query string should normalize to <url>", () => {
154+
const message = "GET https://api.example.com/v2/users?page=1&limit=50 returned 500";
155+
expect(normalizeErrorMessage(message)).toBe("GET <url> returned 500");
156+
});
157+
158+
it("message with both a URL and a timestamp", () => {
159+
const message =
160+
"Request to https://api.example.com/data failed at 2025-06-15T10:30:00Z";
161+
expect(normalizeErrorMessage(message)).toBe(
162+
"Request to <url> failed at <timestamp>"
163+
);
164+
});
165+
166+
it("message with a URL and a unix timestamp", () => {
167+
const message = "Callback to https://example.com/hook timed out after 1700000000";
168+
expect(normalizeErrorMessage(message)).toBe(
169+
"Callback to <url> timed out after <timestamp>"
170+
);
171+
});
172+
173+
it("path-like string that is NOT a URL should still become <path>", () => {
174+
const message = "Cannot read /var/log/app/error.log";
175+
expect(normalizeErrorMessage(message)).toBe("Cannot read <path>");
176+
});
177+
});
178+
179+
describe("fingerprint stability: same error class groups together despite dynamic values", () => {
180+
it("errors differing only in ISO timestamp should share a fingerprint", () => {
181+
const e1 = { type: "TimeoutError", message: "Timed out at 2025-01-01T00:00:00Z" };
182+
const e2 = { type: "TimeoutError", message: "Timed out at 2026-06-15T12:30:00Z" };
183+
expect(calculateErrorFingerprint(e1)).toBe(calculateErrorFingerprint(e2));
184+
});
185+
186+
it("errors differing only in URL path should share a fingerprint", () => {
187+
const e1 = {
188+
type: "FetchError",
189+
message: "Failed to fetch https://api.example.com/users/123",
190+
};
191+
const e2 = {
192+
type: "FetchError",
193+
message: "Failed to fetch https://api.example.com/orders/456",
194+
};
195+
expect(calculateErrorFingerprint(e1)).toBe(calculateErrorFingerprint(e2));
196+
});
197+
198+
it("errors differing only in unix timestamp should share a fingerprint", () => {
199+
const e1 = { type: "ExpiredError", message: "Token expired at 1700000000" };
200+
const e2 = { type: "ExpiredError", message: "Token expired at 1800000000" };
201+
expect(calculateErrorFingerprint(e1)).toBe(calculateErrorFingerprint(e2));
202+
});
203+
});
121204
});
122205

123206
describe("normalizeStackTrace", () => {

0 commit comments

Comments
 (0)