From 966913dc089b10b3ed6c2a37a5346e2f87a4b397 Mon Sep 17 00:00:00 2001 From: arkon Date: Tue, 19 May 2026 10:57:49 +0200 Subject: [PATCH] fix(renderer): WebGL longtask fallback hardening (#89) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups from #89 on top of #83's auto-fallback. Splits cleanly along the three sub-items so cherry-picking individual fixes stays possible. 1) Observer disconnect on context loss (the leak) `_installWebGLLongTaskGuard()`'s PerformanceObserver was only disconnected on the trip path. If the GPU context was lost before the trip threshold, the observer outlived the disposed addon and kept firing for every longtask the page emits — minor in absolute terms but matters for the 24+ hour sessions this repo cares about. Extracted `_disposeWebGLObserver()` (idempotent, swallows throws) and called from both the trip path and the `onContextLoss` handler. Documented as the call site for any future terminal-disposal path. 2) Hoisted thresholds to a single constant block `LONGTASK_MS` (200), `LONGTASK_COUNT` (3), `WINDOW_MS` (30000), `GRACE_MS` (5000), and `STICKY_EXPIRY_MS` (7d) are now in `WEBGL_FALLBACK` in constants.js. Replaces inline literals in `app.js:_installWebGLLongTaskGuard` and `terminal-ui.js`'s sticky-expiry check. 3) Tests for trip logic and observer cleanup Split the rolling-window arithmetic into a pure helper `evaluateWebGLLongTaskTrip(recent, entries, now)` so the threshold math is testable without driving a real PerformanceObserver (which can't be invoked deterministically from JS — entries arrive from the platform). `test/webgl-fallback.test.ts` (port 3166) covers: - WEBGL_FALLBACK exposes the documented thresholds - 3 longtasks inside 30s → trip - 3 longtasks spread over 60s → no trip (oldest pruned) - <200ms entries ignored - empty batches still prune stale entries - counts accumulate across batches - `_disposeWebGLObserver` is idempotent, calls disconnect once, swallows throws (driver-quirk defence) Files ----- - src/web/public/constants.js: +WEBGL_FALLBACK, +evaluateWebGLLongTaskTrip (exposed via window.X for tests). - src/web/public/app.js: _initWebGL.onContextLoss now disconnects the observer; _installWebGLLongTaskGuard delegates the trip math to the new helper and uses the hoisted constants; new _disposeWebGLObserver helper. - src/web/public/terminal-ui.js: STICKY_EXPIRY_MS replaces the inline 7-day literal. - test/webgl-fallback.test.ts: 9 Playwright-driven tests on port 3166. Closes #89. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/web/public/app.js | 33 +++-- src/web/public/constants.js | 46 +++++++ src/web/public/terminal-ui.js | 5 +- test/webgl-fallback.test.ts | 218 ++++++++++++++++++++++++++++++++++ 4 files changed, 288 insertions(+), 14 deletions(-) create mode 100644 test/webgl-fallback.test.ts diff --git a/src/web/public/app.js b/src/web/public/app.js index fca8f574..1a9ddde5 100644 --- a/src/web/public/app.js +++ b/src/web/public/app.js @@ -625,6 +625,7 @@ class CodemanApp { console.error('[CRASH-DIAG] WebGL context LOST — falling back to canvas renderer'); _crashDiag.log('WEBGL_LOST'); this._disableWebGLSticky('context-lost'); + this._disposeWebGLObserver(); this._webglAddon?.dispose(); this._webglAddon = null; }); @@ -636,9 +637,10 @@ class CodemanApp { /** * Watch for sustained main-thread stalls that indicate WebGL/GPU trouble. - * After 3 long tasks (>=200ms each) within 30s, dispose the WebGL addon and - * persist a sticky disable so subsequent reloads also use the DOM renderer. - * 5s grace period skips initial-load stalls. Force-re-enable: ?webgl=force. + * After WEBGL_FALLBACK.LONGTASK_COUNT long tasks (>=LONGTASK_MS each) within + * WINDOW_MS, dispose the WebGL addon and persist a sticky disable so + * subsequent reloads also use the DOM renderer. GRACE_MS skips initial-load + * stalls. Force-re-enable: ?webgl=force. */ _installWebGLLongTaskGuard() { if (typeof PerformanceObserver === 'undefined' || this._webglLongTaskObserver) return; @@ -648,19 +650,14 @@ class CodemanApp { this._webglLongTaskObserver = new PerformanceObserver((list) => { if (!this._webglAddon) return; const now = performance.now(); - if (now - installedAt < 5000) return; - for (const entry of list.getEntries()) { - if (entry.duration >= 200) recent.push(entry.startTime); - } - while (recent.length && now - recent[0] > 30000) recent.shift(); - if (recent.length >= 3) { - console.warn(`[CRASH-DIAG] WebGL long-task threshold (${recent.length} stalls/30s) — falling back to canvas renderer`); + if (now - installedAt < WEBGL_FALLBACK.GRACE_MS) return; + if (evaluateWebGLLongTaskTrip(recent, list.getEntries(), now)) { + console.warn(`[CRASH-DIAG] WebGL long-task threshold (${recent.length} stalls/${WEBGL_FALLBACK.WINDOW_MS}ms) — falling back to canvas renderer`); _crashDiag.log(`WEBGL_FALLBACK: ${recent.length}`); this._disableWebGLSticky('long-tasks'); + this._disposeWebGLObserver(); this._webglAddon?.dispose(); this._webglAddon = null; - try { this._webglLongTaskObserver.disconnect(); } catch {} - this._webglLongTaskObserver = null; try { this.terminal.refresh(0, this.terminal.rows - 1); } catch {} } }); @@ -668,6 +665,18 @@ class CodemanApp { } catch { /* longtask not supported */ } } + /** + * Disconnect the WebGL longtask observer. Idempotent. Called from the trip + * path, the onContextLoss handler, and any future terminal-teardown path — + * the observer outlives its addon otherwise, holding a closure reference + * over `this` for every long task the page emits. + */ + _disposeWebGLObserver() { + if (!this._webglLongTaskObserver) return; + try { this._webglLongTaskObserver.disconnect(); } catch {} + this._webglLongTaskObserver = null; + } + _disableWebGLSticky(reason) { try { localStorage.setItem('codeman-webgl-disabled', JSON.stringify({ reason, at: Date.now() })); diff --git a/src/web/public/constants.js b/src/web/public/constants.js index db309f3a..9375853b 100644 --- a/src/web/public/constants.js +++ b/src/web/public/constants.js @@ -71,6 +71,52 @@ const WINDOW_MIN_WIDTH_PX = 200; const WINDOW_MIN_HEIGHT_PX = 200; const WINDOW_DEFAULT_WIDTH_PX = 300; +// WebGL renderer auto-fallback thresholds. +// _installWebGLLongTaskGuard() observes longtask entries and disables WebGL +// after LONGTASK_COUNT stalls of >= LONGTASK_MS within WINDOW_MS. GRACE_MS +// suppresses the noisy initial-load stalls. STICKY_EXPIRY_MS is how long +// localStorage's webgl-disabled marker survives before we retry WebGL on a +// fresh load (driver/Chrome may have been updated). +const WEBGL_FALLBACK = { + LONGTASK_MS: 200, + LONGTASK_COUNT: 3, + WINDOW_MS: 30000, + GRACE_MS: 5000, + STICKY_EXPIRY_MS: 7 * 24 * 60 * 60 * 1000, +}; + +/** + * Pure rolling-window trip evaluator for the WebGL longtask guard. + * Mutates `recent` in place (prunes entries older than `now - WINDOW_MS`) + * and appends each new duration's startTime that meets the threshold. + * Returns true when the count inside the window reaches `LONGTASK_COUNT`. + * + * Exposed on `window` for unit testing — the production guard in app.js + * inlines this same logic in its PerformanceObserver callback. Splitting it + * out keeps the threshold math testable without a real PerformanceObserver. + * + * @param {number[]} recent - mutable array of startTimes inside the window + * @param {{startTime: number, duration: number}[]} entries - new longtask entries + * @param {number} now - performance.now() at evaluation time + * @param {typeof WEBGL_FALLBACK} [config=WEBGL_FALLBACK] - thresholds + * @returns {boolean} true if the rolling window has reached the trip count + */ +function evaluateWebGLLongTaskTrip(recent, entries, now, config = WEBGL_FALLBACK) { + for (const entry of entries) { + if (entry.duration >= config.LONGTASK_MS) recent.push(entry.startTime); + } + while (recent.length && now - recent[0] > config.WINDOW_MS) recent.shift(); + return recent.length >= config.LONGTASK_COUNT; +} + +// Expose for tests. `const` declarations at the top of a non-module script +// are global lexical bindings but not `window` properties, so explicit +// assignment is the test-visible API surface. +if (typeof window !== 'undefined') { + window.WEBGL_FALLBACK = WEBGL_FALLBACK; + window.evaluateWebGLLongTaskTrip = evaluateWebGLLongTaskTrip; +} + // Scheduler API — prioritize terminal writes over background UI updates. // scheduler.postTask('background') defers non-critical work (connection lines, panel renders) // so the main thread stays free for terminal rendering at 60fps. diff --git a/src/web/public/terminal-ui.js b/src/web/public/terminal-ui.js index b991a72f..df931cf6 100644 --- a/src/web/public/terminal-ui.js +++ b/src/web/public/terminal-ui.js @@ -197,8 +197,9 @@ Object.assign(CodemanApp.prototype, { const raw = localStorage.getItem('codeman-webgl-disabled'); if (!raw) return false; const { at } = JSON.parse(raw); - // Auto-expire after 7 days so we retry (driver may have been fixed) - if (Date.now() - at > 7 * 24 * 60 * 60 * 1000) { + // Auto-expire after WEBGL_FALLBACK.STICKY_EXPIRY_MS so we retry + // (driver/Chrome may have been updated). + if (Date.now() - at > WEBGL_FALLBACK.STICKY_EXPIRY_MS) { localStorage.removeItem('codeman-webgl-disabled'); return false; } diff --git a/test/webgl-fallback.test.ts b/test/webgl-fallback.test.ts new file mode 100644 index 00000000..e6d376d8 --- /dev/null +++ b/test/webgl-fallback.test.ts @@ -0,0 +1,218 @@ +/** + * WebGL longtask auto-fallback tests. + * + * Covers the three follow-ups from #89: + * 1. Pure trip-detection helper — rolling-window arithmetic for the + * "N longtasks of >=Xms within Yms" trip condition. Unit-tested + * independently of PerformanceObserver, which can't be driven + * deterministically from JS (entries arrive from the platform). + * 2. Constants are hoisted from inline literals to `WEBGL_FALLBACK` + * in constants.js — assert they exist with the documented values. + * 3. Observer disconnect — _disposeWebGLObserver() is idempotent and + * can be called from the onContextLoss path without the addon + * having been initialised. Mirrors the leak case in the issue: + * "observer outlives its addon" when teardown precedes a trip. + * + * Strategy: load the static app shell in a headless browser and drive + * the helper through page.evaluate(). No real PTY/tmux/WebGL needed — + * the trip math is pure and the dispose path is a couple of property + * mutations, both of which run on any page where app.js loaded. + * + * Port: 3166 (per MEMORY.md, ports 3150+ for tests) + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { chromium, type Browser, type Page } from 'playwright'; +import { WebServer } from '../src/web/server.js'; + +const PORT = 3166; +const BASE_URL = `http://localhost:${PORT}`; + +describe('WebGL longtask auto-fallback', () => { + let server: WebServer; + let browser: Browser; + let page: Page; + + beforeAll(async () => { + server = new WebServer(PORT, false, true); + await server.start(); + browser = await chromium.launch({ headless: true }); + page = await browser.newPage(); + await page.goto(BASE_URL, { waitUntil: 'domcontentloaded' }); + // Wait for constants.js + app.js to have loaded — both expose globals. + await page.waitForFunction( + () => + typeof (window as { WEBGL_FALLBACK?: unknown }).WEBGL_FALLBACK !== 'undefined' && + typeof (window as { evaluateWebGLLongTaskTrip?: unknown }).evaluateWebGLLongTaskTrip === 'function' && + typeof (window as { app?: unknown }).app !== 'undefined' + ); + }, 60000); + + afterAll(async () => { + if (browser) await browser.close(); + if (server) await server.stop(); + }, 60000); + + describe('constants are hoisted', () => { + it('WEBGL_FALLBACK exposes documented thresholds', async () => { + const cfg = await page.evaluate( + () => (window as unknown as { WEBGL_FALLBACK: Record }).WEBGL_FALLBACK + ); + expect(cfg).toEqual({ + LONGTASK_MS: 200, + LONGTASK_COUNT: 3, + WINDOW_MS: 30000, + GRACE_MS: 5000, + STICKY_EXPIRY_MS: 7 * 24 * 60 * 60 * 1000, + }); + }); + }); + + describe('evaluateWebGLLongTaskTrip — rolling window arithmetic', () => { + type EvalFn = ( + recent: number[], + entries: { startTime: number; duration: number }[], + now: number + ) => { tripped: boolean; recent: number[] }; + + /** Run the pure helper in the page and return the post-call state. */ + const run: EvalFn = async (recent, entries, now) => + page.evaluate( + ({ r, e, n }) => { + const fn = ( + window as unknown as { + evaluateWebGLLongTaskTrip: ( + rec: number[], + ents: { startTime: number; duration: number }[], + now: number + ) => boolean; + } + ).evaluateWebGLLongTaskTrip; + const recent = [...r]; + const tripped = fn(recent, e, n); + return { tripped, recent }; + }, + { r: recent, e: entries, n: now } + ) as unknown as { tripped: boolean; recent: number[] }; + + it('trips when 3 longtasks fall inside the 30s window', async () => { + const entries = [ + { startTime: 1000, duration: 250 }, + { startTime: 5000, duration: 300 }, + { startTime: 10000, duration: 220 }, + ]; + const result = await run([], entries, 12000); + expect(result.tripped).toBe(true); + expect(result.recent).toEqual([1000, 5000, 10000]); + }); + + it('does not trip when 3 longtasks are spread across 60s', async () => { + // 3 longtasks 25s apart — only the most recent two stay inside 30s. + const entries = [ + { startTime: 1000, duration: 250 }, + { startTime: 26000, duration: 250 }, + { startTime: 51000, duration: 250 }, + ]; + const result = await run([], entries, 51100); + expect(result.tripped).toBe(false); + // First entry pruned (1000 is >30s before now=51100); 26000 and 51000 stay. + expect(result.recent).toEqual([26000, 51000]); + }); + + it('ignores entries shorter than 200ms', async () => { + const entries = [ + { startTime: 1000, duration: 199 }, + { startTime: 2000, duration: 100 }, + { startTime: 3000, duration: 50 }, + ]; + const result = await run([], entries, 3500); + expect(result.tripped).toBe(false); + expect(result.recent).toEqual([]); + }); + + it('prunes stale entries even when no new ones arrive', async () => { + // Existing window has 2 stale + 1 fresh; an empty batch should still + // age out the stale ones so the next real batch evaluates correctly. + const recent = [1000, 5000, 40000]; + const result = await run(recent, [], 41000); + expect(result.tripped).toBe(false); + expect(result.recent).toEqual([40000]); + }); + + it('counts entries cumulatively across batches', async () => { + // Two batches of 2 entries each, all inside the window — second + // batch should push the cumulative count to 4 and trip. + const recent: number[] = []; + const first = await run( + recent, + [ + { startTime: 1000, duration: 250 }, + { startTime: 2000, duration: 250 }, + ], + 3000 + ); + expect(first.tripped).toBe(false); + expect(first.recent).toEqual([1000, 2000]); + + const second = await run( + first.recent, + [ + { startTime: 4000, duration: 250 }, + { startTime: 5000, duration: 250 }, + ], + 6000 + ); + expect(second.tripped).toBe(true); + expect(second.recent).toEqual([1000, 2000, 4000, 5000]); + }); + }); + + describe('_disposeWebGLObserver', () => { + it('is idempotent — safe to call when no observer was installed', async () => { + const ok = await page.evaluate(() => { + const app = ( + window as unknown as { app: { _disposeWebGLObserver: () => void; _webglLongTaskObserver: unknown } } + ).app; + app._webglLongTaskObserver = null; + app._disposeWebGLObserver(); + app._disposeWebGLObserver(); + return app._webglLongTaskObserver === null; + }); + expect(ok).toBe(true); + }); + + it('disconnects a stub observer and nulls the reference', async () => { + const result = await page.evaluate(() => { + const app = ( + window as unknown as { app: { _disposeWebGLObserver: () => void; _webglLongTaskObserver: unknown } } + ).app; + let disconnectCalls = 0; + app._webglLongTaskObserver = { + disconnect() { + disconnectCalls++; + }, + } as unknown; + app._disposeWebGLObserver(); + return { disconnectCalls, ref: app._webglLongTaskObserver }; + }); + expect(result.disconnectCalls).toBe(1); + expect(result.ref).toBeNull(); + }); + + it('swallows a throwing disconnect — guards against driver quirks', async () => { + const result = await page.evaluate(() => { + const app = ( + window as unknown as { app: { _disposeWebGLObserver: () => void; _webglLongTaskObserver: unknown } } + ).app; + app._webglLongTaskObserver = { + disconnect() { + throw new Error('synthetic'); + }, + } as unknown; + app._disposeWebGLObserver(); + return app._webglLongTaskObserver; + }); + expect(result).toBeNull(); + }); + }); +});