From dbc82419ee9fcb895533348721fb31d5c2007535 Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:31:17 -0700 Subject: [PATCH 1/7] =?UTF-8?q?feat(core):=20VerifyResultSchema=20+=20veri?= =?UTF-8?q?fyHistory;=20bump=20schema=201=E2=86=922=20(RFC=200002=20Task?= =?UTF-8?q?=20A=20vendored)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds VerifyResultSchema, ComponentStylesDeltaSchema, BoundingRectDeltaSchema, and VerifyVerdictSchema to @domscribe/core. Annotation gains an optional verifyHistory: VerifyResult[] field so older clients ignore it. ANNOTATION_VERIFY API path added to the API_PATHS constants alongside the existing annotation routes. Schema version bumps 1 → 2; migration is a pure stamp (additive only). Per issue #52 — vendoring Task A pieces because the parallel issue #51 PR is not in flight. Will reconcile when Task A lands. --- .../domscribe-core/src/lib/constants/index.ts | 1 + .../migrations/annotation-migrations.spec.ts | 12 +- .../lib/migrations/annotation-migrations.ts | 15 ++- .../src/lib/types/annotation.ts | 106 +++++++++++++++++- .../storage/file-annotation-storage.spec.ts | 6 +- 5 files changed, 127 insertions(+), 13 deletions(-) diff --git a/packages/domscribe-core/src/lib/constants/index.ts b/packages/domscribe-core/src/lib/constants/index.ts index 1535b1c..bd3ae5c 100644 --- a/packages/domscribe-core/src/lib/constants/index.ts +++ b/packages/domscribe-core/src/lib/constants/index.ts @@ -17,6 +17,7 @@ export const API_PATHS = { ANNOTATION_RESPONSE: `/annotations/:id/response`, ANNOTATION_PROCESS: `/annotations/process`, ANNOTATION_SEARCH: `/annotations/search`, + ANNOTATION_VERIFY: `/annotations/:id/verify`, // Manifest endpoints MANIFEST: `/manifest`, diff --git a/packages/domscribe-core/src/lib/migrations/annotation-migrations.spec.ts b/packages/domscribe-core/src/lib/migrations/annotation-migrations.spec.ts index c519dd8..c54c1be 100644 --- a/packages/domscribe-core/src/lib/migrations/annotation-migrations.spec.ts +++ b/packages/domscribe-core/src/lib/migrations/annotation-migrations.spec.ts @@ -56,15 +56,15 @@ describe('migrateAnnotation', () => { expect(result.metadata.schemaVersion).toBe(ANNOTATION_SCHEMA_VERSION); }); - it('should default to version 1 when metadata has no schemaVersion', () => { - // With ANNOTATION_SCHEMA_VERSION === 1 and no field, readVersion returns 1. - // Since 1 === ANNOTATION_SCHEMA_VERSION, no migration steps run — it just stamps. - const raw = buildRawAnnotation(); - delete (raw['metadata'] as Record)['schemaVersion']; + it('should migrate v1 annotations forward (verifyHistory stays absent — additive only)', () => { + const raw = buildRawAnnotation({ + metadata: { schemaVersion: 1 }, + }); const result = migrateAnnotation(raw); - expect(result.metadata.schemaVersion).toBe(1); + expect(result.metadata.schemaVersion).toBe(ANNOTATION_SCHEMA_VERSION); + expect(result.verifyHistory).toBeUndefined(); }); it('should default to version 1 when metadata is missing entirely', () => { diff --git a/packages/domscribe-core/src/lib/migrations/annotation-migrations.ts b/packages/domscribe-core/src/lib/migrations/annotation-migrations.ts index 6457af3..bbf2476 100644 --- a/packages/domscribe-core/src/lib/migrations/annotation-migrations.ts +++ b/packages/domscribe-core/src/lib/migrations/annotation-migrations.ts @@ -16,12 +16,19 @@ import { /** * Registry of migration functions keyed by the version they migrate FROM. * e.g. migrationSteps[1] migrates v1 → v2. - * - * Currently empty — only v1 exists. When a v2 schema is introduced, add: - * migrationSteps[1] = (data: Record) => { … mutate … }; */ const migrationSteps: Record) => void> = - {}; + { + // v1 → v2: introduce optional `verifyHistory: VerifyResult[]` on the + // Annotation root (RFC 0002). Migration is a pure stamp — older + // annotations had no verify data, so no field needs to be synthesized + // and consumers MUST treat `verifyHistory` as optional. + 1: () => { + // Intentionally no-op: the field is optional and additive. The + // schemaVersion bump alone is sufficient; we keep the slot so + // migrateAnnotation does not throw at version 1. + }, + }; /** * Read `metadata.schemaVersion` from raw JSON, defaulting to 1 for diff --git a/packages/domscribe-core/src/lib/types/annotation.ts b/packages/domscribe-core/src/lib/types/annotation.ts index b83bd57..06c6fcb 100644 --- a/packages/domscribe-core/src/lib/types/annotation.ts +++ b/packages/domscribe-core/src/lib/types/annotation.ts @@ -100,8 +100,12 @@ export const AnnotationIdSchema = z /** * Current annotation schema version. Bump when the Annotation shape changes. + * + * v1 → v2: introduce optional `verifyHistory: VerifyResult[]` on Annotation + * for the RFC 0002 verify_after_edit workflow. Older clients ignore + * the field; the migration is a pure stamp (additive change). */ -export const ANNOTATION_SCHEMA_VERSION = 1; +export const ANNOTATION_SCHEMA_VERSION = 2; export const AnnotationMetadataSchema = z.object({ id: AnnotationIdSchema, @@ -188,6 +192,100 @@ export const AgentResponseSchema = z.object({ message: z.string().optional().describe('Message from the agent'), }); +/** + * Verdict produced by `verify_after_edit` comparing pre/post-edit captures. + * + * match — visual + computed-style + boundingRect within tolerance + * partial — some axes match, some drifted (agent should reconcile) + * no_change — post-edit capture is indistinguishable from pre-edit + * baseline; almost always means the edit did not land in + * the rendered output the user is looking at + * regression — measurable backslide on at least one axis vs. baseline + * + * See RFC 0002 §Decision for the verdict semantics. + */ +export const VerifyVerdictSchema = z.enum([ + 'match', + 'partial', + 'no_change', + 'regression', +]); + +/** + * Per-property delta keyed by CSS property name. Values are the + * `[before, after]` pair; absence of a key means "unchanged". Bounded + * by the StyleCapturer allowlist so the payload stays well under 4 KB. + */ +export const ComponentStylesDeltaSchema = z.record( + z.string(), + z.tuple([z.string(), z.string()]), +); + +/** + * Bounding-rect delta — only the four edges plus dimensions are surfaced, + * matching `BoundingRectSchema`. Each axis is `[before, after]`. Keys + * absent from this record were unchanged. Keys are constrained at the + * comparator level (see `@domscribe/verify`), kept as `string` here so + * the record stays partial without per-key optional bookkeeping. + */ +export const BoundingRectDeltaSchema = z.record( + z.string(), + z.tuple([z.number(), z.number()]), +); + +/** + * Result of `verify_after_edit` — the structured verdict the agent + * reconciles against on retry. Built on RFC 0001's componentStyles surface; + * `screenshotRef` is a relay-blob reference (never the raw bytes — the + * 4 KB-per-element serialization budget assumes screenshots are external). + */ +export const VerifyResultSchema = z.object({ + annotationId: AnnotationIdSchema.describe( + 'Annotation this verify result is bound to', + ), + verdict: VerifyVerdictSchema.describe( + 'Overall verdict — see VerifyVerdictSchema for semantics', + ), + pixelDiffRatio: z + .number() + .min(0) + .max(1) + .describe( + 'Fraction of element-scoped pixels that differ between pre/post screenshots in [0, 1]', + ), + pixelDiffPixels: z + .number() + .int() + .nonnegative() + .describe('Absolute pixel-count diff (companion to pixelDiffRatio)'), + componentStylesDelta: ComponentStylesDeltaSchema.describe( + 'Per-CSS-property [before, after] pairs for properties that changed', + ), + boundingRectDelta: BoundingRectDeltaSchema.describe( + 'Per-axis [before, after] pairs for boundingRect entries that changed', + ), + screenshotRef: z + .string() + .optional() + .describe( + 'Opaque relay-blob reference for the post-edit element screenshot. NEVER raw bytes — fetch via the relay if the agent needs the image.', + ), + capturedAt: z + .string() + .describe('ISO 8601 timestamp when the post-edit capture was taken'), + reason: z + .string() + .optional() + .describe( + 'Human-readable explanation when the verdict is not "match" — surface in agent retry prompts', + ), +}); + +export type VerifyVerdict = z.infer; +export type ComponentStylesDelta = z.infer; +export type BoundingRectDelta = z.infer; +export type VerifyResult = z.infer; + export const AnnotationSchema = z.object({ metadata: AnnotationMetadataSchema.describe('Annotation metadata'), interaction: AnnotationInteractionSchema.describe('User interaction details'), @@ -195,6 +293,12 @@ export const AnnotationSchema = z.object({ agentResponse: AgentResponseSchema.optional().describe( "Agent's response if processed", ), + verifyHistory: z + .array(VerifyResultSchema) + .optional() + .describe( + 'Verify-after-edit results, appended in call order. Optional — older clients ignore. Soft-recommended; not gated by the annotation lifecycle.', + ), }); export const AnnotationSummarySchema = z.object({ diff --git a/packages/domscribe-relay/src/server/services/storage/file-annotation-storage.spec.ts b/packages/domscribe-relay/src/server/services/storage/file-annotation-storage.spec.ts index 20ba4a4..6117a1f 100644 --- a/packages/domscribe-relay/src/server/services/storage/file-annotation-storage.spec.ts +++ b/packages/domscribe-relay/src/server/services/storage/file-annotation-storage.spec.ts @@ -14,6 +14,7 @@ import { import path from 'path'; import { tmpdir } from 'os'; import { + ANNOTATION_SCHEMA_VERSION, AnnotationStatusEnum, type Annotation, type AnnotationStatus, @@ -221,7 +222,8 @@ describe('FileAnnotationStorage', () => { AnnotationStatusEnum.QUEUED, ); expect(result).not.toBeNull(); - expect(result?.metadata.schemaVersion).toBe(1); + // Legacy annotations migrate to the current schema version on read. + expect(result?.metadata.schemaVersion).toBe(ANNOTATION_SCHEMA_VERSION); }); it('should stamp schemaVersion on listByStatus when missing from persisted data', async () => { @@ -231,7 +233,7 @@ describe('FileAnnotationStorage', () => { const results = await storage.listByStatus(AnnotationStatusEnum.QUEUED); expect(results).toHaveLength(1); - expect(results[0].metadata.schemaVersion).toBe(1); + expect(results[0].metadata.schemaVersion).toBe(ANNOTATION_SCHEMA_VERSION); }); }); From 49ac1db098e0e1a3db932c18446b01406a353d5d Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:31:26 -0700 Subject: [PATCH 2/7] feat(verify): new @domscribe/verify package lifting pixelmatch comparator (RFC 0002 Task A vendored) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @domscribe/verify is the shared comparator behind verify_after_edit and the RFC 0001 falsifier harness — one implementation grades both CI and the live MCP tool. Pure-TS, Node-friendly, no DOM dep. Three modules: - pixel-diff.ts: lifted from styling/scripts/falsifier.ts verbatim - style-delta.ts: per-CSS-property and per-rect-axis delta with sub-pixel tolerance for boundingRect jitter - verdict.ts: three-axis (pixel + style + rect) derivation of no_change | match | partial | regression with composeReason helper @domscribe/test-fixtures becomes the first re-consumer — falsifier.ts now imports diffPng + MAX_DIFF_RATIO from the package instead of pixelmatch directly. --- packages/domscribe-test-fixtures/package.json | 3 +- .../styling/scripts/falsifier.ts | 56 ++------ packages/domscribe-verify/package.json | 26 ++++ packages/domscribe-verify/project.json | 43 ++++++ packages/domscribe-verify/src/index.ts | 24 ++++ .../src/lib/comparator.spec.ts | 134 ++++++++++++++++++ .../domscribe-verify/src/lib/comparator.ts | 89 ++++++++++++ .../domscribe-verify/src/lib/pixel-diff.ts | 72 ++++++++++ .../src/lib/style-delta.spec.ts | 103 ++++++++++++++ .../domscribe-verify/src/lib/style-delta.ts | 97 +++++++++++++ .../domscribe-verify/src/lib/verdict.spec.ts | 85 +++++++++++ packages/domscribe-verify/src/lib/verdict.ts | 104 ++++++++++++++ packages/domscribe-verify/tsconfig.json | 9 ++ packages/domscribe-verify/tsconfig.lib.json | 39 +++++ packages/domscribe-verify/tsconfig.spec.json | 31 ++++ packages/domscribe-verify/vite.config.ts | 28 ++++ 16 files changed, 900 insertions(+), 43 deletions(-) create mode 100644 packages/domscribe-verify/package.json create mode 100644 packages/domscribe-verify/project.json create mode 100644 packages/domscribe-verify/src/index.ts create mode 100644 packages/domscribe-verify/src/lib/comparator.spec.ts create mode 100644 packages/domscribe-verify/src/lib/comparator.ts create mode 100644 packages/domscribe-verify/src/lib/pixel-diff.ts create mode 100644 packages/domscribe-verify/src/lib/style-delta.spec.ts create mode 100644 packages/domscribe-verify/src/lib/style-delta.ts create mode 100644 packages/domscribe-verify/src/lib/verdict.spec.ts create mode 100644 packages/domscribe-verify/src/lib/verdict.ts create mode 100644 packages/domscribe-verify/tsconfig.json create mode 100644 packages/domscribe-verify/tsconfig.lib.json create mode 100644 packages/domscribe-verify/tsconfig.spec.json create mode 100644 packages/domscribe-verify/vite.config.ts diff --git a/packages/domscribe-test-fixtures/package.json b/packages/domscribe-test-fixtures/package.json index be382e5..0eb6e10 100644 --- a/packages/domscribe-test-fixtures/package.json +++ b/packages/domscribe-test-fixtures/package.json @@ -10,10 +10,9 @@ "test:falsifier:record": "tsx styling/scripts/falsifier.ts --mode=record" }, "devDependencies": { + "@domscribe/verify": "workspace:*", "@playwright/test": "^1.49.0", - "@types/pixelmatch": "^5.2.6", "@types/pngjs": "^6.0.5", - "pixelmatch": "^7.1.0", "playwright": "^1.49.0", "pngjs": "^7.0.0", "tsx": "^4.21.0" diff --git a/packages/domscribe-test-fixtures/styling/scripts/falsifier.ts b/packages/domscribe-test-fixtures/styling/scripts/falsifier.ts index 1ef0d39..c8bd9d0 100644 --- a/packages/domscribe-test-fixtures/styling/scripts/falsifier.ts +++ b/packages/domscribe-test-fixtures/styling/scripts/falsifier.ts @@ -46,8 +46,7 @@ import path from 'node:path'; import { spawn, type ChildProcess } from 'node:child_process'; import { fileURLToPath } from 'node:url'; import { chromium, type Browser, type Page } from 'playwright'; -import pixelmatch from 'pixelmatch'; -import { PNG } from 'pngjs'; +import { diffPng, MAX_DIFF_RATIO } from '@domscribe/verify'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const STYLING_ROOT = path.resolve(__dirname, '..'); @@ -55,23 +54,12 @@ const BASELINES_ROOT = path.join(STYLING_ROOT, 'baselines'); const ANNOTATIONS_FILE = path.join(STYLING_ROOT, 'annotations.json'); /** - * Pixel-diff tolerance. - * - * PER_PIXEL_THRESHOLD — pixelmatch's `threshold` (color distance per - * pixel below which two pixels are considered equal). 0.1 is the - * library's recommended starting point; we keep it modest so the - * harness catches real visual deltas but tolerates AA jitter. - * - * MAX_DIFF_RATIO — the fraction of total pixels that may differ before - * we call the annotation a fail. The canonical-after path diffs at 0, - * so this is a defensive floor for CI worker AA jitter on text glyphs. - * 0.1% (0.001) is tight enough that two images that happen to share - * a mostly-white background (a real false-positive risk we observed in - * sanity testing) cannot slip through, while still absorbing a few - * pixels of subpixel font rendering noise. + * Pixel-diff tolerance lives in `@domscribe/verify` so the comparator + * stays bit-identical between CI and the live `verify_after_edit` MCP + * tool. See the package for the rationale on PER_PIXEL_THRESHOLD and + * MAX_DIFF_RATIO. We only re-import MAX_DIFF_RATIO for the failure + * reason string below. */ -const PER_PIXEL_THRESHOLD = 0.1; -const MAX_DIFF_RATIO = 0.001; const VIEWPORT = { width: 800, height: 600 }; @@ -270,25 +258,14 @@ async function screenshotRoute( }); } -function loadPng(buf: Buffer): PNG { - return PNG.sync.read(buf); -} - -function diff(a: PNG, b: PNG): { diffPixels: number; ratio: number } { - if (a.width !== b.width || a.height !== b.height) { - return { - diffPixels: a.width * a.height, - ratio: 1, - }; - } - const out = new PNG({ width: a.width, height: a.height }); - const diffPixels = pixelmatch(a.data, b.data, out.data, a.width, a.height, { - threshold: PER_PIXEL_THRESHOLD, - }); - return { - diffPixels, - ratio: diffPixels / (a.width * a.height), - }; +/** + * Pixel diff via the shared `@domscribe/verify` comparator. Kept as a + * thin shim so the rest of the harness reads the same way it did + * before the lift. + */ +function diff(a: Buffer, b: Buffer): { diffPixels: number; ratio: number } { + const r = diffPng(a, b); + return { diffPixels: r.diffPixels, ratio: r.pixelDiffRatio }; } interface BrowserContext { @@ -418,10 +395,7 @@ async function runAgainstSource( try { const actualBuf = await source(page, server, ann); const baselineBuf = fs.readFileSync(baseline); - const { diffPixels, ratio } = diff( - loadPng(actualBuf), - loadPng(baselineBuf), - ); + const { diffPixels, ratio } = diff(actualBuf, baselineBuf); const passed = ratio <= MAX_DIFF_RATIO; results.push({ id: ann.id, diff --git a/packages/domscribe-verify/package.json b/packages/domscribe-verify/package.json new file mode 100644 index 0000000..3c77e6d --- /dev/null +++ b/packages/domscribe-verify/package.json @@ -0,0 +1,26 @@ +{ + "name": "@domscribe/verify", + "version": "0.5.2", + "description": "Pure-TS comparator that grades post-edit captures against pre-edit baselines for the verify_after_edit MCP tool (RFC 0002).", + "type": "module", + "main": "src/index.ts", + "publishConfig": { + "access": "restricted" + }, + "dependencies": { + "@domscribe/core": "workspace:*", + "pixelmatch": "^7.1.0", + "pngjs": "^7.0.0" + }, + "devDependencies": { + "@types/pngjs": "^6.0.5" + }, + "engines": { + "node": ">=20" + }, + "repository": { + "type": "git", + "url": "https://github.com/patchorbit/domscribe.git", + "directory": "packages/domscribe-verify" + } +} diff --git a/packages/domscribe-verify/project.json b/packages/domscribe-verify/project.json new file mode 100644 index 0000000..ba0cab9 --- /dev/null +++ b/packages/domscribe-verify/project.json @@ -0,0 +1,43 @@ +{ + "name": "domscribe-verify", + "$schema": "../../node_modules/nx/schemas/project-schema.json", + "projectType": "library", + "sourceRoot": "packages/domscribe-verify/src", + "tags": ["scope:infra", "type:lib", "type:test"], + "targets": { + "build": { + "executor": "@nx/js:tsc", + "outputs": ["{workspaceRoot}/dist/packages/domscribe-verify"], + "options": { + "rootDir": "packages/domscribe-verify/src", + "outputPath": "dist/packages/domscribe-verify", + "main": "packages/domscribe-verify/src/index.ts", + "tsConfig": "packages/domscribe-verify/tsconfig.lib.json", + "generatePackageJson": true, + "generateExportsField": true, + "assets": ["packages/domscribe-verify/*.md"] + } + }, + "lint": { + "executor": "@nx/eslint:lint", + "outputs": ["{options.outputFile}"], + "options": { + "eslintConfig": "eslint.config.mjs", + "lintFilePatterns": [ + "packages/domscribe-verify/**/*.ts", + "packages/domscribe-verify/**/*.tsx" + ] + } + }, + "test": { + "executor": "@nx/vitest:test", + "outputs": ["{projectRoot}/test-output"], + "options": { + "config": "packages/domscribe-verify/vite.config.ts" + } + }, + "typecheck": { + "executor": "nx:noop" + } + } +} diff --git a/packages/domscribe-verify/src/index.ts b/packages/domscribe-verify/src/index.ts new file mode 100644 index 0000000..34a019f --- /dev/null +++ b/packages/domscribe-verify/src/index.ts @@ -0,0 +1,24 @@ +/** + * @domscribe/verify - Comparator for the verify_after_edit MCP tool. + * + * Lifts the pixel-diff comparator from the RFC 0001 falsifier harness and + * adds componentStyles + boundingRect delta computation plus a three-axis + * verdict derivation. Pure-TS, Node-friendly, no DOM dependency. + * + * @module @domscribe/verify + */ + +export { compare } from './lib/comparator.js'; +export type { CapturePair } from './lib/comparator.js'; +export { + diffPng, + PER_PIXEL_THRESHOLD, + MAX_DIFF_RATIO, +} from './lib/pixel-diff.js'; +export type { PixelDiffResult } from './lib/pixel-diff.js'; +export { + computeComponentStylesDelta, + computeBoundingRectDelta, +} from './lib/style-delta.js'; +export { deriveVerdict, composeReason } from './lib/verdict.js'; +export type { VerdictInputs } from './lib/verdict.js'; diff --git a/packages/domscribe-verify/src/lib/comparator.spec.ts b/packages/domscribe-verify/src/lib/comparator.spec.ts new file mode 100644 index 0000000..cad4e61 --- /dev/null +++ b/packages/domscribe-verify/src/lib/comparator.spec.ts @@ -0,0 +1,134 @@ +import { describe, it, expect } from 'vitest'; +import { PNG } from 'pngjs'; +import { compare } from './comparator.js'; + +function solidPng( + width: number, + height: number, + rgba: [number, number, number, number], +): Buffer { + const png = new PNG({ width, height }); + for (let i = 0; i < png.data.length; i += 4) { + png.data[i] = rgba[0]; + png.data[i + 1] = rgba[1]; + png.data[i + 2] = rgba[2]; + png.data[i + 3] = rgba[3]; + } + return PNG.sync.write(png); +} + +const annotationId = 'ann_A7bCd9Ef_1700000000000'; + +describe('compare', () => { + it('returns no_change when every axis matches', () => { + const buf = solidPng(8, 8, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: buf, + afterScreenshot: buf, + beforeStyles: { computed: { color: 'rgb(0, 0, 0)' } }, + afterStyles: { computed: { color: 'rgb(0, 0, 0)' } }, + }); + expect(result.verdict).toBe('no_change'); + expect(result.pixelDiffRatio).toBe(0); + expect(result.componentStylesDelta).toEqual({}); + }); + + it('returns regression when pixel, style, and rect all disagree', () => { + const before = solidPng(8, 8, [255, 255, 255, 255]); + const after = solidPng(8, 8, [0, 0, 0, 255]); + const result = compare({ + annotationId, + beforeScreenshot: before, + afterScreenshot: after, + beforeStyles: { computed: { color: 'red' } }, + afterStyles: { computed: { color: 'blue' } }, + beforeRect: { + x: 0, + y: 0, + width: 100, + height: 50, + top: 0, + right: 100, + bottom: 50, + left: 0, + }, + afterRect: { + x: 0, + y: 0, + width: 120, + height: 50, + top: 0, + right: 120, + bottom: 50, + left: 0, + }, + }); + expect(result.verdict).toBe('regression'); + expect(result.pixelDiffRatio).toBeGreaterThan(0); + expect(result.componentStylesDelta.color).toEqual(['red', 'blue']); + expect(result.boundingRectDelta.width).toEqual([100, 120]); + }); + + it('returns partial when only one axis differs', () => { + const buf = solidPng(8, 8, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: buf, + afterScreenshot: buf, + beforeStyles: { computed: { color: 'red' } }, + afterStyles: { computed: { color: 'blue' } }, + }); + expect(result.verdict).toBe('partial'); + }); + + it('passes screenshotRef through without inlining bytes anywhere in the result', () => { + const buf = solidPng(8, 8, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: buf, + afterScreenshot: buf, + screenshotRef: 'blob://post-edit/abc123', + }); + expect(result.screenshotRef).toBe('blob://post-edit/abc123'); + const serialized = JSON.stringify(result); + // Screenshot bytes (~hundreds of bytes for an 8x8 PNG) must never + // appear in the serialized VerifyResult. + expect(serialized.length).toBeLessThan(1024); + expect(serialized).not.toContain('base64'); + expect(serialized).not.toContain('PNG'); + }); + + it('stamps capturedAt as an ISO 8601 string', () => { + const buf = solidPng(8, 8, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: buf, + afterScreenshot: buf, + }); + expect(() => new Date(result.capturedAt).toISOString()).not.toThrow(); + }); + + it('marks the verdict as changed when the element resized between captures', () => { + const before = solidPng(8, 8, [255, 255, 255, 255]); + const after = solidPng(12, 12, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: before, + afterScreenshot: after, + }); + expect(result.pixelDiffRatio).toBe(1); + expect(result.verdict).not.toBe('no_change'); + }); + + it('honors an injected capturedAt for deterministic snapshots', () => { + const buf = solidPng(8, 8, [255, 255, 255, 255]); + const result = compare({ + annotationId, + beforeScreenshot: buf, + afterScreenshot: buf, + capturedAt: '2025-01-01T00:00:00.000Z', + }); + expect(result.capturedAt).toBe('2025-01-01T00:00:00.000Z'); + }); +}); diff --git a/packages/domscribe-verify/src/lib/comparator.ts b/packages/domscribe-verify/src/lib/comparator.ts new file mode 100644 index 0000000..a094462 --- /dev/null +++ b/packages/domscribe-verify/src/lib/comparator.ts @@ -0,0 +1,89 @@ +/** + * Top-level comparator — combines pixel-diff, style-delta, and verdict + * derivation into a single `VerifyResult` constructor. + * + * The comparator is intentionally I/O-free: the caller supplies decoded + * PNG buffers and computed-style snapshots. The relay's verify_after_edit + * handler is responsible for fetching the post-edit screenshot blob and + * passing its bytes here. + * + * @module @domscribe/verify/comparator + */ + +import type { + AnnotationId, + BoundingRect, + ComponentStyles, + VerifyResult, +} from '@domscribe/core'; +import { diffPng } from './pixel-diff.js'; +import { + computeBoundingRectDelta, + computeComponentStylesDelta, +} from './style-delta.js'; +import { composeReason, deriveVerdict } from './verdict.js'; + +export interface CapturePair { + annotationId: AnnotationId; + beforeScreenshot?: Buffer; + afterScreenshot?: Buffer; + beforeStyles?: ComponentStyles; + afterStyles?: ComponentStyles; + beforeRect?: BoundingRect; + afterRect?: BoundingRect; + /** + * Opaque relay-blob reference for the post-edit screenshot. The + * comparator never inlines bytes into the result — the caller passes + * the reference through so the agent can fetch the image if it needs to. + */ + screenshotRef?: string; + /** + * Optional override for the `capturedAt` timestamp; defaults to "now". + * Mostly useful in tests so snapshots are deterministic. + */ + capturedAt?: string; +} + +/** + * Run the comparator and return a `VerifyResult`. When either screenshot + * buffer is absent the pixel-diff axis silently records 0 — the caller + * (typically the relay's verify_after_edit handler) is expected to refuse + * the request if no post-edit capture is available. + */ +export function compare(pair: CapturePair): VerifyResult { + const pixelDiff = + pair.beforeScreenshot && pair.afterScreenshot + ? diffPng(pair.beforeScreenshot, pair.afterScreenshot) + : { diffPixels: 0, pixelDiffRatio: 0, withinTolerance: true }; + + const componentStylesDelta = computeComponentStylesDelta( + pair.beforeStyles, + pair.afterStyles, + ); + const boundingRectDelta = computeBoundingRectDelta( + pair.beforeRect, + pair.afterRect, + ); + + const verdict = deriveVerdict({ + pixelDiff, + componentStylesDelta, + boundingRectDelta, + }); + + return { + annotationId: pair.annotationId, + verdict, + pixelDiffRatio: pixelDiff.pixelDiffRatio, + pixelDiffPixels: pixelDiff.diffPixels, + componentStylesDelta, + boundingRectDelta, + screenshotRef: pair.screenshotRef, + capturedAt: pair.capturedAt ?? new Date().toISOString(), + reason: composeReason(verdict, { + pixelDiff, + componentStylesDelta, + boundingRectDelta, + }), + }; +} diff --git a/packages/domscribe-verify/src/lib/pixel-diff.ts b/packages/domscribe-verify/src/lib/pixel-diff.ts new file mode 100644 index 0000000..e7c337e --- /dev/null +++ b/packages/domscribe-verify/src/lib/pixel-diff.ts @@ -0,0 +1,72 @@ +/** + * Pixel-diff comparator lifted from the RFC 0001 falsifier harness in + * `@domscribe/test-fixtures`. Kept identical here so CI and the live relay + * grade post-edit captures against pre-edit baselines through one + * implementation. + * + * The harness retains its driver (Playwright + Vite preview) — this module + * is the inner comparator only, so we can run it from a Node process that + * already has the screenshot bytes in hand (e.g. when the relay receives + * them from the browser overlay). + * + * Tolerances are the same constants used in the falsifier: + * PER_PIXEL_THRESHOLD = 0.1 — pixelmatch's per-pixel color distance + * MAX_DIFF_RATIO = 0.001 — 0.1% of total pixels may differ before + * we call the verdict a fail + * + * @module @domscribe/verify/pixel-diff + */ + +import pixelmatch from 'pixelmatch'; +import { PNG } from 'pngjs'; + +export const PER_PIXEL_THRESHOLD = 0.1; +export const MAX_DIFF_RATIO = 0.001; + +export interface PixelDiffResult { + diffPixels: number; + pixelDiffRatio: number; + withinTolerance: boolean; +} + +/** + * Decode a PNG buffer. Surfaces as a thin helper so callers can substitute + * in a different encoding later without touching the comparator. + */ +function decode(buf: Buffer): PNG { + return PNG.sync.read(buf); +} + +/** + * Compute the pixel-diff between two PNG buffers using pixelmatch with the + * harness's stable tolerances. + * + * If the dimensions differ we treat the entire image as a diff — there is + * no useful per-pixel comparison when the element changed size. + */ +export function diffPng(beforePng: Buffer, afterPng: Buffer): PixelDiffResult { + const a = decode(beforePng); + const b = decode(afterPng); + + if (a.width !== b.width || a.height !== b.height) { + const diffPixels = Math.max(a.width * a.height, b.width * b.height); + return { + diffPixels, + pixelDiffRatio: 1, + withinTolerance: false, + }; + } + + const out = new PNG({ width: a.width, height: a.height }); + const diffPixels = pixelmatch(a.data, b.data, out.data, a.width, a.height, { + threshold: PER_PIXEL_THRESHOLD, + }); + const total = a.width * a.height; + const pixelDiffRatio = total === 0 ? 0 : diffPixels / total; + + return { + diffPixels, + pixelDiffRatio, + withinTolerance: pixelDiffRatio <= MAX_DIFF_RATIO, + }; +} diff --git a/packages/domscribe-verify/src/lib/style-delta.spec.ts b/packages/domscribe-verify/src/lib/style-delta.spec.ts new file mode 100644 index 0000000..7b26123 --- /dev/null +++ b/packages/domscribe-verify/src/lib/style-delta.spec.ts @@ -0,0 +1,103 @@ +import { describe, it, expect } from 'vitest'; +import { + computeBoundingRectDelta, + computeComponentStylesDelta, +} from './style-delta.js'; + +describe('computeComponentStylesDelta', () => { + it('emits an empty delta when before and after are identical', () => { + const delta = computeComponentStylesDelta( + { computed: { color: 'rgb(0, 0, 0)' } }, + { computed: { color: 'rgb(0, 0, 0)' } }, + ); + expect(delta).toEqual({}); + }); + + it('emits [before, after] pairs only for changed properties', () => { + const delta = computeComponentStylesDelta( + { + computed: { + color: 'rgb(0, 0, 0)', + 'font-size': '16px', + padding: '8px', + }, + }, + { + computed: { + color: 'rgb(0, 0, 0)', + 'font-size': '18px', + padding: '8px', + }, + }, + ); + expect(delta).toEqual({ 'font-size': ['16px', '18px'] }); + }); + + it('records appearance / disappearance with empty-string sentinel', () => { + const delta = computeComponentStylesDelta( + { computed: { 'box-shadow': 'none' } }, + { computed: { color: 'red' } }, + ); + expect(delta).toEqual({ + 'box-shadow': ['none', ''], + color: ['', 'red'], + }); + }); + + it('merges customProperties into the same delta map', () => { + const delta = computeComponentStylesDelta( + { + computed: { color: 'rgb(15, 23, 42)' }, + customProperties: { '--color-fg': 'rgb(15, 23, 42)' }, + }, + { + computed: { color: 'rgb(15, 23, 42)' }, + customProperties: { '--color-fg': 'rgb(255, 255, 255)' }, + }, + ); + expect(delta).toEqual({ + '--color-fg': ['rgb(15, 23, 42)', 'rgb(255, 255, 255)'], + }); + }); + + it('treats undefined inputs as empty (no spurious deltas)', () => { + expect(computeComponentStylesDelta(undefined, undefined)).toEqual({}); + expect( + computeComponentStylesDelta(undefined, { computed: { color: 'red' } }), + ).toEqual({ color: ['', 'red'] }); + }); +}); + +describe('computeBoundingRectDelta', () => { + const baseRect = { + x: 10, + y: 20, + width: 100, + height: 50, + top: 20, + right: 110, + bottom: 70, + left: 10, + }; + + it('returns an empty delta when rects are identical', () => { + expect(computeBoundingRectDelta(baseRect, baseRect)).toEqual({}); + }); + + it('returns an empty delta when both rects are absent', () => { + expect(computeBoundingRectDelta(undefined, undefined)).toEqual({}); + }); + + it('records changes on axes that moved beyond the sub-pixel tolerance', () => { + const after = { ...baseRect, width: 120, right: 130 }; + expect(computeBoundingRectDelta(baseRect, after)).toEqual({ + width: [100, 120], + right: [110, 130], + }); + }); + + it('absorbs sub-pixel jitter (<=0.5px) on any single axis', () => { + const after = { ...baseRect, x: 10.4 }; + expect(computeBoundingRectDelta(baseRect, after)).toEqual({}); + }); +}); diff --git a/packages/domscribe-verify/src/lib/style-delta.ts b/packages/domscribe-verify/src/lib/style-delta.ts new file mode 100644 index 0000000..721d7a1 --- /dev/null +++ b/packages/domscribe-verify/src/lib/style-delta.ts @@ -0,0 +1,97 @@ +/** + * Style and bounding-rect delta computation for VerifyResult. + * + * Pure functions — no I/O, no DOM. Consumed by both the CI falsifier + * harness and the relay's verify_after_edit handler so the verdict + * semantics live in exactly one place. + * + * @module @domscribe/verify/style-delta + */ + +import type { + BoundingRect, + BoundingRectDelta, + ComponentStyles, + ComponentStylesDelta, +} from '@domscribe/core'; + +/** + * Per-coordinate tolerance for boundingRect (pixels). Sub-pixel deltas are + * indistinguishable visually; treating them as "changed" would inflate the + * partial-verdict rate on browsers that report `getBoundingClientRect` + * coordinates as fractional even when the layout hasn't actually moved. + */ +const BOUNDING_RECT_TOLERANCE_PX = 0.5; + +/** + * Compute the per-CSS-property delta between two `ComponentStyles` snapshots. + * + * Only entries that actually changed (or appeared / disappeared) are + * included. Custom properties (`--*`) are merged into the same delta map + * so the agent sees a single shape regardless of where a token-vs-value + * change originated. + */ +export function computeComponentStylesDelta( + before: ComponentStyles | undefined, + after: ComponentStyles | undefined, +): ComponentStylesDelta { + const delta: Record = {}; + + const beforeComputed = before?.computed ?? {}; + const afterComputed = after?.computed ?? {}; + const beforeCustom = before?.customProperties ?? {}; + const afterCustom = after?.customProperties ?? {}; + + diffStringMap(beforeComputed, afterComputed, delta); + diffStringMap(beforeCustom, afterCustom, delta); + + return delta; +} + +function diffStringMap( + before: Record, + after: Record, + out: Record, +): void { + const keys = new Set([...Object.keys(before), ...Object.keys(after)]); + for (const key of keys) { + const a = before[key] ?? ''; + const b = after[key] ?? ''; + if (a !== b) { + out[key] = [a, b]; + } + } +} + +/** + * Compute the per-axis delta between two boundingRects, with sub-pixel + * tolerance to absorb the fractional values browsers can produce even on + * a layout that didn't actually shift. + */ +export function computeBoundingRectDelta( + before: BoundingRect | undefined, + after: BoundingRect | undefined, +): BoundingRectDelta { + if (!before || !after) { + return {}; + } + const axes: (keyof BoundingRect)[] = [ + 'x', + 'y', + 'width', + 'height', + 'top', + 'right', + 'bottom', + 'left', + ]; + const delta: BoundingRectDelta = {}; + for (const axis of axes) { + const a = before[axis]; + const b = after[axis]; + if (Math.abs(a - b) > BOUNDING_RECT_TOLERANCE_PX) { + delta[axis] = [a, b]; + } + } + return delta; +} diff --git a/packages/domscribe-verify/src/lib/verdict.spec.ts b/packages/domscribe-verify/src/lib/verdict.spec.ts new file mode 100644 index 0000000..c5c6e80 --- /dev/null +++ b/packages/domscribe-verify/src/lib/verdict.spec.ts @@ -0,0 +1,85 @@ +import { describe, it, expect } from 'vitest'; +import { composeReason, deriveVerdict } from './verdict.js'; + +const matchingPixel = { + diffPixels: 0, + pixelDiffRatio: 0, + withinTolerance: true, +}; +const drifted = { + diffPixels: 4000, + pixelDiffRatio: 0.02, + withinTolerance: false, +}; + +describe('deriveVerdict', () => { + it('emits no_change when every axis matches', () => { + expect( + deriveVerdict({ + pixelDiff: matchingPixel, + componentStylesDelta: {}, + boundingRectDelta: {}, + }), + ).toBe('no_change'); + }); + + it('emits regression when every axis disagrees with the baseline', () => { + expect( + deriveVerdict({ + pixelDiff: drifted, + componentStylesDelta: { color: ['red', 'blue'] }, + boundingRectDelta: { width: [100, 120] }, + }), + ).toBe('regression'); + }); + + it('emits partial when only some axes changed', () => { + expect( + deriveVerdict({ + pixelDiff: drifted, + componentStylesDelta: {}, + boundingRectDelta: {}, + }), + ).toBe('partial'); + expect( + deriveVerdict({ + pixelDiff: matchingPixel, + componentStylesDelta: { color: ['red', 'blue'] }, + boundingRectDelta: {}, + }), + ).toBe('partial'); + }); +}); + +describe('composeReason', () => { + it('returns an explanation for the no_change verdict', () => { + const reason = composeReason('no_change', { + pixelDiff: matchingPixel, + componentStylesDelta: {}, + boundingRectDelta: {}, + }); + expect(reason).toMatch(/did not land/i); + }); + + it('summarises the axes that drifted for partial / regression verdicts', () => { + const reason = composeReason('regression', { + pixelDiff: drifted, + componentStylesDelta: { color: ['red', 'blue'] }, + boundingRectDelta: { width: [100, 120] }, + }); + expect(reason).toContain('pixel-diff'); + expect(reason).toContain('CSS'); + expect(reason).toContain('boundingRect'); + }); + + it('omits axes with no delta from the summary', () => { + const reason = composeReason('partial', { + pixelDiff: matchingPixel, + componentStylesDelta: { color: ['red', 'blue'] }, + boundingRectDelta: {}, + }); + expect(reason).not.toContain('pixel-diff'); + expect(reason).not.toContain('boundingRect'); + expect(reason).toContain('1 CSS property changed'); + }); +}); diff --git a/packages/domscribe-verify/src/lib/verdict.ts b/packages/domscribe-verify/src/lib/verdict.ts new file mode 100644 index 0000000..a01a8ca --- /dev/null +++ b/packages/domscribe-verify/src/lib/verdict.ts @@ -0,0 +1,104 @@ +/** + * Verdict derivation — combines pixel diff, computed-style delta, and + * boundingRect delta into a single `VerifyVerdict`. + * + * The semantics map directly to RFC 0002 §Decision: + * no_change — every axis says the post-edit capture is indistinguishable + * from the baseline (the agent's edit did not land in the + * rendered output) + * match — pixel diff within tolerance AND no style/rect deltas + * partial — at least one axis says "changed" AND at least one axis + * says "matched"; the agent should reconcile what landed + * against intent + * regression — every axis says "changed" by more than tolerance with no + * indication of progress (reserved for the strictest case; + * v1 conservatively only emits this when ALL three axes + * disagree with the baseline — the agent shifted output but + * not toward the intent) + * + * @module @domscribe/verify/verdict + */ + +import type { VerifyVerdict } from '@domscribe/core'; +import type { PixelDiffResult } from './pixel-diff.js'; +import type { BoundingRectDelta, ComponentStylesDelta } from '@domscribe/core'; + +export interface VerdictInputs { + pixelDiff: PixelDiffResult; + componentStylesDelta: ComponentStylesDelta; + boundingRectDelta: BoundingRectDelta; +} + +/** + * Three-axis decision. Each axis answers a different question: + * pixel diff → did the rendered output change visually? + * style delta → did the computed CSS change? + * rect delta → did the layout box change? + * + * "Match" means within tolerance / no entries. "Change" is the complement. + */ +function axisVerdicts(inputs: VerdictInputs): { + pixelChanged: boolean; + stylesChanged: boolean; + rectChanged: boolean; +} { + return { + pixelChanged: !inputs.pixelDiff.withinTolerance, + stylesChanged: Object.keys(inputs.componentStylesDelta).length > 0, + rectChanged: Object.keys(inputs.boundingRectDelta).length > 0, + }; +} + +export function deriveVerdict(inputs: VerdictInputs): VerifyVerdict { + const { pixelChanged, stylesChanged, rectChanged } = axisVerdicts(inputs); + + const anyChanged = pixelChanged || stylesChanged || rectChanged; + const allChanged = pixelChanged && stylesChanged && rectChanged; + + if (!anyChanged) { + // Truly indistinguishable — the agent's edit did not surface in the + // captured output. We emit `no_change` rather than `match` so the + // agent does not treat a no-op as a success on retry. + return 'no_change'; + } + + if (allChanged) { + return 'regression'; + } + + // Mixed signal — some axes match, some changed. The agent's + // reconciliation hint lives in the deltas themselves. + return 'partial'; +} + +/** + * Compose the human-readable `reason` field for a VerifyResult. Concise so + * it can be inlined into an agent retry prompt without blowing the budget. + */ +export function composeReason( + verdict: VerifyVerdict, + inputs: VerdictInputs, +): string | undefined { + if (verdict === 'no_change') { + return 'Post-edit capture is indistinguishable from the pre-edit baseline on every measured axis. The agent edit likely did not land in the rendered output.'; + } + const fragments: string[] = []; + if (inputs.pixelDiff.pixelDiffRatio > 0) { + fragments.push( + `pixel-diff ${(inputs.pixelDiff.pixelDiffRatio * 100).toFixed(3)}%`, + ); + } + const styleCount = Object.keys(inputs.componentStylesDelta).length; + if (styleCount > 0) { + fragments.push( + `${styleCount} CSS ${styleCount === 1 ? 'property' : 'properties'} changed`, + ); + } + const rectCount = Object.keys(inputs.boundingRectDelta).length; + if (rectCount > 0) { + fragments.push( + `${rectCount} boundingRect ${rectCount === 1 ? 'axis' : 'axes'} changed`, + ); + } + return fragments.length > 0 ? fragments.join('; ') : undefined; +} diff --git a/packages/domscribe-verify/tsconfig.json b/packages/domscribe-verify/tsconfig.json new file mode 100644 index 0000000..c823650 --- /dev/null +++ b/packages/domscribe-verify/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "files": [], + "include": [], + "references": [ + { "path": "./tsconfig.lib.json" }, + { "path": "./tsconfig.spec.json" } + ] +} diff --git a/packages/domscribe-verify/tsconfig.lib.json b/packages/domscribe-verify/tsconfig.lib.json new file mode 100644 index 0000000..d54c94c --- /dev/null +++ b/packages/domscribe-verify/tsconfig.lib.json @@ -0,0 +1,39 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "tsBuildInfoFile": "../../dist/packages/domscribe-verify/tsconfig.tsbuildinfo", + "lib": [ + "es2024", + "ESNext.Array", + "ESNext.Collection", + "ESNext.Iterator", + "ESNext.Promise" + ], + "target": "es2024", + "module": "nodenext", + "moduleResolution": "nodenext" + }, + "include": ["src/**/*.ts"], + "exclude": [ + "vite.config.ts", + "vite.config.mts", + "vitest.config.ts", + "vitest.config.mts", + "src/**/*.test.ts", + "src/**/*.spec.ts", + "src/**/*.test.tsx", + "src/**/*.spec.tsx", + "**/dist", + "**/build", + "**/coverage", + "**/generated", + "**/tmp", + "**/test-output", + "**/.nx" + ], + "references": [ + { + "path": "../domscribe-core/tsconfig.lib.json" + } + ] +} diff --git a/packages/domscribe-verify/tsconfig.spec.json b/packages/domscribe-verify/tsconfig.spec.json new file mode 100644 index 0000000..fb4d6e4 --- /dev/null +++ b/packages/domscribe-verify/tsconfig.spec.json @@ -0,0 +1,31 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "types": [ + "vitest/globals", + "vitest/importMeta", + "vite/client", + "vitest", + "node" + ], + "tsBuildInfoFile": "../../dist/packages/domscribe-verify/tsconfig.spec.tsbuildinfo", + "module": "nodenext", + "moduleResolution": "nodenext" + }, + "include": [ + "vite.config.ts", + "vite.config.mts", + "vitest.config.ts", + "vitest.config.mts", + "src/**/*.test.ts", + "src/**/*.spec.ts", + "src/**/*.test.tsx", + "src/**/*.spec.tsx", + "src/**/*.d.ts" + ], + "references": [ + { + "path": "./tsconfig.lib.json" + } + ] +} diff --git a/packages/domscribe-verify/vite.config.ts b/packages/domscribe-verify/vite.config.ts new file mode 100644 index 0000000..4d21373 --- /dev/null +++ b/packages/domscribe-verify/vite.config.ts @@ -0,0 +1,28 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + name: '@domscribe/verify', + watch: false, + globals: true, + environment: 'node', + include: ['src/**/*.{test,spec}.{js,mjs,cjs,ts,mts,cts,jsx,tsx}'], + reporters: ['default'], + outputFile: './test-output/vitest/report.json', + coverage: { + enabled: true, + provider: 'v8', + reporter: ['text', 'json-summary'], + reportsDirectory: './test-output/vitest/coverage', + thresholds: { + lines: 0.8, + functions: 0.8, + branches: 0.7, + statements: 0.8, + }, + }, + typecheck: { + tsconfig: './tsconfig.spec.json', + }, + }, +}); From a159172c1b5f6ff9677405ae7b1298a4610b92bd Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:31:34 -0700 Subject: [PATCH 3/7] =?UTF-8?q?feat(runtime):=20ScreenshotCapturer=20for?= =?UTF-8?q?=20element-scoped=20capture=20(RFC=200002=20=C2=A7B1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ScreenshotCapturer alongside StyleCapturer/PropsCapturer/StateCapturer. Driver-injected (the overlay does the actual pixel grab); the capturer is the runtime-side contract. Constraints honored per RFC 0002 §B1: - Per-capture payload ≤200 KB (SCREENSHOT_MAX_BYTES, enforced) - Returns a relay-blob reference; bytes never inlined into the annotation - Default JPEG quality 0.85 to land mid-density UI well under the cap Cardinal-rule unit test asserts that even with a long screenshotRef and a maximum byteSize, the serialized CaptureResult stays under 512 bytes. --- .../src/capture/screenshot-capturer.spec.ts | 205 +++++++++++++++++ .../src/capture/screenshot-capturer.ts | 211 ++++++++++++++++++ packages/domscribe-runtime/src/index.ts | 13 ++ 3 files changed, 429 insertions(+) create mode 100644 packages/domscribe-runtime/src/capture/screenshot-capturer.spec.ts create mode 100644 packages/domscribe-runtime/src/capture/screenshot-capturer.ts diff --git a/packages/domscribe-runtime/src/capture/screenshot-capturer.spec.ts b/packages/domscribe-runtime/src/capture/screenshot-capturer.spec.ts new file mode 100644 index 0000000..fd36d29 --- /dev/null +++ b/packages/domscribe-runtime/src/capture/screenshot-capturer.spec.ts @@ -0,0 +1,205 @@ +/** + * Tests for ScreenshotCapturer. + * + * The capturer is driver-injected (the overlay implements the actual pixel + * grab) — these tests exercise the runtime contract: request shape, + * blob-reference passthrough, byte-cap enforcement, and the cardinal rule + * that raw bytes never appear in the returned result. + * + * @module @domscribe/runtime/capture/screenshot-capturer.spec + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + ScreenshotCapturer, + SCREENSHOT_MAX_BYTES, + type ScreenshotCaptureRequest, + type ScreenshotCaptureResult, + type ScreenshotDriver, +} from './screenshot-capturer.js'; + +function makeElement( + rect: Partial = {}, + devicePixelRatio = 2, +): HTMLElement { + const fullRect: DOMRect = { + x: 10, + y: 20, + width: 200, + height: 100, + top: 20, + right: 210, + bottom: 120, + left: 10, + toJSON: () => ({}), + ...rect, + } as DOMRect; + const win = { devicePixelRatio } as unknown as Window; + const element = { + getBoundingClientRect: () => fullRect, + ownerDocument: { defaultView: win }, + } as unknown as HTMLElement; + return element; +} + +function driverReturning( + result: ScreenshotCaptureResult, +): ScreenshotDriver & { calls: ScreenshotCaptureRequest[] } { + const calls: ScreenshotCaptureRequest[] = []; + return { + calls, + async capture(req) { + calls.push(req); + return result; + }, + }; +} + +describe('ScreenshotCapturer', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('forwards the element bounding rect, scale, format, and budget to the driver', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://post-edit/abc', + byteSize: 4096, + format: 'jpeg', + width: 400, + height: 200, + }); + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement()); + + expect(result.success).toBe(true); + expect(driver.calls).toHaveLength(1); + expect(driver.calls[0]).toEqual({ + rect: { x: 10, y: 20, width: 200, height: 100 }, + scale: 2, + format: 'jpeg', + quality: 0.85, + maxBytes: SCREENSHOT_MAX_BYTES, + }); + }); + + it('returns the screenshotRef from the driver as an opaque string', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://post-edit/xyz789', + byteSize: 8192, + format: 'jpeg', + width: 200, + height: 100, + }); + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement()); + + expect(result.success).toBe(true); + expect(result.data?.screenshotRef).toBe('blob://post-edit/xyz789'); + }); + + it('rejects captures with zero-dimension bounding rects', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://noop', + byteSize: 0, + format: 'jpeg', + width: 0, + height: 0, + }); + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement({ width: 0, height: 0 })); + + expect(result.success).toBe(false); + expect(result.error?.message).toMatch(/zero width or height/); + }); + + it('fails when the overlay returns a payload over the byte cap', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://oversize', + byteSize: SCREENSHOT_MAX_BYTES + 1, + format: 'jpeg', + width: 1000, + height: 1000, + }); + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement()); + + expect(result.success).toBe(false); + expect(result.error?.message).toMatch(/exceeds maxBytes/); + }); + + it('honors a custom maxBytes from options', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://small', + byteSize: 1500, + format: 'jpeg', + width: 200, + height: 100, + }); + const capturer = new ScreenshotCapturer(driver, { maxBytes: 1024 }); + + const result = await capturer.capture(makeElement()); + expect(result.success).toBe(false); + expect(result.error?.message).toMatch(/exceeds maxBytes 1024/); + }); + + it('falls back to scale=1 when devicePixelRatio is unavailable', async () => { + const driver = driverReturning({ + screenshotRef: 'blob://x', + byteSize: 1, + format: 'jpeg', + width: 1, + height: 1, + }); + const capturer = new ScreenshotCapturer(driver); + + await capturer.capture(makeElement({}, NaN)); + + expect(driver.calls[0].scale).toBe(1); + }); + + it('NEVER inlines raw bytes — the returned CaptureResult must be serializable inside the 4 KB annotation budget', async () => { + // Even at maximum byteSize, the SERIALIZED CaptureResult should be a + // tiny metadata-only payload — the screenshot lives behind the + // blob reference, not in the result. + const driver = driverReturning({ + screenshotRef: 'blob://post-edit/' + 'x'.repeat(48), + byteSize: SCREENSHOT_MAX_BYTES, + format: 'jpeg', + width: 1920, + height: 1080, + }); + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement()); + + expect(result.success).toBe(true); + const serialized = JSON.stringify({ + success: result.success, + data: result.data, + }); + // Pad the budget heavily — the real cap is the per-annotation 4 KB + // serialization budget from RFC 0001; metadata for one screenshot + // should be well under 1 KB even with a long ref. + expect(serialized.length).toBeLessThan(512); + expect(serialized).not.toMatch(/data:image/i); + expect(serialized).not.toMatch(/base64/i); + }); + + it('surfaces driver exceptions as a CaptureResult error', async () => { + const driver: ScreenshotDriver = { + async capture() { + throw new Error('overlay timed out'); + }, + }; + const capturer = new ScreenshotCapturer(driver); + + const result = await capturer.capture(makeElement()); + + expect(result.success).toBe(false); + expect(result.error?.message).toMatch(/screenshot/); + }); +}); diff --git a/packages/domscribe-runtime/src/capture/screenshot-capturer.ts b/packages/domscribe-runtime/src/capture/screenshot-capturer.ts new file mode 100644 index 0000000..82713a7 --- /dev/null +++ b/packages/domscribe-runtime/src/capture/screenshot-capturer.ts @@ -0,0 +1,211 @@ +/** + * ScreenshotCapturer — element-scoped image capture for verify_after_edit. + * + * Issues a relay-side capture request for the picked element's bounding box. + * The actual pixel-grab lives in the browser overlay (it has access to + * `html2canvas` / the experimental `ImageCapture` API depending on platform); + * this module is the runtime-side contract: it returns a structured + * `ScreenshotCaptureResult` whose `screenshotRef` is an opaque relay-blob + * reference. We never inline image bytes into the annotation payload — the + * RFC 0001 per-element 4 KB serialization budget assumes screenshots live + * outside the annotation. + * + * @module @domscribe/runtime/capture/screenshot-capturer + */ + +import type { CaptureResult } from './types.js'; +import { ContextCaptureError } from '../errors/index.js'; + +/** + * Soft cap for the encoded screenshot payload that the relay accepts on the + * blob-upload endpoint. RFC 0002 §B1 fixes this at 200 KB so a high-DPR + * fullscreen capture cannot starve other annotation traffic. + */ +export const SCREENSHOT_MAX_BYTES = 200 * 1024; + +/** + * Default JPEG quality when the browser-side encoder supports format hints. + * 0.85 balances pixel-diff fidelity (we still need to detect 0.1% drift + * downstream) against payload size — well-formed mid-density UI captures + * land at 60–120 KB at this setting in our overlay smoke tests. + */ +export const SCREENSHOT_DEFAULT_QUALITY = 0.85; + +export interface ScreenshotCaptureOptions { + /** + * Maximum encoded bytes the relay will accept for a single capture. + * The capturer surfaces this as a contract; the actual enforcement runs + * in the overlay's encoder loop (it can re-encode at lower quality if + * the first attempt exceeds the cap). + * @default 204800 (200 KB) + */ + maxBytes?: number; + /** + * Optional fixed device-pixel scale. When unset the capturer uses the + * caller's `devicePixelRatio` (the overlay reads `window.devicePixelRatio`). + */ + scale?: number; + /** + * Encoder format hint — JPEG is preferred over PNG for screenshots + * because it consistently lands well under the 200 KB cap on dense UI. + * @default 'jpeg' + */ + format?: 'jpeg' | 'png'; + /** + * JPEG quality in [0, 1]. Ignored when `format === 'png'`. + * @default 0.85 + */ + quality?: number; + /** + * Enable debug logging. + * @default false + */ + debug?: boolean; +} + +/** + * Capture request handed to the overlay. The overlay turns this into pixels + * and resolves with a relay-blob reference. + * + * Shape lives in the runtime so adapters (`@domscribe/react`, `-vue`, etc.) + * can synthesize requests without depending on overlay internals. + */ +export interface ScreenshotCaptureRequest { + /** Bounding rect of the picked element in viewport coordinates. */ + rect: { x: number; y: number; width: number; height: number }; + scale: number; + format: 'jpeg' | 'png'; + quality: number; + maxBytes: number; +} + +/** + * Result of a successful screenshot capture. The agent never receives the + * raw bytes through this surface — only an opaque blob reference it can + * resolve through the relay if it needs the image. + */ +export interface ScreenshotCaptureResult { + /** Opaque relay-blob reference, e.g. "blob://post-edit//". */ + screenshotRef: string; + /** Encoded byte size — useful for telemetry; ≤ `maxBytes`. */ + byteSize: number; + /** Format the overlay actually encoded to. */ + format: 'jpeg' | 'png'; + /** Pixel dimensions of the encoded capture. */ + width: number; + height: number; +} + +/** + * Driver contract — implemented by the overlay (browser context). The + * runtime never imports the overlay directly; the driver is injected at + * construction time so the capturer remains test-pure and adapter-agnostic. + */ +export interface ScreenshotDriver { + capture(request: ScreenshotCaptureRequest): Promise; +} + +const DEFAULT_FORMAT: 'jpeg' | 'png' = 'jpeg'; + +export class ScreenshotCapturer { + private readonly maxBytes: number; + private readonly fixedScale: number | undefined; + private readonly format: 'jpeg' | 'png'; + private readonly quality: number; + private readonly debug: boolean; + + constructor( + private readonly driver: ScreenshotDriver, + options: ScreenshotCaptureOptions = {}, + ) { + this.maxBytes = options.maxBytes ?? SCREENSHOT_MAX_BYTES; + this.fixedScale = options.scale; + this.format = options.format ?? DEFAULT_FORMAT; + this.quality = options.quality ?? SCREENSHOT_DEFAULT_QUALITY; + this.debug = options.debug ?? false; + } + + /** + * Capture an element-scoped screenshot. + * + * Returns `CaptureResult` — on failure the result + * is `{ success: false, error }` so the caller (annotation flow) can + * choose to surface or swallow. The verify_after_edit MCP handler treats + * a missing capture as a "no post-edit data" condition and rejects the + * call with a clear error instead of returning a partial verdict. + */ + async capture( + element: HTMLElement, + ): Promise> { + try { + const rect = element.getBoundingClientRect(); + if (rect.width === 0 || rect.height === 0) { + return { + success: false, + error: new ContextCaptureError( + 'ScreenshotCapturer: element has zero width or height — refusing to capture', + ), + }; + } + + const scale = this.fixedScale ?? this.detectScale(element); + const request: ScreenshotCaptureRequest = { + rect: { + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + }, + scale, + format: this.format, + quality: this.quality, + maxBytes: this.maxBytes, + }; + + const result = await this.driver.capture(request); + + if (result.byteSize > this.maxBytes) { + return { + success: false, + error: new ContextCaptureError( + `ScreenshotCapturer: overlay returned ${result.byteSize} bytes; exceeds maxBytes ${this.maxBytes}`, + ), + }; + } + + if (this.debug) { + console.log( + '[domscribe-runtime][screenshot-capturer] Captured screenshot', + { + ref: result.screenshotRef, + bytes: result.byteSize, + format: result.format, + width: result.width, + height: result.height, + }, + ); + } + + return { success: true, data: result }; + } catch (error) { + const err = new ContextCaptureError( + 'Failed to capture element screenshot', + error instanceof Error ? error : undefined, + ); + if (this.debug) { + console.error('[domscribe-runtime][screenshot-capturer]', err); + } + return { success: false, error: err }; + } + } + + /** + * Read the device-pixel ratio from the element's owner window, falling + * back to 1 in jsdom-style environments. + */ + private detectScale(element: HTMLElement): number { + const win = element.ownerDocument?.defaultView; + const dpr = (win as { devicePixelRatio?: number } | null)?.devicePixelRatio; + return typeof dpr === 'number' && dpr > 0 ? dpr : 1; + } +} diff --git a/packages/domscribe-runtime/src/index.ts b/packages/domscribe-runtime/src/index.ts index c3c141d..c32661a 100644 --- a/packages/domscribe-runtime/src/index.ts +++ b/packages/domscribe-runtime/src/index.ts @@ -27,3 +27,16 @@ export { STYLE_CAPTURE_ALLOWLIST, } from './capture/style-capturer.js'; export type { StyleCaptureOptions } from './capture/style-capturer.js'; + +// Screenshot capture (RFC 0002 — verify_after_edit) +export { + ScreenshotCapturer, + SCREENSHOT_MAX_BYTES, + SCREENSHOT_DEFAULT_QUALITY, +} from './capture/screenshot-capturer.js'; +export type { + ScreenshotCaptureOptions, + ScreenshotCaptureRequest, + ScreenshotCaptureResult, + ScreenshotDriver, +} from './capture/screenshot-capturer.js'; From 4585ced2f719683aaa962e153328e4665ff3166f Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:31:47 -0700 Subject: [PATCH 4/7] =?UTF-8?q?feat(relay):=20verify=5Fafter=5Fedit=20HTTP?= =?UTF-8?q?=20route=20+=20service=20+=20client=20(RFC=200002=20=C2=A7B2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the verify_after_edit endpoint: POST /api/v1/annotations/:id/verify Request: { postEdit: { componentStyles?, boundingRect?, screenshotRef? } } Response: { success, result: VerifyResult, annotationId } AnnotationService.verifyAfterEdit reads the pre-edit baseline from context.runtimeContext.componentStyles + interaction.boundingRect, runs the @domscribe/verify comparator, and appends the VerifyResult to verifyHistory. RelayHttpClient.verifyAnnotation typesafely calls the new endpoint. NO lifecycle gate — annotation_update_status accepts PROCESSED with or without a verify call. Integration test asserts this. --- packages/domscribe-relay/package.json | 1 + .../src/client/relay-http-client.ts | 34 +++ packages/domscribe-relay/src/schema.ts | 50 ++++ .../src/server/handlers/annotation-handler.ts | 10 + .../src/server/routes/index.ts | 1 + .../routes/v1/annotation-verify.route.spec.ts | 276 ++++++++++++++++++ .../routes/v1/annotation-verify.route.ts | 119 ++++++++ .../src/server/services/annotation-service.ts | 60 ++++ 8 files changed, 551 insertions(+) create mode 100644 packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.spec.ts create mode 100644 packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.ts diff --git a/packages/domscribe-relay/package.json b/packages/domscribe-relay/package.json index b48995f..8da2d72 100644 --- a/packages/domscribe-relay/package.json +++ b/packages/domscribe-relay/package.json @@ -24,6 +24,7 @@ "@clack/prompts": "^1.1.0", "@domscribe/core": "workspace:*", "@domscribe/manifest": "workspace:*", + "@domscribe/verify": "workspace:*", "@fastify/cors": "^10.0.0", "@fastify/websocket": "^11.0.0", "@modelcontextprotocol/sdk": "^1.0.0", diff --git a/packages/domscribe-relay/src/client/relay-http-client.ts b/packages/domscribe-relay/src/client/relay-http-client.ts index 0295862..0b69f8b 100644 --- a/packages/domscribe-relay/src/client/relay-http-client.ts +++ b/packages/domscribe-relay/src/client/relay-http-client.ts @@ -12,6 +12,8 @@ import { AnnotationInteraction, AnnotationStatus, API_PATHS, + BoundingRect, + ComponentStyles, DomscribeError, DomscribeErrorCode, InteractionMode, @@ -37,6 +39,8 @@ import { AnnotationUpdateResponseResponseSchema, AnnotationUpdateStatusResponse, AnnotationUpdateStatusResponseSchema, + AnnotationVerifyResponse, + AnnotationVerifyResponseSchema, HealthResponse, HealthResponseSchema, ManifestBatchResolveResponse, @@ -218,6 +222,36 @@ export class RelayHttpClient { return AnnotationUpdateResponseResponseSchema.parse(await response.json()); } + /** + * Grade a post-edit capture against the annotation's pre-edit baseline + * via the relay's verify_after_edit endpoint (RFC 0002). + * + * `postEdit.screenshotRef` is an opaque blob reference managed by the + * overlay; raw image bytes never traverse this client. + */ + async verifyAnnotation( + annotationId: AnnotationId, + postEdit: { + componentStyles?: ComponentStyles; + boundingRect?: BoundingRect; + screenshotRef?: string; + }, + ): Promise { + const apiPath = `${API_PATHS.BASE.replace(':version', 'v1')}${API_PATHS.ANNOTATION_VERIFY.replace(':id', annotationId)}`; + const url = new URL(apiPath, this.baseUrl); + const response = await fetch(url.toString(), { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ postEdit }), + }); + if (!response.ok) { + throw await this.parseError(response); + } + return AnnotationVerifyResponseSchema.parse(await response.json()); + } + async updateAnnotationStatus( annotationId: AnnotationId, status: AnnotationStatus, diff --git a/packages/domscribe-relay/src/schema.ts b/packages/domscribe-relay/src/schema.ts index 33fde0b..db7d33b 100644 --- a/packages/domscribe-relay/src/schema.ts +++ b/packages/domscribe-relay/src/schema.ts @@ -13,6 +13,8 @@ import { AnnotationIdSchema, AnnotationInteractionSchema, AnnotationSchema, + BoundingRectSchema, + ComponentStylesSchema, InteractionModeSchema, ManifestEntryIdSchema, ManifestEntrySchema, @@ -21,6 +23,7 @@ import { SourcePositionSchema, AnnotationStatusSchema, AnnotationSummarySchema, + VerifyResultSchema, } from '@domscribe/core'; /* ============================= @@ -181,6 +184,53 @@ export type AnnotationUpdateResponseResponse = z.infer< typeof AnnotationUpdateResponseResponseSchema >; +/* ============================= + * Annotation - Verify After Edit (RFC 0002) + * ============================= */ +export const AnnotationVerifyRequestParamsSchema = z.object({ + id: AnnotationIdSchema.describe('The annotation ID to verify'), +}); +export type AnnotationVerifyRequestParams = z.infer< + typeof AnnotationVerifyRequestParamsSchema +>; + +export const AnnotationVerifyRequestBodySchema = z.object({ + postEdit: z + .object({ + componentStyles: ComponentStylesSchema.optional().describe( + 'Post-edit ComponentStyles snapshot from the runtime StyleCapturer', + ), + boundingRect: BoundingRectSchema.optional().describe( + 'Post-edit boundingRect from the picked element', + ), + screenshotRef: z + .string() + .optional() + .describe( + 'Opaque relay-blob reference for the post-edit screenshot (NEVER raw bytes)', + ), + }) + .describe( + "Post-edit capture as observed by the overlay. The relay grades this against the annotation's pre-edit baseline.", + ), +}); +export type AnnotationVerifyRequestBody = z.infer< + typeof AnnotationVerifyRequestBodySchema +>; + +export const AnnotationVerifyResponseSchema = z.object({ + success: z.boolean().describe('Whether the verify result was recorded'), + result: VerifyResultSchema.describe( + 'Structured verify verdict and per-axis deltas', + ), + annotationId: AnnotationIdSchema.describe( + 'Annotation ID that was verified (echoed for client convenience)', + ), +}); +export type AnnotationVerifyResponse = z.infer< + typeof AnnotationVerifyResponseSchema +>; + /* ============================= * Annotation - Update Status * ============================= */ diff --git a/packages/domscribe-relay/src/server/handlers/annotation-handler.ts b/packages/domscribe-relay/src/server/handlers/annotation-handler.ts index a1b9e9e..f957cea 100644 --- a/packages/domscribe-relay/src/server/handlers/annotation-handler.ts +++ b/packages/domscribe-relay/src/server/handlers/annotation-handler.ts @@ -14,6 +14,7 @@ import { AnnotationUpdateResponseRoute, AnnotationUpdateStatusRoute, AnnotationProcessRoute, + AnnotationVerifyRoute, } from '../routes/index.js'; import { registerRoute } from '../routes/route.interface.js'; @@ -107,4 +108,13 @@ export function registerAnnotationHandlers( app, annotationService, }); + + /** + * POST /api/v1/annotations/:id/verify + * Grade a post-edit capture against the pre-edit baseline (RFC 0002) + */ + registerRoute(AnnotationVerifyRoute, { + app, + annotationService, + }); } diff --git a/packages/domscribe-relay/src/server/routes/index.ts b/packages/domscribe-relay/src/server/routes/index.ts index a1a601b..1e5b1cf 100644 --- a/packages/domscribe-relay/src/server/routes/index.ts +++ b/packages/domscribe-relay/src/server/routes/index.ts @@ -11,6 +11,7 @@ export { AnnotationProcessRoute } from './v1/annotation-process.route.js'; export { AnnotationSearchRoute } from './v1/annotation-search.route.js'; export { AnnotationUpdateResponseRoute } from './v1/annotation-update-response.route.js'; export { AnnotationUpdateStatusRoute } from './v1/annotation-update-status.route.js'; +export { AnnotationVerifyRoute } from './v1/annotation-verify.route.js'; export { AnnotationDeleteRoute } from './v1/annotation-delete.route.js'; export { AnnotationPatchRoute } from './v1/annotation-patch.route.js'; diff --git a/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.spec.ts b/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.spec.ts new file mode 100644 index 0000000..7f19a2a --- /dev/null +++ b/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.spec.ts @@ -0,0 +1,276 @@ +/** + * Integration tests for POST /api/v1/annotations/:id/verify + * + * Exercises the full annotation -> respond -> verify chain through the + * real AnnotationService + the lifted @domscribe/verify comparator. + * + * One scenario uses the RFC 0001 styling-fixture annotation "A001" as the + * shape of a real pre/post-edit capture (padding 16px -> 32px). Cross- + * package fixture bytes don't load at unit-test time, so we model the + * fixture annotation here as the agent would observe it through the + * runtime StyleCapturer. + */ +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { + createTestServer, + cleanupTestServer, + expectStatus, + type TestServer, +} from '../../__test-utils__/setup.js'; + +const fixtureA001Pre = { + componentStyles: { + computed: { + padding: '16px', + display: 'flex', + 'background-color': 'rgb(255, 255, 255)', + }, + }, + boundingRect: { + x: 100, + y: 100, + width: 320, + height: 80, + top: 100, + right: 420, + bottom: 180, + left: 100, + }, +}; + +const fixtureA001PostMatch = { + componentStyles: { + computed: { + padding: '32px', + display: 'flex', + 'background-color': 'rgb(255, 255, 255)', + }, + }, + boundingRect: { + x: 100, + y: 100, + width: 352, + height: 112, + top: 100, + right: 452, + bottom: 212, + left: 100, + }, +}; + +const fixtureA001PostNoChange = { + componentStyles: fixtureA001Pre.componentStyles, + boundingRect: fixtureA001Pre.boundingRect, +}; + +const annotationInputWithPreCapture = { + mode: 'element-click', + interaction: { + type: 'element-annotation', + selectedElement: { + tagName: 'div', + selector: 'body > div > [data-testid="A001"]', + attributes: { 'data-testid': 'A001' }, + innerText: 'A001 card', + }, + boundingRect: fixtureA001Pre.boundingRect, + }, + context: { + pageUrl: 'http://localhost:4801/#A001/before', + pageTitle: 'tailwind-app', + viewport: { width: 800, height: 600 }, + userAgent: 'TestAgent/1.0', + userMessage: + 'Card padding is too cramped. Bump it to 32px (Tailwind p-8) to match the spacious card style used elsewhere on the page.', + runtimeContext: { + componentStyles: fixtureA001Pre.componentStyles, + }, + }, +}; + +describe('POST /api/v1/annotations/:id/verify', () => { + let server: TestServer; + + beforeAll(async () => { + server = await createTestServer(); + }); + + afterAll(() => { + cleanupTestServer(server); + }); + + it('completes the annotation -> respond -> verify chain on an RFC 0001 styling fixture', async () => { + const createResp = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations', + payload: annotationInputWithPreCapture, + }); + expectStatus(createResp, 201); + const id = createResp.json().metadata.id; + + await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/status`, + payload: { status: 'processing' }, + }); + const respondResp = await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/response`, + payload: { message: 'Replaced p-4 with p-8 on the card container.' }, + }); + expectStatus(respondResp, 200); + + const verifyResp = await server.app.inject({ + method: 'POST', + url: `/api/v1/annotations/${id}/verify`, + payload: { + postEdit: { + componentStyles: fixtureA001PostMatch.componentStyles, + boundingRect: fixtureA001PostMatch.boundingRect, + screenshotRef: `blob://post-edit/${id}/a001`, + }, + }, + }); + + expectStatus(verifyResp, 200); + const body = verifyResp.json(); + + expect(body.success).toBe(true); + expect(body.annotationId).toBe(id); + // No screenshot bytes are supplied (only a ref) so the pixel-diff axis + // is inactive — verdict is "partial" until the overlay wires the + // ScreenshotCapturer through the blob endpoint (deferred follow-up). + expect(body.result.verdict).toBe('partial'); + expect(body.result.componentStylesDelta).toEqual({ + padding: ['16px', '32px'], + }); + expect(body.result.boundingRectDelta).toEqual({ + width: [320, 352], + height: [80, 112], + right: [420, 452], + bottom: [180, 212], + }); + expect(body.result.screenshotRef).toBe(`blob://post-edit/${id}/a001`); + }); + + it('reports no_change when the post-edit capture is indistinguishable from the baseline', async () => { + const createResp = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations', + payload: annotationInputWithPreCapture, + }); + const id = createResp.json().metadata.id; + + await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/status`, + payload: { status: 'processing' }, + }); + + const verifyResp = await server.app.inject({ + method: 'POST', + url: `/api/v1/annotations/${id}/verify`, + payload: { postEdit: fixtureA001PostNoChange }, + }); + + expectStatus(verifyResp, 200); + expect(verifyResp.json().result.verdict).toBe('no_change'); + }); + + it('appends to verifyHistory so multiple verify calls accumulate', async () => { + const createResp = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations', + payload: annotationInputWithPreCapture, + }); + const id = createResp.json().metadata.id; + + await server.app.inject({ + method: 'POST', + url: `/api/v1/annotations/${id}/verify`, + payload: { postEdit: fixtureA001PostNoChange }, + }); + await server.app.inject({ + method: 'POST', + url: `/api/v1/annotations/${id}/verify`, + payload: { postEdit: fixtureA001PostMatch }, + }); + + const getResp = await server.app.inject({ + method: 'GET', + url: `/api/v1/annotations/${id}`, + }); + const annotation = getResp.json(); + expect(annotation.verifyHistory).toHaveLength(2); + expect(annotation.verifyHistory[0].verdict).toBe('no_change'); + expect(annotation.verifyHistory[1].verdict).toBe('partial'); + }); + + it('does NOT gate the lifecycle — updateStatus accepts PROCESSED with or without a verify call', async () => { + const createResp = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations', + payload: annotationInputWithPreCapture, + }); + const id = createResp.json().metadata.id; + + await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/status`, + payload: { status: 'processing' }, + }); + await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/response`, + payload: { message: 'Skipped verify on purpose.' }, + }); + + const updateResp = await server.app.inject({ + method: 'PUT', + url: `/api/v1/annotations/${id}/status`, + payload: { status: 'processed' }, + }); + + expectStatus(updateResp, 200); + }); + + it('NEVER inlines screenshot bytes — the stored VerifyResult is small even with a long screenshotRef', async () => { + const createResp = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations', + payload: annotationInputWithPreCapture, + }); + const id = createResp.json().metadata.id; + + const longRef = 'blob://post-edit/' + 'x'.repeat(96); + await server.app.inject({ + method: 'POST', + url: `/api/v1/annotations/${id}/verify`, + payload: { + postEdit: { ...fixtureA001PostNoChange, screenshotRef: longRef }, + }, + }); + + const getResp = await server.app.inject({ + method: 'GET', + url: `/api/v1/annotations/${id}`, + }); + const annotation = getResp.json(); + const serialized = JSON.stringify(annotation); + expect(serialized).toContain(longRef); + expect(serialized).not.toMatch(/base64/i); + expect(serialized).not.toMatch(/data:image/i); + expect(serialized.length).toBeLessThan(8 * 1024); + }); + + it('returns 404 for a nonexistent annotation id', async () => { + const response = await server.app.inject({ + method: 'POST', + url: '/api/v1/annotations/ann_zzzzzzzz_0/verify', + payload: { postEdit: fixtureA001PostNoChange }, + }); + + expectStatus(response, 404); + expect(response.json().code).toBe('DS_ANNOTATION_NOTFOUND'); + }); +}); diff --git a/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.ts b/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.ts new file mode 100644 index 0000000..e284ab8 --- /dev/null +++ b/packages/domscribe-relay/src/server/routes/v1/annotation-verify.route.ts @@ -0,0 +1,119 @@ +/** + * Route: POST /api/v1/annotations/:id/verify + * + * Backs the `domscribe.verify.afterEdit` MCP tool (RFC 0002). Caller + * supplies a post-edit capture (componentStyles + boundingRect + an opaque + * screenshotRef); the relay grades it against the annotation's pre-edit + * baseline via `@domscribe/verify` and stores the result on the + * annotation's optional `verifyHistory`. + * + * Soft-recommended — does NOT affect the annotation lifecycle. + * + * @module @domscribe/relay/server/routes/v1/annotation-verify.route + */ +import { + API_PATHS, + DomscribeError, + DomscribeErrorCode, + HTTP_STATUS, +} from '@domscribe/core'; +import { + FastifyInstance, + FastifyReply, + FastifyRequest, + HTTPMethods, +} from 'fastify'; +import { AnnotationService } from '../../services/index.js'; +import { RelayErrorResponse, RelayErrorResponseSchema } from '../../types.js'; +import type { ZodTypeProvider } from 'fastify-type-provider-zod'; +import path from 'path'; +import { ApiVersion, RelayRoute } from '../route.interface.js'; +import { + AnnotationVerifyRequestBody, + AnnotationVerifyRequestBodySchema, + AnnotationVerifyRequestParams, + AnnotationVerifyRequestParamsSchema, + AnnotationVerifyResponse, + AnnotationVerifyResponseSchema, +} from '../../../schema.js'; + +export class AnnotationVerifyRoute implements RelayRoute { + apiPath = API_PATHS.ANNOTATION_VERIFY; + method: HTTPMethods = 'POST'; + version: ApiVersion = 'v1'; + + constructor(private readonly annotationService: AnnotationService) {} + + static register({ + app, + annotationService, + }: { + app: FastifyInstance; + annotationService: AnnotationService; + }): void { + const route = new AnnotationVerifyRoute(annotationService); + const { apiPath, version, method, handler } = route; + const url = path.posix.join( + API_PATHS.BASE.replace(':version', version), + apiPath, + ); + + app.withTypeProvider().route<{ + Params: AnnotationVerifyRequestParams; + Body: AnnotationVerifyRequestBody; + Reply: AnnotationVerifyResponse | RelayErrorResponse; + }>({ + url, + method, + handler: handler.bind(route), + schema: { + params: AnnotationVerifyRequestParamsSchema, + body: AnnotationVerifyRequestBodySchema, + response: { + 200: AnnotationVerifyResponseSchema, + 400: RelayErrorResponseSchema, + 404: RelayErrorResponseSchema, + 500: RelayErrorResponseSchema, + }, + }, + }); + } + + async handler( + request: FastifyRequest<{ + Params: AnnotationVerifyRequestParams; + Body: AnnotationVerifyRequestBody; + }>, + reply: FastifyReply<{ + Reply: AnnotationVerifyResponse | RelayErrorResponse; + }>, + ) { + try { + const { id } = request.params; + const { postEdit } = request.body; + + const result = await this.annotationService.verifyAfterEdit(id, postEdit); + + return { success: true, result, annotationId: id }; + } catch (error: unknown) { + if (error instanceof Error && /not found/i.test(error.message)) { + return reply.status(HTTP_STATUS.NOT_FOUND).send({ + error: error.message, + code: DomscribeErrorCode.DS_ANNOTATION_NOTFOUND, + }); + } + if (error instanceof DomscribeError) { + return reply.status(HTTP_STATUS.INTERNAL_SERVER_ERROR).send({ + ...error.toProblemDetails(), + error: error.message, + }); + } + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + return reply.status(HTTP_STATUS.INTERNAL_SERVER_ERROR).send({ + error: errorMessage, + code: DomscribeErrorCode.DS_INTERNAL_ERROR, + }); + } + } +} diff --git a/packages/domscribe-relay/src/server/services/annotation-service.ts b/packages/domscribe-relay/src/server/services/annotation-service.ts index 93415e3..135e8be 100644 --- a/packages/domscribe-relay/src/server/services/annotation-service.ts +++ b/packages/domscribe-relay/src/server/services/annotation-service.ts @@ -12,9 +12,12 @@ import type { AnnotationInteraction, AnnotationStatus, AnnotationSummary, + BoundingRect, + ComponentStyles, InteractionMode, ManifestEntry, ManifestEntryId, + VerifyResult, } from '@domscribe/core'; import { ANNOTATION_SCHEMA_VERSION, @@ -22,6 +25,7 @@ import { generateAnnotationId, WS_EVENTS, } from '@domscribe/core'; +import { compare } from '@domscribe/verify'; import type { AnnotationStorageProvider } from './storage/annotation-storage.js'; /** @@ -386,6 +390,62 @@ export class AnnotationService { return annotation; } + /** + * Run verify_after_edit against a stored annotation. + * + * Reads the pre-edit baseline from the annotation's + * `context.runtimeContext.componentStyles` and `interaction.boundingRect`, + * compares it to the caller-supplied post-edit capture via + * `@domscribe/verify`, and appends the resulting `VerifyResult` to the + * annotation's `verifyHistory`. + * + * Soft-recommended: there is NO lifecycle gate — `updateStatus(PROCESSED)` + * works whether or not `verifyAfterEdit` has been called. RFC 0002 §Decision + * routes the escalation path through a falsifier-trip review, not the + * state machine. + * + * Screenshots are never inlined into the annotation — only the opaque + * `screenshotRef` is stored. The pixel-diff axis is therefore inactive + * here; it activates when the overlay later wires the runtime + * ScreenshotCapturer through the blob endpoint (deferred to a follow-up + * PR per RFC 0002 §B1). + */ + async verifyAfterEdit( + id: string, + postEdit: { + componentStyles?: ComponentStyles; + boundingRect?: BoundingRect; + screenshotRef?: string; + }, + ): Promise { + const annotation = await this.get(id); + if (!annotation) { + throw new Error(`Annotation not found: ${id}`); + } + + const beforeStyles = annotation.context.runtimeContext?.componentStyles; + const beforeRect = annotation.interaction.boundingRect; + + const result = compare({ + annotationId: annotation.metadata.id as AnnotationId, + beforeStyles, + afterStyles: postEdit.componentStyles, + beforeRect, + afterRect: postEdit.boundingRect, + screenshotRef: postEdit.screenshotRef, + }); + + annotation.verifyHistory = [...(annotation.verifyHistory ?? []), result]; + + await this.storage.write(annotation); + this.emit({ + type: WS_EVENTS.ANNOTATION_UPDATED, + data: { id, status: annotation.metadata.status }, + }); + + return result; + } + /** * Patch an annotation with partial context updates. */ From 71c9f03cd460a0cc82155bc908a421d6c1da600c Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:32:00 -0700 Subject: [PATCH 5/7] =?UTF-8?q?feat(relay,mcp):=20verify=5Fafter=5Fedit=20?= =?UTF-8?q?MCP=20tool=20+=20soft-recommend=20prompts=20(RFC=200002=20?= =?UTF-8?q?=C2=A7B2,=20=C2=A7B3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Registers domscribe.verify.afterEdit as the 13th active MCP tool. (NOTE: PE memo's '13 → 14' assumed e8d452d underscore grammar was merged; on this base it is 12 → 13. Will become 13 → 14 + alias layer when e8d452d lands.) Tool input: { annotationId, postEditComponentStyles?, postEditBoundingRect?, screenshotRef? }. Output: structured VerifyResult with a nextStep hint — match/partial points at updateStatus, no_change/regression points at retry with the deltas inlined into the hint string. SOFT-RECOMMENDED, no lifecycle gate: - process-next.prompt now lists verify_after_edit as step 5 with an explicit 'NOT a lifecycle gate' callout. - annotation_respond's description + nextStep recommend verify before updateStatus. - annotation_update_status accepts PROCESSED with or without verify (asserted via integration test in the previous commit). Cardinal-rule unit test: even with a long screenshotRef, the serialized tool output stays under 2 KB and never matches /base64/i or /data:image/i. --- .../mcp/__test-utils__/mock-relay-client.ts | 1 + .../src/mcp/mcp-adapter.spec.ts | 6 +- .../domscribe-relay/src/mcp/mcp-adapter.ts | 2 + .../src/mcp/prompts/process-next.prompt.ts | 3 +- .../src/mcp/prompts/prompts.spec.ts | 5 + .../mcp/tools/annotation-respond.tool.spec.ts | 9 +- .../src/mcp/tools/annotation-respond.tool.ts | 5 +- .../src/mcp/tools/tool.defs.spec.ts | 5 + .../src/mcp/tools/tool.defs.ts | 2 + .../mcp/tools/verify-after-edit.tool.spec.ts | 159 ++++++++++++++++++ .../src/mcp/tools/verify-after-edit.tool.ts | 114 +++++++++++++ 11 files changed, 304 insertions(+), 7 deletions(-) create mode 100644 packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.spec.ts create mode 100644 packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.ts diff --git a/packages/domscribe-relay/src/mcp/__test-utils__/mock-relay-client.ts b/packages/domscribe-relay/src/mcp/__test-utils__/mock-relay-client.ts index 1a9a719..7ac9103 100644 --- a/packages/domscribe-relay/src/mcp/__test-utils__/mock-relay-client.ts +++ b/packages/domscribe-relay/src/mcp/__test-utils__/mock-relay-client.ts @@ -20,6 +20,7 @@ export function createMockRelayClient( processAnnotation: vi.fn(), updateAnnotationStatus: vi.fn(), updateAnnotationResponse: vi.fn(), + verifyAnnotation: vi.fn(), createAnnotation: vi.fn(), deleteAnnotation: vi.fn(), patchAnnotation: vi.fn(), diff --git a/packages/domscribe-relay/src/mcp/mcp-adapter.spec.ts b/packages/domscribe-relay/src/mcp/mcp-adapter.spec.ts index 8eec9f0..22cc518 100644 --- a/packages/domscribe-relay/src/mcp/mcp-adapter.spec.ts +++ b/packages/domscribe-relay/src/mcp/mcp-adapter.spec.ts @@ -60,7 +60,8 @@ describe('McpAdapter', () => { // Assert const server = getServer(adapter); - expect(server.registeredTools.size).toBe(12); + // RFC 0002 added the verify_after_edit tool — the active count is 13. + expect(server.registeredTools.size).toBe(13); expect(server.registeredTools.has('domscribe.resolve')).toBe(true); expect(server.registeredTools.has('domscribe.resolve.batch')).toBe(true); expect(server.registeredTools.has('domscribe.manifest.stats')).toBe(true); @@ -83,6 +84,9 @@ describe('McpAdapter', () => { ); expect(server.registeredTools.has('domscribe.status')).toBe(true); expect(server.registeredTools.has('domscribe.query.bySource')).toBe(true); + expect(server.registeredTools.has('domscribe.verify.afterEdit')).toBe( + true, + ); }); it('should register all 4 prompts', () => { diff --git a/packages/domscribe-relay/src/mcp/mcp-adapter.ts b/packages/domscribe-relay/src/mcp/mcp-adapter.ts index 1f751d2..f8b7872 100644 --- a/packages/domscribe-relay/src/mcp/mcp-adapter.ts +++ b/packages/domscribe-relay/src/mcp/mcp-adapter.ts @@ -25,6 +25,7 @@ import { AnnotationsRespondTool } from './tools/annotation-respond.tool.js'; import { AnnotationsSearchTool } from './tools/annotation-search.tool.js'; import { StatusTool } from './tools/status.tool.js'; import { QueryBySourceTool } from './tools/query-by-source.tool.js'; +import { VerifyAfterEditTool } from './tools/verify-after-edit.tool.js'; // Prompt classes import { ProcessNextPrompt } from './prompts/process-next.prompt.js'; @@ -113,6 +114,7 @@ export class McpAdapter { new AnnotationsSearchTool(relayHttpClient), new StatusTool(relayHttpClient), new QueryBySourceTool(relayHttpClient), + new VerifyAfterEditTool(relayHttpClient), ]; for (const tool of tools) { diff --git a/packages/domscribe-relay/src/mcp/prompts/process-next.prompt.ts b/packages/domscribe-relay/src/mcp/prompts/process-next.prompt.ts index 41c83cc..a7229c4 100644 --- a/packages/domscribe-relay/src/mcp/prompts/process-next.prompt.ts +++ b/packages/domscribe-relay/src/mcp/prompts/process-next.prompt.ts @@ -30,7 +30,8 @@ If an annotation is found: 2. Navigate to the source file and understand the context 3. Implement the requested change 4. Use domscribe.annotation.respond to store your response -5. Use domscribe.annotation.updateStatus to mark it as 'processed' +5. RECOMMENDED: call domscribe.verify.afterEdit with your post-edit ComponentStyles / boundingRect (and a screenshotRef when the overlay supplied one). The tool grades your edit against the pre-edit baseline and returns a verdict (match | partial | no_change | regression) plus per-axis deltas. If the verdict is no_change or regression, reconcile the deltas and retry your edit before moving on. +6. Use domscribe.annotation.updateStatus to mark the annotation 'processed'. (NOTE: updateStatus does NOT require verify — it is a soft-recommended diagnostic, not a lifecycle gate.) If no annotation is found, inform the user that the queue is empty.`, }, diff --git a/packages/domscribe-relay/src/mcp/prompts/prompts.spec.ts b/packages/domscribe-relay/src/mcp/prompts/prompts.spec.ts index fc5acab..c4b676d 100644 --- a/packages/domscribe-relay/src/mcp/prompts/prompts.spec.ts +++ b/packages/domscribe-relay/src/mcp/prompts/prompts.spec.ts @@ -25,6 +25,11 @@ describe('ProcessNextPrompt', () => { expect(messages[0].content.text).toContain( 'domscribe.annotation.updateStatus', ); + // RFC 0002: prompt should recommend verify_after_edit between respond + // and updateStatus, but NOT gate the lifecycle on it. + expect(messages[0].content.text).toContain('domscribe.verify.afterEdit'); + expect(messages[0].content.text).toMatch(/RECOMMENDED/i); + expect(messages[0].content.text).toMatch(/not a lifecycle gate/i); }); }); diff --git a/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.spec.ts b/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.spec.ts index 98f373d..11e322e 100644 --- a/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.spec.ts +++ b/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.spec.ts @@ -28,12 +28,15 @@ describe('AnnotationsRespondTool', () => { 'ann_123', 'Changed button color to blue', ); - expect(result.structuredContent).toEqual({ + expect(result.structuredContent).toMatchObject({ success: true, annotationId: 'ann_123', - nextStep: - 'Call domscribe.annotation.updateStatus with annotationId "ann_123" and status "processed" to complete the lifecycle.', }); + const structured = result.structuredContent as { nextStep: string }; + expect(structured.nextStep).toContain('domscribe.verify.afterEdit'); + expect(structured.nextStep).toContain( + 'domscribe.annotation.updateStatus', + ); }); it('should default message to empty string when not provided', async () => { diff --git a/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.ts b/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.ts index ecbe273..8dd00a5 100644 --- a/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.ts +++ b/packages/domscribe-relay/src/mcp/tools/annotation-respond.tool.ts @@ -43,7 +43,8 @@ export class AnnotationsRespondTool implements McpToolDefinition< description = "Store the agent's response to an annotation including explanation message and code patches. " + 'Use after implementing changes to record what was done so users can review in the overlay. ' + - 'IMPORTANT: After calling this, you MUST call domscribe.annotation.updateStatus with status "processed" (or "failed") to complete the lifecycle.'; + 'RECOMMENDED next step: call domscribe.verify.afterEdit to grade your edit against the pre-edit baseline (verdict + per-axis deltas you can reconcile on retry). ' + + 'IMPORTANT: After verify (or directly, if you skip verify), call domscribe.annotation.updateStatus with status "processed" (or "failed") to complete the lifecycle.'; inputSchema = AnnotationsRespondToolInputSchema; outputSchema = AnnotationsRespondToolOutputSchema; @@ -60,7 +61,7 @@ export class AnnotationsRespondTool implements McpToolDefinition< success: response.success, annotationId: response.annotation.metadata.id, nextStep: response.success - ? `Call domscribe.annotation.updateStatus with annotationId "${response.annotation.metadata.id}" and status "processed" to complete the lifecycle.` + ? `RECOMMENDED: call domscribe.verify.afterEdit with annotationId "${response.annotation.metadata.id}" and your post-edit ComponentStyles / boundingRect to grade the edit. Then call domscribe.annotation.updateStatus with status "processed" to complete the lifecycle.` : undefined, }; diff --git a/packages/domscribe-relay/src/mcp/tools/tool.defs.spec.ts b/packages/domscribe-relay/src/mcp/tools/tool.defs.spec.ts index ec61e0d..3ea16fb 100644 --- a/packages/domscribe-relay/src/mcp/tools/tool.defs.spec.ts +++ b/packages/domscribe-relay/src/mcp/tools/tool.defs.spec.ts @@ -18,6 +18,11 @@ describe('tool.defs', () => { expect(MCP_TOOLS.ANNOTATION_RESPOND).toBe('domscribe.annotation.respond'); expect(MCP_TOOLS.ANNOTATION_SEARCH).toBe('domscribe.annotation.search'); expect(MCP_TOOLS.STATUS).toBe('domscribe.status'); + expect(MCP_TOOLS.VERIFY_AFTER_EDIT).toBe('domscribe.verify.afterEdit'); + }); + + it('declares 13 active tools (RFC 0002 added verify_after_edit)', () => { + expect(Object.keys(MCP_TOOLS)).toHaveLength(13); }); }); diff --git a/packages/domscribe-relay/src/mcp/tools/tool.defs.ts b/packages/domscribe-relay/src/mcp/tools/tool.defs.ts index c1524b8..d931d49 100644 --- a/packages/domscribe-relay/src/mcp/tools/tool.defs.ts +++ b/packages/domscribe-relay/src/mcp/tools/tool.defs.ts @@ -27,6 +27,8 @@ export const MCP_TOOLS = { QUERY_BY_SOURCE: 'domscribe.query.bySource', // System tools STATUS: 'domscribe.status', + // Verify tools (RFC 0002) + VERIFY_AFTER_EDIT: 'domscribe.verify.afterEdit', } as const; export type McpToolName = (typeof MCP_TOOLS)[keyof typeof MCP_TOOLS]; diff --git a/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.spec.ts b/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.spec.ts new file mode 100644 index 0000000..e20aa37 --- /dev/null +++ b/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.spec.ts @@ -0,0 +1,159 @@ +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { VerifyAfterEditTool } from './verify-after-edit.tool.js'; +import { createMockRelayClient } from '../__test-utils__/mock-relay-client.js'; +import { MCP_TOOLS } from './tool.defs.js'; + +const annotationId = 'ann_A7bCd9Ef_1700000000000'; + +describe('VerifyAfterEditTool', () => { + it('declares the verify_after_edit canonical name', () => { + const tool = new VerifyAfterEditTool(createMockRelayClient()); + expect(tool.name).toBe(MCP_TOOLS.VERIFY_AFTER_EDIT); + expect(tool.name).toBe('domscribe.verify.afterEdit'); + }); + + it('forwards componentStyles, boundingRect, and screenshotRef to the relay', async () => { + const verifyAnnotation = vi.fn().mockResolvedValue({ + success: true, + annotationId, + result: { + annotationId, + verdict: 'match', + pixelDiffRatio: 0, + pixelDiffPixels: 0, + componentStylesDelta: {}, + boundingRectDelta: {}, + screenshotRef: 'blob://post-edit/abc', + capturedAt: '2025-01-01T00:00:00.000Z', + }, + }); + const mockClient = createMockRelayClient({ verifyAnnotation }); + const tool = new VerifyAfterEditTool(mockClient); + + await tool.toolCallback({ + annotationId, + postEditComponentStyles: { computed: { color: 'rgb(0, 0, 0)' } }, + postEditBoundingRect: { + x: 0, + y: 0, + width: 10, + height: 10, + top: 0, + right: 10, + bottom: 10, + left: 0, + }, + screenshotRef: 'blob://post-edit/abc', + }); + + expect(verifyAnnotation).toHaveBeenCalledWith(annotationId, { + componentStyles: { computed: { color: 'rgb(0, 0, 0)' } }, + boundingRect: { + x: 0, + y: 0, + width: 10, + height: 10, + top: 0, + right: 10, + bottom: 10, + left: 0, + }, + screenshotRef: 'blob://post-edit/abc', + }); + }); + + it('returns the relay verdict as structured content', async () => { + const verifyAnnotation = vi.fn().mockResolvedValue({ + success: true, + annotationId, + result: { + annotationId, + verdict: 'partial', + pixelDiffRatio: 0.005, + pixelDiffPixels: 250, + componentStylesDelta: { color: ['red', 'blue'] }, + boundingRectDelta: {}, + capturedAt: '2025-01-01T00:00:00.000Z', + }, + }); + const tool = new VerifyAfterEditTool( + createMockRelayClient({ verifyAnnotation }), + ); + + const result: CallToolResult = await tool.toolCallback({ annotationId }); + + expect(result.structuredContent).toMatchObject({ + success: true, + result: { verdict: 'partial' }, + }); + }); + + it('hints at retry when the verdict is regression or no_change', async () => { + const verifyAnnotation = vi.fn().mockResolvedValue({ + success: true, + annotationId, + result: { + annotationId, + verdict: 'no_change', + pixelDiffRatio: 0, + pixelDiffPixels: 0, + componentStylesDelta: {}, + boundingRectDelta: {}, + capturedAt: '2025-01-01T00:00:00.000Z', + reason: 'edit did not land', + }, + }); + const tool = new VerifyAfterEditTool( + createMockRelayClient({ verifyAnnotation }), + ); + + const result: CallToolResult = await tool.toolCallback({ annotationId }); + const structured = result.structuredContent as { nextStep: string }; + + expect(structured.nextStep).toMatch(/retry/i); + expect(structured.nextStep).toContain('edit did not land'); + }); + + it('returns an MCP error result when the relay call throws', async () => { + const verifyAnnotation = vi.fn().mockRejectedValue(new Error('relay down')); + const tool = new VerifyAfterEditTool( + createMockRelayClient({ verifyAnnotation }), + ); + + const result: CallToolResult = await tool.toolCallback({ annotationId }); + + expect(result.isError).toBe(true); + }); + + it('NEVER inlines screenshot bytes — the serialized tool output stays small even with a long screenshotRef', async () => { + const longRef = 'blob://post-edit/' + 'x'.repeat(64); + const verifyAnnotation = vi.fn().mockResolvedValue({ + success: true, + annotationId, + result: { + annotationId, + verdict: 'match', + pixelDiffRatio: 0, + pixelDiffPixels: 0, + componentStylesDelta: {}, + boundingRectDelta: {}, + screenshotRef: longRef, + capturedAt: '2025-01-01T00:00:00.000Z', + }, + }); + const tool = new VerifyAfterEditTool( + createMockRelayClient({ verifyAnnotation }), + ); + + const result: CallToolResult = await tool.toolCallback({ + annotationId, + screenshotRef: longRef, + }); + const serialized = JSON.stringify(result.structuredContent); + + expect(serialized).not.toMatch(/base64/i); + expect(serialized).not.toMatch(/data:image/i); + expect(serialized).toContain(longRef); + expect(serialized.length).toBeLessThan(2048); + }); +}); diff --git a/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.ts b/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.ts new file mode 100644 index 0000000..73f3916 --- /dev/null +++ b/packages/domscribe-relay/src/mcp/tools/verify-after-edit.tool.ts @@ -0,0 +1,114 @@ +/** + * MCP tool: `domscribe.verify.afterEdit` + * + * Posts a post-edit capture (componentStyles + boundingRect + opaque + * screenshotRef) to the relay's verify endpoint, which grades it against + * the annotation's pre-edit baseline and returns a structured `VerifyResult`. + * + * Soft-recommended in `process-next.prompt` — NO lifecycle gate. RFC 0002 + * §Decision routes the escalation path through a falsifier-trip review, + * not the state machine. + * + * @module @domscribe/relay/mcp/tools/verify-after-edit + */ + +import { z } from 'zod'; +import { + BoundingRectSchema, + ComponentStylesSchema, + VerifyResultSchema, +} from '@domscribe/core'; +import { RelayHttpClient } from '../../client/relay-http-client.js'; +import { + McpToolDefinition, + McpToolOutputSchema, + MCP_TOOLS, + mcpErrorResult, +} from './tool.defs.js'; + +const VerifyAfterEditToolInputSchema = z.object({ + annotationId: z.string().describe('The annotation ID to verify'), + postEditComponentStyles: ComponentStylesSchema.optional().describe( + "Post-edit ComponentStyles snapshot from the runtime StyleCapturer. The relay diffs this against the annotation's pre-edit baseline.", + ), + postEditBoundingRect: BoundingRectSchema.optional().describe( + 'Post-edit boundingRect from the picked element', + ), + screenshotRef: z + .string() + .optional() + .describe( + 'Opaque relay-blob reference for the post-edit element screenshot. The overlay produces this via the runtime ScreenshotCapturer. NEVER raw image bytes — bytes live behind the reference, not in the tool input.', + ), +}); + +type VerifyAfterEditToolInput = z.infer; + +const VerifyAfterEditToolOutputSchema = McpToolOutputSchema.extend({ + success: z.boolean().describe('Whether the verify result was recorded'), + result: VerifyResultSchema.optional().describe( + 'Structured VerifyResult with verdict and per-axis deltas', + ), + nextStep: z + .string() + .optional() + .describe('Workflow hint — what to do after this tool call'), +}); + +type VerifyAfterEditToolOutput = z.infer< + typeof VerifyAfterEditToolOutputSchema +>; + +export class VerifyAfterEditTool implements McpToolDefinition< + typeof VerifyAfterEditToolInputSchema, + typeof VerifyAfterEditToolOutputSchema +> { + name = MCP_TOOLS.VERIFY_AFTER_EDIT; + description = + "Grade a post-edit capture against the annotation's pre-edit baseline. " + + 'Call this AFTER domscribe.annotation.respond and BEFORE ' + + 'domscribe.annotation.updateStatus so the verdict + per-axis deltas ' + + 'can inform a retry if the edit did not land as intended. ' + + 'Returns a structured VerifyResult (verdict ∈ match | partial | ' + + 'no_change | regression; componentStylesDelta; boundingRectDelta; ' + + 'pixelDiffRatio; screenshotRef). Soft-recommended — no lifecycle gate.'; + inputSchema = VerifyAfterEditToolInputSchema; + outputSchema = VerifyAfterEditToolOutputSchema; + + constructor(private readonly relayHttpClient: RelayHttpClient) {} + + async toolCallback(input: VerifyAfterEditToolInput) { + try { + const response = await this.relayHttpClient.verifyAnnotation( + input.annotationId, + { + componentStyles: input.postEditComponentStyles, + boundingRect: input.postEditBoundingRect, + screenshotRef: input.screenshotRef, + }, + ); + + const output: VerifyAfterEditToolOutput = { + success: response.success, + result: response.result, + nextStep: + response.result.verdict === 'match' || + response.result.verdict === 'partial' + ? `Verify verdict: ${response.result.verdict}. Call domscribe.annotation.updateStatus with annotationId "${input.annotationId}" and status "processed" to complete the lifecycle.` + : `Verify verdict: ${response.result.verdict}${response.result.reason ? ` (${response.result.reason})` : ''}. Reconcile the deltas above and retry your edit before marking the annotation processed.`, + }; + + return { + structuredContent: output, + content: [ + { + type: 'text' as const, + text: JSON.stringify(output, null, 2), + }, + ], + }; + } catch (error: unknown) { + return mcpErrorResult(error); + } + } +} From 85b10bea239f9a0da15507178666c12db0ea7ebb Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:32:08 -0700 Subject: [PATCH 6/7] =?UTF-8?q?chore:=20nx=20sync=20=E2=80=94=20workspace?= =?UTF-8?q?=20project=20references=20for=20@domscribe/verify?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Output of `nx sync` after introducing @domscribe/verify. Adds the new package to the root tsconfig references, wires @domscribe/relay's tsconfig.lib.json to reference verify, refreshes pnpm-lock for the new workspace dep. Other tsconfig files are reformatted by the syncer but not functionally changed. --- packages/domscribe-next/tsconfig.lib.json | 7 ++- packages/domscribe-nuxt/tsconfig.lib.json | 7 ++- packages/domscribe-relay/tsconfig.lib.json | 11 +++- .../domscribe-test-fixtures/tsconfig.json | 12 ++--- .../domscribe-transform/tsconfig.lib.json | 9 ++-- pnpm-lock.yaml | 35 +++++++----- tsconfig.json | 53 +++++++++++++++---- 7 files changed, 95 insertions(+), 39 deletions(-) diff --git a/packages/domscribe-next/tsconfig.lib.json b/packages/domscribe-next/tsconfig.lib.json index 4c8892c..73b527e 100644 --- a/packages/domscribe-next/tsconfig.lib.json +++ b/packages/domscribe-next/tsconfig.lib.json @@ -40,13 +40,16 @@ ], "references": [ { - "path": "../domscribe-transform/tsconfig.lib.json" + "path": "../domscribe-overlay/tsconfig.lib.json" + }, + { + "path": "../domscribe-react/tsconfig.lib.json" }, { "path": "../domscribe-runtime/tsconfig.lib.json" }, { - "path": "../domscribe-react/tsconfig.lib.json" + "path": "../domscribe-transform/tsconfig.lib.json" } ] } diff --git a/packages/domscribe-nuxt/tsconfig.lib.json b/packages/domscribe-nuxt/tsconfig.lib.json index 04aef2b..288cf30 100644 --- a/packages/domscribe-nuxt/tsconfig.lib.json +++ b/packages/domscribe-nuxt/tsconfig.lib.json @@ -40,13 +40,16 @@ ], "references": [ { - "path": "../domscribe-transform/tsconfig.lib.json" + "path": "../domscribe-overlay/tsconfig.lib.json" + }, + { + "path": "../domscribe-vue/tsconfig.lib.json" }, { "path": "../domscribe-runtime/tsconfig.lib.json" }, { - "path": "../domscribe-vue/tsconfig.lib.json" + "path": "../domscribe-transform/tsconfig.lib.json" }, { "path": "../domscribe-relay/tsconfig.lib.json" diff --git a/packages/domscribe-relay/tsconfig.lib.json b/packages/domscribe-relay/tsconfig.lib.json index a711971..8a4ff57 100644 --- a/packages/domscribe-relay/tsconfig.lib.json +++ b/packages/domscribe-relay/tsconfig.lib.json @@ -38,7 +38,14 @@ "**/.nx" ], "references": [ - { "path": "../domscribe-core/tsconfig.lib.json" }, - { "path": "../domscribe-manifest/tsconfig.lib.json" } + { + "path": "../domscribe-verify/tsconfig.lib.json" + }, + { + "path": "../domscribe-manifest/tsconfig.lib.json" + }, + { + "path": "../domscribe-core/tsconfig.lib.json" + } ] } diff --git a/packages/domscribe-test-fixtures/tsconfig.json b/packages/domscribe-test-fixtures/tsconfig.json index 4ba49dc..81613ef 100644 --- a/packages/domscribe-test-fixtures/tsconfig.json +++ b/packages/domscribe-test-fixtures/tsconfig.json @@ -3,11 +3,11 @@ "files": [], "include": [], "references": [ - // All project dependencies - { "path": "../domscribe-core" }, - { "path": "../domscribe-manifest" }, - { "path": "../domscribe-transform" }, - // This project's other tsconfig.*.json files - { "path": "./tsconfig.spec.json" } + { + "path": "../domscribe-verify" + }, + { + "path": "./tsconfig.spec.json" + } ] } diff --git a/packages/domscribe-transform/tsconfig.lib.json b/packages/domscribe-transform/tsconfig.lib.json index df5cf16..2d7c5ff 100644 --- a/packages/domscribe-transform/tsconfig.lib.json +++ b/packages/domscribe-transform/tsconfig.lib.json @@ -39,13 +39,16 @@ ], "references": [ { - "path": "../domscribe-manifest/tsconfig.lib.json" + "path": "../domscribe-relay/tsconfig.lib.json" }, { - "path": "../domscribe-core/tsconfig.lib.json" + "path": "../domscribe-overlay/tsconfig.lib.json" }, { - "path": "../domscribe-relay/tsconfig.lib.json" + "path": "../domscribe-manifest/tsconfig.lib.json" + }, + { + "path": "../domscribe-core/tsconfig.lib.json" } ] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c4b9387..560d2bc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -243,6 +243,9 @@ importers: '@domscribe/manifest': specifier: workspace:* version: link:../domscribe-manifest + '@domscribe/verify': + specifier: workspace:* + version: link:../domscribe-verify '@fastify/cors': specifier: ^10.0.0 version: 10.1.0 @@ -296,18 +299,15 @@ importers: packages/domscribe-test-fixtures: devDependencies: + '@domscribe/verify': + specifier: workspace:* + version: link:../domscribe-verify '@playwright/test': specifier: ^1.49.0 version: 1.58.2 - '@types/pixelmatch': - specifier: ^5.2.6 - version: 5.2.6 '@types/pngjs': specifier: ^6.0.5 version: 6.0.5 - pixelmatch: - specifier: ^7.1.0 - version: 7.2.0 playwright: specifier: ^1.49.0 version: 1.58.2 @@ -432,6 +432,22 @@ importers: specifier: ^5.102.0 version: 5.102.0(@swc/core@1.15.8(@swc/helpers@0.5.19)) + packages/domscribe-verify: + dependencies: + '@domscribe/core': + specifier: workspace:* + version: link:../domscribe-core + pixelmatch: + specifier: ^7.1.0 + version: 7.2.0 + pngjs: + specifier: ^7.0.0 + version: 7.0.0 + devDependencies: + '@types/pngjs': + specifier: ^6.0.5 + version: 6.0.5 + packages/domscribe-vue: dependencies: '@domscribe/core': @@ -2973,9 +2989,6 @@ packages: '@types/parse-json@4.0.2': resolution: {integrity: sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==} - '@types/pixelmatch@5.2.6': - resolution: {integrity: sha512-wC83uexE5KGuUODn6zkm9gMzTwdY5L0chiK+VrKcDfEjzxh1uadlWTvOmAbCpnM9zx/Ww3f8uKlYQVnO/TrqVg==} - '@types/pngjs@6.0.5': resolution: {integrity: sha512-0k5eKfrA83JOZPppLtS2C7OUtyNAl2wKNxfyYl9Q5g9lPkgBl/9hNyAu6HuEH2J4XmIv2znEpkDd0SaZVxW6iQ==} @@ -10319,10 +10332,6 @@ snapshots: '@types/parse-json@4.0.2': {} - '@types/pixelmatch@5.2.6': - dependencies: - '@types/node': 25.3.3 - '@types/pngjs@6.0.5': dependencies: '@types/node': 25.3.3 diff --git a/tsconfig.json b/tsconfig.json index acd8100..31c6ab2 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,16 +3,47 @@ "compileOnSave": false, "files": [], "references": [ - { "path": "./packages/domscribe-core" }, - { "path": "./packages/domscribe-manifest" }, - { "path": "./packages/domscribe-relay" }, - { "path": "./packages/domscribe-runtime" }, - { "path": "./packages/domscribe-overlay" }, - { "path": "./packages/domscribe-transform" }, - { "path": "./packages/domscribe-react" }, - { "path": "./packages/domscribe-vue" }, - { "path": "./packages/domscribe-next" }, - { "path": "./packages/domscribe-nuxt" }, - { "path": "./packages/domscribe-test-fixtures" } + { + "path": "./packages/domscribe-core" + }, + { + "path": "./packages/domscribe-manifest" + }, + { + "path": "./packages/domscribe-relay" + }, + { + "path": "./packages/domscribe-runtime" + }, + { + "path": "./packages/domscribe-overlay" + }, + { + "path": "./packages/domscribe-transform" + }, + { + "path": "./packages/domscribe-react" + }, + { + "path": "./packages/domscribe-vue" + }, + { + "path": "./packages/domscribe-next" + }, + { + "path": "./packages/domscribe-nuxt" + }, + { + "path": "./packages/domscribe-test-fixtures" + }, + { + "path": "./packages/domscribe-verify" + }, + { + "path": "./packages/domscribe-cli" + }, + { + "path": "./packages/domscribe-mcp" + } ] } From faa10e87fe1c3a4926c68888ef59be62f141c4ee Mon Sep 17 00:00:00 2001 From: Kaushik Gnanaskandan Date: Mon, 8 Jun 2026 05:45:59 -0700 Subject: [PATCH 7/7] ci(test-fixtures): override typecheck target for styling fixture apps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Nx @nx/js/typescript plugin auto-infers `tsc --build --emitDeclarationOnly` as the typecheck target for any project with a tsconfig.json. The Tailwind and styled-components fixture apps have `noEmit: true` (they are Vite apps, not libraries), so the inferred command fails with TS5069. Override the inferred target with `tsc --noEmit` via the package.json `nx` field. Type errors are still surfaced; the apps just no longer try to emit declaration files they don't need. Pre-existing failure on the base branch (feat/sprint-2734) — surfaced by CI on this PR's merge gate. Reviewer-pushed fix to unblock the merge. Co-Authored-By: Claude Opus 4.7 --- .../styling/styled-app/package.json | 11 +++++++++++ .../styling/tailwind-app/package.json | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/packages/domscribe-test-fixtures/styling/styled-app/package.json b/packages/domscribe-test-fixtures/styling/styled-app/package.json index 06c035d..858f506 100644 --- a/packages/domscribe-test-fixtures/styling/styled-app/package.json +++ b/packages/domscribe-test-fixtures/styling/styled-app/package.json @@ -7,6 +7,17 @@ "build": "vite build", "preview": "vite preview --port 4802 --strictPort" }, + "nx": { + "targets": { + "typecheck": { + "executor": "nx:run-commands", + "options": { + "command": "tsc --noEmit", + "cwd": "packages/domscribe-test-fixtures/styling/styled-app" + } + } + } + }, "dependencies": { "react": "^18.3.1", "react-dom": "^18.3.1", diff --git a/packages/domscribe-test-fixtures/styling/tailwind-app/package.json b/packages/domscribe-test-fixtures/styling/tailwind-app/package.json index 900ba9b..84c2237 100644 --- a/packages/domscribe-test-fixtures/styling/tailwind-app/package.json +++ b/packages/domscribe-test-fixtures/styling/tailwind-app/package.json @@ -7,6 +7,17 @@ "build": "vite build", "preview": "vite preview --port 4801 --strictPort" }, + "nx": { + "targets": { + "typecheck": { + "executor": "nx:run-commands", + "options": { + "command": "tsc --noEmit", + "cwd": "packages/domscribe-test-fixtures/styling/tailwind-app" + } + } + } + }, "dependencies": { "react": "^18.3.1", "react-dom": "^18.3.1"