From 93a6b3c3fd0be25fdcd9b2f35a7090959562cc58 Mon Sep 17 00:00:00 2001 From: aorlov Date: Sat, 30 May 2026 03:04:50 +0200 Subject: [PATCH] fix(llm-tools): deletion guard on tracked rewrite + actionable oneOf validation errors (SD-3287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the two highest-volume LLM-tooling error classes from production traffic. **executor.ts — empty text nodes on tracked rewrite (1001/wk).** The word-diff prefix/suffix trim optimization (#2817/#2832) could reduce a non-empty replacement to an empty delta (e.g. "best endeavours to:" → "endeavours to:" leaves trimmedNew === ""), then the single-change branch called schema.text(""), which ProseMirror rejects with `RangeError: Empty text nodes are not allowed`. Add an `else if (trimmedNew.length === 0)` branch that uses `tr.delete(trimmedFrom, trimmedTo)` instead — same semantics as the existing tracked-review-state guard. Regression tests in tracked-rewrite.integration.test.ts cover both layers (direct executor and end-to-end doc.replace in tracked mode). The remap-length empty-replacement test was asserting the buggy `tr.replaceWith(5, 12, )` call — the mocked schema masked the crash — corrected to assert `tr.delete(5, 12)` and `tr.replaceWith` not called. **operation-args.ts — opaque oneOf validation errors (2000+/wk cluster).** `validateValueAgainstTypeSpec`'s oneOf branch returned the unactionable `X must match one of the allowed schema variants.` for every object-union failure. LLM agents looped on the same malformed shape because the error named no specific issue. Make the branch discriminator-aware: * When object variants share a const discriminator (kind/op/type) and the value carries it matching exactly one variant, re-validate against that variant and surface its specific failure (e.g. `at.target is required.`, `target.nodeType is required.`). * When the value carries the discriminator but matches no variant, list the allowed tag values and echo what was sent. * When no discriminator is shared (or the value isn't an object), enumerate the accepted variant shapes via `describeVariant` and report the received value via `describeReceived`. Behavior is message-only: identical pass/fail across every input. Same fix benefits every oneOf-validated parameter (target/at/within/steps[]) across insert/create/lists/styles/format/query/mutations at once. Defensive details: * `return;` after the inner re-validate in the discriminator-match path (unreachable in practice — the matched variant already failed in the outer loop — but pins the invariant against future fast-path refactors). * `details.selectedError` set consistently across all three error paths (matched-variant, unmatched-tag, generic fallback). * `truncateForError` caps serialized values at 64 chars so an accidentally- large payload can't blow up logs or LLM context. Matches the same truncation pattern used by REPAIR_BLOCKED. New tests in validate-type-spec.test.ts cover real contract schemas (doc.create.paragraph:at and doc.insert:target) plus a non-discriminated repeated-error case that pins the `selectRepeatedActionableOneOfError` fallback (which the new discriminator path would otherwise mask). The existing "oneOf with mixed schemas" generic-fallback assertion was updated to the enumerated message. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../__tests__/lib/validate-type-spec.test.ts | 117 +++++++++++++++++- apps/cli/src/lib/operation-args.ts | 113 ++++++++++++++++- .../plan-engine/executor.ts | 7 ++ .../plan-engine/remap-length.test.ts | 6 +- .../tracked-rewrite.integration.test.ts | 48 +++++++ 5 files changed, 287 insertions(+), 4 deletions(-) diff --git a/apps/cli/src/__tests__/lib/validate-type-spec.test.ts b/apps/cli/src/__tests__/lib/validate-type-spec.test.ts index 621b16cb7c..d14872445f 100644 --- a/apps/cli/src/__tests__/lib/validate-type-spec.test.ts +++ b/apps/cli/src/__tests__/lib/validate-type-spec.test.ts @@ -61,13 +61,88 @@ describe('validateValueAgainstTypeSpec – oneOf with mixed schemas', () => { oneOf: [{ const: 'block' }, { type: 'object', properties: { kind: { const: 'inline' } }, required: ['kind'] }], }; - test('falls back to generic message when variants are not all const', () => { + test('enumerates the accepted variant shapes when nothing matches', () => { try { validateValueAgainstTypeSpec('nope', mixedSchema, 'target'); throw new Error('Expected CliError to be thrown'); } catch (error) { const cliError = error as CliError; - expect(cliError.message).toBe('target must match one of the allowed schema variants.'); + expect(cliError.message).toBe( + 'target must match one of: "block" | { kind: "inline" }. Received a string ("nope").', + ); + } + }); +}); + +// --------------------------------------------------------------------------- +// Tagged-union (target/at) actionable errors — the dominant LLM failure class. +// These exercise the REAL contract schemas so the assertions track what an +// agent actually sees. See customer error pivot: "insert:target …" and +// "create paragraph:at must match one of the allowed schema variants." +// --------------------------------------------------------------------------- + +describe('validateValueAgainstTypeSpec – tagged-union actionable errors (real schemas)', () => { + const atSchema = CLI_OPERATION_METADATA['doc.create.paragraph'].params.find((p) => p.name === 'at')?.schema; + const insertTargetSchema = CLI_OPERATION_METADATA['doc.insert'].params.find((p) => p.name === 'target')?.schema; + + if (!atSchema || !insertTargetSchema) { + throw new Error('metadata missing create.paragraph:at or insert:target schema'); + } + + test('flattened "at" (nodeId hoisted out of target) names the missing field', () => { + try { + validateValueAgainstTypeSpec({ kind: 'after', nodeId: 'ABC123' }, atSchema, 'at'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + // Was the misleading "at.nodeId is not allowed by schema." + expect((error as CliError).message).toBe('at.target is required.'); + } + }); + + test('unknown "at" kind enumerates the valid kinds and echoes what was sent', () => { + try { + validateValueAgainstTypeSpec({ kind: 'sidebar' }, atSchema, 'at'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + // Was the misleading "at.target is required." + expect((error as CliError).message).toBe( + 'at.kind must be one of: "documentStart", "documentEnd", "before", "after" (received "sidebar").', + ); + } + }); + + test('bare string "at" enumerates the accepted shapes instead of the opaque union error', () => { + try { + validateValueAgainstTypeSpec('ABC123', atSchema, 'at'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + const message = (error as CliError).message; + expect(message).not.toContain('allowed schema variants'); + expect(message).toContain('at must match one of:'); + expect(message).toContain('kind: "before"'); + expect(message).toContain('Received a string ("ABC123")'); + } + }); + + test('insert target missing nodeType names the missing field', () => { + try { + validateValueAgainstTypeSpec({ kind: 'block', nodeId: 'ABC123' }, insertTargetSchema, 'target'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + const message = (error as CliError).message; + expect(message).not.toContain('allowed schema variants'); + expect(message).toContain('nodeType'); + } + }); + + test('bare string insert target enumerates the accepted shapes', () => { + try { + validateValueAgainstTypeSpec('ABC123', insertTargetSchema, 'target'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + const message = (error as CliError).message; + expect(message).not.toContain('allowed schema variants'); + expect(message).toContain('target must match one of:'); } }); }); @@ -108,6 +183,44 @@ describe('validateValueAgainstTypeSpec – repeated actionable oneOf errors', () }); }); +// Without a shared const discriminator across object variants, the +// discriminator path cannot intercept — a repeated error must therefore surface +// through `selectRepeatedActionableOneOfError`. Pins that fallback so a future +// change to the discriminator gate can't silently make it unreachable. +describe('validateValueAgainstTypeSpec – repeated actionable oneOf errors (no discriminator)', () => { + const repeatedRequiredSchema: CliTypeSpec = { + oneOf: [ + { + type: 'object', + properties: { + id: { type: 'string' }, + name: { type: 'string' }, + }, + required: ['id'], + }, + { + type: 'object', + properties: { + id: { type: 'string' }, + label: { type: 'string' }, + }, + required: ['id'], + }, + ], + }; + + test('surfaces the repeated required-field error when no discriminator exists', () => { + try { + validateValueAgainstTypeSpec({ name: 'x' }, repeatedRequiredSchema, 'target'); + throw new Error('Expected CliError to be thrown'); + } catch (error) { + const cliError = error as CliError; + expect(cliError.message).toBe('target.id is required.'); + expect((cliError.details as { selectedError?: string }).selectedError).toBe('target.id is required.'); + } + }); +}); + describe('validateValueAgainstTypeSpec – enum branch', () => { const enumSchema: CliTypeSpec = { type: 'string', diff --git a/apps/cli/src/lib/operation-args.ts b/apps/cli/src/lib/operation-args.ts index 838b30a8b6..e672007bb3 100644 --- a/apps/cli/src/lib/operation-args.ts +++ b/apps/cli/src/lib/operation-args.ts @@ -146,6 +146,85 @@ function selectRepeatedActionableOneOfError(path: string, errors: string[]): str return bestMessage; } +/** + * Render a variant's shape compactly so a oneOf failure can name the accepted + * forms (e.g. `{ kind: "before", target }`) instead of an opaque "must match + * one of the allowed schema variants". Object properties show their `const` + * discriminator when present, and a trailing `?` when optional. + */ +function describeVariant(variant: CliTypeSpec): string { + if ('const' in variant) return JSON.stringify(variant.const); + if ('oneOf' in variant) return (variant.oneOf as CliTypeSpec[]).map(describeVariant).join(' | '); + if (variant.enum) return variant.enum.map((entry) => JSON.stringify(entry)).join(' | '); + if (variant.type === 'object') { + const properties = variant.properties ?? {}; + const required = new Set(variant.required ?? []); + const keys = Object.keys(properties); + if (keys.length === 0) return 'object'; + const parts = keys.map((key) => { + const prop = properties[key]; + if (prop && 'const' in prop) return `${key}: ${JSON.stringify(prop.const)}`; + return required.has(key) ? key : `${key}?`; + }); + return `{ ${parts.join(', ')} }`; + } + if (variant.type === 'array') return 'array'; + if (variant.type === 'json') return 'object'; + return variant.type ?? 'value'; +} + +/** + * The property key that carries a `const` in every object variant — the + * discriminator of a tagged union (e.g. `kind` for target/at, `op` for + * mutation steps). Returns null when there is no such shared key. Non-object + * variants (e.g. a bare string ref) are ignored so mixed unions still resolve. + */ +function getOneOfDiscriminator(variants: readonly CliTypeSpec[]): string | null { + const objectVariants = variants.filter( + (variant): variant is Extract => + !('const' in variant) && !('oneOf' in variant) && variant.type === 'object' && isRecord(variant.properties), + ); + if (objectVariants.length < 2) return null; + for (const key of Object.keys(objectVariants[0].properties)) { + const sharedByAll = objectVariants.every((variant) => { + const prop = variant.properties[key]; + return prop != null && 'const' in prop; + }); + if (sharedByAll) return key; + } + return null; +} + +/** The const value a variant pins for `key`, if any (its discriminator tag). */ +function variantConstFor(variant: CliTypeSpec, key: string): unknown { + if ('const' in variant || 'oneOf' in variant || variant.type !== 'object') return undefined; + const prop = variant.properties?.[key]; + return prop && 'const' in prop ? prop.const : undefined; +} + +/** + * Truncate a serialized value to keep oneOf error messages bounded — a caller + * accidentally passing a multi-MB string as `target`/`at` shouldn't inflate + * logs or the LLM context window. Matches the truncation pattern used by the + * REPAIR_BLOCKED preview. + */ +function truncateForError(serialized: string, max = 64): string { + return serialized.length > max ? `${serialized.slice(0, max)}…` : serialized; +} + +/** Human description of a received value, for oneOf error messages. */ +function describeReceived(value: unknown): string { + if (value === null) return 'null'; + if (Array.isArray(value)) return 'an array'; + const valueType = typeof value; + if (valueType === 'object') { + const keys = Object.keys(value as Record); + return keys.length > 0 ? `an object with keys [${keys.join(', ')}]` : 'an empty object'; + } + if (valueType === 'string') return `a string (${truncateForError(JSON.stringify(value))})`; + return `a ${valueType} (${truncateForError(JSON.stringify(value))})`; +} + export function validateValueAgainstTypeSpec(value: unknown, schema: CliTypeSpec, path: string): void { if ('const' in schema) { if (value !== schema.const) { @@ -166,12 +245,44 @@ export function validateValueAgainstTypeSpec(value: unknown, schema: CliTypeSpec } } + // Tagged-union path (target/at keyed by `kind`, mutation steps keyed by + // `op`): when the value carries the discriminator, surface the matching + // variant's specific failure ("at.target is required.") or the set of + // valid tags. This lets an LLM self-correct instead of retrying the same + // malformed shape against an opaque union error. + const discriminator = getOneOfDiscriminator(variants); + if (discriminator && isRecord(value) && value[discriminator] !== undefined) { + const received = value[discriminator]; + const matched = variants.find((variant) => variantConstFor(variant, discriminator) === received); + if (matched) { + try { + validateValueAgainstTypeSpec(value, matched, path); + // Unreachable in practice: `matched` already failed in the outer + // variant loop above (deterministic same-input revalidation must + // throw again). Explicit return so a future fast-path refactor on + // this function can't silently let control fall through to the + // unmatched-tag throw below with a falsely-matched value. + return; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new CliError('VALIDATION_ERROR', message, { errors, selectedError: message }); + } + } + const allowedTags = variants + .map((variant) => variantConstFor(variant, discriminator)) + .filter((tag) => tag !== undefined) + .map((tag) => JSON.stringify(tag)); + const unmatchedTagMessage = `${path}.${discriminator} must be one of: ${allowedTags.join(', ')} (received ${truncateForError(JSON.stringify(received))}).`; + throw new CliError('VALIDATION_ERROR', unmatchedTagMessage, { errors, selectedError: unmatchedTagMessage }); + } + const allowedValues = extractConstValues(variants); const selectedError = selectRepeatedActionableOneOfError(path, errors); const message = allowedValues.length > 0 ? `${path} must be one of: ${allowedValues.join(', ')}.` - : (selectedError ?? `${path} must match one of the allowed schema variants.`); + : (selectedError ?? + `${path} must match one of: ${variants.map(describeVariant).join(' | ')}. Received ${describeReceived(value)}.`); throw new CliError('VALIDATION_ERROR', message, { errors, selectedError }); } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts index 5a2330695a..06032c78a4 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts @@ -998,6 +998,13 @@ export function executeTextRewrite( tr.replaceWith(remap(change.docFrom), remap(change.docTo), content); } } + } else if (trimmedNew.length === 0) { + // Pure deletion after trimming: a non-empty replacement whose new text is + // fully contained in the old text's common prefix + suffix collapses to an + // empty delta (e.g. "best endeavours to:" → "endeavours to:" leaves + // trimmedNew === ""). Delete the removed range rather than building + // schema.text('') — ProseMirror rejects empty text nodes. + tr.delete(trimmedFrom, trimmedTo); } else { // 0 or 1 word change: replace just the trimmed range. const content = buildTextWithTabs(editor.state.schema, trimmedNew, asProseMirrorMarks(marks)); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/remap-length.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/remap-length.test.ts index 47e760204a..45b1624132 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/remap-length.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/remap-length.test.ts @@ -227,7 +227,11 @@ describe('remap correctness: mapping.map called with compiled positions', () => expect(tr.mapping.map).toHaveBeenCalledWith(3); expect(tr.mapping.map).toHaveBeenCalledWith(10); - expect(tr.replaceWith).toHaveBeenCalledWith(5, 12, expect.anything()); + // An empty replacement collapses to a pure deletion: it must use tr.delete, + // not tr.replaceWith with an empty text node (ProseMirror rejects empty + // text nodes — the mocked schema previously hid that crash). + expect(tr.delete).toHaveBeenCalledWith(5, 12); + expect(tr.replaceWith).not.toHaveBeenCalled(); }); it('equal length replacement: maps positions through mapping', () => { diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts index 6f1aadbb5f..6c4f7025dd 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts @@ -343,6 +343,54 @@ describe('doc.replace multi-paragraph integration', () => { expect(accepted).not.toContain('Smith address'); }); + // Customer-reported crash ("Empty text nodes are not allowed"): a non-empty + // replacement whose new text is fully contained in the old text's prefix + + // suffix trims to an EMPTY delta. The single-change branch must delete the + // removed text rather than build schema.text('') (which ProseMirror rejects). + it('rewrites a replacement that trims to an empty delta as a deletion (executor)', () => { + editor = makeEditor(['the Company refers to: the following terms']); + const { step, target } = compileSingleRewrite(editor, 'refers to:', 'to:'); + const tr = editor.state.tr; + + const outcome = executeTextRewrite(editor, tr, target as any, step as any, tr.mapping as any); + + expect(outcome).toEqual({ changed: true }); + expect(tr.doc.textContent).toBe('the Company to: the following terms'); + }); + + it('handles a tracked replace that trims to an empty delta (deletion only)', () => { + editor = makeEditor(['We will use our best endeavours to: deliver']); + const receipt = editor.doc.replace( + { + ref: getFirstMatchRef(editor, 'best endeavours to:'), + text: 'endeavours to:', + }, + { changeMode: 'tracked' }, + ); + + expect(receipt.success).toBe(true); + + // Accepted view: drop trackDelete marks, keep everything else. + const acceptedParts: string[] = []; + editor.state.doc.descendants((node: any) => { + if (!node.isText || !node.text) return; + const isDeleted = node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName); + if (!isDeleted) acceptedParts.push(node.text); + }); + + expect(acceptedParts.join('')).toBe('We will use our endeavours to: deliver'); + + // The trimmed-away prefix "best " must be represented as a tracked deletion. + const deletedTexts: string[] = []; + editor.state.doc.descendants((node: any) => { + if (!node.isText || !node.text) return; + if (node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName)) { + deletedTexts.push(node.text); + } + }); + expect(deletedTexts.join('')).toContain('best'); + }); + it('SD-3044: tracked rewrite of long block preserves spacing across multiple equal anchors', () => { editor = makeEditor([ '[insert] Pty Limited a company incorporated in Australia having its registered office at [insert] (ACN [insert])("Company")',