Skip to content

Commit 1469062

Browse files
k4cper-gclaude
andcommitted
Add vocabulary short codes, simplify detail levels to compact/full
Add ROLE_CODES, STATE_CODES, ACTION_CODES maps for compact output. Remove "minimal" detail level — consolidate to "compact" (default) and "full". Only emit bounds for interactable nodes. Update docs, MCP server defaults, and tests to match. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8cda924 commit 1469062

File tree

7 files changed

+164
-96
lines changed

7 files changed

+164
-96
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ Output:
5151
# app: Spotify
5252
# 63 nodes (280 before pruning)
5353
54-
[e0] window "Spotify" @120,40 1680x1020
55-
[e1] document "Spotify" @120,40 1680x1020
56-
[e2] button "Back" @132,52 32x32 [click]
57-
[e3] button "Forward" @170,52 32x32 {disabled} [click]
58-
[e7] navigation "Main" @120,88 240x972
59-
[e8] link "Home" @132,100 216x40 {selected} [click]
60-
[e9] link "Search" @132,148 216x40 [click]
54+
[e0] win "Spotify" 120,40 1680x1020
55+
[e1] doc "Spotify" 120,40 1680x1020
56+
[e2] btn "Back" 132,52 32x32 [clk]
57+
[e3] btn "Forward" 170,52 32x32 {dis} [clk]
58+
[e7] nav "Main" 120,88 240x972
59+
[e8] lnk "Home" 132,100 216x40 {sel} [clk]
60+
[e9] lnk "Search" 132,148 216x40 [clk]
6161
```
6262

6363
### Session API

docs/api-reference.md

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const result = await session.snapshot({
2525
app: undefined, // filter by window title (scope="full" only)
2626
maxDepth: 999, // maximum tree depth
2727
compact: true, // true → compact text, false → CUP envelope object
28-
detail: "standard", // "standard" | "minimal" | "full"
28+
detail: "compact", // "compact" | "full"
2929
});
3030
```
3131
@@ -44,8 +44,7 @@ const result = await session.snapshot({
4444
4545
| Level | Behavior |
4646
|-------|----------|
47-
| `standard` | Prunes unnamed generics, empty text, decorative images (~75% smaller) |
48-
| `minimal` | Keep only interactive nodes and their ancestors |
47+
| `compact` | Prunes unnamed generics, empty text, decorative images (~75% smaller) |
4948
| `full` | No pruning — every node included |
5049
5150
---
@@ -266,14 +265,14 @@ The text format returned by `session.snapshot({ compact: true })`. Optimized for
266265
# app: Discord
267266
# 87 nodes (353 before pruning)
268267

269-
[e0] window "Discord" @509,62 1992x1274
270-
[e1] document "General" @509,62 1992x1274 {readonly}
271-
[e2] button "Back" @518,66 26x24 [click]
272-
[e7] tree "Servers" @509,94 72x1242
273-
[e8] treeitem "Lechownia" @513,190 64x48 {selected} [click,select]
268+
[e0] win "Discord" 509,62 1992x1274
269+
[e1] doc "General" 509,62 1992x1274 {ro}
270+
[e2] btn "Back" 518,66 26x24 [clk]
271+
[e7] tre "Servers" 509,94 72x1242
272+
[e8] ti "Lechownia" 513,190 64x48 {sel} [clk,sel]
274273
```
275274
276-
Line format: `[id] role "name" @x,y wxh {states} [actions] val="value" (attrs)`
275+
Line format: `[id] role "name" x,y wxh {states} [actions] val="value" (attrs)`
277276
278277
Full spec: [compact.md](https://github.com/computeruseprotocol/computer-use-protocol/blob/main/schema/compact.md)
279278

src/format.ts

Lines changed: 120 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -283,48 +283,18 @@ function pruneNode(
283283
return [pruned];
284284
}
285285

286-
function pruneMinimalNode(node: CupNode): CupNode | null {
287-
const children = node.children ?? [];
288-
const keptChildren: CupNode[] = [];
289-
290-
for (const child of children) {
291-
const pruned = pruneMinimalNode(child);
292-
if (pruned) keptChildren.push(pruned);
293-
}
294-
295-
if (hasMeaningfulActions(node) || keptChildren.length > 0) {
296-
const pruned: CupNode = { ...node };
297-
delete pruned.children;
298-
if (keptChildren.length > 0) {
299-
pruned.children = keptChildren;
300-
}
301-
return pruned;
302-
}
303-
304-
return null;
305-
}
306-
307286
export function pruneTree(
308287
tree: CupNode[],
309288
options?: { detail?: Detail; screen?: { w: number; h: number } | null },
310289
): CupNode[] {
311-
const detail = options?.detail ?? "standard";
290+
const detail = options?.detail ?? "compact";
312291
const screen = options?.screen;
313292

314293
if (detail === "full") {
315294
return structuredClone(tree);
316295
}
317296

318-
if (detail === "minimal") {
319-
const result: CupNode[] = [];
320-
for (const root of tree) {
321-
const pruned = pruneMinimalNode(root);
322-
if (pruned) result.push(pruned);
323-
}
324-
return result;
325-
}
326-
327-
// "standard"
297+
// "compact"
328298
let screenViewport: Rect | null = null;
329299
if (screen) {
330300
screenViewport = { x: 0, y: 0, w: screen.w, h: screen.h };
@@ -342,8 +312,113 @@ export function pruneTree(
342312

343313
const VALUE_ROLES = new Set(["textbox", "searchbox", "combobox", "spinbutton", "slider"]);
344314

315+
// ---------------------------------------------------------------------------
316+
// Vocabulary short codes — compact aliases for roles, states, and actions.
317+
// These reduce per-node token cost by ~50% on role/state/action strings.
318+
// ---------------------------------------------------------------------------
319+
320+
export const ROLE_CODES: Record<string, string> = {
321+
alert: "alrt",
322+
alertdialog: "adlg",
323+
application: "app",
324+
banner: "bnr",
325+
button: "btn",
326+
cell: "cel",
327+
checkbox: "chk",
328+
columnheader: "colh",
329+
combobox: "cmb",
330+
complementary: "cmp",
331+
contentinfo: "ci",
332+
dialog: "dlg",
333+
document: "doc",
334+
form: "frm",
335+
generic: "gen",
336+
grid: "grd",
337+
group: "grp",
338+
heading: "hdg",
339+
img: "img",
340+
link: "lnk",
341+
list: "lst",
342+
listitem: "li",
343+
log: "log",
344+
main: "main",
345+
marquee: "mrq",
346+
menu: "mnu",
347+
menubar: "mnub",
348+
menuitem: "mi",
349+
menuitemcheckbox: "mic",
350+
menuitemradio: "mir",
351+
navigation: "nav",
352+
none: "none",
353+
option: "opt",
354+
progressbar: "pbar",
355+
radio: "rad",
356+
region: "rgn",
357+
row: "row",
358+
rowheader: "rowh",
359+
scrollbar: "sb",
360+
search: "srch",
361+
searchbox: "sbx",
362+
separator: "sep",
363+
slider: "sld",
364+
spinbutton: "spn",
365+
status: "sts",
366+
switch: "sw",
367+
tab: "tab",
368+
table: "tbl",
369+
tablist: "tabs",
370+
tabpanel: "tpnl",
371+
text: "txt",
372+
textbox: "tbx",
373+
timer: "tmr",
374+
titlebar: "ttlb",
375+
toolbar: "tlbr",
376+
tooltip: "ttp",
377+
tree: "tre",
378+
treeitem: "ti",
379+
window: "win",
380+
};
381+
382+
export const STATE_CODES: Record<string, string> = {
383+
busy: "bsy",
384+
checked: "chk",
385+
collapsed: "col",
386+
disabled: "dis",
387+
editable: "edt",
388+
expanded: "exp",
389+
focused: "foc",
390+
hidden: "hid",
391+
mixed: "mix",
392+
modal: "mod",
393+
multiselectable: "msel",
394+
offscreen: "off",
395+
pressed: "prs",
396+
readonly: "ro",
397+
required: "req",
398+
selected: "sel",
399+
};
400+
401+
export const ACTION_CODES: Record<string, string> = {
402+
click: "clk",
403+
collapse: "col",
404+
decrement: "dec",
405+
dismiss: "dsm",
406+
doubleclick: "dbl",
407+
expand: "exp",
408+
focus: "foc",
409+
increment: "inc",
410+
longpress: "lp",
411+
rightclick: "rclk",
412+
scroll: "scr",
413+
select: "sel",
414+
setvalue: "sv",
415+
toggle: "tog",
416+
type: "typ",
417+
};
418+
345419
export function formatLine(node: CupNode): string {
346-
const parts = [`[${node.id}]`, node.role];
420+
const role = node.role;
421+
const parts = [`[${node.id}]`, ROLE_CODES[role] ?? role];
347422

348423
const name = node.name || "";
349424
if (name) {
@@ -352,25 +427,29 @@ export function formatLine(node: CupNode): string {
352427
parts.push(`"${truncated}"`);
353428
}
354429

430+
// Actions (drop "focus" — it's noise)
431+
const actions = (node.actions ?? []).filter((a) => a !== "focus");
432+
433+
// Only include bounds for interactable nodes (nodes with meaningful actions).
434+
// Non-interactable nodes are context-only — agents reference them by ID, not
435+
// by coordinates, so spatial info adds tokens without value.
355436
const bounds = node.bounds;
356-
if (bounds) {
357-
parts.push(`@${bounds.x},${bounds.y} ${bounds.w}x${bounds.h}`);
437+
if (bounds && actions.length > 0) {
438+
parts.push(`${bounds.x},${bounds.y} ${bounds.w}x${bounds.h}`);
358439
}
359440

360441
const states = node.states ?? [];
361442
if (states.length > 0) {
362-
parts.push("{" + states.join(",") + "}");
443+
parts.push("{" + states.map((s) => STATE_CODES[s] ?? s).join(",") + "}");
363444
}
364445

365-
// Actions (drop "focus" — it's noise)
366-
const actions = (node.actions ?? []).filter((a) => a !== "focus");
367446
if (actions.length > 0) {
368-
parts.push("[" + actions.join(",") + "]");
447+
parts.push("[" + actions.map((a) => ACTION_CODES[a] ?? a).join(",") + "]");
369448
}
370449

371450
// Value for input-type elements
372451
const value = node.value || "";
373-
if (value && VALUE_ROLES.has(node.role)) {
452+
if (value && VALUE_ROLES.has(role)) {
374453
let truncatedVal = value.length > 120 ? value.slice(0, 120) + "..." : value;
375454
truncatedVal = truncatedVal.replace(/"/g, '\\"').replace(/\n/g, " ");
376455
parts.push(`val="${truncatedVal}"`);
@@ -436,7 +515,7 @@ export function serializeCompact(
436515
maxChars?: number;
437516
},
438517
): string {
439-
const detail = options?.detail ?? "standard";
518+
const detail = options?.detail ?? "compact";
440519
const maxChars = options?.maxChars ?? MAX_OUTPUT_CHARS;
441520
const windowList = options?.windowList ?? null;
442521

src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ export class Session {
7777
const scope = options?.scope ?? "foreground";
7878
const maxDepth = options?.maxDepth ?? 999;
7979
const compact = options?.compact ?? true;
80-
const detail = options?.detail ?? "standard";
80+
const detail = options?.detail ?? "compact";
8181

8282
const [sw, sh, scale] = await this.adapter.getScreenInfo();
8383

@@ -442,7 +442,7 @@ export async function overview(): Promise<string> {
442442
// ---------------------------------------------------------------------------
443443

444444
export { getAdapter, detectPlatform } from "./router.js";
445-
export { buildEnvelope, serializeCompact, serializeOverview, pruneTree, formatLine } from "./format.js";
445+
export { buildEnvelope, serializeCompact, serializeOverview, pruneTree, formatLine, ROLE_CODES, STATE_CODES, ACTION_CODES } from "./format.js";
446446
export { searchTree, resolveRoles, tokenize } from "./search.js";
447447
export { ActionExecutor, VALID_ACTIONS } from "./actions/executor.js";
448448
export { parseCombo } from "./actions/keys.js";

src/mcp/server.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ After executing any action, you MUST call this again for fresh IDs.`,
8484
scope: "foreground",
8585
maxDepth: 999,
8686
compact: true,
87-
detail: "standard",
87+
detail: "compact",
8888
});
8989
return { content: [{ type: "text", text: result as string }] };
9090
},
@@ -109,7 +109,7 @@ Element IDs are ephemeral — only valid for THIS snapshot.`,
109109
app,
110110
maxDepth: 999,
111111
compact: true,
112-
detail: "standard",
112+
detail: "compact",
113113
});
114114
return { content: [{ type: "text", text: result as string }] };
115115
},
@@ -130,7 +130,7 @@ Element IDs are ephemeral — only valid for THIS snapshot.`,
130130
scope: "desktop",
131131
maxDepth: 999,
132132
compact: true,
133-
detail: "standard",
133+
detail: "compact",
134134
});
135135
return { content: [{ type: "text", text: result as string }] };
136136
},

src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ export interface CupEnvelope {
300300

301301
export type Scope = "overview" | "foreground" | "desktop" | "full";
302302

303-
export type Detail = "standard" | "minimal" | "full";
303+
export type Detail = "compact" | "full";
304304

305305
// ---------------------------------------------------------------------------
306306
// Window metadata (internal, from adapters)

0 commit comments

Comments
 (0)