From f0ff080ffe4892a1375b7578b03e285d11937acf Mon Sep 17 00:00:00 2001 From: jerelvelarde Date: Thu, 19 Mar 2026 07:08:28 -0700 Subject: [PATCH 01/28] feat: integrate excalidraw-mcp as first-class diagramming feature Wire up MCPAppsMiddleware on the LangGraph agent so Excalidraw widget HTML is properly sent to the frontend (fixes blank canvas). Add progressive iframe streaming via postMessage to preserve JS state during updates, and a 75% focused modal for expanding widgets. Include an Excalidraw diagram skill for the agent with camera reveal patterns, color grammar, and element reference snippets. Closes #11 --- .../agent/skills/excalidraw-diagram-skill.txt | 211 ++++++++++++++++++ apps/app/src/app/api/copilotkit/route.ts | 21 +- .../generative-ui/widget-renderer.tsx | 192 +++++++++++----- 3 files changed, 358 insertions(+), 66 deletions(-) create mode 100644 apps/agent/skills/excalidraw-diagram-skill.txt diff --git a/apps/agent/skills/excalidraw-diagram-skill.txt b/apps/agent/skills/excalidraw-diagram-skill.txt new file mode 100644 index 0000000..eb52786 --- /dev/null +++ b/apps/agent/skills/excalidraw-diagram-skill.txt @@ -0,0 +1,211 @@ +# Excalidraw Diagram Skill + +Create beautiful, professional, animated Excalidraw diagrams with progressive camera reveals, color-coded zones, and polished visual design. Use this skill whenever a user asks to diagram, visualize, map, chart, illustrate, or draw anything — including architecture diagrams, flowcharts, sequence diagrams, concept explainers, system maps, process flows, and technical overviews. Also trigger for requests like "show me how X works", "draw a diagram of", "create a visual for", "make an Excalidraw of", or any time a visual explanation would be clearer than text alone. + +--- + +## Step 1 — Always call read_me first + +Before emitting ANY elements, call `Excalidraw:read_me`. Do not skip this step, even for simple diagrams. It provides the color palette, camera sizes, font rules, and element syntax required to produce clean output. + +``` +Excalidraw:read_me() +``` + +Then proceed directly to `Excalidraw:create_view` with your elements array — no narration about the read_me call. + +--- + +## Step 2 — Plan the diagram before writing elements + +Before writing elements, mentally sketch: + +1. **What are the layers / zones?** (e.g. Frontend / Backend / Database, or Input / Process / Output) +2. **What color grammar makes sense?** Assign one color per layer and keep it consistent throughout +3. **How many camera positions do I need?** Plan 3–6 camera stops minimum for a reveal effect +4. **What's the reading order?** Left-to-right or top-to-bottom; pick one and stick to it + +--- + +## Step 3 — Core design rules (MUST follow) + +### Camera rules +- **Always start with `cameraUpdate` as the first element** +- Camera sizes MUST be exact 4:3 ratios: `400x300`, `600x450`, `800x600`, `1200x900`, `1600x1200` +- Use **multiple cameraUpdates** throughout the array — pan to each section as you draw it +- Leave padding: if content is 500px wide, use 800x600 camera +- Final element should be a wide cameraUpdate showing the full diagram + +### Color grammar (use consistently) + +| Zone / Role | Fill | Stroke | +|---------------------|---------------|-----------| +| UI / Frontend | `#dbe4ff` | `#4a9eed` | +| Logic / Agent | `#e5dbff` | `#8b5cf6` | +| Data / Storage | `#d3f9d8` | `#22c55e` | +| External / API | `#ffd8a8` | `#f59e0b` | +| Error / Alert | `#ffc9c9` | `#ef4444` | +| Notes / Decisions | `#fff3bf` | `#f59e0b` | + +Zone background rectangles: use `opacity: 40`, `fillStyle: "solid"` + +Node shapes: use pastel fills (`#a5d8ff`, `#b2f2bb`, `#d0bfff`, `#ffd8a8`, `#c3fae8`, `#eebefa`) + +### Typography rules +- Title: `fontSize: 26–28`, `strokeColor: "#1e1e1e"` +- Subtitle / annotation: `fontSize: 16`, `strokeColor: "#757575"` +- Shape labels: `fontSize: 16–18` via `label` property on the shape +- NEVER use fontSize below 14 +- NEVER use light gray on white backgrounds (minimum text color: `#757575`) + +### Shape rules +- Use `label: { "text": "...", "fontSize": 16 }` directly on shapes — no separate text elements +- Minimum shape size: `120x60` for labeled boxes +- Add `roundness: { type: 3 }` for rounded corners (preferred for nodes) +- Leave 20–30px gaps between elements + +### Drawing order (z-order, critical) +Emit in this sequence per section: +1. Zone background rectangle (drawn first = sits behind) +2. Zone label text +3. Node shapes (with labels) +4. Arrows between nodes +5. Then next section + +NEVER dump all rectangles, then all text, then all arrows. + +### Arrow rules +- Always include `endArrowhead: "arrow"` for directional flow +- Use `strokeStyle: "dashed"` for responses, return values, optional paths +- Keep arrow labels short (under 20 chars) or omit — long labels overflow +- Use `startBinding` / `endBinding` with `fixedPoint` to attach to shapes + +--- + +## Step 4 — Diagram type patterns + +### Architecture / System Diagram +Zones as swim lanes (left-to-right or top-to-bottom). Each zone = one architectural layer. Arrows show data/request flow between layers. End with a full-width cameraUpdate. + +**Camera pattern:** Title zoom (M) → pan right zone by zone (S/M) → final overview (XL) + +### Sequence / Flow Diagram +Actors as header boxes with dashed vertical lifelines. Horizontal arrows show messages. Pan camera downward as messages progress. + +**Camera pattern:** Title (M) → pan right per actor drawing header + lifeline → zoom out (L) → pan down per message group → final overview (XL) + +### Concept Explainer +Start zoomed on the title, then reveal parts of the concept one at a time. Use annotations (`#fff3bf` boxes) as callouts. Simple left-to-right flow. + +**Camera pattern:** Title zoom (S) → zoom out (M) → pan section by section → final (L) + +### Process / Flowchart +Diamonds for decisions, rectangles for steps. Top-to-bottom flow. Color-code by stage (e.g. initiation=blue, processing=purple, output=green). + +**Camera pattern:** Top zoom → pan down per stage group → final overview + +--- + +## Step 5 — The camera reveal technique (what makes diagrams feel alive) + +The secret to great Excalidraw diagrams is **drawing section by section with camera moves**: + +```json +// 1. Start with title, zoomed in +{"type":"cameraUpdate","width":600,"height":450,"x":100,"y":0}, +{"type":"text","id":"t1","x":200,"y":20,"text":"My Diagram","fontSize":28}, + +// 2. Pan to first zone and draw it +{"type":"cameraUpdate","width":400,"height":300,"x":20,"y":60}, +{"type":"rectangle","id":"zone1", ...zone background...}, +{"type":"rectangle","id":"node1", ...node with label...}, + +// 3. Pan to second zone +{"type":"cameraUpdate","width":400,"height":300,"x":280,"y":60}, +{"type":"rectangle","id":"zone2", ...}, +{"type":"rectangle","id":"node2", ...}, + +// 4. Draw connecting arrows (camera stays or pans to show both ends) +{"type":"cameraUpdate","width":800,"height":600,"x":0,"y":40}, +{"type":"arrow","id":"a1", ...arrow from node1 to node2...}, + +// 5. Final wide overview +{"type":"cameraUpdate","width":1200,"height":900,"x":-20,"y":-10} +``` + +This creates the "drawing itself" animation effect users love. + +--- + +## Step 6 — Common mistakes to avoid + +- **No cameraUpdate first** → diagram appears un-framed, elements clip +- **Wrong aspect ratio** → `700x500` causes distortion; use `800x600` +- **All elements at once, no panning** → loses the reveal animation +- **Overlapping elements** → check y-coordinates leave 60–80px between rows +- **Long arrow labels** → overflow the arrow; keep under 20 chars or use a note box instead +- **Emoji in text** → don't render in Excalidraw's font +- **Light text on white** → `#b0b0b0` on white is invisible; minimum `#757575` +- **Zone label covered by nodes** → put zone label text at top-left of zone (y + 8px from zone top), nodes start 40px below +- **Title not centered** → estimate `text.length x fontSize x 0.5` for width, then set `x = diagramCenterX - estimatedWidth/2` + +--- + +## Step 7 — Quality checklist before emitting + +- [ ] `Excalidraw:read_me` called +- [ ] First element is `cameraUpdate` +- [ ] All camera sizes are valid 4:3 ratios +- [ ] Minimum 3 camera positions used (more = better animation) +- [ ] Color grammar is consistent across zones +- [ ] All shape labels use `label` property, not separate text elements +- [ ] No font sizes below 14 +- [ ] Zone backgrounds are drawn BEFORE the nodes inside them +- [ ] Arrows drawn AFTER both source and target shapes +- [ ] Final element is a wide cameraUpdate revealing the full diagram +- [ ] No emoji in any text strings + +--- + +## Reference: Element snippets + +**Zone background:** +```json +{"type":"rectangle","id":"zone_bg","x":20,"y":80,"width":220,"height":380,"backgroundColor":"#dbe4ff","fillStyle":"solid","roundness":{"type":3},"strokeColor":"#4a9eed","strokeWidth":1,"opacity":40} +``` + +**Zone label:** +```json +{"type":"text","id":"zone_lbl","x":40,"y":88,"text":"FRONTEND","fontSize":14,"strokeColor":"#2563eb"} +``` + +**Node:** +```json +{"type":"rectangle","id":"n1","x":60,"y":130,"width":150,"height":55,"backgroundColor":"#a5d8ff","fillStyle":"solid","roundness":{"type":3},"strokeColor":"#4a9eed","strokeWidth":2,"label":{"text":"API Gateway","fontSize":16}} +``` + +**Arrow (solid, directed):** +```json +{"type":"arrow","id":"a1","x":210,"y":157,"width":100,"height":0,"points":[[0,0],[100,0]],"strokeColor":"#1e1e1e","strokeWidth":2,"endArrowhead":"arrow","startBinding":{"elementId":"n1","fixedPoint":[1,0.5]},"endBinding":{"elementId":"n2","fixedPoint":[0,0.5]}} +``` + +**Arrow (dashed, response):** +```json +{"type":"arrow","id":"a2","x":310,"y":157,"width":-100,"height":0,"points":[[0,0],[-100,0]],"strokeColor":"#757575","strokeWidth":2,"strokeStyle":"dashed","endArrowhead":"arrow"} +``` + +**Annotation note:** +```json +{"type":"rectangle","id":"note1","x":80,"y":200,"width":200,"height":36,"backgroundColor":"#fff3bf","fillStyle":"solid","roundness":{"type":3},"strokeColor":"#f59e0b","strokeWidth":1,"opacity":80,"label":{"text":"Caches for 5 min","fontSize":14}} +``` + +**Title text:** +```json +{"type":"text","id":"title","x":150,"y":15,"text":"System Architecture","fontSize":28,"strokeColor":"#1e1e1e"} +``` + +**Stick figure (user icon):** +```json +{"type":"ellipse","id":"fig_head","x":58,"y":110,"width":20,"height":20,"backgroundColor":"#a5d8ff","fillStyle":"solid","strokeColor":"#4a9eed","strokeWidth":2}, +{"type":"rectangle","id":"fig_body","x":57,"y":132,"width":22,"height":26,"backgroundColor":"#a5d8ff","fillStyle":"solid","roundness":{"type":3},"strokeColor":"#4a9eed","strokeWidth":2} +``` diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts index 9c73d9b..832d831 100644 --- a/apps/app/src/app/api/copilotkit/route.ts +++ b/apps/app/src/app/api/copilotkit/route.ts @@ -4,6 +4,7 @@ import { copilotRuntimeNextJSAppRouterEndpoint, } from "@copilotkit/runtime"; import { LangGraphAgent } from "@copilotkit/runtime/langgraph"; +import { MCPAppsMiddleware } from "@ag-ui/mcp-apps-middleware"; import { NextRequest } from "next/server"; // 1. Define the agent connection to LangGraph @@ -13,21 +14,25 @@ const defaultAgent = new LangGraphAgent({ langsmithApiKey: process.env.LANGSMITH_API_KEY || "", }); +// 2. Wire up MCP apps middleware so widget HTML is sent to the frontend +defaultAgent.use( + new MCPAppsMiddleware({ + mcpServers: [{ + type: "http", + url: process.env.MCP_SERVER_URL || "https://mcp.excalidraw.com", + serverId: "example_mcp_app", + }], + }) +); + // 3. Define the route and CopilotRuntime for the agent export const POST = async (req: NextRequest) => { const { handleRequest } = copilotRuntimeNextJSAppRouterEndpoint({ endpoint: "/api/copilotkit", serviceAdapter: new ExperimentalEmptyAdapter(), runtime: new CopilotRuntime({ - agents: { default: defaultAgent, }, + agents: { default: defaultAgent }, a2ui: { injectA2UITool: true }, - mcpApps: { - servers: [{ - type: "http", - url: process.env.MCP_SERVER_URL || "https://mcp.excalidraw.com", - serverId: "example_mcp_app", - }], - }, }), }); diff --git a/apps/app/src/components/generative-ui/widget-renderer.tsx b/apps/app/src/components/generative-ui/widget-renderer.tsx index f978385..0cc367f 100644 --- a/apps/app/src/components/generative-ui/widget-renderer.tsx +++ b/apps/app/src/components/generative-ui/widget-renderer.tsx @@ -1,6 +1,6 @@ "use client"; -import { useEffect, useRef, useState, useCallback } from "react"; +import { useEffect, useLayoutEffect, useRef, useState, useCallback } from "react"; import { z } from "zod"; // ─── Zod Schema (CopilotKit parameter contract) ───────────────────── @@ -358,6 +358,17 @@ window.addEventListener('load', reportHeight); // Periodic reports during initial load var _resizeInterval = setInterval(reportHeight, 200); setTimeout(function() { clearInterval(_resizeInterval); }, 15000); + +// Patch: receive incremental HTML updates without full reload +window.addEventListener('message', function(e) { + if (e.data && e.data.type === 'update-content' && typeof e.data.html === 'string') { + var content = document.getElementById('content'); + if (content) { + content.innerHTML = e.data.html; + reportHeight(); + } + } +}); `; // ─── Document Assembly ─────────────────────────────────────────────── @@ -425,8 +436,10 @@ export function WidgetRenderer({ title, description, html }: WidgetRendererProps const iframeRef = useRef(null); const [height, setHeight] = useState(0); const [loaded, setLoaded] = useState(false); + const [isFullscreen, setIsFullscreen] = useState(false); // Track what html has been committed to the iframe to avoid redundant reloads const committedHtmlRef = useRef(""); + const isFirstRenderRef = useRef(true); const handleMessage = useCallback((e: MessageEvent) => { // Only handle messages from our own iframe @@ -445,19 +458,37 @@ export function WidgetRenderer({ title, description, html }: WidgetRendererProps return () => window.removeEventListener("message", handleMessage); }, [handleMessage]); - // Write to iframe imperatively — bypasses React reconciliation so the - // iframe only reloads when the html *content* truly changes, preserving - // internal JS state (Three.js scenes, step counters, etc.) across - // CopilotKit re-renders. - useEffect(() => { + // Write to iframe imperatively — first render sets srcdoc (executes scripts), + // subsequent streaming updates patch #content.innerHTML via postMessage + // to preserve JS state (Three.js scenes, step counters, etc.). + useLayoutEffect(() => { if (!html || !iframeRef.current) return; if (html === committedHtmlRef.current) return; committedHtmlRef.current = html; - iframeRef.current.srcdoc = assembleDocument(html); - setLoaded(false); - setHeight(0); + + if (isFirstRenderRef.current) { + isFirstRenderRef.current = false; + iframeRef.current.srcdoc = assembleDocument(html); + setLoaded(false); + setHeight(0); + } else { + iframeRef.current.contentWindow?.postMessage( + { type: "update-content", html }, + "*" + ); + } }, [html]); + // Escape key exits fullscreen + useEffect(() => { + if (!isFullscreen) return; + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape") setIsFullscreen(false); + }; + window.addEventListener("keydown", handleKeyDown); + return () => window.removeEventListener("keydown", handleKeyDown); + }, [isFullscreen]); + // Fallback: if iframe has html but hasn't reported ready after 4s, force-show useEffect(() => { if (!html || (loaded && height > 0)) return; @@ -474,65 +505,110 @@ export function WidgetRenderer({ title, description, html }: WidgetRendererProps const loadingPhrase = useLoadingPhrase(showLoading); return ( -
- {/* Loading indicator: visible until iframe is fully ready */} - {showLoading && ( + <> + {/* Fullscreen backdrop */} + {isFullscreen && (
- {/* Animated gradient border top */} + onClick={() => setIsFullscreen(false)} + /> + )} +
+ {/* Toolbar */} + {html && ( +
+ +
+ )} + {/* Loading indicator: visible until iframe is fully ready */} + {showLoading && !isFullscreen && (
-
- {/* Spinning icon */} + >
- - {loadingPhrase}... - +
+
+ + {loadingPhrase}... + +
-
- )} - {/* Iframe: always mounted so ref is stable; srcdoc set imperatively. - No srcDoc React prop — prevents React from reloading the iframe - on parent re-renders. */} -