Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions skills.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@
"id": "camera-claw",
"name": "Camera Claw",
"description": "Security camera for your AI agent — sandbox, record, and monitor OpenClaw activity.",
"version": "1.1.0",
"version": "2026.3.12",
"category": "integrations",
"url": "https://github.com/SharpAI/CameraClaw",
"repo_url": "https://github.com/SharpAI/CameraClaw",
"code_structure": [
{ "path": "SKILL.md", "desc": "Aegis skill manifest (11 params)" },
Expand All @@ -114,7 +115,7 @@
{ "path": "scripts/health-check.js", "desc": "Container health checker" },
{ "path": "docs/aegis_openclaw_note.md", "desc": "Aegis integration requirements" }
],
"tags": ["security", "sandbox", "monitoring", "openclaw"],
"tags": ["security", "sandbox", "monitoring", "openclaw", "ai-agent"],
"platforms": [
"linux-x64",
"linux-arm64",
Expand Down
114 changes: 95 additions & 19 deletions skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,15 @@ async function llmCall(messages, opts = {}) {
}

const model = opts.model || (opts.vlm ? VLM_MODEL : LLM_MODEL) || undefined;
// For JSON-expected tests, disable thinking (Qwen3 /no_think directive)
// This prevents the model from wasting tokens on reasoning before outputting JSON
if (opts.expectJSON) {
const lastUserIdx = messages.findLastIndex(m => m.role === 'user');
if (lastUserIdx >= 0) {
messages = [...messages];
messages[lastUserIdx] = { ...messages[lastUserIdx], content: messages[lastUserIdx].content + ' /no_think' };
}
}

// Build request params
const params = {
Expand All @@ -173,6 +182,7 @@ async function llmCall(messages, opts = {}) {
...(model && { model }),
...(opts.temperature !== undefined && { temperature: opts.temperature }),
...(opts.maxTokens && { max_completion_tokens: opts.maxTokens }),
...(opts.expectJSON && { response_format: { type: 'json_object' } }),
...(opts.tools && { tools: opts.tools }),
};

Expand All @@ -181,6 +191,34 @@ async function llmCall(messages, opts = {}) {
const idleMs = opts.timeout || IDLE_TIMEOUT_MS;
let idleTimer = setTimeout(() => controller.abort(), idleMs);
const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); };
// Log prompt being sent
log(`\n 📤 Prompt (${messages.length} messages, params: ${JSON.stringify({maxTokens: opts.maxTokens, expectJSON: !!opts.expectJSON, response_format: params.response_format})}):`);
for (const m of messages) {
if (typeof m.content === 'string') {
log(` [${m.role}] ${m.content}`);
} else if (Array.isArray(m.content)) {
// Multi-part content (VLM with images)
for (const part of m.content) {
if (part.type === 'text') {
log(` [${m.role}] ${part.text}`);
} else if (part.type === 'image_url') {
const url = part.image_url?.url || '';
const b64Match = url.match(/^data:([^;]+);base64,(.+)/);
if (b64Match) {
const mimeType = b64Match[1];
const b64Data = b64Match[2];
const sizeKB = Math.round(b64Data.length * 3 / 4 / 1024);
log(` [${m.role}] 🖼️ [Image: ${mimeType}, ~${sizeKB}KB]`);
log(`[IMG:${url}]`);
} else {
log(` [${m.role}] 🖼️ [Image URL: ${url.slice(0, 80)}…]`);
}
}
}
} else {
log(` [${m.role}] ${JSON.stringify(m.content).slice(0, 200)}`);
}
}

try {
const stream = await client.chat.completions.create(params, {
Expand All @@ -193,6 +231,7 @@ async function llmCall(messages, opts = {}) {
let model = '';
let usage = {};
let tokenCount = 0;
let tokenBuffer = '';

for await (const chunk of stream) {
resetIdle();
Expand All @@ -204,8 +243,43 @@ async function llmCall(messages, opts = {}) {
if (delta?.reasoning_content) reasoningContent += delta.reasoning_content;
if (delta?.content || delta?.reasoning_content) {
tokenCount++;
// Buffer and log tokens — tag with field source
const isContent = !!delta?.content;
const tok = delta?.content || delta?.reasoning_content || '';
// Tag first token of each field type
if (tokenCount === 1) tokenBuffer += isContent ? '[C] ' : '[R] ';
tokenBuffer += tok;
if (tokenCount % 20 === 0) {
log(tokenBuffer);
tokenBuffer = '';
}
if (tokenCount % 100 === 0) {
log(` … ${tokenCount} tokens received`);
log(` … ${tokenCount} tokens (content: ${content.length}c, reasoning: ${reasoningContent.length}c)`);
}

// Smart early abort for JSON-expected tests:
// If the model is producing reasoning_content (thinking) for a JSON test,
// abort after 100 reasoning tokens — it should output JSON directly.
if (opts.expectJSON && !isContent && tokenCount > 100) {
log(` ⚠ Aborting: ${tokenCount} reasoning tokens for JSON test — model is thinking instead of outputting JSON`);
controller.abort();
break;
}
// If content is arriving, check it starts with JSON
if (opts.expectJSON && isContent && content.length >= 50) {
const stripped = content.replace(/<think>[\s\S]*?<\/think>\s*/gi, '').trimStart();
if (stripped.length >= 50 && !/^\s*[{\[]/.test(stripped)) {
log(` ⚠ Aborting: expected JSON but got: "${stripped.slice(0, 80)}…"`);
controller.abort();
break;
}
}
// Hard cap: abort if token count far exceeds maxTokens (server may
// not count thinking tokens toward the limit)
if (opts.maxTokens && tokenCount > opts.maxTokens * 3) {
log(` ⚠ Aborting: ${tokenCount} tokens exceeds ${opts.maxTokens}×3 safety limit`);
controller.abort();
break;
}
}

Expand All @@ -224,6 +298,9 @@ async function llmCall(messages, opts = {}) {
if (chunk.usage) usage = chunk.usage;
}

// Flush remaining token buffer
if (tokenBuffer) log(tokenBuffer);

// If the model only produced reasoning_content (thinking) with no content,
// use the reasoning output as the response content for evaluation purposes.
if (!content && reasoningContent) {
Expand Down Expand Up @@ -337,12 +414,11 @@ ${userMessage}
4. Keep system messages (they contain tool results)

## Response Format
Return ONLY this JSON (no other text):
{"keep": [0, 5, 8], "summary": "brief 1-line summary of dropped exchanges"}
Respond with ONLY a valid JSON object, no other text:
{"keep": [<actual index numbers from the list above>], "summary": "<brief 1-line summary of what was dropped>"}

- "keep": array of message indices to KEEP (from the index list above)
- "summary": what the dropped messages were about (so context is not lost entirely)
- If nothing should be dropped, set keep to ALL indices and summary to ""`;
Example: if keeping messages at indices 0, 18, 22 → {"keep": [0, 18, 22], "summary": "Removed 4 duplicate 'what happened today' questions"}
If nothing should be dropped, keep ALL indices and set summary to "".`;
}

suite('📋 Context Preprocessing', async () => {
Expand All @@ -356,7 +432,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 18, ts: '12:56 PM', text: 'What has happened today' },
{ idx: 22, ts: '1:08 PM', text: 'What has happened today' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'What has happened today?') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'What has happened today?') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep), 'keep must be array');
assert(p.keep.length <= 3, `Expected ≤3, got ${p.keep.length}`);
Expand All @@ -373,7 +449,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 18, ts: '12:00 PM', text: 'What is the system status?' },
{ idx: 22, ts: '1:00 PM', text: 'What has happened today' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Any alerts triggered?') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Any alerts triggered?') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep), 'keep must be array');
assert(p.keep.includes(3) || p.keep.includes(10) || p.keep.includes(18), 'Should keep unique topics');
Expand All @@ -387,7 +463,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 6, ts: '10:00 AM', text: 'What is the system status?' },
{ idx: 10, ts: '11:00 AM', text: 'Analyze the clip from 9:40 AM' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Any new motion events?') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Any new motion events?') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep) && p.keep.length === 4, `Expected 4, got ${p.keep?.length}`);
return `kept all 4 ✓`;
Expand All @@ -398,7 +474,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 0, ts: '9:00 AM', text: 'Hello' },
{ idx: 2, ts: '9:05 AM', text: 'Show cameras' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Thanks') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Thanks') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep), 'keep must be array');
return `kept ${p.keep.length}/2`;
Expand Down Expand Up @@ -427,7 +503,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 36, ts: '12:30 PM', text: 'What happened today?' },
{ idx: 38, ts: '12:45 PM', text: 'Were there any packages delivered?' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'What happened today?') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'What happened today?') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep), 'keep must be array');
// 10 duplicates of "What happened today?" → should keep ≤12 of 20
Expand All @@ -444,7 +520,7 @@ suite('📋 Context Preprocessing', async () => {
{ idx: 3, ts: '9:05 AM', text: '[System] Alert triggered: person at front door' },
{ idx: 4, ts: '9:10 AM', text: 'What happened today?' },
];
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Show me alerts') }]);
const r = await llmCall([{ role: 'user', content: buildPreprocessPrompt(idx, 'Show me alerts') }], { maxTokens: 300, expectJSON: true });
const p = parseJSON(r.content);
assert(Array.isArray(p.keep), 'keep must be array');
// System messages (idx 1, 3) must be kept
Expand Down Expand Up @@ -538,7 +614,7 @@ suite('🧠 Knowledge Distillation', async () => {
const r = await llmCall([
{ role: 'system', content: DISTILL_PROMPT },
{ role: 'user', content: `## Topic: Camera Setup\n## Existing KIs: (none)\n## Conversation\nUser: I have three cameras. Front door is a Blink Mini, living room is Blink Indoor, side parking is Blink Outdoor.\nAegis: Got it! Want to set up alerts?\nUser: Yes, person detection on front door after 10pm. My name is Sam.\nAegis: Alert set. Nice to meet you, Sam!` },
]);
], { maxTokens: 500, expectJSON: true });
const p = parseJSON(r.content);
assert(p && typeof p === 'object', 'Must return object');
const facts = (p.items || []).reduce((n, i) => n + (i.facts?.length || 0), 0) + (p.new_items || []).reduce((n, i) => n + (i.facts?.length || 0), 0);
Expand All @@ -550,7 +626,7 @@ suite('🧠 Knowledge Distillation', async () => {
const r = await llmCall([
{ role: 'system', content: DISTILL_PROMPT },
{ role: 'user', content: `## Topic: Greeting\n## Existing KIs: (none)\n## Conversation\nUser: Hi\nAegis: Hello! How can I help?\nUser: Thanks, bye\nAegis: Goodbye!` },
]);
], { maxTokens: 500, expectJSON: true });
const p = parseJSON(r.content);
const facts = (p.items || []).reduce((n, i) => n + (i.facts?.length || 0), 0) + (p.new_items || []).reduce((n, i) => n + (i.facts?.length || 0), 0);
assert(facts === 0, `Expected 0 facts, got ${facts}`);
Expand All @@ -561,7 +637,7 @@ suite('🧠 Knowledge Distillation', async () => {
const r = await llmCall([
{ role: 'system', content: DISTILL_PROMPT },
{ role: 'user', content: `## Topic: Alert Configuration\n## Existing KIs: alert_preferences\n## Conversation\nUser: No notifications from side parking 8am-5pm. Too many false alarms from passing cars.\nAegis: Quiet hours set for side parking 8 AM-5 PM.\nUser: Front door alerts go to Telegram. Discord for everything else.\nAegis: Done — front door to Telegram, rest to Discord.` },
]);
], { maxTokens: 500, expectJSON: true });
const p = parseJSON(r.content);
const facts = (p.items || []).reduce((n, i) => n + (i.facts?.length || 0), 0) + (p.new_items || []).reduce((n, i) => n + (i.facts?.length || 0), 0);
assert(facts >= 2, `Expected ≥2 facts, got ${facts}`);
Expand All @@ -572,7 +648,7 @@ suite('🧠 Knowledge Distillation', async () => {
const r = await llmCall([
{ role: 'system', content: DISTILL_PROMPT },
{ role: 'user', content: `## Topic: Camera Update\n## Existing KIs: home_profile (facts: ["3 cameras: Blink Mini front, Blink Indoor living, Blink Outdoor side", "Owner: Sam"])\n## Conversation\nUser: I just installed a fourth camera in the backyard. It's a Reolink Argus 3 Pro.\nAegis: Nice upgrade! I've noted your new backyard Reolink camera. That brings your total to 4 cameras.\nUser: Also, I got a dog named Max, golden retriever.\nAegis: Welcome, Max! I'll note that for the pet detections.` },
]);
], { maxTokens: 500, expectJSON: true });
const p = parseJSON(r.content);
const allFacts = [...(p.items || []).flatMap(i => i.facts || []), ...(p.new_items || []).flatMap(i => i.facts || [])];
assert(allFacts.length >= 2, `Expected ≥2 facts, got ${allFacts.length}`);
Expand All @@ -587,7 +663,7 @@ suite('🧠 Knowledge Distillation', async () => {
const r = await llmCall([
{ role: 'system', content: DISTILL_PROMPT },
{ role: 'user', content: `## Topic: Camera Change\n## Existing KIs: home_profile (facts: ["3 cameras: Blink Mini front, Blink Indoor living, Blink Outdoor side"])\n## Conversation\nUser: I replaced the living room camera. The Blink Indoor died. I put a Ring Indoor there now.\nAegis: Got it — living room camera is now a Ring Indoor. Updated.\nUser: Actually I also moved the side parking camera to the garage instead.\nAegis: Camera moved from side parking to garage, noted.` },
]);
], { maxTokens: 500, expectJSON: true });
const p = parseJSON(r.content);
const allFacts = [...(p.items || []).flatMap(i => i.facts || []), ...(p.new_items || []).flatMap(i => i.facts || [])];
assert(allFacts.length >= 1, `Expected ≥1 fact, got ${allFacts.length}`);
Expand Down Expand Up @@ -634,7 +710,7 @@ suite('🔔 Event Deduplication', async () => {
const r = await llmCall([
{ role: 'system', content: 'You are a security event classifier. Respond only with valid JSON.' },
{ role: 'user', content: buildDedupPrompt(s.current, s.recent, s.age_sec) },
], { maxTokens: 150, temperature: 0.1 });
], { maxTokens: 150, temperature: 0.1, expectJSON: true });
const p = parseJSON(r.content);
if (s.expected_duplicate !== undefined) {
assert(p.duplicate === s.expected_duplicate, `Expected duplicate=${s.expected_duplicate}, got ${p.duplicate}`);
Expand Down Expand Up @@ -847,7 +923,7 @@ suite('🛡️ Security Classification', async () => {
const r = await llmCall([
{ role: 'system', content: SECURITY_CLASSIFY_PROMPT },
{ role: 'user', content: `Event description: ${s.description}` },
], { maxTokens: 200, temperature: 0.1 });
], { maxTokens: 200, temperature: 0.1, expectJSON: true });
const p = parseJSON(r.content);
assert(expectedClassifications.includes(p.classification),
`Expected "${expectedLabel}", got "${p.classification}"`);
Expand Down