Skip to content

Commit 2818b74

Browse files
committed
migrate enrichment logic to general abstraction
1 parent 2d49de7 commit 2818b74

File tree

15 files changed

+222
-238
lines changed

15 files changed

+222
-238
lines changed

apps/sim/executor/handlers/agent/agent-handler.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,6 @@ export class AgentBlockHandler implements BlockHandler {
711711
getAllBlocks,
712712
getToolAsync: (toolId: string) => getToolAsync(toolId, ctx.workflowId),
713713
getTool,
714-
workspaceId: ctx.workspaceId,
715-
workflowId: ctx.workflowId,
716-
executeTool,
717714
})
718715

719716
if (transformedTool) {

apps/sim/lib/table/llm/enrichment.ts

Lines changed: 0 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -5,155 +5,8 @@
55
* with table-specific information so LLMs can construct proper queries.
66
*/
77

8-
import { createLogger } from '@sim/logger'
98
import type { TableSummary } from '../types'
109

11-
const logger = createLogger('TableLLMEnrichment')
12-
13-
/**
14-
* Cache for in-flight and recently fetched table schemas.
15-
* Key: tableId, Value: { promise, timestamp }
16-
* This deduplicates concurrent requests for the same table schema.
17-
*/
18-
const schemaCache = new Map<
19-
string,
20-
{
21-
promise: Promise<TableSummary | null>
22-
timestamp: number
23-
}
24-
>()
25-
26-
/** Schema cache TTL in milliseconds (5 seconds) */
27-
const SCHEMA_CACHE_TTL_MS = 5000
28-
29-
/**
30-
* Clears expired entries from the schema cache.
31-
*/
32-
function cleanupSchemaCache(): void {
33-
const now = Date.now()
34-
for (const [key, entry] of schemaCache.entries()) {
35-
if (now - entry.timestamp > SCHEMA_CACHE_TTL_MS) {
36-
schemaCache.delete(key)
37-
}
38-
}
39-
}
40-
41-
/**
42-
* Fetches table schema with caching and request deduplication.
43-
* If a request for the same table is already in flight, returns the same promise.
44-
*/
45-
async function fetchTableSchemaWithCache(
46-
tableId: string,
47-
context: TableEnrichmentContext
48-
): Promise<TableSummary | null> {
49-
// Clean up old entries periodically
50-
if (schemaCache.size > 50) {
51-
cleanupSchemaCache()
52-
}
53-
54-
const cacheKey = `${context.workspaceId}:${tableId}`
55-
const cached = schemaCache.get(cacheKey)
56-
57-
// If we have a cached entry that's still valid, return it
58-
if (cached && Date.now() - cached.timestamp < SCHEMA_CACHE_TTL_MS) {
59-
return cached.promise
60-
}
61-
62-
// Create a new fetch promise
63-
const fetchPromise = (async (): Promise<TableSummary | null> => {
64-
const schemaResult = await context.executeTool('table_get_schema', {
65-
tableId,
66-
_context: {
67-
workspaceId: context.workspaceId,
68-
workflowId: context.workflowId,
69-
},
70-
})
71-
72-
if (!schemaResult.success || !schemaResult.output) {
73-
logger.warn(`Failed to fetch table schema: ${schemaResult.error}`)
74-
return null
75-
}
76-
77-
return {
78-
name: schemaResult.output.name,
79-
columns: schemaResult.output.columns || [],
80-
}
81-
})()
82-
83-
// Cache the promise immediately to deduplicate concurrent requests
84-
schemaCache.set(cacheKey, {
85-
promise: fetchPromise,
86-
timestamp: Date.now(),
87-
})
88-
89-
return fetchPromise
90-
}
91-
92-
export interface TableEnrichmentContext {
93-
workspaceId: string
94-
workflowId: string
95-
executeTool: (toolId: string, params: Record<string, any>) => Promise<any>
96-
}
97-
98-
export interface TableEnrichmentResult {
99-
description: string
100-
parameters: {
101-
properties: Record<string, any>
102-
required: string[]
103-
}
104-
}
105-
106-
/**
107-
* Enriches a table tool for LLM consumption by fetching its schema
108-
* and injecting column information into the description and parameters.
109-
*
110-
* @param toolId - The table tool ID (e.g., 'table_query_rows')
111-
* @param originalDescription - The tool's original description
112-
* @param llmSchema - The original LLM schema
113-
* @param userProvidedParams - Parameters provided by the user (must include tableId)
114-
* @param context - Execution context with workspaceId, workflowId, and executeTool
115-
* @returns Enriched description and parameters, or null if enrichment not applicable
116-
*/
117-
export async function enrichTableToolForLLM(
118-
toolId: string,
119-
originalDescription: string,
120-
llmSchema: { properties?: Record<string, any>; required?: string[] },
121-
userProvidedParams: Record<string, any>,
122-
context: TableEnrichmentContext
123-
): Promise<TableEnrichmentResult | null> {
124-
const { tableId } = userProvidedParams
125-
126-
// Need a tableId to fetch schema
127-
if (!tableId) {
128-
return null
129-
}
130-
131-
try {
132-
// Use cached schema fetch to deduplicate concurrent requests for the same table
133-
const tableSchema = await fetchTableSchemaWithCache(tableId, context)
134-
135-
if (!tableSchema) {
136-
return null
137-
}
138-
139-
// Apply enrichment using the existing utility functions
140-
const enrichedDescription = enrichTableToolDescription(originalDescription, tableSchema, toolId)
141-
const enrichedParams = enrichTableToolParameters(llmSchema, tableSchema, toolId)
142-
143-
return {
144-
description: enrichedDescription,
145-
parameters: {
146-
properties: enrichedParams.properties,
147-
required:
148-
enrichedParams.required.length > 0 ? enrichedParams.required : llmSchema.required || [],
149-
},
150-
}
151-
} catch (error) {
152-
logger.warn('Error fetching table schema:', error)
153-
return null
154-
}
155-
}
156-
15710
/**
15811
* Operations that use filters and need filter-specific enrichment.
15912
*/
@@ -175,11 +28,6 @@ export const DATA_OPERATIONS = new Set([
17528

17629
/**
17730
* Enriches a table tool description with table information based on the operation type.
178-
*
179-
* @param originalDescription - The original tool description
180-
* @param table - The table summary with name and columns
181-
* @param toolId - The tool identifier to determine operation type
182-
* @returns Enriched description with table-specific instructions
18331
*/
18432
export function enrichTableToolDescription(
18533
originalDescription: string,
@@ -192,7 +40,6 @@ export function enrichTableToolDescription(
19240

19341
const columnList = table.columns.map((col) => ` - ${col.name} (${col.type})`).join('\n')
19442

195-
// Filter-based operations: emphasize filter usage
19643
if (FILTER_OPERATIONS.has(toolId)) {
19744
const stringCols = table.columns.filter((c) => c.type === 'string')
19845
const numberCols = table.columns.filter((c) => c.type === 'number')
@@ -208,14 +55,12 @@ Example filter: {"${stringCols[0].name}": {"$eq": "value"}, "${numberCols[0].nam
20855
Example filter: {"${stringCols[0].name}": {"$eq": "value"}}`
20956
}
21057

211-
// Add sort example for query operations with numeric columns
21258
let sortExample = ''
21359
if (toolId === 'table_query_rows' && numberCols.length > 0) {
21460
sortExample = `
21561
Example sort: {"${numberCols[0].name}": "desc"} for highest first, {"${numberCols[0].name}": "asc"} for lowest first`
21662
}
21763

218-
// Query-specific instructions with sort/limit guidance
21964
const queryInstructions =
22065
toolId === 'table_query_rows'
22166
? `
@@ -242,7 +87,6 @@ ${columnList}
24287
${filterExample}${sortExample}`
24388
}
24489

245-
// Data operations: show columns for data construction
24690
if (DATA_OPERATIONS.has(toolId)) {
24791
const exampleCols = table.columns.slice(0, 3)
24892
const dataExample = exampleCols.reduce(
@@ -253,7 +97,6 @@ ${filterExample}${sortExample}`
25397
{} as Record<string, unknown>
25498
)
25599

256-
// Update operations support partial updates
257100
if (toolId === 'table_update_row') {
258101
return `${originalDescription}
259102
@@ -271,7 +114,6 @@ ${columnList}
271114
Pass the "data" parameter with an object like: ${JSON.stringify(dataExample)}`
272115
}
273116

274-
// Default: just show columns
275117
return `${originalDescription}
276118
277119
Table "${table.name}" columns:
@@ -280,11 +122,6 @@ ${columnList}`
280122

281123
/**
282124
* Enriches LLM tool parameters with table-specific information.
283-
*
284-
* @param llmSchema - The original LLM schema with properties and required fields
285-
* @param table - The table summary with name and columns
286-
* @param toolId - The tool identifier to determine operation type
287-
* @returns Enriched schema with updated property descriptions and required fields
288125
*/
289126
export function enrichTableToolParameters(
290127
llmSchema: { properties?: Record<string, any>; required?: string[] },
@@ -302,36 +139,31 @@ export function enrichTableToolParameters(
302139
const enrichedProperties = { ...llmSchema.properties }
303140
const enrichedRequired = llmSchema.required ? [...llmSchema.required] : []
304141

305-
// Enrich filter parameter for filter-based operations
306142
if (enrichedProperties.filter && FILTER_OPERATIONS.has(toolId)) {
307143
enrichedProperties.filter = {
308144
...enrichedProperties.filter,
309145
description: `REQUIRED - query will fail without a filter. Construct filter from user's question using columns: ${columnNames}. Syntax: {"column": {"$eq": "value"}}`,
310146
}
311147
}
312148

313-
// Mark filter as required in schema for query operations
314149
if (FILTER_OPERATIONS.has(toolId) && !enrichedRequired.includes('filter')) {
315150
enrichedRequired.push('filter')
316151
}
317152

318-
// Enrich sort parameter for query operations
319153
if (enrichedProperties.sort && toolId === 'table_query_rows') {
320154
enrichedProperties.sort = {
321155
...enrichedProperties.sort,
322156
description: `Sort order as {field: "asc"|"desc"}. REQUIRED for ranking queries (highest, lowest, Nth). Example: {"salary": "desc"} for highest salary first.`,
323157
}
324158
}
325159

326-
// Enrich limit parameter for query operations
327160
if (enrichedProperties.limit && toolId === 'table_query_rows') {
328161
enrichedProperties.limit = {
329162
...enrichedProperties.limit,
330163
description: `Maximum rows to return (min: 1, max: 1000, default: 100). For ranking queries: use limit=1 for highest/lowest, limit=2 for second highest, etc.`,
331164
}
332165
}
333166

334-
// Enrich data parameter for insert/update operations
335167
if (enrichedProperties.data && DATA_OPERATIONS.has(toolId)) {
336168
const exampleCols = table.columns.slice(0, 2)
337169
const exampleData = exampleCols.reduce(
@@ -342,7 +174,6 @@ export function enrichTableToolParameters(
342174
{} as Record<string, unknown>
343175
)
344176

345-
// Update operations support partial updates - only include fields to change
346177
if (toolId === 'table_update_row') {
347178
enrichedProperties.data = {
348179
...enrichedProperties.data,
@@ -356,7 +187,6 @@ export function enrichTableToolParameters(
356187
}
357188
}
358189

359-
// Enrich rows parameter for batch insert
360190
if (enrichedProperties.rows && toolId === 'table_batch_insert_rows') {
361191
enrichedProperties.rows = {
362192
...enrichedProperties.rows,

apps/sim/providers/utils.ts

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import type { ChatCompletionChunk } from 'openai/resources/chat/completions'
33
import type { CompletionUsage } from 'openai/resources/completions'
44
import { env } from '@/lib/core/config/env'
55
import { isHosted } from '@/lib/core/config/feature-flags'
6-
import { enrichTableToolForLLM } from '@/lib/table/llm'
76
import { isCustomTool } from '@/executor/constants'
87
import {
98
getComputerUseModels,
@@ -433,20 +432,9 @@ export async function transformBlockTool(
433432
getAllBlocks: () => any[]
434433
getTool: (toolId: string) => any
435434
getToolAsync?: (toolId: string) => Promise<any>
436-
workspaceId?: string
437-
workflowId?: string
438-
executeTool?: (toolId: string, params: Record<string, any>) => Promise<any>
439435
}
440436
): Promise<ProviderToolConfig | null> {
441-
const {
442-
selectedOperation,
443-
getAllBlocks,
444-
getTool,
445-
getToolAsync,
446-
workspaceId,
447-
workflowId,
448-
executeTool,
449-
} = options
437+
const { selectedOperation, getAllBlocks, getTool, getToolAsync } = options
450438

451439
const blockDef = getAllBlocks().find((b: any) => b.type === block.type)
452440
if (!blockDef) {
@@ -500,11 +488,14 @@ export async function transformBlockTool(
500488

501489
const userProvidedParams = block.params || {}
502490

503-
const llmSchema = await createLLMToolSchema(toolConfig, userProvidedParams)
491+
const { schema: llmSchema, enrichedDescription } = await createLLMToolSchema(
492+
toolConfig,
493+
userProvidedParams
494+
)
504495

505496
let uniqueToolId = toolConfig.id
506497
let toolName = toolConfig.name
507-
let toolDescription = toolConfig.description
498+
let toolDescription = enrichedDescription || toolConfig.description
508499

509500
if (toolId === 'workflow_executor' && userProvidedParams.workflowId) {
510501
uniqueToolId = `${toolConfig.id}_${userProvidedParams.workflowId}`
@@ -521,36 +512,16 @@ export async function transformBlockTool(
521512
}
522513
} else if (toolId.startsWith('knowledge_') && userProvidedParams.knowledgeBaseId) {
523514
uniqueToolId = `${toolConfig.id}_${userProvidedParams.knowledgeBaseId}`
524-
}
525-
526-
// Apply table tool enrichment if applicable
527-
let finalDescription = toolDescription
528-
let finalSchema = llmSchema
529-
530-
if (toolId.startsWith('table_') && workspaceId && workflowId && executeTool) {
531-
const result = await enrichTableToolForLLM(
532-
toolId,
533-
toolDescription,
534-
llmSchema,
535-
userProvidedParams,
536-
{
537-
workspaceId,
538-
workflowId,
539-
executeTool,
540-
}
541-
)
542-
if (result) {
543-
finalDescription = result.description
544-
finalSchema = { ...llmSchema, ...result.parameters }
545-
}
515+
} else if (toolId.startsWith('table_') && userProvidedParams.tableId) {
516+
uniqueToolId = `${toolConfig.id}_${userProvidedParams.tableId}`
546517
}
547518

548519
return {
549520
id: uniqueToolId,
550521
name: toolName,
551-
description: finalDescription,
522+
description: toolDescription,
552523
params: userProvidedParams,
553-
parameters: finalSchema,
524+
parameters: llmSchema,
554525
}
555526
}
556527

0 commit comments

Comments
 (0)