Skip to content

Commit dab6383

Browse files
author
priyanshu.solanki
committed
feat: add full support for number/date/boolean tag filtering in KB search
- Copy all tag types (number, date, boolean) from document to embedding records - Update processDocumentTags to handle all field types with proper type conversion - Add number/date/boolean columns to document queries in checkDocumentWriteAccess - Update chunk creation to inherit all tag types from parent document - Add getSearchResultFields helper for consistent query result selection - Support structured filters with operators (eq, gt, lt, between, etc.) - Fix search queries to include all 28 tag fields in results
1 parent 21174d8 commit dab6383

File tree

16 files changed

+725
-207
lines changed

16 files changed

+725
-207
lines changed

apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,38 @@ export async function POST(
156156
const validatedData = CreateChunkSchema.parse(searchParams)
157157

158158
const docTags = {
159+
// Text tags
159160
tag1: doc.tag1 ?? null,
160161
tag2: doc.tag2 ?? null,
161162
tag3: doc.tag3 ?? null,
162163
tag4: doc.tag4 ?? null,
163164
tag5: doc.tag5 ?? null,
164165
tag6: doc.tag6 ?? null,
165166
tag7: doc.tag7 ?? null,
167+
// Number tags
168+
number1: doc.number1 ?? null,
169+
number2: doc.number2 ?? null,
170+
number3: doc.number3 ?? null,
171+
number4: doc.number4 ?? null,
172+
number5: doc.number5 ?? null,
173+
number6: doc.number6 ?? null,
174+
number7: doc.number7 ?? null,
175+
// Date tags
176+
date1: doc.date1 ?? null,
177+
date2: doc.date2 ?? null,
178+
date3: doc.date3 ?? null,
179+
date4: doc.date4 ?? null,
180+
date5: doc.date5 ?? null,
181+
date6: doc.date6 ?? null,
182+
date7: doc.date7 ?? null,
183+
// Boolean tags
184+
boolean1: doc.boolean1 ?? null,
185+
boolean2: doc.boolean2 ?? null,
186+
boolean3: doc.boolean3 ?? null,
187+
boolean4: doc.boolean4 ?? null,
188+
boolean5: doc.boolean5 ?? null,
189+
boolean6: doc.boolean6 ?? null,
190+
boolean7: doc.boolean7 ?? null,
166191
}
167192

168193
const newChunk = await createChunk(

apps/sim/app/api/knowledge/search/route.ts

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ import { calculateCost } from '@/providers/utils'
2020

2121
const logger = createLogger('VectorSearchAPI')
2222

23+
/** Structured tag filter with operator support */
24+
const StructuredTagFilterSchema = z.object({
25+
tagName: z.string(),
26+
tagSlot: z.string().optional(),
27+
fieldType: z.enum(['text', 'number', 'date', 'boolean']).default('text'),
28+
operator: z.string().default('eq'),
29+
value: z.union([z.string(), z.number(), z.boolean()]),
30+
valueTo: z.union([z.string(), z.number()]).optional(),
31+
})
32+
2333
const VectorSearchSchema = z
2434
.object({
2535
knowledgeBaseIds: z.union([
@@ -43,14 +53,20 @@ const VectorSearchSchema = z
4353
.record(z.string())
4454
.optional()
4555
.nullable()
46-
.transform((val) => val || undefined), // Allow dynamic filter keys (display names)
56+
.transform((val) => val || undefined), // Legacy format: simple key-value pairs
57+
tagFilters: z
58+
.array(StructuredTagFilterSchema)
59+
.optional()
60+
.nullable()
61+
.transform((val) => val || undefined), // New format: structured filters with operators
4762
})
4863
.refine(
4964
(data) => {
5065
// Ensure at least query or filters are provided
5166
const hasQuery = data.query && data.query.trim().length > 0
52-
const hasFilters = data.filters && Object.keys(data.filters).length > 0
53-
return hasQuery || hasFilters
67+
const hasLegacyFilters = data.filters && Object.keys(data.filters).length > 0
68+
const hasTagFilters = data.tagFilters && data.tagFilters.length > 0
69+
return hasQuery || hasLegacyFilters || hasTagFilters
5470
},
5571
{
5672
message: 'Please provide either a search query or tag filters to search your knowledge base',
@@ -89,6 +105,54 @@ export async function POST(request: NextRequest) {
89105

90106
// Map display names to tag slots for filtering
91107
let mappedFilters: Record<string, string> = {}
108+
let structuredFilters: Array<{
109+
tagSlot: string
110+
fieldType: string
111+
operator: string
112+
value: string | number | boolean
113+
valueTo?: string | number
114+
}> = []
115+
116+
// Handle new structured tagFilters format
117+
if (validatedData.tagFilters && accessibleKbIds.length > 0) {
118+
try {
119+
const kbId = accessibleKbIds[0]
120+
const tagDefs = await getDocumentTagDefinitions(kbId)
121+
122+
// Create mapping from display name to tag slot and fieldType
123+
const displayNameToTagDef: Record<string, { tagSlot: string; fieldType: string }> = {}
124+
tagDefs.forEach((def) => {
125+
displayNameToTagDef[def.displayName] = {
126+
tagSlot: def.tagSlot,
127+
fieldType: def.fieldType,
128+
}
129+
})
130+
131+
structuredFilters = validatedData.tagFilters.map((filter) => {
132+
const tagDef = displayNameToTagDef[filter.tagName]
133+
const tagSlot = filter.tagSlot || tagDef?.tagSlot || filter.tagName
134+
const fieldType = filter.fieldType || tagDef?.fieldType || 'text'
135+
136+
logger.debug(
137+
`[${requestId}] Structured filter: ${filter.tagName} -> ${tagSlot} (${fieldType}) ${filter.operator} ${filter.value}`
138+
)
139+
140+
return {
141+
tagSlot,
142+
fieldType,
143+
operator: filter.operator,
144+
value: filter.value,
145+
valueTo: filter.valueTo,
146+
}
147+
})
148+
149+
logger.debug(`[${requestId}] Processed ${structuredFilters.length} structured filters`)
150+
} catch (error) {
151+
logger.error(`[${requestId}] Structured filter processing error:`, error)
152+
}
153+
}
154+
155+
// Handle legacy filters format (for backwards compatibility)
92156
if (validatedData.filters && accessibleKbIds.length > 0) {
93157
try {
94158
// Fetch tag definitions for the first accessible KB (since we're using single KB now)
@@ -155,26 +219,36 @@ export async function POST(request: NextRequest) {
155219

156220
let results: SearchResult[]
157221

158-
const hasFilters = mappedFilters && Object.keys(mappedFilters).length > 0
222+
const hasLegacyFilters = mappedFilters && Object.keys(mappedFilters).length > 0
223+
const hasStructuredFilters = structuredFilters && structuredFilters.length > 0
224+
const hasFilters = hasLegacyFilters || hasStructuredFilters
159225

160226
if (!hasQuery && hasFilters) {
161227
// Tag-only search without vector similarity
162-
logger.debug(`[${requestId}] Executing tag-only search with filters:`, mappedFilters)
228+
logger.debug(
229+
`[${requestId}] Executing tag-only search with filters:`,
230+
hasStructuredFilters ? structuredFilters : mappedFilters
231+
)
163232
results = await handleTagOnlySearch({
164233
knowledgeBaseIds: accessibleKbIds,
165234
topK: validatedData.topK,
166-
filters: mappedFilters,
235+
filters: hasLegacyFilters ? mappedFilters : undefined,
236+
structuredFilters: hasStructuredFilters ? structuredFilters : undefined,
167237
})
168238
} else if (hasQuery && hasFilters) {
169239
// Tag + Vector search
170-
logger.debug(`[${requestId}] Executing tag + vector search with filters:`, mappedFilters)
240+
logger.debug(
241+
`[${requestId}] Executing tag + vector search with filters:`,
242+
hasStructuredFilters ? structuredFilters : mappedFilters
243+
)
171244
const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
172245
const queryVector = JSON.stringify(await queryEmbeddingPromise)
173246

174247
results = await handleTagAndVectorSearch({
175248
knowledgeBaseIds: accessibleKbIds,
176249
topK: validatedData.topK,
177-
filters: mappedFilters,
250+
filters: hasLegacyFilters ? mappedFilters : undefined,
251+
structuredFilters: hasStructuredFilters ? structuredFilters : undefined,
178252
queryVector,
179253
distanceThreshold: strategy.distanceThreshold,
180254
})

0 commit comments

Comments
 (0)