Skip to content

Commit c2ee276

Browse files
committed
test(memory): enhance vector store tests with metadata and multi-key filters
1 parent 3219ab0 commit c2ee276

File tree

1 file changed

+138
-0
lines changed

1 file changed

+138
-0
lines changed

tests/test_memory_vector_store.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
153153
metadata={
154154
"node_type": "tech",
155155
"category": "AI",
156+
"source": "research",
157+
"priority": "high",
158+
"year": 2023,
159+
"department": "engineering",
160+
"language": "english",
161+
"status": "published",
156162
},
157163
),
158164
VectorNode(
@@ -162,6 +168,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
162168
metadata={
163169
"node_type": "tech",
164170
"category": "ML",
171+
"source": "research",
172+
"priority": "high",
173+
"year": 2022,
174+
"department": "engineering",
175+
"language": "english",
176+
"status": "published",
165177
},
166178
),
167179
VectorNode(
@@ -171,6 +183,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
171183
metadata={
172184
"node_type": "tech_new",
173185
"category": "ML",
186+
"source": "blog",
187+
"priority": "medium",
188+
"year": 2024,
189+
"department": "marketing",
190+
"language": "chinese",
191+
"status": "draft",
174192
},
175193
),
176194
VectorNode(
@@ -180,6 +198,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
180198
metadata={
181199
"node_type": "food",
182200
"category": "preference",
201+
"source": "personal",
202+
"priority": "low",
203+
"year": 2023,
204+
"department": "lifestyle",
205+
"language": "english",
206+
"status": "published",
183207
},
184208
),
185209
VectorNode(
@@ -189,6 +213,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
189213
metadata={
190214
"node_type": "tech",
191215
"category": "DL",
216+
"source": "research",
217+
"priority": "high",
218+
"year": 2024,
219+
"department": "engineering",
220+
"language": "chinese",
221+
"status": "review",
192222
},
193223
),
194224
]
@@ -286,6 +316,7 @@ def test_search(self, workspace_id: str):
286316
def test_search_with_filter(self, workspace_id: str):
287317
"""Test vector search with filter."""
288318
logger.info("=" * 20 + " FILTER SEARCH TEST " + "=" * 20)
319+
# Test 1: Filter by node_type only
289320
filter_dict = {"metadata.node_type": ["tech", "tech_new"]}
290321
results = self.client.search(
291322
"What is artificial intelligence?",
@@ -301,6 +332,57 @@ def test_search_with_filter(self, workspace_id: str):
301332
"tech_new",
302333
], "All results should have node_type in [tech, tech_new]"
303334

335+
# Test 2: Filter by both node_type and source (multiple metadata keys)
336+
logger.info("=" * 20 + " MULTI-KEY FILTER SEARCH TEST " + "=" * 20)
337+
filter_dict_multi = {
338+
"metadata.node_type": ["tech", "tech_new"],
339+
"metadata.source": "research",
340+
}
341+
results_multi = self.client.search(
342+
"What is artificial intelligence?",
343+
workspace_id=workspace_id,
344+
top_k=5,
345+
filter_dict=filter_dict_multi,
346+
)
347+
logger.info(
348+
f"Multi-key filtered search returned {len(results_multi)} results "
349+
f"(node_type in [tech, tech_new] AND source=research)"
350+
)
351+
for i, r in enumerate(results_multi, 1):
352+
logger.info(f"Multi-key Filtered Result {i}: {r.model_dump(exclude={'vector'})}")
353+
assert r.metadata.get("node_type") in [
354+
"tech",
355+
"tech_new",
356+
], "All results should have node_type in [tech, tech_new]"
357+
assert r.metadata.get("source") == "research", "All results should have source=research"
358+
359+
# Test 3: Filter by both node_type and language with list values for both
360+
logger.info("=" * 20 + " MULTI-KEY LIST FILTER SEARCH TEST " + "=" * 20)
361+
filter_dict_multi_list = {
362+
"metadata.node_type": ["tech", "tech_new"],
363+
"metadata.language": ["english", "chinese"],
364+
}
365+
results_multi_list = self.client.search(
366+
"What is artificial intelligence?",
367+
workspace_id=workspace_id,
368+
top_k=5,
369+
filter_dict=filter_dict_multi_list,
370+
)
371+
logger.info(
372+
f"Multi-key list filtered search returned {len(results_multi_list)} results "
373+
f"(node_type in [tech, tech_new] AND language in [english, chinese])"
374+
)
375+
for i, r in enumerate(results_multi_list, 1):
376+
logger.info(f"Multi-key List Filtered Result {i}: {r.model_dump(exclude={'vector'})}")
377+
assert r.metadata.get("node_type") in [
378+
"tech",
379+
"tech_new",
380+
], "All results should have node_type in [tech, tech_new]"
381+
assert r.metadata.get("language") in [
382+
"english",
383+
"chinese",
384+
], "All results should have language in [english, chinese]"
385+
304386
def test_search_with_id(self, workspace_id: str):
305387
"""Test vector search by unique_id with empty query."""
306388
logger.info("=" * 20 + " SEARCH BY ID TEST " + "=" * 20)
@@ -534,6 +616,7 @@ async def test_search(self, workspace_id: str):
534616
async def test_search_with_filter(self, workspace_id: str):
535617
"""Test async vector search with filter."""
536618
logger.info("ASYNC - " + "=" * 20 + " FILTER SEARCH TEST " + "=" * 20)
619+
# Test 1: Filter by node_type only
537620
filter_dict = {"metadata.node_type": ["tech", "tech_new"]}
538621
results = await self.client.async_search(
539622
"What is artificial intelligence?",
@@ -549,6 +632,57 @@ async def test_search_with_filter(self, workspace_id: str):
549632
"tech_new",
550633
], "All results should have node_type in [tech, tech_new]"
551634

635+
# Test 2: Filter by both node_type and source (multiple metadata keys)
636+
logger.info("ASYNC - " + "=" * 20 + " MULTI-KEY FILTER SEARCH TEST " + "=" * 20)
637+
filter_dict_multi = {
638+
"metadata.node_type": ["tech", "tech_new"],
639+
"metadata.source": "research",
640+
}
641+
results_multi = await self.client.async_search(
642+
"What is artificial intelligence?",
643+
workspace_id=workspace_id,
644+
top_k=5,
645+
filter_dict=filter_dict_multi,
646+
)
647+
logger.info(
648+
f"Multi-key filtered search returned {len(results_multi)} results "
649+
f"(node_type in [tech, tech_new] AND source=research)"
650+
)
651+
for i, r in enumerate(results_multi, 1):
652+
logger.info(f"Multi-key Filtered Result {i}: {r.model_dump(exclude={'vector'})}")
653+
assert r.metadata.get("node_type") in [
654+
"tech",
655+
"tech_new",
656+
], "All results should have node_type in [tech, tech_new]"
657+
assert r.metadata.get("source") == "research", "All results should have source=research"
658+
659+
# Test 3: Filter by both node_type and language with list values for both
660+
logger.info("ASYNC - " + "=" * 20 + " MULTI-KEY LIST FILTER SEARCH TEST " + "=" * 20)
661+
filter_dict_multi_list = {
662+
"metadata.node_type": ["tech", "tech_new"],
663+
"metadata.language": ["english", "chinese"],
664+
}
665+
results_multi_list = await self.client.async_search(
666+
"What is artificial intelligence?",
667+
workspace_id=workspace_id,
668+
top_k=5,
669+
filter_dict=filter_dict_multi_list,
670+
)
671+
logger.info(
672+
f"Multi-key list filtered search returned {len(results_multi_list)} results "
673+
f"(node_type in [tech, tech_new] AND language in [english, chinese])"
674+
)
675+
for i, r in enumerate(results_multi_list, 1):
676+
logger.info(f"Multi-key List Filtered Result {i}: {r.model_dump(exclude={'vector'})}")
677+
assert r.metadata.get("node_type") in [
678+
"tech",
679+
"tech_new",
680+
], "All results should have node_type in [tech, tech_new]"
681+
assert r.metadata.get("language") in [
682+
"english",
683+
"chinese",
684+
], "All results should have language in [english, chinese]"
685+
552686
async def test_search_with_id(self, workspace_id: str):
553687
"""Test async vector search by unique_id with empty query."""
554688
logger.info("ASYNC - " + "=" * 20 + " SEARCH BY ID TEST " + "=" * 20)
@@ -823,12 +957,16 @@ def print_usage():
823957
if args[0] == "--all":
824958
# Test all vector stores
825959
run_tests(store_types=valid_store_types)
960+
# Clean up test files after testing
961+
delete_test_files()
826962
elif args[0] == "delete":
827963
# Delete all test-generated files
828964
delete_test_files()
829965
elif args[0] in valid_store_types:
830966
# Test specific vector store
831967
run_tests(store_types=[args[0]])
968+
# Clean up test files after testing
969+
delete_test_files()
832970
else:
833971
print(f"Unknown argument: {args[0]}")
834972
print_usage()

0 commit comments

Comments
 (0)