@@ -153,6 +153,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
153153 metadata = {
154154 "node_type" : "tech" ,
155155 "category" : "AI" ,
156+ "source" : "research" ,
157+ "priority" : "high" ,
158+ "year" : 2023 ,
159+ "department" : "engineering" ,
160+ "language" : "english" ,
161+ "status" : "published" ,
156162 },
157163 ),
158164 VectorNode (
@@ -162,6 +168,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
162168 metadata = {
163169 "node_type" : "tech" ,
164170 "category" : "ML" ,
171+ "source" : "research" ,
172+ "priority" : "high" ,
173+ "year" : 2022 ,
174+ "department" : "engineering" ,
175+ "language" : "english" ,
176+ "status" : "published" ,
165177 },
166178 ),
167179 VectorNode (
@@ -171,6 +183,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
171183 metadata = {
172184 "node_type" : "tech_new" ,
173185 "category" : "ML" ,
186+ "source" : "blog" ,
187+ "priority" : "medium" ,
188+ "year" : 2024 ,
189+ "department" : "marketing" ,
190+ "language" : "chinese" ,
191+ "status" : "draft" ,
174192 },
175193 ),
176194 VectorNode (
@@ -180,6 +198,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
180198 metadata = {
181199 "node_type" : "food" ,
182200 "category" : "preference" ,
201+ "source" : "personal" ,
202+ "priority" : "low" ,
203+ "year" : 2023 ,
204+ "department" : "lifestyle" ,
205+ "language" : "english" ,
206+ "status" : "published" ,
183207 },
184208 ),
185209 VectorNode (
@@ -189,6 +213,12 @@ def create_sample_nodes(workspace_id: str, prefix: str = "") -> List[VectorNode]
189213 metadata = {
190214 "node_type" : "tech" ,
191215 "category" : "DL" ,
216+ "source" : "research" ,
217+ "priority" : "high" ,
218+ "year" : 2024 ,
219+ "department" : "engineering" ,
220+ "language" : "chinese" ,
221+ "status" : "review" ,
192222 },
193223 ),
194224 ]
@@ -286,6 +316,7 @@ def test_search(self, workspace_id: str):
286316 def test_search_with_filter (self , workspace_id : str ):
287317 """Test vector search with filter."""
288318 logger .info ("=" * 20 + " FILTER SEARCH TEST " + "=" * 20 )
319+ # Test 1: Filter by node_type only
289320 filter_dict = {"metadata.node_type" : ["tech" , "tech_new" ]}
290321 results = self .client .search (
291322 "What is artificial intelligence?" ,
@@ -301,6 +332,57 @@ def test_search_with_filter(self, workspace_id: str):
301332 "tech_new" ,
302333 ], "All results should have node_type in [tech, tech_new]"
303334
335+ # Test 2: Filter by both node_type and source (multiple metadata keys)
336+ logger .info ("=" * 20 + " MULTI-KEY FILTER SEARCH TEST " + "=" * 20 )
337+ filter_dict_multi = {
338+ "metadata.node_type" : ["tech" , "tech_new" ],
339+ "metadata.source" : "research" ,
340+ }
341+ results_multi = self .client .search (
342+ "What is artificial intelligence?" ,
343+ workspace_id = workspace_id ,
344+ top_k = 5 ,
345+ filter_dict = filter_dict_multi ,
346+ )
347+ logger .info (
348+ f"Multi-key filtered search returned { len (results_multi )} results "
349+ f"(node_type in [tech, tech_new] AND source=research)"
350+ )
351+ for i , r in enumerate (results_multi , 1 ):
352+ logger .info (f"Multi-key Filtered Result { i } : { r .model_dump (exclude = {'vector' })} " )
353+ assert r .metadata .get ("node_type" ) in [
354+ "tech" ,
355+ "tech_new" ,
356+ ], "All results should have node_type in [tech, tech_new]"
357+ assert r .metadata .get ("source" ) == "research" , "All results should have source=research"
358+
359+ # Test 3: Filter by both node_type and language with list values for both
360+ logger .info ("=" * 20 + " MULTI-KEY LIST FILTER SEARCH TEST " + "=" * 20 )
361+ filter_dict_multi_list = {
362+ "metadata.node_type" : ["tech" , "tech_new" ],
363+ "metadata.language" : ["english" , "chinese" ],
364+ }
365+ results_multi_list = self .client .search (
366+ "What is artificial intelligence?" ,
367+ workspace_id = workspace_id ,
368+ top_k = 5 ,
369+ filter_dict = filter_dict_multi_list ,
370+ )
371+ logger .info (
372+ f"Multi-key list filtered search returned { len (results_multi_list )} results "
373+ f"(node_type in [tech, tech_new] AND language in [english, chinese])"
374+ )
375+ for i , r in enumerate (results_multi_list , 1 ):
376+ logger .info (f"Multi-key List Filtered Result { i } : { r .model_dump (exclude = {'vector' })} " )
377+ assert r .metadata .get ("node_type" ) in [
378+ "tech" ,
379+ "tech_new" ,
380+ ], "All results should have node_type in [tech, tech_new]"
381+ assert r .metadata .get ("language" ) in [
382+ "english" ,
383+ "chinese" ,
384+ ], "All results should have language in [english, chinese]"
385+
304386 def test_search_with_id (self , workspace_id : str ):
305387 """Test vector search by unique_id with empty query."""
306388 logger .info ("=" * 20 + " SEARCH BY ID TEST " + "=" * 20 )
@@ -534,6 +616,7 @@ async def test_search(self, workspace_id: str):
534616 async def test_search_with_filter (self , workspace_id : str ):
535617 """Test async vector search with filter."""
536618 logger .info ("ASYNC - " + "=" * 20 + " FILTER SEARCH TEST " + "=" * 20 )
619+ # Test 1: Filter by node_type only
537620 filter_dict = {"metadata.node_type" : ["tech" , "tech_new" ]}
538621 results = await self .client .async_search (
539622 "What is artificial intelligence?" ,
@@ -549,6 +632,57 @@ async def test_search_with_filter(self, workspace_id: str):
549632 "tech_new" ,
550633 ], "All results should have node_type in [tech, tech_new]"
551634
635+ # Test 2: Filter by both node_type and source (multiple metadata keys)
636+ logger .info ("ASYNC - " + "=" * 20 + " MULTI-KEY FILTER SEARCH TEST " + "=" * 20 )
637+ filter_dict_multi = {
638+ "metadata.node_type" : ["tech" , "tech_new" ],
639+ "metadata.source" : "research" ,
640+ }
641+ results_multi = await self .client .async_search (
642+ "What is artificial intelligence?" ,
643+ workspace_id = workspace_id ,
644+ top_k = 5 ,
645+ filter_dict = filter_dict_multi ,
646+ )
647+ logger .info (
648+ f"Multi-key filtered search returned { len (results_multi )} results "
649+ f"(node_type in [tech, tech_new] AND source=research)"
650+ )
651+ for i , r in enumerate (results_multi , 1 ):
652+ logger .info (f"Multi-key Filtered Result { i } : { r .model_dump (exclude = {'vector' })} " )
653+ assert r .metadata .get ("node_type" ) in [
654+ "tech" ,
655+ "tech_new" ,
656+ ], "All results should have node_type in [tech, tech_new]"
657+ assert r .metadata .get ("source" ) == "research" , "All results should have source=research"
658+
659+ # Test 3: Filter by both node_type and language with list values for both
660+ logger .info ("ASYNC - " + "=" * 20 + " MULTI-KEY LIST FILTER SEARCH TEST " + "=" * 20 )
661+ filter_dict_multi_list = {
662+ "metadata.node_type" : ["tech" , "tech_new" ],
663+ "metadata.language" : ["english" , "chinese" ],
664+ }
665+ results_multi_list = await self .client .async_search (
666+ "What is artificial intelligence?" ,
667+ workspace_id = workspace_id ,
668+ top_k = 5 ,
669+ filter_dict = filter_dict_multi_list ,
670+ )
671+ logger .info (
672+ f"Multi-key list filtered search returned { len (results_multi_list )} results "
673+ f"(node_type in [tech, tech_new] AND language in [english, chinese])"
674+ )
675+ for i , r in enumerate (results_multi_list , 1 ):
676+ logger .info (f"Multi-key List Filtered Result { i } : { r .model_dump (exclude = {'vector' })} " )
677+ assert r .metadata .get ("node_type" ) in [
678+ "tech" ,
679+ "tech_new" ,
680+ ], "All results should have node_type in [tech, tech_new]"
681+ assert r .metadata .get ("language" ) in [
682+ "english" ,
683+ "chinese" ,
684+ ], "All results should have language in [english, chinese]"
685+
552686 async def test_search_with_id (self , workspace_id : str ):
553687 """Test async vector search by unique_id with empty query."""
554688 logger .info ("ASYNC - " + "=" * 20 + " SEARCH BY ID TEST " + "=" * 20 )
@@ -823,12 +957,16 @@ def print_usage():
823957 if args [0 ] == "--all" :
824958 # Test all vector stores
825959 run_tests (store_types = valid_store_types )
960+ # Clean up test files after testing
961+ delete_test_files ()
826962 elif args [0 ] == "delete" :
827963 # Delete all test-generated files
828964 delete_test_files ()
829965 elif args [0 ] in valid_store_types :
830966 # Test specific vector store
831967 run_tests (store_types = [args [0 ]])
968+ # Clean up test files after testing
969+ delete_test_files ()
832970 else :
833971 print (f"Unknown argument: { args [0 ]} " )
834972 print_usage ()
0 commit comments