VectorInstitute · fcogidi · Jan 13, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -56,4 +56,4 @@ jobs:
         with:
           virtual-environment: .venv/
           ignore-vulns: |
-            GHSA-2qfp-q593-8484
+            GHSA-xm59-rqc7-hhvf
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,19 +9,18 @@ requires-python = ">=3.12"
 dependencies = [
     "aiohttp>=3.12.14",
     "beautifulsoup4>=4.13.4",
-    "datasets>=3.6.0",
-    "e2b-code-interpreter>=1.5.2",
-    "gradio>=5.37.0",
-    "langfuse>=3.1.3",
+    "datasets>=4.4.0",
+    "e2b-code-interpreter>=2.3.0",
+    "gradio>=6.1.0",
+    "langfuse>=3.9.0",
     "lxml>=6.0.0",
     "nest-asyncio>=1.6.0",
     "numpy<2.3.0",
-    "openai>=1.93.1",
-    "openai-agents>=0.1.0",
+    "openai>=2.6.0",
+    "openai-agents>=0.4.0",
     "plotly>=6.2.0",
     "pydantic>=2.11.7",
     "pydantic-ai-slim[logfire]>=0.3.7",
-    "pytest-asyncio>=0.25.2",
     "scikit-learn>=1.7.0",
     "weaviate-client>=4.15.4",
 ]
@@ -46,8 +45,8 @@ dev = [
     "pip-audit>=2.7.3",
     "pre-commit>=4.1.0",
     "pytest>=8.3.4",
-    "pytest-asyncio>=0.25.2",
-    "pytest-cov>=6.0.0",
+    "pytest-asyncio>=1.2.0",
+    "pytest-cov>=7.0.0",
     "pytest-mock>=3.14.0",
     "ruff>=0.12.2",
     "transformers>=4.54.1",

diff --git a/src/1_basics/0_search_demo/app.py b/src/1_basics/0_search_demo/app.py
@@ -1,20 +1,16 @@
 """Knowledge Base Search Demo using Gradio."""
 
+import asyncio
+
 import gradio as gr
 from dotenv import load_dotenv
-from openai import AsyncOpenAI
 
-from src.utils import (
-    AsyncWeaviateKnowledgeBase,
-    Configs,
-    get_weaviate_async_client,
-    pretty_print,
-)
+from src.utils import AsyncClientManager, pretty_print
 
 
 DESCRIPTION = """\
 In the example below, your goal is to find out where \
-Apple's SVP Software Engineering got his degree in engineering- \
+Apple's SVP Software Engineering got his degree in engineering - \
 without knowing the full name of that person ahead of time. \
 \
 Did you see why traditional RAG systems like this one \
@@ -24,51 +20,46 @@
 \
 The output format you see is also what the Agent LLM \
 would receive when interacting with the knowledge base search \
-tool in subsequent sections of this bootcamp- both when using \
+tool in subsequent sections of this bootcamp - both when using \
 the Wikipedia database we provided and when using your own \
 public dataset.
 """
 
 
-load_dotenv(verbose=True)
-
-configs = Configs()
-async_weaviate_client = get_weaviate_async_client(
-    http_host=configs.weaviate_http_host,
-    http_port=configs.weaviate_http_port,
-    http_secure=configs.weaviate_http_secure,
-    grpc_host=configs.weaviate_grpc_host,
-    grpc_port=configs.weaviate_grpc_port,
-    grpc_secure=configs.weaviate_grpc_secure,
-    api_key=configs.weaviate_api_key,
-)
-async_openai_client = AsyncOpenAI()
-async_knowledgebase = AsyncWeaviateKnowledgeBase(
-    async_weaviate_client,
-    collection_name=configs.weaviate_collection_name,
-)
-
-
 async def search_and_pretty_format(keyword: str) -> str:
     """Search knowledgebase and pretty-format output."""
-    output = await async_knowledgebase.search_knowledgebase(keyword)
+    output = await client_manager.knowledgebase.search_knowledgebase(keyword)
     return pretty_print(output)
 
 
-json_codeblock = gr.Code(language="json", wrap_lines=True)
+if __name__ == "__main__":
+    load_dotenv(verbose=True)
+
+    # Initialize client manager
+    # This class initializes the OpenAI and Weaviate async clients, as well as the
+    # Weaviate knowledge base tool. The initialization is done once when the clients
+    # are first accessed, and the clients are reused for subsequent calls.
+    client_manager = AsyncClientManager()
 
-demo = gr.Interface(
-    fn=search_and_pretty_format,
-    inputs=["text"],
-    outputs=[json_codeblock],
-    title="1.0: Knowledge Base Search Demo",
-    description=DESCRIPTION,
-    examples=[
-        "Apple SVP Software Engineering",
-        "Craig Federighi",
-        "Apple SVP Software Engineering academic background",
-        "Craig Federighi academic background",
-    ],
-)
+    # Gradio UI
+    # The UI consists of a text input for the search keyword
+    # and a code block to display the JSON-formatted search results.
+    demo = gr.Interface(
+        fn=search_and_pretty_format,
+        inputs=["text"],
+        outputs=[gr.Code(language="json", wrap_lines=True)],
+        title="1.0: Knowledge Base Search Demo",
+        description=DESCRIPTION,
+        examples=[
+            "Apple SVP Software Engineering academic background",
+            "Apple SVP Software Engineering",
+            "Craig Federighi",
+            "Craig Federighi academic background",
+        ],
+    )
 
-demo.launch(share=True)
+    try:
+        demo.launch(share=True)
+    finally:
+        # Ensure clients are closed on exit
+        asyncio.run(client_manager.close())
diff --git a/src/1_basics/1_react_rag/app.py b/src/1_basics/1_react_rag/app.py
@@ -4,26 +4,22 @@
 """
 
 import asyncio
-import contextlib
 import json
-import signal
-import sys
+from typing import TYPE_CHECKING, Any, AsyncGenerator
 
 import gradio as gr
 from dotenv import load_dotenv
 from gradio.components.chatbot import ChatMessage
-from openai import AsyncOpenAI
-from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionToolParam
 
 from src.prompts import REACT_INSTRUCTIONS
-from src.utils import (
-    AsyncWeaviateKnowledgeBase,
-    Configs,
-    get_weaviate_async_client,
-)
+from src.utils.client_manager import AsyncClientManager
 
 
-load_dotenv(verbose=True)
+if TYPE_CHECKING:
+    from openai.types.chat import (
+        ChatCompletionSystemMessageParam,
+        ChatCompletionToolParam,
+    )
 
 MAX_TURNS = 5
 
@@ -55,29 +51,24 @@
 }
 
 
-async def _cleanup_clients() -> None:
-    """Close async clients."""
-    await async_weaviate_client.close()
-    await async_openai_client.close()
-
-
-def _handle_sigint(signum: int, frame: object) -> None:
-    """Handle SIGINT signal to gracefully shutdown."""
-    with contextlib.suppress(Exception):
-        asyncio.get_event_loop().run_until_complete(_cleanup_clients())
-    sys.exit(0)
-
-
-async def react_rag(query: str, history: list[ChatMessage]):
+async def react_rag(
+    query: str, history: list[ChatMessage]
+) -> AsyncGenerator[list[ChatMessage], Any]:
     """Handle ReAct RAG chat for knowledgebase-augmented agents."""
+    # Flag to track if the agent has provided a final response
+    # If the agent exhausts all reasoning steps without a final answer,
+    # we make one last call to get a final response based on the information available.
+    agent_responded = False
+
+    # Construct chat completion messages to pass to LLM
     oai_messages = [system_message, {"role": "user", "content": query}]
 
     for _ in range(MAX_TURNS):
-        completion = await async_openai_client.chat.completions.create(
-            model=configs.default_planner_model,
+        # Call OpenAI chat completions with tools enabled
+        completion = await client_manager.openai_client.chat.completions.create(
+            model=client_manager.configs.default_worker_model,
             messages=oai_messages,
-            tools=tools,
-            reasoning_effort=None,
+            tools=tools,  # This makes the tool defined above available to the LLM
         )
 
         # Print assistant output
@@ -87,20 +78,25 @@ async def react_rag(query: str, history: list[ChatMessage]):
         # Execute tool calls and send results back to LLM if requested.
         # Otherwise, stop, as the conversation would have been finished.
         tool_calls = message.tool_calls
+
+        if tool_calls is None:  # No tool calls, assume final response
+            history.append(ChatMessage(content=message.content or "", role="assistant"))
+            agent_responded = True
+            yield history
+            break
+
         history.append(
             ChatMessage(
-                content=message.content or "",
                 role="assistant",
+                content=message.content or "",
+                metadata={"title": "🧠 Thought"},
             )
         )
-
-        if tool_calls is None:
-            yield history
-            break
+        yield history
 
         for tool_call in tool_calls:
             arguments = json.loads(tool_call.function.arguments)
-            results = await async_knowledgebase.search_knowledgebase(
+            results = await client_manager.knowledgebase.search_knowledgebase(
                 arguments["keyword"]
             )
             results_serialized = json.dumps(
@@ -117,46 +113,65 @@ async def react_rag(query: str, history: list[ChatMessage]):
             history.append(
                 ChatMessage(
                     role="assistant",
-                    content=results_serialized,
+                    content=f"```\n{results_serialized}\n```",
                     metadata={
-                        "title": f"Used tool {tool_call.function.name}",
+                        "title": f"🛠️ Used tool `{tool_call.function.name}`",
                         "log": f"Arguments: {arguments}",
+                        "status": "done",  # This makes it collapsed by default
                     },
                 )
             )
             yield history
 
+    if not agent_responded:
+        # Make one final LLM call to get a response given the history
+        oai_messages.append(
+            {
+                "role": "system",
+                "content": (
+                    "You have reached the maximum number of allowed reasoning "
+                    "steps. Provide a final answer based on the information available."
+                ),
+            }
+        )
+        completion = await client_manager.openai_client.chat.completions.create(
+            model=client_manager.configs.default_planner_model,
+            messages=oai_messages,
+        )
+        message = completion.choices[0].message
+        history.append(ChatMessage(content=message.content or "", role="assistant"))
+        oai_messages.pop()  # Remove the last system message for next iteration
+        oai_messages.append(message)  # Append the final message to history
+        yield history
 
-demo = gr.ChatInterface(
-    react_rag,
-    title="1.1 ReAct Agent for Retrieval-Augmented Generation",
-    type="messages",
-    examples=[
-        "At which university did the SVP Software Engineering"
-        " at Apple (as of June 2025) earn their engineering degree?",
-    ],
-)
 
 if __name__ == "__main__":
-    configs = Configs()
-    async_weaviate_client = get_weaviate_async_client(
-        http_host=configs.weaviate_http_host,
-        http_port=configs.weaviate_http_port,
-        http_secure=configs.weaviate_http_secure,
-        grpc_host=configs.weaviate_grpc_host,
-        grpc_port=configs.weaviate_grpc_port,
-        grpc_secure=configs.weaviate_grpc_secure,
-        api_key=configs.weaviate_api_key,
+    load_dotenv(verbose=True)
+
+    # Initialize client manager
+    # This class initializes the OpenAI and Weaviate async clients, as well as the
+    # Weaviate knowledge base tool. The initialization is done once when the clients
+    # are first accessed, and the clients are reused for subsequent calls.
+    client_manager = AsyncClientManager()
+
+    demo = gr.ChatInterface(
+        react_rag,
+        chatbot=gr.Chatbot(height=600),
+        textbox=gr.Textbox(lines=1, placeholder="Enter your prompt"),
+        examples=[
+            [
+                "At which university did the SVP Software Engineering"
+                " at Apple (as of June 2025) earn their engineering degree?"
+            ],
+            [
+                "Où le vice-président senior actuel d'Apple en charge de l'ingénierie "
+                "logicielle a-t-il obtenu son diplôme d'ingénieur?"
+            ],
+        ],
+        title="1.1: ReAct Agent for Retrieval-Augmented Generation",
     )
-    async_openai_client = AsyncOpenAI()
-    async_knowledgebase = AsyncWeaviateKnowledgeBase(
-        async_weaviate_client,
-        collection_name=configs.weaviate_collection_name,
-    )
-
-    signal.signal(signal.SIGINT, _handle_sigint)
 
     try:
         demo.launch(share=True)
     finally:
-        asyncio.run(_cleanup_clients())
+        asyncio.run(client_manager.close())