Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ..datafabric_query_tool import DataFabricQueryTool
from . import datafabric_prompt_builder
from .models import DataFabricExecuteSqlInput
from .ontology_fetch_tool import create_ontology_fetch_tool

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -88,18 +89,32 @@ def __init__(
max_iterations: int = 25,
resource_description: str = "",
base_system_prompt: str = "",
ontology_name: str | None = None,
folder_key: str | None = None,
) -> None:
self._max_iterations = max_iterations
self._execute_sql_tool = self._create_execute_sql_tool(
entities_service, entities
)
# Inner toolset: always execute_sql; optionally an LLM-decided
# fetch_ontology tool when an ontology name is configured.
inner_tools: list[BaseTool] = [self._execute_sql_tool]
if ontology_name:
inner_tools.append(
create_ontology_fetch_tool(
entities_service, ontology_name, folder_key
)
)
self._tools_by_name: dict[str, BaseTool] = {
tool.name: tool for tool in inner_tools
}
self._system_message = SystemMessage(
content=datafabric_prompt_builder.build(
entities, resource_description, base_system_prompt
)
)
self._inner_llm = llm.model_copy(update={"disable_streaming": True}).bind_tools(
[self._execute_sql_tool]
inner_tools
)

# Build and compile the graph
Expand Down Expand Up @@ -139,27 +154,50 @@ async def tool_node(self, state: DataFabricSubgraphState) -> dict[str, Any]:
}

async def _execute_tool_call(self, tool_call: ToolCall) -> tuple[ToolMessage, bool]:
"""Execute a single tool call and report whether it succeeded."""
"""Execute a single tool call and report whether it is a terminal success.

Dispatches by tool name so the sub-graph can host more than one tool
(e.g. ``execute_sql`` and ``fetch_ontology``). Only a successful
``execute_sql`` that returned rows is terminal; every other tool
(including ontology fetch) reports ``False`` so the router loops back to
the inner LLM, letting it use the result to write or refine SQL.
Comment on lines +159 to +163
"""
name = tool_call.get("name", "")
args = tool_call.get("args", {})
tool = self._tools_by_name.get(name)
if tool is None:
return (
ToolMessage(
content=f"Unknown tool: {name}",
tool_call_id=tool_call["id"],
name=name,
status="error",
),
False,
)
try:
result = await self._execute_sql_tool.ainvoke(args)
result = await tool.ainvoke(args)
except ValueError as e:
result = {
"records": [],
"total_count": 0,
"error": str(e),
"sql_query": args.get("sql_query", ""),
}
if name == self._execute_sql_tool.name:
result = {
"records": [],
"total_count": 0,
"error": str(e),
"sql_query": args.get("sql_query", ""),
}
else:
result = f"Tool '{name}' failed: {e}"
succeeded = (
isinstance(result, dict)
name == self._execute_sql_tool.name
and isinstance(result, dict)
and not result.get("error")
and result.get("total_count", 0) > 0
)
return (
ToolMessage(
content=str(result),
tool_call_id=tool_call["id"],
name="execute_sql",
name=name,
),
succeeded,
)
Expand Down Expand Up @@ -226,6 +264,8 @@ def create(
max_iterations: int = 25,
resource_description: str = "",
base_system_prompt: str = "",
ontology_name: str | None = None,
folder_key: str | None = None,
) -> CompiledStateGraph[Any]:
"""Create and return a compiled Data Fabric sub-graph."""
graph = DataFabricGraph(
Expand All @@ -235,5 +275,7 @@ def create(
max_iterations,
resource_description,
base_system_prompt,
ontology_name,
folder_key,
)
return graph.compiled_graph
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import asyncio
import logging
import os
from typing import Any

from langchain_core.language_models import BaseChatModel
Expand All @@ -28,6 +29,8 @@
logger = logging.getLogger(__name__)

BASE_SYSTEM_PROMPT = "base_system_prompt"
ONTOLOGY_NAME = "ontology_name"
FOLDER_KEY = "folder_key"


class DataFabricTextQueryHandler:
Expand All @@ -44,11 +47,15 @@ def __init__(
llm: BaseChatModel,
resource_description: str = "",
base_system_prompt: str = "",
ontology_name: str | None = None,
folder_key: str | None = None,
) -> None:
self._entity_set = entity_set
self._llm = llm
self._resource_description = resource_description
self._base_system_prompt = base_system_prompt
self._ontology_name = ontology_name
self._folder_key = folder_key
self._compiled: CompiledStateGraph[Any] | None = None
self._init_lock = asyncio.Lock()

Expand Down Expand Up @@ -82,6 +89,8 @@ async def _ensure_datafabric_graph(self) -> CompiledStateGraph[Any]:
entities_service=resolution.entities_service,
resource_description=self._resource_description,
base_system_prompt=self._base_system_prompt,
ontology_name=self._ontology_name,
folder_key=self._folder_key,
)
return self._compiled

Expand Down Expand Up @@ -159,11 +168,18 @@ def create_datafabric_query_tool(
DataFabricEntityItem.model_validate(item.model_dump(by_alias=True))
for item in (resource.entity_set or [])
]
# Ontology name is pinned from configuration (not chosen by the LLM).
# Falls back to env vars for local/demo runs that have no Agent Builder UI.
# When unset, no fetch_ontology tool is added (fully backward compatible).
ontology_name = config.get(ONTOLOGY_NAME) or os.getenv("UIPATH_ONTOLOGY_NAME")
folder_key = config.get(FOLDER_KEY) or os.getenv("UIPATH_FOLDER_KEY")
Comment on lines +171 to +175
handler = DataFabricTextQueryHandler(
entity_set=entity_set,
llm=llm,
resource_description=resource.description or "",
base_system_prompt=config.get(BASE_SYSTEM_PROMPT, ""),
ontology_name=ontology_name,
folder_key=folder_key,
)
entity_lines = []
for e in entity_set:
Expand Down
9 changes: 9 additions & 0 deletions src/uipath_langchain/agent/tools/datafabric_tool/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,12 @@ class DataFabricExecuteSqlInput(BaseModel):
"Use exact table and column names from the entity schemas."
),
)


class OntologyFetchInput(BaseModel):
"""Input schema for the ontology fetch tool — intentionally empty.

The ontology name is pinned from configuration, never supplied by the
LLM, so the model cannot redirect the fetch to an arbitrary resource. The
tool simply triggers a fetch of the configured ontology.
"""
118 changes: 118 additions & 0 deletions src/uipath_langchain/agent/tools/datafabric_tool/ontology_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Client for fetching ontology files from UiPath Data Fabric (QueryEngine).

The QueryEngine ontology REST API is hosted under the same ``datafabric_``
service as Data Fabric entities, so we reuse the SDK's authenticated
``EntitiesService`` — its ``request_async`` already injects auth, tenant/account
scoping, and retries — instead of building a second auth path. The only
caller-influenced value is ``ontology_name``, which is validated against the
QueryEngine name contract before it becomes part of the request URL.

The ``owl`` file's content may be serialized as Turtle (.ttl) or as OWL
Functional Notation (.ofn) — both are valid OWL 2 QL serializations and both
are plain text. To stay agnostic to the stored serialization we request the
JSON wrapper (``Accept: application/json``), which always returns ``content``
plus its ``mediaType`` regardless of notation. Requesting a specific text type
(e.g. ``text/turtle``) would 406 when the stored file is the other notation.

Naming follows the REST API: the resource is identified by ``ontologyName``
(``OntologyController`` route ``/{ontologyName}/files/{fileType}``).
"""

import logging
import re
from typing import Any

logger = logging.getLogger(__name__)

# QueryEngine ontology name contract (OntologyCreateRequestValidator):
# lowercase, must start with a letter, max 64 chars.
_ONTOLOGY_NAME_RE = re.compile(r"^[a-z][a-z0-9-]{0,63}$")

# Defensive cap so a malformed or oversized file can never blow up the prompt
# or token budget. Real OWL 2 QL files are a few KB; QueryEngine caps at 10 MB.
_MAX_OWL_BYTES = 1_000_000
Comment on lines +31 to +33

_FOLDER_KEY_HEADER = "X-UiPath-FolderKey"


def _validate_ontology_name(ontology_name: str) -> str:
"""Validate the ontology name against the QueryEngine name contract.

The name becomes a path segment in the request URL, so only the documented
charset is permitted. This blocks path-segment injection and traversal via
crafted name values.

Args:
ontology_name: The ontology name to validate.

Returns:
The validated name (unchanged).

Raises:
ValueError: If the name does not match ``^[a-z][a-z0-9-]{0,63}$``.
"""
if not isinstance(ontology_name, str) or not _ONTOLOGY_NAME_RE.match(
ontology_name
):
raise ValueError(
f"Invalid ontology name {ontology_name!r}. "
"Must match ^[a-z][a-z0-9-]{0,63}$."
)
return ontology_name


async def fetch_ontology_owl(
entities_service: Any,
ontology_name: str,
folder_key: str | None = None,
) -> tuple[str, str]:
"""Fetch the OWL file for an ontology from Data Fabric.

Args:
entities_service: An authenticated SDK ``EntitiesService``. Reused for
its ``request_async`` (auth headers, base-URL scoping, retries).
ontology_name: Ontology name. Validated against the QE name contract.
folder_key: Optional UiPath folder key for folder-scoped resolution.

Returns:
A ``(content, media_type)`` tuple. ``content`` is the OWL text in
whatever serialization is stored — Turtle or OWL Functional Notation;
``media_type`` is the stored media type (e.g. ``text/turtle``), usable
to label the notation.

Raises:
ValueError: If the name is invalid or the content exceeds the size cap.
Transport/HTTP errors propagate from the SDK as raised exceptions
(the caller decides how to degrade).
"""
safe_name = _validate_ontology_name(ontology_name)
# Same datafabric_ service the entities calls target; matches the
# QueryEngine ontology route GET /ontologies/{ontologyName}/files/{fileType}.
endpoint = f"datafabric_/api/ontologies/{safe_name}/files/owl"

# JSON wrapper: notation-agnostic (works for Turtle or OFN) and returns the
# stored mediaType. A text/* Accept would 406 on a serialization mismatch.
headers = {"Accept": "application/json"}
if folder_key:
headers[_FOLDER_KEY_HEADER] = folder_key

response = await entities_service.request_async(
"GET", endpoint, scoped="tenant", headers=headers
)

data = response.json()
content = data.get("content") or ""
media_type = data.get("mediaType") or ""

if len(content.encode("utf-8")) > _MAX_OWL_BYTES:
raise ValueError(
f"Ontology OWL for {safe_name!r} exceeds the "
f"{_MAX_OWL_BYTES} byte limit."
)
logger.debug(
"Fetched ontology OWL for %r (%d chars, mediaType=%s)",
safe_name,
len(content),
media_type,
)
return content, media_type
Loading
Loading