Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath"
version = "2.6.26"
version = "2.6.27"
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
Expand Down
545 changes: 168 additions & 377 deletions src/uipath/_cli/_evals/_runtime.py

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions src/uipath/_cli/_evals/_span_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# Type hint for runtime protocol (avoids circular imports)
try:
from uipath.runtime import UiPathRuntimeProtocol
from uipath.runtime import UiPathRuntimeProtocol, UiPathRuntimeSchema
except ImportError:
UiPathRuntimeProtocol = Any # type: ignore

Expand Down Expand Up @@ -192,8 +192,7 @@ async def configure_eval_set_run_span(
span: Span,
evaluator_averages: Dict[str, float],
execution_id: str,
runtime: Any,
get_schema_func: Any,
schema: UiPathRuntimeSchema,
success: bool = True,
) -> None:
"""Configure Evaluation Set Run span with output and metadata.
Expand All @@ -216,7 +215,6 @@ async def configure_eval_set_run_span(

# Get runtime schemas
try:
schema = await get_schema_func(runtime)
input_schema = schema.input
output_schema = schema.output
except Exception:
Expand Down
60 changes: 60 additions & 0 deletions src/uipath/_cli/_utils/_eval_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import click
from pydantic import ValidationError

from uipath._cli._evals._evaluator_factory import EvaluatorFactory
from uipath._cli._evals._models._evaluation_set import (
EvaluationItem,
EvaluationSet,
Expand All @@ -13,6 +14,7 @@
)
from uipath._cli._evals.mocks.types import InputMockingStrategy, LLMMockingStrategy
from uipath._cli._utils._console import ConsoleLogger
from uipath.eval.evaluators import BaseEvaluator

console = ConsoleLogger()

Expand Down Expand Up @@ -175,3 +177,61 @@ def migrate_evaluation_item(
if eval_ids:
eval_set.extract_selected_evals(eval_ids)
return eval_set, resolved_path

@staticmethod
async def load_evaluators(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moved from eval runtime as it needs to be adjacent to load eval set.

eval_set_path: str,
evaluation_set: EvaluationSet,
agent_model: str | None = None,
) -> list[BaseEvaluator[Any, Any, Any]]:
"""Load evaluators referenced by the evaluation set."""
evaluators = []
if evaluation_set is None:
raise ValueError("eval_set cannot be None")
evaluators_dir = Path(eval_set_path).parent.parent / "evaluators"

# If evaluatorConfigs is specified, use that (new field with weights)
# Otherwise, fall back to evaluatorRefs (old field without weights)
if (
hasattr(evaluation_set, "evaluator_configs")
and evaluation_set.evaluator_configs
):
# Use new evaluatorConfigs field - supports weights
evaluator_ref_ids = {ref.ref for ref in evaluation_set.evaluator_configs}
else:
# Fall back to old evaluatorRefs field - plain strings
evaluator_ref_ids = set(evaluation_set.evaluator_refs)

found_evaluator_ids = set()

for file in evaluators_dir.glob("*.json"):
try:
with open(file, "r", encoding="utf-8") as f:
data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(
f"Invalid JSON in evaluator file '{file}': {str(e)}. "
f"Please check the file for syntax errors."
) from e

try:
evaluator_id = data.get("id")
if evaluator_id in evaluator_ref_ids:
evaluator = EvaluatorFactory.create_evaluator(
data, evaluators_dir, agent_model=agent_model
)
evaluators.append(evaluator)
found_evaluator_ids.add(evaluator_id)
except Exception as e:
raise ValueError(
f"Failed to create evaluator from file '{file}': {str(e)}. "
f"Please verify the evaluator configuration."
) from e

missing_evaluators = evaluator_ref_ids - found_evaluator_ids
if missing_evaluators:
raise ValueError(
f"Could not find the following evaluators: {missing_evaluators}"
)

return evaluators
150 changes: 145 additions & 5 deletions src/uipath/_cli/cli_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,24 @@
import asyncio
import logging
import os
import uuid
from typing import Any

import click
from uipath.core.tracing import UiPathTraceManager
from uipath.runtime import UiPathRuntimeContext, UiPathRuntimeFactoryRegistry
from uipath.runtime import (
UiPathRuntimeContext,
UiPathRuntimeFactoryRegistry,
UiPathRuntimeProtocol,
UiPathRuntimeSchema,
)

from uipath._cli._evals._console_progress_reporter import ConsoleProgressReporter
from uipath._cli._evals._evaluate import evaluate
from uipath._cli._evals._models._evaluation_set import EvaluationSet
from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
from uipath._cli._evals._runtime import (
LLMAgentRuntimeProtocol,
UiPathEvalContext,
)
from uipath._cli._evals._telemetry import EvalTelemetrySubscriber
Expand Down Expand Up @@ -62,6 +70,109 @@ def setup_reporting_prereq(no_report: bool) -> bool:
return True


def _find_agent_model_in_runtime(runtime: UiPathRuntimeProtocol) -> str | None:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved from eval runtime

"""Recursively search for get_agent_model in runtime and its delegates.

Runtimes may be wrapped (e.g., ResumableRuntime wraps TelemetryWrapper
which wraps the base runtime). This method traverses the wrapper chain
to find a runtime that implements LLMAgentRuntimeProtocol.

Args:
runtime: The runtime to check (may be a wrapper)

Returns:
The model name if found, None otherwise.
"""
# Check if this runtime implements the protocol
if isinstance(runtime, LLMAgentRuntimeProtocol):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this fallback anymore, the low-code runtime has the model in get_schema() (we can remove the LLMAgentRuntimeProtocol as well)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I remove it in a separate small PR? I want this one to just focus on the decoupling,,

return runtime.get_agent_model()

# Check for delegate property (used by UiPathResumableRuntime, TelemetryRuntimeWrapper)
delegate = getattr(runtime, "delegate", None) or getattr(runtime, "_delegate", None)
if delegate is not None:
return _find_agent_model_in_runtime(delegate)

return None


async def _get_agent_model(
runtime: UiPathRuntimeProtocol, schema: UiPathRuntimeSchema
) -> str | None:
"""Get agent model from the runtime schema metadata.

The model is read from schema.metadata["settings"]["model"] which is
populated by the low-code agents runtime from agent.json.

Returns:
The model name from agent settings, or None if not found.
"""
try:
if schema.metadata and "settings" in schema.metadata:
settings = schema.metadata["settings"]
model = settings.get("model")
if model:
logger.debug(f"Got agent model from schema.metadata: {model}")
return model

# Fallback to protocol-based approach for backwards compatibility
model = _find_agent_model_in_runtime(runtime)
if model:
logger.debug(f"Got agent model from runtime protocol: {model}")
return model
except Exception:
return None


def _resolve_model_settings_override(
model_settings_id: str, evaluation_set: EvaluationSet
) -> dict[str, Any] | None:
"""Resolve model settings override from evaluation set.

Returns:
Model settings dict to use for override, or None if using defaults.
Settings are passed to factory via settings kwarg.
"""
# Skip if no model settings ID specified or using default
if not model_settings_id or model_settings_id == "default":
return None

# Load evaluation set to get model settings
if not evaluation_set.model_settings:
logger.warning("No model settings available in evaluation set")
return None

# Find the specified model settings
target_model_settings = next(
(ms for ms in evaluation_set.model_settings if ms.id == model_settings_id),
None,
)

if not target_model_settings:
logger.warning(
f"Model settings ID '{model_settings_id}' not found in evaluation set"
)
return None

logger.info(
f"Applying model settings override: model={target_model_settings.model_name}, temperature={target_model_settings.temperature}"
)

# Return settings dict with correct keys for factory
override: dict[str, str | float] = {}
if (
target_model_settings.model_name
and target_model_settings.model_name != "same-as-agent"
):
override["model"] = target_model_settings.model_name
if (
target_model_settings.temperature is not None
and target_model_settings.temperature != "same-as-agent"
):
override["temperature"] = float(target_model_settings.temperature)

return override if override else None


@click.command()
@click.argument("entrypoint", required=False)
@click.argument("eval_set", required=False)
Expand Down Expand Up @@ -188,7 +299,6 @@ def eval(
eval_context = UiPathEvalContext()

eval_context.entrypoint = entrypoint or auto_discover_entrypoint()
eval_context.no_report = no_report
eval_context.workers = workers
eval_context.eval_set_run_id = eval_set_run_id
eval_context.enable_mocker_cache = enable_mocker_cache
Expand All @@ -197,10 +307,7 @@ def eval(
eval_set_path = eval_set or EvalHelpers.auto_discover_eval_set()
_, resolved_eval_set_path = EvalHelpers.load_eval_set(eval_set_path, eval_ids)

eval_context.eval_set = resolved_eval_set_path
eval_context.eval_ids = eval_ids
eval_context.report_coverage = report_coverage
eval_context.model_settings_id = model_settings_id
eval_context.input_overrides = input_overrides
eval_context.resume = resume

Expand Down Expand Up @@ -269,6 +376,39 @@ async def execute_eval():

project_id = UiPathConfig.project_id

eval_context.execution_id = (
eval_context.job_id
or eval_context.eval_set_run_id
or str(uuid.uuid4())
)

# Load eval set (path is already resolved in cli_eval.py)
eval_context.evaluation_set, _ = EvalHelpers.load_eval_set(
resolved_eval_set_path, eval_ids
)

# Resolve model settings override from eval set
settings_override = _resolve_model_settings_override(
model_settings_id, eval_context.evaluation_set
)

runtime = await runtime_factory.new_runtime(
entrypoint=eval_context.entrypoint or "",
runtime_id=eval_context.execution_id,
settings=settings_override,
)

eval_context.runtime_schema = await runtime.get_schema()

eval_context.evaluators = await EvalHelpers.load_evaluators(
resolved_eval_set_path,
eval_context.evaluation_set,
await _get_agent_model(runtime, eval_context.runtime_schema),
)

# Runtime is not required anymore.
await runtime.dispose()

try:
if project_id:
studio_client = StudioClient(project_id)
Expand Down
Loading