Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/uipath/eval/_helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,33 @@
from ..models import ErrorEvaluationResult, EvaluationResult


def is_empty_value(value: Any) -> bool:
"""Check if a value is empty or contains only empty values.

Handles multiple cases:
- None or empty string
- String with only whitespace
- Dict where all values are empty strings or whitespace
- Empty list or dict
"""
if value is None:
return True

if isinstance(value, str):
return not value.strip()

if isinstance(value, dict):
if not value: # Empty dict
return True
# Check if all values are empty strings
return all(isinstance(v, str) and not v.strip() for v in value.values())

if isinstance(value, list):
return len(value) == 0

return False


def auto_discover_entrypoint() -> str:
"""Auto-discover entrypoint from config file.

Expand Down
25 changes: 24 additions & 1 deletion src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""

import logging
from typing import Any, Optional

from pydantic import field_validator
Expand All @@ -9,14 +10,23 @@
from ..._utils.constants import COMMUNITY_agents_SUFFIX
from ...platform.chat import UiPathLlmChatService
from ...platform.chat.llm_gateway import RequiredToolChoice
from ..models.models import AgentExecution, EvaluationResult, LLMResponse
from .._helpers.helpers import is_empty_value
from ..models.models import (
AgentExecution,
EvaluationResult,
LLMResponse,
UiPathEvaluationError,
UiPathEvaluationErrorCategory,
)
from .legacy_base_evaluator import (
LegacyBaseEvaluator,
LegacyEvaluationCriteria,
LegacyEvaluatorConfig,
)
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response

logger = logging.getLogger(__name__)


class LegacyLlmAsAJudgeEvaluatorConfig(LegacyEvaluatorConfig):
"""Configuration for legacy LLM-as-a-judge evaluators."""
Expand Down Expand Up @@ -124,6 +134,19 @@ def _create_evaluation_prompt(
self, expected_output: Any, actual_output: Any
) -> str:
"""Create the evaluation prompt for the LLM."""
# Validate that expected output is not empty
if is_empty_value(expected_output):
logger.error(
"❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
f"Received: {repr(expected_output)}"
)
raise UiPathEvaluationError(
code="EMPTY_EXPECTED_OUTPUT",
title="Expected output cannot be empty",
detail="The evaluation criteria must contain a non-empty expected output.",
category=UiPathEvaluationErrorCategory.USER,
)

formatted_prompt = self.prompt.replace(
self.actual_output_placeholder,
str(actual_output),
Expand Down
19 changes: 19 additions & 0 deletions src/uipath/eval/evaluators/legacy_trajectory_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Trajectory evaluator for analyzing execution paths and decision sequences."""

import logging
from typing import Any, Optional

from opentelemetry.sdk.trace import ReadableSpan
Expand All @@ -10,11 +11,14 @@
from ..._utils.constants import COMMUNITY_agents_SUFFIX
from ...platform.chat import UiPathLlmChatService
from ...platform.chat.llm_gateway import RequiredToolChoice
from .._helpers.helpers import is_empty_value
from ..models.models import (
AgentExecution,
LLMResponse,
NumericEvaluationResult,
TrajectoryEvaluationTrace,
UiPathEvaluationError,
UiPathEvaluationErrorCategory,
)
from .legacy_base_evaluator import (
LegacyBaseEvaluator,
Expand All @@ -23,6 +27,8 @@
)
from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response

logger = logging.getLogger(__name__)


class LegacyTrajectoryEvaluatorConfig(LegacyEvaluatorConfig):
"""Configuration for legacy trajectory evaluators."""
Expand Down Expand Up @@ -103,6 +109,19 @@ def _create_evaluation_prompt(
agent_run_history: Any,
) -> str:
"""Create the evaluation prompt for the LLM."""
# Validate that expected agent behavior is not empty
if is_empty_value(expected_agent_behavior):
logger.error(
"❌ EMPTY_EXPECTED_AGENT_BEHAVIOR: Expected agent behavior is empty or contains only empty values. "
f"Received: {repr(expected_agent_behavior)}"
)
raise UiPathEvaluationError(
code="EMPTY_EXPECTED_AGENT_BEHAVIOR",
title="Expected agent behavior cannot be empty",
detail="The evaluation criteria must contain a non-empty expected agent behavior.",
category=UiPathEvaluationErrorCategory.USER,
)

formatted_prompt = self.prompt.replace(
self.expected_agent_behavior_placeholder,
str(expected_agent_behavior),
Expand Down
41 changes: 0 additions & 41 deletions src/uipath/eval/evaluators/llm_as_judge_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,34 +136,6 @@ def _get_expected_output(self, evaluation_criteria: T) -> Any:
"""Get the expected output from the evaluation criteria. Must be implemented by concrete evaluator classes."""
pass

def _is_empty_expected_output(self, expected_output: Any) -> bool:
"""Check if the expected output is empty or contains only empty values.

Handles multiple cases:
- None or empty string
- String with only whitespace
- Dict where all values are empty strings or whitespace
- Empty list or dict
"""
if expected_output is None:
return True

if isinstance(expected_output, str):
return not expected_output.strip()

if isinstance(expected_output, dict):
if not expected_output: # Empty dict
return True
# Check if all values are empty strings
return all(
isinstance(v, str) and not v.strip() for v in expected_output.values()
)

if isinstance(expected_output, list):
return len(expected_output) == 0

return False

async def evaluate(
self,
agent_execution: AgentExecution,
Expand Down Expand Up @@ -193,19 +165,6 @@ def _create_evaluation_prompt(
"""Create the evaluation prompt for the LLM."""
expected_output = self._get_expected_output(evaluation_criteria)

# Validate that expected output is not empty
if self._is_empty_expected_output(expected_output):
logger.error(
"❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
f"Received: {repr(expected_output)}"
)
raise UiPathEvaluationError(
code="EMPTY_EXPECTED_OUTPUT",
title="Expected output cannot be empty",
detail="The evaluation criteria must contain a non-empty expected output or expected agent behavior.",
category=UiPathEvaluationErrorCategory.USER,
)

formatted_prompt = self.evaluator_config.prompt.replace(
self.actual_output_placeholder,
str(self._get_actual_output(agent_execution)),
Expand Down