From 8343275df7debe23152727f2aabe2b3874185dee Mon Sep 17 00:00:00 2001 From: mathurk Date: Mon, 2 Feb 2026 20:06:45 -0500 Subject: [PATCH] fix: is_empty_value --- src/uipath/eval/_helpers/helpers.py | 27 ++++++++++++ .../legacy_llm_as_judge_evaluator.py | 25 ++++++++++- .../evaluators/legacy_trajectory_evaluator.py | 19 +++++++++ .../eval/evaluators/llm_as_judge_evaluator.py | 41 ------------------- 4 files changed, 70 insertions(+), 42 deletions(-) diff --git a/src/uipath/eval/_helpers/helpers.py b/src/uipath/eval/_helpers/helpers.py index 9318ca45b..f00f5f5e5 100644 --- a/src/uipath/eval/_helpers/helpers.py +++ b/src/uipath/eval/_helpers/helpers.py @@ -10,6 +10,33 @@ from ..models import ErrorEvaluationResult, EvaluationResult +def is_empty_value(value: Any) -> bool: + """Check if a value is empty or contains only empty values. + + Handles multiple cases: + - None or empty string + - String with only whitespace + - Dict where all values are empty strings or whitespace + - Empty list or dict + """ + if value is None: + return True + + if isinstance(value, str): + return not value.strip() + + if isinstance(value, dict): + if not value: # Empty dict + return True + # Check if all values are empty strings + return all(isinstance(v, str) and not v.strip() for v in value.values()) + + if isinstance(value, list): + return len(value) == 0 + + return False + + def auto_discover_entrypoint() -> str: """Auto-discover entrypoint from config file. diff --git a/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py index 8ec2a1146..4b7bd7f0b 100644 --- a/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +++ b/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py @@ -1,5 +1,6 @@ """LLM-as-a-judge evaluator for subjective quality assessment of agent outputs.""" +import logging from typing import Any, Optional from pydantic import field_validator @@ -9,7 +10,14 @@ from ..._utils.constants import COMMUNITY_agents_SUFFIX from ...platform.chat import UiPathLlmChatService from ...platform.chat.llm_gateway import RequiredToolChoice -from ..models.models import AgentExecution, EvaluationResult, LLMResponse +from .._helpers.helpers import is_empty_value +from ..models.models import ( + AgentExecution, + EvaluationResult, + LLMResponse, + UiPathEvaluationError, + UiPathEvaluationErrorCategory, +) from .legacy_base_evaluator import ( LegacyBaseEvaluator, LegacyEvaluationCriteria, @@ -17,6 +25,8 @@ ) from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response +logger = logging.getLogger(__name__) + class LegacyLlmAsAJudgeEvaluatorConfig(LegacyEvaluatorConfig): """Configuration for legacy LLM-as-a-judge evaluators.""" @@ -124,6 +134,19 @@ def _create_evaluation_prompt( self, expected_output: Any, actual_output: Any ) -> str: """Create the evaluation prompt for the LLM.""" + # Validate that expected output is not empty + if is_empty_value(expected_output): + logger.error( + "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. " + f"Received: {repr(expected_output)}" + ) + raise UiPathEvaluationError( + code="EMPTY_EXPECTED_OUTPUT", + title="Expected output cannot be empty", + detail="The evaluation criteria must contain a non-empty expected output.", + category=UiPathEvaluationErrorCategory.USER, + ) + formatted_prompt = self.prompt.replace( self.actual_output_placeholder, str(actual_output), diff --git a/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py b/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py index 27e8e73be..b81638034 100644 --- a/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py +++ b/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py @@ -1,5 +1,6 @@ """Trajectory evaluator for analyzing execution paths and decision sequences.""" +import logging from typing import Any, Optional from opentelemetry.sdk.trace import ReadableSpan @@ -10,11 +11,14 @@ from ..._utils.constants import COMMUNITY_agents_SUFFIX from ...platform.chat import UiPathLlmChatService from ...platform.chat.llm_gateway import RequiredToolChoice +from .._helpers.helpers import is_empty_value from ..models.models import ( AgentExecution, LLMResponse, NumericEvaluationResult, TrajectoryEvaluationTrace, + UiPathEvaluationError, + UiPathEvaluationErrorCategory, ) from .legacy_base_evaluator import ( LegacyBaseEvaluator, @@ -23,6 +27,8 @@ ) from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response +logger = logging.getLogger(__name__) + class LegacyTrajectoryEvaluatorConfig(LegacyEvaluatorConfig): """Configuration for legacy trajectory evaluators.""" @@ -103,6 +109,19 @@ def _create_evaluation_prompt( agent_run_history: Any, ) -> str: """Create the evaluation prompt for the LLM.""" + # Validate that expected agent behavior is not empty + if is_empty_value(expected_agent_behavior): + logger.error( + "❌ EMPTY_EXPECTED_AGENT_BEHAVIOR: Expected agent behavior is empty or contains only empty values. " + f"Received: {repr(expected_agent_behavior)}" + ) + raise UiPathEvaluationError( + code="EMPTY_EXPECTED_AGENT_BEHAVIOR", + title="Expected agent behavior cannot be empty", + detail="The evaluation criteria must contain a non-empty expected agent behavior.", + category=UiPathEvaluationErrorCategory.USER, + ) + formatted_prompt = self.prompt.replace( self.expected_agent_behavior_placeholder, str(expected_agent_behavior), diff --git a/src/uipath/eval/evaluators/llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/llm_as_judge_evaluator.py index b734f3de1..e4c9bffd8 100644 --- a/src/uipath/eval/evaluators/llm_as_judge_evaluator.py +++ b/src/uipath/eval/evaluators/llm_as_judge_evaluator.py @@ -136,34 +136,6 @@ def _get_expected_output(self, evaluation_criteria: T) -> Any: """Get the expected output from the evaluation criteria. Must be implemented by concrete evaluator classes.""" pass - def _is_empty_expected_output(self, expected_output: Any) -> bool: - """Check if the expected output is empty or contains only empty values. - - Handles multiple cases: - - None or empty string - - String with only whitespace - - Dict where all values are empty strings or whitespace - - Empty list or dict - """ - if expected_output is None: - return True - - if isinstance(expected_output, str): - return not expected_output.strip() - - if isinstance(expected_output, dict): - if not expected_output: # Empty dict - return True - # Check if all values are empty strings - return all( - isinstance(v, str) and not v.strip() for v in expected_output.values() - ) - - if isinstance(expected_output, list): - return len(expected_output) == 0 - - return False - async def evaluate( self, agent_execution: AgentExecution, @@ -193,19 +165,6 @@ def _create_evaluation_prompt( """Create the evaluation prompt for the LLM.""" expected_output = self._get_expected_output(evaluation_criteria) - # Validate that expected output is not empty - if self._is_empty_expected_output(expected_output): - logger.error( - "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. " - f"Received: {repr(expected_output)}" - ) - raise UiPathEvaluationError( - code="EMPTY_EXPECTED_OUTPUT", - title="Expected output cannot be empty", - detail="The evaluation criteria must contain a non-empty expected output or expected agent behavior.", - category=UiPathEvaluationErrorCategory.USER, - ) - formatted_prompt = self.evaluator_config.prompt.replace( self.actual_output_placeholder, str(self._get_actual_output(agent_execution)),