UiPath · mathurk · Feb 3, 2026
diff --git a/src/uipath/eval/_helpers/helpers.py b/src/uipath/eval/_helpers/helpers.py
@@ -10,6 +10,33 @@
 from ..models import ErrorEvaluationResult, EvaluationResult
 
 
+def is_empty_value(value: Any) -> bool:
+    """Check if a value is empty or contains only empty values.
+
+    Handles multiple cases:
+    - None or empty string
+    - String with only whitespace
+    - Dict where all values are empty strings or whitespace
+    - Empty list or dict
+    """
+    if value is None:
+        return True
+
+    if isinstance(value, str):
+        return not value.strip()
+
+    if isinstance(value, dict):
+        if not value:  # Empty dict
+            return True
+        # Check if all values are empty strings
+        return all(isinstance(v, str) and not v.strip() for v in value.values())
+
+    if isinstance(value, list):
+        return len(value) == 0
+
+    return False
+
+
 def auto_discover_entrypoint() -> str:
     """Auto-discover entrypoint from config file.
 

diff --git a/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py
@@ -1,5 +1,6 @@
 """LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""
 
+import logging
 from typing import Any, Optional
 
 from pydantic import field_validator
@@ -9,14 +10,23 @@
 from ..._utils.constants import COMMUNITY_agents_SUFFIX
 from ...platform.chat import UiPathLlmChatService
 from ...platform.chat.llm_gateway import RequiredToolChoice
-from ..models.models import AgentExecution, EvaluationResult, LLMResponse
+from .._helpers.helpers import is_empty_value
+from ..models.models import (
+    AgentExecution,
+    EvaluationResult,
+    LLMResponse,
+    UiPathEvaluationError,
+    UiPathEvaluationErrorCategory,
+)
 from .legacy_base_evaluator import (
     LegacyBaseEvaluator,
     LegacyEvaluationCriteria,
     LegacyEvaluatorConfig,
 )
 from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
 
+logger = logging.getLogger(__name__)
+
 
 class LegacyLlmAsAJudgeEvaluatorConfig(LegacyEvaluatorConfig):
     """Configuration for legacy LLM-as-a-judge evaluators."""
@@ -124,6 +134,19 @@ def _create_evaluation_prompt(
         self, expected_output: Any, actual_output: Any
     ) -> str:
         """Create the evaluation prompt for the LLM."""
+        # Validate that expected output is not empty
+        if is_empty_value(expected_output):
+            logger.error(
+                "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
+                f"Received: {repr(expected_output)}"
+            )
+            raise UiPathEvaluationError(
+                code="EMPTY_EXPECTED_OUTPUT",
+                title="Expected output cannot be empty",
+                detail="The evaluation criteria must contain a non-empty expected output.",
+                category=UiPathEvaluationErrorCategory.USER,
+            )
+
         formatted_prompt = self.prompt.replace(
             self.actual_output_placeholder,
             str(actual_output),

diff --git a/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py b/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py
@@ -1,5 +1,6 @@
 """Trajectory evaluator for analyzing execution paths and decision sequences."""
 
+import logging
 from typing import Any, Optional
 
 from opentelemetry.sdk.trace import ReadableSpan
@@ -10,11 +11,14 @@
 from ..._utils.constants import COMMUNITY_agents_SUFFIX
 from ...platform.chat import UiPathLlmChatService
 from ...platform.chat.llm_gateway import RequiredToolChoice
+from .._helpers.helpers import is_empty_value
 from ..models.models import (
     AgentExecution,
     LLMResponse,
     NumericEvaluationResult,
     TrajectoryEvaluationTrace,
+    UiPathEvaluationError,
+    UiPathEvaluationErrorCategory,
 )
 from .legacy_base_evaluator import (
     LegacyBaseEvaluator,
@@ -23,6 +27,8 @@
 )
 from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
 
+logger = logging.getLogger(__name__)
+
 
 class LegacyTrajectoryEvaluatorConfig(LegacyEvaluatorConfig):
     """Configuration for legacy trajectory evaluators."""
@@ -103,6 +109,19 @@ def _create_evaluation_prompt(
         agent_run_history: Any,
     ) -> str:
         """Create the evaluation prompt for the LLM."""
+        # Validate that expected agent behavior is not empty
+        if is_empty_value(expected_agent_behavior):
+            logger.error(
+                "❌ EMPTY_EXPECTED_AGENT_BEHAVIOR: Expected agent behavior is empty or contains only empty values. "
+                f"Received: {repr(expected_agent_behavior)}"
+            )
+            raise UiPathEvaluationError(
+                code="EMPTY_EXPECTED_AGENT_BEHAVIOR",
+                title="Expected agent behavior cannot be empty",
+                detail="The evaluation criteria must contain a non-empty expected agent behavior.",
+                category=UiPathEvaluationErrorCategory.USER,
+            )
+
         formatted_prompt = self.prompt.replace(
             self.expected_agent_behavior_placeholder,
             str(expected_agent_behavior),

diff --git a/src/uipath/eval/evaluators/llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/llm_as_judge_evaluator.py
@@ -136,34 +136,6 @@ def _get_expected_output(self, evaluation_criteria: T) -> Any:
         """Get the expected output from the evaluation criteria. Must be implemented by concrete evaluator classes."""
         pass
 
-    def _is_empty_expected_output(self, expected_output: Any) -> bool:
-        """Check if the expected output is empty or contains only empty values.
-
-        Handles multiple cases:
-        - None or empty string
-        - String with only whitespace
-        - Dict where all values are empty strings or whitespace
-        - Empty list or dict
-        """
-        if expected_output is None:
-            return True
-
-        if isinstance(expected_output, str):
-            return not expected_output.strip()
-
-        if isinstance(expected_output, dict):
-            if not expected_output:  # Empty dict
-                return True
-            # Check if all values are empty strings
-            return all(
-                isinstance(v, str) and not v.strip() for v in expected_output.values()
-            )
-
-        if isinstance(expected_output, list):
-            return len(expected_output) == 0
-
-        return False
-
     async def evaluate(
         self,
         agent_execution: AgentExecution,
@@ -193,19 +165,6 @@ def _create_evaluation_prompt(
         """Create the evaluation prompt for the LLM."""
         expected_output = self._get_expected_output(evaluation_criteria)
 
-        # Validate that expected output is not empty
-        if self._is_empty_expected_output(expected_output):
-            logger.error(
-                "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
-                f"Received: {repr(expected_output)}"
-            )
-            raise UiPathEvaluationError(
-                code="EMPTY_EXPECTED_OUTPUT",
-                title="Expected output cannot be empty",
-                detail="The evaluation criteria must contain a non-empty expected output or expected agent behavior.",
-                category=UiPathEvaluationErrorCategory.USER,
-            )
-
         formatted_prompt = self.evaluator_config.prompt.replace(
             self.actual_output_placeholder,
             str(self._get_actual_output(agent_execution)),