Skip to content

Commit 5eb0c30

Browse files
authored
Remove evaluator defs (#1255)
1 parent e92c3f6 commit 5eb0c30

33 files changed

Lines changed: 800 additions & 867 deletions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.7.0"
3+
version = "2.7.1"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

samples/calculator/evaluations/evaluators/custom/correct_operator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22

3-
from uipath.eval.evaluators import BaseEvaluator, BaseEvaluationCriteria, BaseEvaluatorConfig
3+
from uipath.eval.evaluators import BaseEvaluationCriteria, BaseEvaluatorConfig
4+
from uipath.eval.evaluators.base_evaluator import BaseEvaluator
45
from uipath.eval.models import AgentExecution, EvaluationResult, NumericEvaluationResult
56
from opentelemetry.sdk.trace import ReadableSpan
67

src/uipath/_cli/_evals/_console_progress_reporter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
EvalSetRunUpdatedEvent,
1616
EvaluationEvents,
1717
)
18-
from uipath.eval.evaluators import BaseEvaluator
18+
from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator
1919
from uipath.eval.models import ScoreType
2020

2121
logger = logging.getLogger(__name__)
@@ -26,7 +26,7 @@ class ConsoleProgressReporter:
2626

2727
def __init__(self):
2828
self.console = Console()
29-
self.evaluators: dict[str, BaseEvaluator[Any, Any, Any]] = {}
29+
self.evaluators: dict[str, GenericBaseEvaluator[Any, Any, Any]] = {}
3030
self.display_started = False
3131
self.eval_results_by_name: dict[str, list[Any]] = {}
3232

src/uipath/_cli/_evals/_evaluator_factory.py

Lines changed: 31 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -9,85 +9,20 @@
99
from uipath._cli._evals._helpers import ( # type: ignore # Remove after gnarly fix
1010
try_extract_file_and_class_name,
1111
)
12-
from uipath._cli._evals._models._evaluator import (
13-
EvaluatorConfig,
14-
LegacyEqualsEvaluatorParams,
15-
LegacyEvaluator,
16-
LegacyJsonSimilarityEvaluatorParams,
17-
LegacyLLMEvaluatorParams,
18-
LegacyTrajectoryEvaluatorParams,
19-
)
20-
from uipath._cli._evals._models._evaluator_base_params import EvaluatorBaseParams
12+
from uipath._cli._evals._models._evaluator import CodedEvaluator, LegacyEvaluator
2113
from uipath._utils.constants import EVALS_FOLDER
2214
from uipath.eval.evaluators import (
2315
BaseEvaluator,
24-
LegacyBaseEvaluator,
16+
BaseLegacyEvaluator,
2517
LegacyContextPrecisionEvaluator,
26-
LegacyExactMatchEvaluator,
2718
LegacyFaithfulnessEvaluator,
28-
LegacyJsonSimilarityEvaluator,
2919
LegacyLlmAsAJudgeEvaluator,
3020
LegacyTrajectoryEvaluator,
3121
)
32-
from uipath.eval.evaluators.base_evaluator import BaseEvaluatorConfig
33-
from uipath.eval.evaluators.contains_evaluator import (
34-
ContainsEvaluator,
35-
ContainsEvaluatorConfig,
36-
)
37-
from uipath.eval.evaluators.exact_match_evaluator import (
38-
ExactMatchEvaluator,
39-
ExactMatchEvaluatorConfig,
40-
)
41-
from uipath.eval.evaluators.json_similarity_evaluator import (
42-
JsonSimilarityEvaluator,
43-
JsonSimilarityEvaluatorConfig,
44-
)
45-
from uipath.eval.evaluators.llm_judge_output_evaluator import (
46-
LLMJudgeOutputEvaluator,
47-
LLMJudgeOutputEvaluatorConfig,
48-
LLMJudgeStrictJSONSimilarityOutputEvaluator,
49-
LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig,
50-
)
51-
from uipath.eval.evaluators.llm_judge_trajectory_evaluator import (
52-
LLMJudgeTrajectoryEvaluator,
53-
LLMJudgeTrajectoryEvaluatorConfig,
54-
LLMJudgeTrajectorySimulationEvaluator,
55-
LLMJudgeTrajectorySimulationEvaluatorConfig,
56-
)
57-
from uipath.eval.evaluators.tool_call_args_evaluator import (
58-
ToolCallArgsEvaluator,
59-
ToolCallArgsEvaluatorConfig,
60-
)
61-
from uipath.eval.evaluators.tool_call_count_evaluator import (
62-
ToolCallCountEvaluator,
63-
ToolCallCountEvaluatorConfig,
64-
)
65-
from uipath.eval.evaluators.tool_call_order_evaluator import (
66-
ToolCallOrderEvaluator,
67-
ToolCallOrderEvaluatorConfig,
68-
)
69-
from uipath.eval.evaluators.tool_call_output_evaluator import (
70-
ToolCallOutputEvaluator,
71-
ToolCallOutputEvaluatorConfig,
72-
)
73-
from uipath.eval.models import LegacyEvaluatorType
22+
from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator
7423

7524
logger = logging.getLogger(__name__)
7625

77-
EVALUATOR_SCHEMA_TO_EVALUATOR_CLASS = {
78-
ContainsEvaluatorConfig: ContainsEvaluator,
79-
ExactMatchEvaluatorConfig: ExactMatchEvaluator,
80-
JsonSimilarityEvaluatorConfig: JsonSimilarityEvaluator,
81-
LLMJudgeOutputEvaluatorConfig: LLMJudgeOutputEvaluator,
82-
LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig: LLMJudgeStrictJSONSimilarityOutputEvaluator,
83-
LLMJudgeTrajectoryEvaluatorConfig: LLMJudgeTrajectoryEvaluator,
84-
LLMJudgeTrajectorySimulationEvaluatorConfig: LLMJudgeTrajectorySimulationEvaluator,
85-
ToolCallArgsEvaluatorConfig: ToolCallArgsEvaluator,
86-
ToolCallCountEvaluatorConfig: ToolCallCountEvaluator,
87-
ToolCallOrderEvaluatorConfig: ToolCallOrderEvaluator,
88-
ToolCallOutputEvaluatorConfig: ToolCallOutputEvaluator,
89-
}
90-
9126

9227
class EvaluatorFactory:
9328
"""Factory class for creating evaluator instances based on configuration."""
@@ -130,7 +65,7 @@ def create_evaluator(
13065
data: dict[str, Any],
13166
evaluators_dir: Path | None = None,
13267
agent_model: str | None = None,
133-
) -> BaseEvaluator[Any, Any, Any]:
68+
) -> GenericBaseEvaluator[Any, Any, Any]:
13469
if data.get("version", None) == "1.0":
13570
return cls._create_evaluator_internal(data, evaluators_dir)
13671
else:
@@ -147,31 +82,20 @@ def _create_evaluator_internal(
14782
evaluator_schema
14883
)
14984
if success:
150-
return EvaluatorFactory._create_coded_evaluator_internal(
85+
return EvaluatorFactory._create_custom_coded_evaluator_internal(
15186
data, file_path, class_name, evaluators_dir
15287
)
153-
154-
config: BaseEvaluatorConfig[Any] = TypeAdapter(EvaluatorConfig).validate_python(
155-
data
156-
)
157-
evaluator_class = EVALUATOR_SCHEMA_TO_EVALUATOR_CLASS.get(type(config))
158-
if not evaluator_class:
159-
raise ValueError(f"Unknown evaluator configuration: {config}")
160-
return TypeAdapter(evaluator_class).validate_python(
161-
{
162-
"id": data.get("id"),
163-
"config": EvaluatorFactory._prepare_evaluator_config(data),
164-
}
165-
)
88+
else:
89+
return TypeAdapter(CodedEvaluator).validate_python(data)
16690

16791
@staticmethod
168-
def _create_coded_evaluator_internal(
92+
def _create_custom_coded_evaluator_internal(
16993
data: dict[str, Any],
17094
file_path_str: str,
17195
class_name: str,
17296
evaluators_dir: Path | None = None,
17397
) -> BaseEvaluator[Any, Any, Any]:
174-
"""Create a coded evaluator by dynamically loading from a Python file.
98+
"""Create a custom coded evaluator by dynamically loading from a Python file.
17599
176100
Args:
177101
data: Dictionary containing evaluator configuration with evaluatorTypeId
@@ -242,18 +166,13 @@ def _create_coded_evaluator_internal(
242166
evaluator_id = data.get("id")
243167
if not evaluator_id or not isinstance(evaluator_id, str):
244168
raise ValueError("Evaluator 'id' must be a non-empty string")
245-
return TypeAdapter(evaluator_class).validate_python(
246-
{
247-
"id": evaluator_id,
248-
"config": EvaluatorFactory._prepare_evaluator_config(data),
249-
}
250-
)
169+
return TypeAdapter(evaluator_class).validate_python(data)
251170

252171
@staticmethod
253172
def _create_legacy_evaluator_internal(
254173
data: dict[str, Any],
255174
agent_model: str | None = None,
256-
) -> LegacyBaseEvaluator[Any]:
175+
) -> BaseLegacyEvaluator[Any]:
257176
"""Create an evaluator instance from configuration data.
258177
259178
Args:
@@ -267,97 +186,25 @@ def _create_legacy_evaluator_internal(
267186
Raises:
268187
ValueError: If category is unknown or required fields are missing
269188
"""
270-
params: EvaluatorBaseParams = TypeAdapter(LegacyEvaluator).validate_python(data)
271-
272-
match params:
273-
case LegacyEqualsEvaluatorParams():
274-
return EvaluatorFactory._create_legacy_exact_match_evaluator(params)
275-
case LegacyJsonSimilarityEvaluatorParams():
276-
return EvaluatorFactory._create_legacy_json_similarity_evaluator(params)
277-
case LegacyLLMEvaluatorParams():
278-
return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(
279-
params, agent_model
280-
)
281-
case LegacyTrajectoryEvaluatorParams():
282-
return EvaluatorFactory._create_legacy_trajectory_evaluator(
283-
params, agent_model
284-
)
285-
case _:
286-
raise ValueError(f"Unknown evaluator category: {params}")
287-
288-
@staticmethod
289-
def _create_legacy_exact_match_evaluator(
290-
params: LegacyEqualsEvaluatorParams,
291-
) -> LegacyExactMatchEvaluator:
292-
"""Create a deterministic evaluator."""
293-
return LegacyExactMatchEvaluator(**params.model_dump(), config={})
294-
295-
@staticmethod
296-
def _create_legacy_json_similarity_evaluator(
297-
params: LegacyJsonSimilarityEvaluatorParams,
298-
) -> LegacyJsonSimilarityEvaluator:
299-
"""Create a deterministic evaluator."""
300-
return LegacyJsonSimilarityEvaluator(**params.model_dump(), config={})
301-
302-
@staticmethod
303-
def _create_legacy_llm_as_judge_evaluator(
304-
params: LegacyLLMEvaluatorParams,
305-
agent_model: str | None = None,
306-
) -> LegacyBaseEvaluator[Any]:
307-
"""Create an LLM-as-a-judge evaluator or context precision evaluator based on type."""
308-
if not params.model:
309-
raise ValueError("LLM evaluator must include 'model' field")
310-
311-
# Resolve 'same-as-agent' to actual agent model
312-
if params.model == "same-as-agent":
313-
if not agent_model:
314-
raise ValueError(
315-
"'same-as-agent' model option requires agent settings. "
316-
"Ensure agent.json contains valid model settings."
317-
)
318-
logger.info(
319-
f"Resolving 'same-as-agent' to agent model: {agent_model} "
320-
f"for evaluator '{params.name}'"
321-
)
322-
params = params.model_copy(update={"model": agent_model})
323-
324-
# Check evaluator type to determine which evaluator to create
325-
if params.evaluator_type == LegacyEvaluatorType.ContextPrecision:
326-
return LegacyContextPrecisionEvaluator(**params.model_dump(), config={})
327-
elif params.evaluator_type == LegacyEvaluatorType.Faithfulness:
328-
return LegacyFaithfulnessEvaluator(**params.model_dump(), config={})
329-
else:
330-
if not params.prompt:
331-
raise ValueError("LLM evaluator must include 'prompt' field")
332-
333-
return LegacyLlmAsAJudgeEvaluator(**params.model_dump(), config={})
334-
335-
@staticmethod
336-
def _create_legacy_trajectory_evaluator(
337-
params: LegacyTrajectoryEvaluatorParams,
338-
agent_model: str | None = None,
339-
) -> LegacyTrajectoryEvaluator:
340-
"""Create a trajectory evaluator."""
341-
if not params.prompt:
342-
raise ValueError("Trajectory evaluator must include 'prompt' field")
343-
344-
if not params.model:
345-
raise ValueError("Trajectory evaluator must include 'model' field")
346-
347-
# Resolve 'same-as-agent' to actual agent model
348-
if params.model == "same-as-agent":
349-
if not agent_model:
350-
raise ValueError(
351-
"'same-as-agent' model option requires agent settings. "
352-
"Ensure agent.json contains valid model settings."
189+
evaluator: LegacyEvaluator = TypeAdapter(LegacyEvaluator).validate_python(data)
190+
191+
if isinstance(
192+
evaluator,
193+
LegacyTrajectoryEvaluator
194+
| LegacyLlmAsAJudgeEvaluator
195+
| LegacyContextPrecisionEvaluator
196+
| LegacyFaithfulnessEvaluator,
197+
):
198+
if evaluator.model == "same-as-agent":
199+
if not agent_model:
200+
raise ValueError(
201+
"'same-as-agent' model option requires agent settings. "
202+
"Ensure agent.json contains valid model settings."
203+
)
204+
logger.info(
205+
f"Resolving 'same-as-agent' to agent model: {agent_model} "
206+
f"for evaluator '{evaluator.name}'"
353207
)
354-
logger.info(
355-
f"Resolving 'same-as-agent' to agent model: {agent_model} "
356-
f"for evaluator '{params.name}'"
357-
)
358-
params = params.model_copy(update={"model": agent_model})
208+
evaluator.model = agent_model
359209

360-
logger.info(
361-
f"Creating trajectory evaluator '{params.name}' with model: {params.model}"
362-
)
363-
return LegacyTrajectoryEvaluator(**params.model_dump(), config={})
210+
return evaluator

0 commit comments

Comments
 (0)