Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions src/uipath/_cli/_evals/_evaluator_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
try_extract_file_and_class_name,
)
from uipath._cli._evals._models._evaluator import (
EqualsEvaluatorParams,
EvaluatorConfig,
JsonSimilarityEvaluatorParams,
LegacyEqualsEvaluatorParams,
LegacyEvaluator,
LLMEvaluatorParams,
TrajectoryEvaluatorParams,
LegacyJsonSimilarityEvaluatorParams,
LegacyLLMEvaluatorParams,
LegacyTrajectoryEvaluatorParams,
)
from uipath._cli._evals._models._evaluator_base_params import EvaluatorBaseParams
from uipath._utils.constants import EVALS_FOLDER
Expand Down Expand Up @@ -401,15 +401,15 @@ def _create_legacy_evaluator_internal(
params: EvaluatorBaseParams = TypeAdapter(LegacyEvaluator).validate_python(data)

match params:
case EqualsEvaluatorParams():
case LegacyEqualsEvaluatorParams():
return EvaluatorFactory._create_legacy_exact_match_evaluator(params)
case JsonSimilarityEvaluatorParams():
case LegacyJsonSimilarityEvaluatorParams():
return EvaluatorFactory._create_legacy_json_similarity_evaluator(params)
case LLMEvaluatorParams():
case LegacyLLMEvaluatorParams():
return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(
params, agent_model
)
case TrajectoryEvaluatorParams():
case LegacyTrajectoryEvaluatorParams():
return EvaluatorFactory._create_legacy_trajectory_evaluator(
params, agent_model
)
Expand All @@ -418,21 +418,21 @@ def _create_legacy_evaluator_internal(

@staticmethod
def _create_legacy_exact_match_evaluator(
params: EqualsEvaluatorParams,
params: LegacyEqualsEvaluatorParams,
) -> LegacyExactMatchEvaluator:
"""Create a deterministic evaluator."""
return LegacyExactMatchEvaluator(**params.model_dump(), config={})

@staticmethod
def _create_legacy_json_similarity_evaluator(
params: JsonSimilarityEvaluatorParams,
params: LegacyJsonSimilarityEvaluatorParams,
) -> LegacyJsonSimilarityEvaluator:
"""Create a deterministic evaluator."""
return LegacyJsonSimilarityEvaluator(**params.model_dump(), config={})

@staticmethod
def _create_legacy_llm_as_judge_evaluator(
params: LLMEvaluatorParams,
params: LegacyLLMEvaluatorParams,
agent_model: str | None = None,
) -> LegacyBaseEvaluator[Any]:
"""Create an LLM-as-a-judge evaluator or context precision evaluator based on type."""
Expand Down Expand Up @@ -465,7 +465,7 @@ def _create_legacy_llm_as_judge_evaluator(

@staticmethod
def _create_legacy_trajectory_evaluator(
params: TrajectoryEvaluatorParams,
params: LegacyTrajectoryEvaluatorParams,
agent_model: str | None = None,
) -> LegacyTrajectoryEvaluator:
"""Create a trajectory evaluator."""
Expand Down
101 changes: 26 additions & 75 deletions src/uipath/_cli/_evals/_models/_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)


class EvaluatorBaseParams(BaseModel):
class LegacyEvaluatorBaseParams(BaseModel):
"""Parameters for initializing the base evaluator."""

id: str
Expand All @@ -48,7 +48,7 @@ class EvaluatorBaseParams(BaseModel):
file_name: str = Field(..., alias="fileName")


class LLMEvaluatorParams(EvaluatorBaseParams):
class LegacyLLMEvaluatorParams(LegacyEvaluatorBaseParams):
category: Literal[LegacyEvaluatorCategory.LlmAsAJudge] = Field(
..., alias="category"
)
Expand All @@ -60,7 +60,7 @@ class LLMEvaluatorParams(EvaluatorBaseParams):
)


class TrajectoryEvaluatorParams(EvaluatorBaseParams):
class LegacyTrajectoryEvaluatorParams(LegacyEvaluatorBaseParams):
category: Literal[LegacyEvaluatorCategory.Trajectory] = Field(..., alias="category")
prompt: str = Field(..., alias="prompt")
model: str = Field(..., alias="model")
Expand All @@ -70,71 +70,71 @@ class TrajectoryEvaluatorParams(EvaluatorBaseParams):
)


class EqualsEvaluatorParams(EvaluatorBaseParams):
class LegacyEqualsEvaluatorParams(LegacyEvaluatorBaseParams):
model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


class JsonSimilarityEvaluatorParams(EvaluatorBaseParams):
class LegacyJsonSimilarityEvaluatorParams(LegacyEvaluatorBaseParams):
model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


class UnknownEvaluatorParams(EvaluatorBaseParams):
class LegacyUnknownEvaluatorParams(LegacyEvaluatorBaseParams):
model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


def evaluator_discriminator(data: Any) -> str:
def legacy_evaluator_discriminator(data: Any) -> str:
if isinstance(data, dict):
category = data.get("category")
evaluator_type = data.get("type")
match category:
case LegacyEvaluatorCategory.LlmAsAJudge:
return "LLMEvaluatorParams"
return "LegacyLLMEvaluatorParams"
case LegacyEvaluatorCategory.Trajectory:
return "TrajectoryEvaluatorParams"
return "LegacyTrajectoryEvaluatorParams"
case LegacyEvaluatorCategory.Deterministic:
match evaluator_type:
case LegacyEvaluatorType.Equals:
return "EqualsEvaluatorParams"
return "LegacyEqualsEvaluatorParams"
case LegacyEvaluatorType.JsonSimilarity:
return "JsonSimilarityEvaluatorParams"
return "LegacyJsonSimilarityEvaluatorParams"
case _:
return "UnknownEvaluatorParams"
return "LegacyUnknownEvaluatorParams"
case _:
return "UnknownEvaluatorParams"
return "LegacyUnknownEvaluatorParams"
else:
return "UnknownEvaluatorParams"
return "LegacyUnknownLegacyEvaluatorParams"


Evaluator = Annotated[
LegacyEvaluator = Annotated[
Union[
Annotated[
LLMEvaluatorParams,
Tag("LLMEvaluatorParams"),
LegacyLLMEvaluatorParams,
Tag("LegacyLLMEvaluatorParams"),
],
Annotated[
TrajectoryEvaluatorParams,
Tag("TrajectoryEvaluatorParams"),
LegacyTrajectoryEvaluatorParams,
Tag("LegacyTrajectoryEvaluatorParams"),
],
Annotated[
EqualsEvaluatorParams,
Tag("EqualsEvaluatorParams"),
LegacyEqualsEvaluatorParams,
Tag("LegacyEqualsEvaluatorParams"),
],
Annotated[
JsonSimilarityEvaluatorParams,
Tag("JsonSimilarityEvaluatorParams"),
LegacyJsonSimilarityEvaluatorParams,
Tag("LegacyJsonSimilarityEvaluatorParams"),
],
Annotated[
UnknownEvaluatorParams,
Tag("UnknownEvaluatorParams"),
LegacyUnknownEvaluatorParams,
Tag("LegacyUnknownEvaluatorParams"),
],
],
Field(discriminator=Discriminator(evaluator_discriminator)),
Field(discriminator=Discriminator(legacy_evaluator_discriminator)),
]


Expand All @@ -144,29 +144,6 @@ class UnknownEvaluatorConfig(BaseEvaluatorConfig[Any]):
)


def legacy_evaluator_discriminator(data: Any) -> str:
if isinstance(data, dict):
category = data.get("category")
evaluator_type = data.get("type")
match category:
case LegacyEvaluatorCategory.LlmAsAJudge:
return "LLMEvaluatorParams"
case LegacyEvaluatorCategory.Trajectory:
return "TrajectoryEvaluatorParams"
case LegacyEvaluatorCategory.Deterministic:
match evaluator_type:
case LegacyEvaluatorType.Equals:
return "EqualsEvaluatorParams"
case LegacyEvaluatorType.JsonSimilarity:
return "JsonSimilarityEvaluatorParams"
case _:
return "UnknownEvaluatorParams"
case _:
return "UnknownEvaluatorParams"
else:
return "UnknownEvaluatorParams"


def evaluator_config_discriminator(data: Any) -> str:
if isinstance(data, dict):
evaluator_type_id = data.get("evaluatorTypeId")
Expand Down Expand Up @@ -199,32 +176,6 @@ def evaluator_config_discriminator(data: Any) -> str:
return "UnknownEvaluatorConfig"


LegacyEvaluator = Annotated[
Union[
Annotated[
LLMEvaluatorParams,
Tag("LLMEvaluatorParams"),
],
Annotated[
TrajectoryEvaluatorParams,
Tag("TrajectoryEvaluatorParams"),
],
Annotated[
EqualsEvaluatorParams,
Tag("EqualsEvaluatorParams"),
],
Annotated[
JsonSimilarityEvaluatorParams,
Tag("JsonSimilarityEvaluatorParams"),
],
Annotated[
UnknownEvaluatorParams,
Tag("UnknownEvaluatorParams"),
],
],
Field(discriminator=Discriminator(legacy_evaluator_discriminator)),
]

EvaluatorConfig = Annotated[
Union[
Annotated[
Expand Down
3 changes: 1 addition & 2 deletions src/uipath/_cli/_evals/_progress_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
EvaluationItem,
EvaluationStatus,
)
from uipath._cli._evals._models._evaluator import Evaluator
from uipath._cli._evals._models._sw_reporting import (
StudioWebAgentSnapshot,
StudioWebProgressItem,
Expand Down Expand Up @@ -453,7 +452,7 @@ async def create_eval_run(
async def update_eval_run(
self,
sw_progress_item: StudioWebProgressItem,
evaluators: dict[str, Evaluator],
evaluators: dict[str, BaseEvaluator[Any, Any, Any]],
is_coded: bool = False,
spans: list[Any] | None = None,
):
Expand Down
4 changes: 2 additions & 2 deletions src/uipath/agent/models/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pydantic import Field

from uipath._cli._evals._models._evaluation_set import EvaluationSet
from uipath._cli._evals._models._evaluator import Evaluator
from uipath._cli._evals._models._evaluator import LegacyEvaluator
from uipath.agent.models.agent import (
AgentDefinition,
)
Expand All @@ -22,6 +22,6 @@ class AgentEvalsDefinition(AgentDefinition):
alias="evaluationSets",
description="List of agent evaluation sets",
)
evaluators: Optional[List[Evaluator]] = Field(
evaluators: Optional[List[LegacyEvaluator]] = Field(
None, description="List of agent evaluators"
)