|
74 | 74 |
|
75 | 75 | logger = logging.getLogger(__name__) |
76 | 76 |
|
| 77 | +EVALUATOR_SCHEMA_TO_EVALUATOR_CLASS = { |
| 78 | + ContainsEvaluatorConfig: ContainsEvaluator, |
| 79 | + ExactMatchEvaluatorConfig: ExactMatchEvaluator, |
| 80 | + JsonSimilarityEvaluatorConfig: JsonSimilarityEvaluator, |
| 81 | + LLMJudgeOutputEvaluatorConfig: LLMJudgeOutputEvaluator, |
| 82 | + LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig: LLMJudgeStrictJSONSimilarityOutputEvaluator, |
| 83 | + LLMJudgeTrajectoryEvaluatorConfig: LLMJudgeTrajectoryEvaluator, |
| 84 | + LLMJudgeTrajectorySimulationEvaluatorConfig: LLMJudgeTrajectorySimulationEvaluator, |
| 85 | + ToolCallArgsEvaluatorConfig: ToolCallArgsEvaluator, |
| 86 | + ToolCallCountEvaluatorConfig: ToolCallCountEvaluator, |
| 87 | + ToolCallOrderEvaluatorConfig: ToolCallOrderEvaluator, |
| 88 | + ToolCallOutputEvaluatorConfig: ToolCallOutputEvaluator, |
| 89 | +} |
| 90 | + |
77 | 91 |
|
78 | 92 | class EvaluatorFactory: |
79 | 93 | """Factory class for creating evaluator instances based on configuration.""" |
@@ -137,50 +151,15 @@ def _create_evaluator_internal( |
137 | 151 | data, file_path, class_name, evaluators_dir |
138 | 152 | ) |
139 | 153 |
|
140 | | - # use built-in evaluators |
141 | 154 | config: BaseEvaluatorConfig[Any] = TypeAdapter(EvaluatorConfig).validate_python( |
142 | 155 | data |
143 | 156 | ) |
144 | | - match config: |
145 | | - case ContainsEvaluatorConfig(): |
146 | | - return EvaluatorFactory._create_contains_evaluator(data) |
147 | | - case ExactMatchEvaluatorConfig(): |
148 | | - return EvaluatorFactory._create_exact_match_evaluator(data) |
149 | | - case JsonSimilarityEvaluatorConfig(): |
150 | | - return EvaluatorFactory._create_json_similarity_evaluator(data) |
151 | | - case LLMJudgeOutputEvaluatorConfig(): |
152 | | - return EvaluatorFactory._create_llm_judge_output_evaluator(data) |
153 | | - case LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig(): |
154 | | - return EvaluatorFactory._create_llm_judge_strict_json_similarity_output_evaluator( |
155 | | - data |
156 | | - ) |
157 | | - case LLMJudgeTrajectoryEvaluatorConfig(): |
158 | | - return EvaluatorFactory._create_trajectory_evaluator(data) |
159 | | - case ToolCallArgsEvaluatorConfig(): |
160 | | - return EvaluatorFactory._create_tool_call_args_evaluator(data) |
161 | | - case ToolCallCountEvaluatorConfig(): |
162 | | - return EvaluatorFactory._create_tool_call_count_evaluator(data) |
163 | | - case ToolCallOrderEvaluatorConfig(): |
164 | | - return EvaluatorFactory._create_tool_call_order_evaluator(data) |
165 | | - case ToolCallOutputEvaluatorConfig(): |
166 | | - return EvaluatorFactory._create_tool_call_output_evaluator(data) |
167 | | - case LLMJudgeTrajectorySimulationEvaluatorConfig(): |
168 | | - return ( |
169 | | - EvaluatorFactory._create_llm_judge_simulation_trajectory_evaluator( |
170 | | - data |
171 | | - ) |
172 | | - ) |
173 | | - case _: |
174 | | - raise ValueError(f"Unknown evaluator configuration: {config}") |
175 | | - |
176 | | - @staticmethod |
177 | | - def _create_contains_evaluator(data: dict[str, Any]) -> ContainsEvaluator: |
178 | | - evaluator_id = data.get("id") |
179 | | - if not evaluator_id or not isinstance(evaluator_id, str): |
180 | | - raise ValueError("Evaluator 'id' must be a non-empty string") |
181 | | - return TypeAdapter(ContainsEvaluator).validate_python( |
| 157 | + evaluator_class = EVALUATOR_SCHEMA_TO_EVALUATOR_CLASS.get(type(config)) |
| 158 | + if not evaluator_class: |
| 159 | + raise ValueError(f"Unknown evaluator configuration: {config}") |
| 160 | + return TypeAdapter(evaluator_class).validate_python( |
182 | 161 | { |
183 | | - "id": evaluator_id, |
| 162 | + "id": data.get("id"), |
184 | 163 | "config": EvaluatorFactory._prepare_evaluator_config(data), |
185 | 164 | } |
186 | 165 | ) |
@@ -270,116 +249,6 @@ def _create_coded_evaluator_internal( |
270 | 249 | } |
271 | 250 | ) |
272 | 251 |
|
273 | | - @staticmethod |
274 | | - def _create_exact_match_evaluator( |
275 | | - data: dict[str, Any], |
276 | | - ) -> ExactMatchEvaluator: |
277 | | - return TypeAdapter(ExactMatchEvaluator).validate_python( |
278 | | - { |
279 | | - "id": data.get("id"), |
280 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
281 | | - } |
282 | | - ) |
283 | | - |
284 | | - @staticmethod |
285 | | - def _create_json_similarity_evaluator( |
286 | | - data: dict[str, Any], |
287 | | - ) -> JsonSimilarityEvaluator: |
288 | | - return TypeAdapter(JsonSimilarityEvaluator).validate_python( |
289 | | - { |
290 | | - "id": data.get("id"), |
291 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
292 | | - } |
293 | | - ) |
294 | | - |
295 | | - @staticmethod |
296 | | - def _create_llm_judge_output_evaluator( |
297 | | - data: dict[str, Any], |
298 | | - ) -> LLMJudgeOutputEvaluator: |
299 | | - return TypeAdapter(LLMJudgeOutputEvaluator).validate_python( |
300 | | - { |
301 | | - "id": data.get("id"), |
302 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
303 | | - } |
304 | | - ) |
305 | | - |
306 | | - @staticmethod |
307 | | - def _create_llm_judge_strict_json_similarity_output_evaluator( |
308 | | - data: dict[str, Any], |
309 | | - ) -> LLMJudgeStrictJSONSimilarityOutputEvaluator: |
310 | | - return TypeAdapter(LLMJudgeStrictJSONSimilarityOutputEvaluator).validate_python( |
311 | | - { |
312 | | - "id": data.get("id"), |
313 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
314 | | - } |
315 | | - ) |
316 | | - |
317 | | - @staticmethod |
318 | | - def _create_trajectory_evaluator( |
319 | | - data: dict[str, Any], |
320 | | - ) -> LLMJudgeTrajectoryEvaluator: |
321 | | - return TypeAdapter(LLMJudgeTrajectoryEvaluator).validate_python( |
322 | | - { |
323 | | - "id": data.get("id"), |
324 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
325 | | - } |
326 | | - ) |
327 | | - |
328 | | - @staticmethod |
329 | | - def _create_tool_call_args_evaluator( |
330 | | - data: dict[str, Any], |
331 | | - ) -> ToolCallArgsEvaluator: |
332 | | - return TypeAdapter(ToolCallArgsEvaluator).validate_python( |
333 | | - { |
334 | | - "id": data.get("id"), |
335 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
336 | | - } |
337 | | - ) |
338 | | - |
339 | | - @staticmethod |
340 | | - def _create_tool_call_count_evaluator( |
341 | | - data: dict[str, Any], |
342 | | - ) -> ToolCallCountEvaluator: |
343 | | - return TypeAdapter(ToolCallCountEvaluator).validate_python( |
344 | | - { |
345 | | - "id": data.get("id"), |
346 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
347 | | - } |
348 | | - ) |
349 | | - |
350 | | - @staticmethod |
351 | | - def _create_tool_call_order_evaluator( |
352 | | - data: dict[str, Any], |
353 | | - ) -> ToolCallOrderEvaluator: |
354 | | - return TypeAdapter(ToolCallOrderEvaluator).validate_python( |
355 | | - { |
356 | | - "id": data.get("id"), |
357 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
358 | | - } |
359 | | - ) |
360 | | - |
361 | | - @staticmethod |
362 | | - def _create_tool_call_output_evaluator( |
363 | | - data: dict[str, Any], |
364 | | - ) -> ToolCallOutputEvaluator: |
365 | | - return TypeAdapter(ToolCallOutputEvaluator).validate_python( |
366 | | - { |
367 | | - "id": data.get("id"), |
368 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
369 | | - } |
370 | | - ) |
371 | | - |
372 | | - @staticmethod |
373 | | - def _create_llm_judge_simulation_trajectory_evaluator( |
374 | | - data: dict[str, Any], |
375 | | - ) -> LLMJudgeTrajectorySimulationEvaluator: |
376 | | - return TypeAdapter(LLMJudgeTrajectorySimulationEvaluator).validate_python( |
377 | | - { |
378 | | - "id": data.get("id"), |
379 | | - "config": EvaluatorFactory._prepare_evaluator_config(data), |
380 | | - } |
381 | | - ) |
382 | | - |
383 | 252 | @staticmethod |
384 | 253 | def _create_legacy_evaluator_internal( |
385 | 254 | data: dict[str, Any], |
|
0 commit comments