Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions sdk/ai/azure-ai-projects/azure/ai/projects/_patch.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------
"""Type stub for _patch.py.

Overrides get_openai_client() return type so that evals.create() accepts
Azure-specific grader types in addition to the standard OpenAI graders.
"""

from typing import Any, Iterable, Union, Optional
from httpx import Timeout
from openai import NotGiven, Omit, OpenAI as OpenAIClient
from openai._types import Body, Query, Headers
from openai.resources.evals.evals import Evals
from openai.resources.evals.runs.runs import Runs
from openai.types.evals.run_create_params import DataSource
from openai.types.evals.run_create_response import RunCreateResponse
from openai.types.eval_create_params import DataSourceConfig, TestingCriterion
from openai.types.eval_create_response import EvalCreateResponse
from openai.types.shared_params.metadata import Metadata
from ._client import AIProjectClient as AIProjectClientGenerated
from .models import EvalGraderAzureAIEvaluator, TargetCompletionEvalRunDataSource

class _AzureEvalRuns(Runs):
def create(
self,
eval_id: str,
*,
data_source: Union[DataSource, TargetCompletionEvalRunDataSource], # <=== Azure extention here
metadata: Optional[Metadata] | Omit = ...,
name: str | Omit = ...,
extra_headers: Headers | None = ...,
extra_query: Query | None = ...,
extra_body: Body | None = ...,
timeout: float | Timeout | None | NotGiven = ...,
) -> RunCreateResponse: ...

class _AzureEvals(Evals):
def create(
self,
*,
data_source_config: DataSourceConfig,
testing_criteria: Iterable[
Union[
TestingCriterion,
EvalGraderAzureAIEvaluator, # <=== Azure extention here
]
],
metadata: Optional[Metadata] | Omit | None = ...,
name: str | Omit = ...,
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | Timeout | NotGiven | None = ...,
) -> EvalCreateResponse: ...
@property
def runs(self) -> _AzureEvalRuns: ...

class OpenAI(OpenAIClient):
@property
def evals(self) -> _AzureEvals: ...

class AIProjectClient(AIProjectClientGenerated):
def get_openai_client(self, **kwargs: Any) -> OpenAI: ...
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

from typing import Final, FrozenSet, List, Dict, Mapping, Optional, Any, Tuple
from ._patch_typeddicts import AzureAIAgentTarget, EvalGraderAzureAIEvaluator, TargetCompletionEvalRunDataSource
from azure.core.polling import LROPoller, AsyncLROPoller, PollingMethod, AsyncPollingMethod
from azure.core.polling.base_polling import (
LROBasePolling,
Expand Down Expand Up @@ -346,9 +347,12 @@ def from_continuation_token(


__all__: List[str] = [
"AsyncUpdateMemoriesLROPoller",
"AzureAIAgentTarget",
"CustomCredential",
"EvalGraderAzureAIEvaluator",
"TargetCompletionEvalRunDataSource",
"UpdateMemoriesLROPoller",
"AsyncUpdateMemoriesLROPoller",
] # Add all objects you want publicly available to users at this package level


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# pylint: disable=line-too-long,useless-suppression
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

from typing import Dict, Any, List, Union
from typing_extensions import Literal, Required, TypedDict
from openai.types.evals.create_eval_completions_run_data_source_param import (
InputMessagesItemReference,
SourceFileContent,
SourceFileID,
)
from ._models import ToolDescription


class AzureAIAgentTarget(TypedDict, total=False):
"""Represents a target specifying an Azure AI agent.

:ivar type: The type of target, always ``azure_ai_agent``. Required. Default value is
"azure_ai_agent".
:vartype type: str
:ivar name: The unique identifier of the Azure AI agent. Required.
:vartype name: str
:ivar version: The version of the Azure AI agent.
:vartype version: str
:ivar tool_descriptions: The parameters used to control the sampling behavior of the agent
during text generation.
:vartype tool_descriptions: list[~azure.ai.projects.models.ToolDescription]
"""

type: Required[Literal["azure_ai_agent"]]
"""The type of target, always ``azure_ai_agent``. Required. Default value is \"azure_ai_agent\"."""
name: Required[str]
"""The unique identifier of the Azure AI agent. Required."""
version: str
"""The version of the Azure AI agent."""
tool_descriptions: List[ToolDescription]
"""The parameters used to control the sampling behavior of the agent during text generation."""


class TargetCompletionEvalRunDataSource(TypedDict, total=False):
"""Represents a data source for target-based completion evaluation configuration.

:ivar type: The type of data source, always ``azure_ai_target_completions``. Required. Default
value is "azure_ai_target_completions".
:vartype type: str
:ivar input_messages: Input messages configuration.
:vartype input_messages:
~azure.ai.projects.models.CreateEvalCompletionsRunDataSourceInputMessagesItemReference
:ivar source: The source configuration for inline or file data. Required. Is either a
SourceFileContent type or a SourceFileID type.
:vartype source: ~azure.ai.projects.models.SourceFileContent or
~azure.ai.projects.models.SourceFileID
:ivar target: The target configuration for the evaluation. Required.
:vartype target: ~azure.ai.projects.models.Target
"""

type: Required[Literal["azure_ai_target_completions"]]
"""The type of data source, always ``azure_ai_target_completions``. Required. Default value is
\"azure_ai_target_completions\"."""
source: Required[Union[SourceFileContent, SourceFileID]]
"""The source configuration for inline or file data. Required. Is either a
SourceFileContent type or a SourceFileID type."""
target: Required[AzureAIAgentTarget]
"""The target configuration for the evaluation. Required."""
input_messages: Required[InputMessagesItemReference]
"""Input messages configuration."""


class EvalGraderAzureAIEvaluator(TypedDict, total=False):
"""AzureAIEvaluatorGrader.

:ivar type: The object type, which is always ``azure_ai_evaluator``. Required. Default value is
"azure_ai_evaluator".
:vartype type: str
:ivar name: The name of the grader. Required.
:vartype name: str
:ivar evaluator_name: The name of the evaluator. Required.
:vartype evaluator_name: str
:ivar evaluator_version: The version of the evaluator. Latest version if not specified.
:vartype evaluator_version: str
:ivar initialization_parameters: The initialization parameters for the evaluation. Must support
structured outputs.
:vartype initialization_parameters: dict[str, any]
:ivar data_mapping: The model to use for the evaluation. Must support structured outputs.
:vartype data_mapping: dict[str, str]
"""

type: Required[Literal["azure_ai_evaluator"]]
"""The object type, which is always ``azure_ai_evaluator``. Required. Default value is
\"azure_ai_evaluator\"."""
name: Required[str]
"""The name of the grader. Required."""
evaluator_name: Required[str]
"""The name of the evaluator. Required."""
evaluator_version: str
"""The version of the evaluator. Latest version if not specified."""
initialization_parameters: Dict[str, Any]
"""The initialization parameters for the evaluation. Must support structured outputs."""
data_mapping: Dict[str, str]
"""The model to use for the evaluation. Must support structured outputs."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------
"""
DESCRIPTION:
This sample demonstrates how to create and run an evaluation for an Azure AI agent
using the synchronous AIProjectClient.

The OpenAI compatible Evals calls in this sample are made using
the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
for more information.

USAGE:
python sample_agent_evaluation.py

Before running the sample:

pip install "azure-ai-projects>=2.0.0" python-dotenv

Set these environment variables with your own values:
1) FOUNDRY_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
page of your Microsoft Foundry portal.
2) FOUNDRY_AGENT_NAME - The name of the AI agent to use for evaluation.
3) FOUNDRY_MODEL_NAME - The deployment name of the AI model, as found under the "Name" column in
the "Models + endpoints" tab in your Microsoft Foundry project.
"""

import os
import time
from typing import Union
from pprint import pprint
from dotenv import load_dotenv
from openai.types.evals.create_eval_completions_run_data_source_param import SourceFileContent, SourceFileContentContent
from openai.types.eval_create_params import DataSourceConfigCustom
from openai.types.evals.run_create_response import RunCreateResponse
from openai.types.evals.run_retrieve_response import RunRetrieveResponse
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
AzureAIAgentTarget,
EvalGraderAzureAIEvaluator,
PromptAgentDefinition,
TargetCompletionEvalRunDataSource,
)

load_dotenv()
endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"]
model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME", "") # Sample : gpt-4o-mini
agent_name = os.environ["FOUNDRY_AGENT_NAME"]

# [START agent_evaluation_basic]
with (
DefaultAzureCredential() as credential,
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
project_client.get_openai_client() as openai_client,
):
agent = project_client.agents.create_version(
agent_name=agent_name,
definition=PromptAgentDefinition(
model=model_deployment_name,
instructions="You are a helpful assistant that answers general questions",
),
)
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")

data_source_config = DataSourceConfigCustom(
type="custom",
item_schema={"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]},
include_sample_schema=True,
)
# Notes: for data_mapping:
# sample.output_text is the string output of the agent
# sample.output_items is the structured JSON output of the agent, including tool calls information
testing_criteria = [
EvalGraderAzureAIEvaluator(
type="azure_ai_evaluator",
name="violence_detection",
evaluator_name="builtin.violence",
data_mapping={"query": "{{item.query}}", "response": "{{sample.output_text}}"},
),
EvalGraderAzureAIEvaluator(
type="azure_ai_evaluator",
name="fluency",
evaluator_name="builtin.fluency",
initialization_parameters={"deployment_name": f"{model_deployment_name}"},
data_mapping={"query": "{{item.query}}", "response": "{{sample.output_text}}"},
),
EvalGraderAzureAIEvaluator(
type="azure_ai_evaluator",
name="task_adherence",
evaluator_name="builtin.task_adherence",
initialization_parameters={"deployment_name": f"{model_deployment_name}"},
data_mapping={"query": "{{item.query}}", "response": "{{sample.output_items}}"},
),
]
eval_object = openai_client.evals.create(
name="Agent Evaluation",
data_source_config=data_source_config,
testing_criteria=testing_criteria, # type: ignore
)
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")

data_source = TargetCompletionEvalRunDataSource(
type="azure_ai_target_completions",
source=SourceFileContent(
type="file_content",
content=[
SourceFileContentContent(item={"query": "What is the capital of France?"}),
SourceFileContentContent(item={"query": "How do I reverse a string in Python?"}),
],
),
input_messages={
"type": "template", # TODO: This is not an option based on our TypeSpec..
"template": [
{"type": "message", "role": "user", "content": {"type": "input_text", "text": "{{item.query}}"}}
],
},
target=AzureAIAgentTarget(
type="azure_ai_agent",
name=agent.name,
version=agent.version, # Version is optional. Defaults to latest version if not specified
),
)

agent_eval_run: Union[RunCreateResponse, RunRetrieveResponse] = openai_client.evals.runs.create(
eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source # type: ignore
)
print(f"Evaluation run created (id: {agent_eval_run.id})")
# [END agent_evaluation_basic]

while agent_eval_run.status not in ["completed", "failed"]:
agent_eval_run = openai_client.evals.runs.retrieve(run_id=agent_eval_run.id, eval_id=eval_object.id)
print(f"Waiting for eval run to complete... current status: {agent_eval_run.status}")
time.sleep(5)

if agent_eval_run.status == "completed":
print("\n✓ Evaluation run completed successfully!")
print(f"Result Counts: {agent_eval_run.result_counts}")

output_items = list(
openai_client.evals.runs.output_items.list(run_id=agent_eval_run.id, eval_id=eval_object.id)
)
print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
print(f"{'-'*60}")
pprint(output_items)
print(f"{'-'*60}")
else:
print("\n✗ Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")

project_client.agents.delete(agent_name=agent.name)
print("Agent deleted")
Loading
Loading