Azure · dargilco · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 26, 2026
@@ -0,0 +1,65 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Type stub for _patch.py.
+
+Overrides get_openai_client() return type so that evals.create() accepts
+Azure-specific grader types in addition to the standard OpenAI graders.
+"""
+
+from typing import Any, Iterable, Union, Optional
+from httpx import Timeout
+from openai import NotGiven, Omit, OpenAI as OpenAIClient
+from openai._types import Body, Query, Headers
+from openai.resources.evals.evals import Evals
+from openai.resources.evals.runs.runs import Runs
+from openai.types.evals.run_create_params import DataSource
+from openai.types.evals.run_create_response import RunCreateResponse
+from openai.types.eval_create_params import DataSourceConfig, TestingCriterion
+from openai.types.eval_create_response import EvalCreateResponse
+from openai.types.shared_params.metadata import Metadata
+from ._client import AIProjectClient as AIProjectClientGenerated
+from .models import EvalGraderAzureAIEvaluator, TargetCompletionEvalRunDataSource
+
+class _AzureEvalRuns(Runs):
+    def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: Union[DataSource, TargetCompletionEvalRunDataSource],  # <=== Azure extention here
+        metadata: Optional[Metadata] | Omit = ...,
+        name: str | Omit = ...,
+        extra_headers: Headers | None = ...,
+        extra_query: Query | None = ...,
+        extra_body: Body | None = ...,
+        timeout: float | Timeout | None | NotGiven = ...,
+    ) -> RunCreateResponse: ...
+
+class _AzureEvals(Evals):
+    def create(
+        self,
+        *,
+        data_source_config: DataSourceConfig,
+        testing_criteria: Iterable[
+            Union[
+                TestingCriterion,
+                EvalGraderAzureAIEvaluator,  # <=== Azure extention here
+            ]
+        ],
+        metadata: Optional[Metadata] | Omit | None = ...,
+        name: str | Omit = ...,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | Timeout | NotGiven | None = ...,
+    ) -> EvalCreateResponse: ...
+    @property
+    def runs(self) -> _AzureEvalRuns: ...
+
+class OpenAI(OpenAIClient):
+    @property
+    def evals(self) -> _AzureEvals: ...
+
+class AIProjectClient(AIProjectClientGenerated):
+    def get_openai_client(self, **kwargs: Any) -> OpenAI: ...
@@ -9,6 +9,7 @@
 """
 
 from typing import Final, FrozenSet, List, Dict, Mapping, Optional, Any, Tuple
+from ._patch_typeddicts import AzureAIAgentTarget, EvalGraderAzureAIEvaluator, TargetCompletionEvalRunDataSource
 from azure.core.polling import LROPoller, AsyncLROPoller, PollingMethod, AsyncPollingMethod
 from azure.core.polling.base_polling import (
     LROBasePolling,
@@ -346,9 +347,12 @@ def from_continuation_token(
 
 
 __all__: List[str] = [
+    "AsyncUpdateMemoriesLROPoller",
+    "AzureAIAgentTarget",
     "CustomCredential",
+    "EvalGraderAzureAIEvaluator",
+    "TargetCompletionEvalRunDataSource",
     "UpdateMemoriesLROPoller",
-    "AsyncUpdateMemoriesLROPoller",
 ]  # Add all objects you want publicly available to users at this package level
 
 

@@ -0,0 +1,102 @@
+# pylint: disable=line-too-long,useless-suppression
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+
+from typing import Dict, Any, List, Union
+from typing_extensions import Literal, Required, TypedDict
+from openai.types.evals.create_eval_completions_run_data_source_param import (
+    InputMessagesItemReference,
+    SourceFileContent,
+    SourceFileID,
+)
+from ._models import ToolDescription
+
+
+class AzureAIAgentTarget(TypedDict, total=False):
+    """Represents a target specifying an Azure AI agent.
+
+    :ivar type: The type of target, always ``azure_ai_agent``. Required. Default value is
+     "azure_ai_agent".
+    :vartype type: str
+    :ivar name: The unique identifier of the Azure AI agent. Required.
+    :vartype name: str
+    :ivar version: The version of the Azure AI agent.
+    :vartype version: str
+    :ivar tool_descriptions: The parameters used to control the sampling behavior of the agent
+     during text generation.
+    :vartype tool_descriptions: list[~azure.ai.projects.models.ToolDescription]
+    """
+
+    type: Required[Literal["azure_ai_agent"]]
+    """The type of target, always ``azure_ai_agent``. Required. Default value is \"azure_ai_agent\"."""
+    name: Required[str]
+    """The unique identifier of the Azure AI agent. Required."""
+    version: str
+    """The version of the Azure AI agent."""
+    tool_descriptions: List[ToolDescription]
+    """The parameters used to control the sampling behavior of the agent during text generation."""
+
+
+class TargetCompletionEvalRunDataSource(TypedDict, total=False):
+    """Represents a data source for target-based completion evaluation configuration.
+
+    :ivar type: The type of data source, always ``azure_ai_target_completions``. Required. Default
+     value is "azure_ai_target_completions".
+    :vartype type: str
+    :ivar input_messages: Input messages configuration.
+    :vartype input_messages:
+     ~azure.ai.projects.models.CreateEvalCompletionsRunDataSourceInputMessagesItemReference
+    :ivar source: The source configuration for inline or file data. Required. Is either a
+     SourceFileContent type or a SourceFileID type.
+    :vartype source: ~azure.ai.projects.models.SourceFileContent or
+     ~azure.ai.projects.models.SourceFileID
+    :ivar target: The target configuration for the evaluation. Required.
+    :vartype target: ~azure.ai.projects.models.Target
+    """
+
+    type: Required[Literal["azure_ai_target_completions"]]
+    """The type of data source, always ``azure_ai_target_completions``. Required. Default value is
+     \"azure_ai_target_completions\"."""
+    source: Required[Union[SourceFileContent, SourceFileID]]
+    """The source configuration for inline or file data. Required. Is either a
+     SourceFileContent type or a SourceFileID type."""
+    target: Required[AzureAIAgentTarget]
+    """The target configuration for the evaluation. Required."""
+    input_messages: Required[InputMessagesItemReference]
+    """Input messages configuration."""
+
+
+class EvalGraderAzureAIEvaluator(TypedDict, total=False):
+    """AzureAIEvaluatorGrader.
+
+    :ivar type: The object type, which is always ``azure_ai_evaluator``. Required. Default value is
+     "azure_ai_evaluator".
+    :vartype type: str
+    :ivar name: The name of the grader. Required.
+    :vartype name: str
+    :ivar evaluator_name: The name of the evaluator. Required.
+    :vartype evaluator_name: str
+    :ivar evaluator_version: The version of the evaluator. Latest version if not specified.
+    :vartype evaluator_version: str
+    :ivar initialization_parameters: The initialization parameters for the evaluation. Must support
+     structured outputs.
+    :vartype initialization_parameters: dict[str, any]
+    :ivar data_mapping: The model to use for the evaluation. Must support structured outputs.
+    :vartype data_mapping: dict[str, str]
+    """
+
+    type: Required[Literal["azure_ai_evaluator"]]
+    """The object type, which is always ``azure_ai_evaluator``. Required. Default value is
+     \"azure_ai_evaluator\"."""
+    name: Required[str]
+    """The name of the grader. Required."""
+    evaluator_name: Required[str]
+    """The name of the evaluator. Required."""
+    evaluator_version: str
+    """The version of the evaluator. Latest version if not specified."""
+    initialization_parameters: Dict[str, Any]
+    """The initialization parameters for the evaluation. Must support structured outputs."""
+    data_mapping: Dict[str, str]
+    """The model to use for the evaluation. Must support structured outputs."""
@@ -0,0 +1,155 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to create and run an evaluation for an Azure AI agent
+    using the synchronous AIProjectClient.
+
+    The OpenAI compatible Evals calls in this sample are made using
+    the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
+    for more information.
+
+USAGE:
+    python sample_agent_evaluation.py
+
+    Before running the sample:
+
+    pip install "azure-ai-projects>=2.0.0" python-dotenv
+
+    Set these environment variables with your own values:
+    1) FOUNDRY_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
+       page of your Microsoft Foundry portal.
+    2) FOUNDRY_AGENT_NAME - The name of the AI agent to use for evaluation.
+    3) FOUNDRY_MODEL_NAME - The deployment name of the AI model, as found under the "Name" column in
+       the "Models + endpoints" tab in your Microsoft Foundry project.
+"""
+
+import os
+import time
+from typing import Union
+from pprint import pprint
+from dotenv import load_dotenv
+from openai.types.evals.create_eval_completions_run_data_source_param import SourceFileContent, SourceFileContentContent
+from openai.types.eval_create_params import DataSourceConfigCustom
+from openai.types.evals.run_create_response import RunCreateResponse
+from openai.types.evals.run_retrieve_response import RunRetrieveResponse
+from azure.identity import DefaultAzureCredential
+from azure.ai.projects import AIProjectClient
+from azure.ai.projects.models import (
+    AzureAIAgentTarget,
+    EvalGraderAzureAIEvaluator,
+    PromptAgentDefinition,
+    TargetCompletionEvalRunDataSource,
+)
+
+load_dotenv()
+endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"]
+model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME", "")  # Sample : gpt-4o-mini
+agent_name = os.environ["FOUNDRY_AGENT_NAME"]
+
+# [START agent_evaluation_basic]
+with (
+    DefaultAzureCredential() as credential,
+    AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
+    project_client.get_openai_client() as openai_client,
+):
+    agent = project_client.agents.create_version(
+        agent_name=agent_name,
+        definition=PromptAgentDefinition(
+            model=model_deployment_name,
+            instructions="You are a helpful assistant that answers general questions",
+        ),
+    )
+    print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
+
+    data_source_config = DataSourceConfigCustom(
+        type="custom",
+        item_schema={"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]},
+        include_sample_schema=True,
+    )
+    # Notes: for data_mapping:
+    # sample.output_text is the string output of the agent
+    # sample.output_items is the structured JSON output of the agent, including tool calls information
+    testing_criteria = [
+        EvalGraderAzureAIEvaluator(
+            type="azure_ai_evaluator",
+            name="violence_detection",
+            evaluator_name="builtin.violence",
+            data_mapping={"query": "{{item.query}}", "response": "{{sample.output_text}}"},
+        ),
+        EvalGraderAzureAIEvaluator(
+            type="azure_ai_evaluator",
+            name="fluency",
+            evaluator_name="builtin.fluency",
+            initialization_parameters={"deployment_name": f"{model_deployment_name}"},
+            data_mapping={"query": "{{item.query}}", "response": "{{sample.output_text}}"},
+        ),
+        EvalGraderAzureAIEvaluator(
+            type="azure_ai_evaluator",
+            name="task_adherence",
+            evaluator_name="builtin.task_adherence",
+            initialization_parameters={"deployment_name": f"{model_deployment_name}"},
+            data_mapping={"query": "{{item.query}}", "response": "{{sample.output_items}}"},
+        ),
+    ]
+    eval_object = openai_client.evals.create(
+        name="Agent Evaluation",
+        data_source_config=data_source_config,
+        testing_criteria=testing_criteria,  # type: ignore
+    )
+    print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
+
+    data_source = TargetCompletionEvalRunDataSource(
+        type="azure_ai_target_completions",
+        source=SourceFileContent(
+            type="file_content",
+            content=[
+                SourceFileContentContent(item={"query": "What is the capital of France?"}),
+                SourceFileContentContent(item={"query": "How do I reverse a string in Python?"}),
+            ],
+        ),
+        input_messages={
+            "type": "template",  # TODO: This is not an option based on our TypeSpec..
+            "template": [
+                {"type": "message", "role": "user", "content": {"type": "input_text", "text": "{{item.query}}"}}
+            ],
+        },
+        target=AzureAIAgentTarget(
+            type="azure_ai_agent",
+            name=agent.name,
+            version=agent.version,  # Version is optional. Defaults to latest version if not specified
+        ),
+    )
+
+    agent_eval_run: Union[RunCreateResponse, RunRetrieveResponse] = openai_client.evals.runs.create(
+        eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source  # type: ignore
+    )
+    print(f"Evaluation run created (id: {agent_eval_run.id})")
+    # [END agent_evaluation_basic]
+
+    while agent_eval_run.status not in ["completed", "failed"]:
+        agent_eval_run = openai_client.evals.runs.retrieve(run_id=agent_eval_run.id, eval_id=eval_object.id)
+        print(f"Waiting for eval run to complete... current status: {agent_eval_run.status}")
+        time.sleep(5)
+
+    if agent_eval_run.status == "completed":
+        print("\n✓ Evaluation run completed successfully!")
+        print(f"Result Counts: {agent_eval_run.result_counts}")
+
+        output_items = list(
+            openai_client.evals.runs.output_items.list(run_id=agent_eval_run.id, eval_id=eval_object.id)
+        )
+        print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
+        print(f"{'-'*60}")
+        pprint(output_items)
+        print(f"{'-'*60}")
+    else:
+        print("\n✗ Evaluation run failed.")
+
+    openai_client.evals.delete(eval_id=eval_object.id)
+    print("Evaluation deleted")
+
+    project_client.agents.delete(agent_name=agent.name)
+    print("Agent deleted")