From 1f6beb97beca7a211465463e7bce78c1f2559716 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Wed, 19 Nov 2025 15:25:39 +0200 Subject: [PATCH 1/9] Create workflow evaluation with Foundry demo --- .../demos/workflow_evaluation/README.md | 35 + .../demos/workflow_evaluation/_tools.py | 622 ++++++++++++++++++ .../workflow_evaluation/create_workflow.py | 494 ++++++++++++++ .../workflow_evaluation/run_evaluation.py | 219 ++++++ 4 files changed, 1370 insertions(+) create mode 100644 python/samples/demos/workflow_evaluation/README.md create mode 100644 python/samples/demos/workflow_evaluation/_tools.py create mode 100644 python/samples/demos/workflow_evaluation/create_workflow.py create mode 100644 python/samples/demos/workflow_evaluation/run_evaluation.py diff --git a/python/samples/demos/workflow_evaluation/README.md b/python/samples/demos/workflow_evaluation/README.md new file mode 100644 index 0000000000..ee9aa7ffd6 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/README.md @@ -0,0 +1,35 @@ +# Multi-Agent Travel Planning Workflow Evaluation + +This sample demonstrates evaluating a multi-agent workflow using Azure AI's built-in evaluators. The workflow processes travel planning requests through seven specialized agents in a fan-out/fan-in pattern: travel request handler, hotel/flight/activity search agents, booking aggregator, booking confirmation, and payment processing. + +## Evaluation Metrics + +The evaluation uses four Azure AI built-in evaluators: + +- **Relevance** - How well responses address the user query +- **Groundedness** - Whether responses are grounded in available context +- **Tool Call Accuracy** - Correct tool selection and parameter usage +- **Tool Output Utilization** - Effective use of tool outputs in responses + +## Setup + +Create a `.env` file with required configuration: + +```env +AZURE_AI_PROJECT_ENDPOINT= +AZURE_AI_MODEL_DEPLOYMENT_NAME= +``` + +## Running the Evaluation + +Execute the complete workflow and evaluation: + +```bash +python run_evaluation.py +``` + +The script will: +1. Execute the multi-agent travel planning workflow +2. Display response summary for each agent +3. Create and run evaluation on hotel, flight, and activity search agents +4. Monitor progress and display the evaluation report URL diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py new file mode 100644 index 0000000000..420d1ece9e --- /dev/null +++ b/python/samples/demos/workflow_evaluation/_tools.py @@ -0,0 +1,622 @@ +import json +from datetime import datetime + +# --- Travel Planning Tools --- + +def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str: + """ + Search for available hotels based on location and dates. + """ + # Specific mock data for Paris December 15-18, 2025 + if "paris" in location.lower(): + mock_hotels = [ + { + "name": "Hotel Eiffel Trocadéro", + "rating": 4.6, + "price_per_night": "$185", + "total_price": "$555 for 3 nights", + "distance_to_eiffel_tower": "0.3 miles", + "amenities": ["WiFi", "Breakfast", "Eiffel Tower View", "Concierge"], + "availability": "Available", + "address": "35 Rue Benjamin Franklin, 16th arr., Paris" + }, + { + "name": "Mercure Paris Centre Tour Eiffel", + "rating": 4.4, + "price_per_night": "$220", + "total_price": "$660 for 3 nights", + "distance_to_eiffel_tower": "0.5 miles", + "amenities": ["WiFi", "Restaurant", "Bar", "Gym", "Air Conditioning"], + "availability": "Available", + "address": "20 Rue Jean Rey, 15th arr., Paris" + }, + { + "name": "Pullman Paris Tour Eiffel", + "rating": 4.7, + "price_per_night": "$280", + "total_price": "$840 for 3 nights", + "distance_to_eiffel_tower": "0.2 miles", + "amenities": ["WiFi", "Spa", "Gym", "Restaurant", "Rooftop Bar", "Concierge"], + "availability": "Limited", + "address": "18 Avenue de Suffren, 15th arr., Paris" + } + ] + else: + mock_hotels = [ + { + "name": "Grand Plaza Hotel", + "rating": 4.5, + "price_per_night": "$150", + "amenities": ["WiFi", "Pool", "Gym", "Restaurant"], + "availability": "Available" + } + ] + + return json.dumps({ + "location": location, + "check_in": check_in, + "check_out": check_out, + "guests": guests, + "hotels_found": len(mock_hotels), + "hotels": mock_hotels, + "note": "Hotel search results matching your query" + }) + +def get_hotel_details(hotel_name: str) -> str: + """ + Get detailed information about a specific hotel. + """ + hotel_details = { + "Hotel Eiffel Trocadéro": { + "description": "Charming boutique hotel with stunning Eiffel Tower views from select rooms. Perfect for couples and families.", + "check_in_time": "3:00 PM", + "check_out_time": "11:00 AM", + "cancellation_policy": "Free cancellation up to 24 hours before check-in", + "reviews": { + "total": 1247, + "recent_comments": [ + "Amazing location! Walked to Eiffel Tower in 5 minutes.", + "Staff was incredibly helpful with restaurant recommendations.", + "Rooms are cozy and clean with great views." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.3 mi)", "Trocadéro Gardens (0.2 mi)", "Seine River (0.4 mi)"] + }, + "Mercure Paris Centre Tour Eiffel": { + "description": "Modern hotel with contemporary rooms and excellent dining options. Close to metro stations.", + "check_in_time": "2:00 PM", + "check_out_time": "12:00 PM", + "cancellation_policy": "Free cancellation up to 48 hours before check-in", + "reviews": { + "total": 2156, + "recent_comments": [ + "Great value for money, clean and comfortable.", + "Restaurant had excellent French cuisine.", + "Easy access to public transportation." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.5 mi)", "Champ de Mars (0.4 mi)", "Les Invalides (0.8 mi)"] + }, + "Pullman Paris Tour Eiffel": { + "description": "Luxury hotel offering panoramic views, upscale amenities, and exceptional service. Ideal for a premium experience.", + "check_in_time": "3:00 PM", + "check_out_time": "12:00 PM", + "cancellation_policy": "Free cancellation up to 72 hours before check-in", + "reviews": { + "total": 3421, + "recent_comments": [ + "Rooftop bar has the best Eiffel Tower views in Paris!", + "Luxurious rooms with every amenity you could want.", + "Worth the price for the location and service." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.2 mi)", "Seine River Cruise Dock (0.3 mi)", "Trocadéro (0.5 mi)"] + } + } + + details = hotel_details.get(hotel_name, { + "name": hotel_name, + "description": "Comfortable hotel with modern amenities", + "check_in_time": "3:00 PM", + "check_out_time": "11:00 AM", + "cancellation_policy": "Standard cancellation policy applies", + "reviews": {"total": 0, "recent_comments": []}, + "nearby_attractions": [] + }) + + return json.dumps({ + "hotel_name": hotel_name, + "details": details + }) + +def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str: + """ + Search for available flights between two locations. + """ + # Specific mock data for JFK to Paris December 15-18, 2025 + if "jfk" in origin.lower() or "new york" in origin.lower(): + if "paris" in destination.lower() or "cdg" in destination.lower(): + mock_flights = [ + { + "outbound": { + "flight_number": "AF007", + "airline": "Air France", + "departure": "December 15, 2025 at 6:30 PM", + "arrival": "December 16, 2025 at 8:15 AM", + "duration": "7h 45m", + "aircraft": "Boeing 777-300ER", + "class": "Economy", + "price": "$520" + }, + "return": { + "flight_number": "AF008", + "airline": "Air France", + "departure": "December 18, 2025 at 11:00 AM", + "arrival": "December 18, 2025 at 2:15 PM", + "duration": "8h 15m", + "aircraft": "Airbus A350-900", + "class": "Economy", + "price": "Included" + }, + "total_price": "$520", + "stops": "Nonstop", + "baggage": "1 checked bag included" + }, + { + "outbound": { + "flight_number": "DL264", + "airline": "Delta", + "departure": "December 15, 2025 at 10:15 PM", + "arrival": "December 16, 2025 at 12:05 PM", + "duration": "7h 50m", + "aircraft": "Airbus A330-900neo", + "class": "Economy", + "price": "$485" + }, + "return": { + "flight_number": "DL265", + "airline": "Delta", + "departure": "December 18, 2025 at 1:45 PM", + "arrival": "December 18, 2025 at 5:00 PM", + "duration": "8h 15m", + "aircraft": "Airbus A330-900neo", + "class": "Economy", + "price": "Included" + }, + "total_price": "$485", + "stops": "Nonstop", + "baggage": "1 checked bag included" + }, + { + "outbound": { + "flight_number": "UA57", + "airline": "United Airlines", + "departure": "December 15, 2025 at 5:00 PM", + "arrival": "December 16, 2025 at 6:50 AM", + "duration": "7h 50m", + "aircraft": "Boeing 767-400ER", + "class": "Economy", + "price": "$560" + }, + "return": { + "flight_number": "UA58", + "airline": "United Airlines", + "departure": "December 18, 2025 at 9:30 AM", + "arrival": "December 18, 2025 at 12:45 PM", + "duration": "8h 15m", + "aircraft": "Boeing 787-10", + "class": "Economy", + "price": "Included" + }, + "total_price": "$560", + "stops": "Nonstop", + "baggage": "1 checked bag included" + } + ] + else: + mock_flights = [{"flight_number": "XX123", "airline": "Generic Air", "price": "$400", "note": "Generic route"}] + else: + mock_flights = [ + { + "outbound": { + "flight_number": "AA123", + "airline": "Generic Airlines", + "departure": f"{departure_date} at 9:00 AM", + "arrival": f"{departure_date} at 2:30 PM", + "duration": "5h 30m", + "class": "Economy", + "price": "$350" + }, + "total_price": "$350", + "stops": "Nonstop" + } + ] + + return json.dumps({ + "origin": origin, + "destination": destination, + "departure_date": departure_date, + "return_date": return_date, + "passengers": passengers, + "flights_found": len(mock_flights), + "flights": mock_flights, + "note": "Flight search results for JFK to Paris CDG" + }) + +def get_flight_details(flight_number: str) -> str: + """ + Get detailed information about a specific flight. + """ + mock_details = { + "flight_number": flight_number, + "airline": "Sky Airways", + "aircraft": "Boeing 737-800", + "departure": { + "airport": "JFK International Airport", + "terminal": "Terminal 4", + "gate": "B23", + "time": "08:00 AM" + }, + "arrival": { + "airport": "Charles de Gaulle Airport", + "terminal": "Terminal 2E", + "gate": "K15", + "time": "11:30 AM local time" + }, + "duration": "3h 30m", + "baggage_allowance": { + "carry_on": "1 bag (10kg)", + "checked": "1 bag (23kg)" + }, + "amenities": ["WiFi", "In-flight entertainment", "Meals included"] + } + + return json.dumps({ + "flight_details": mock_details + }) + +def search_activities(location: str, date: str = None, category: str = None) -> str: + """ + Search for available activities and attractions at a destination. + """ + # Specific mock data for Paris activities + if "paris" in location.lower(): + all_activities = [ + { + "name": "Eiffel Tower Summit Access", + "category": "Sightseeing", + "duration": "2-3 hours", + "price": "$35", + "rating": 4.8, + "description": "Skip-the-line access to all three levels including the summit. Best views of Paris!", + "availability": "Daily 9:30 AM - 11:00 PM", + "best_time": "Early morning or sunset", + "booking_required": True + }, + { + "name": "Louvre Museum Guided Tour", + "category": "Sightseeing", + "duration": "3 hours", + "price": "$55", + "rating": 4.7, + "description": "Expert-guided tour covering masterpieces including Mona Lisa and Venus de Milo.", + "availability": "Daily except Tuesdays, 9:00 AM entry", + "best_time": "Morning entry recommended", + "booking_required": True + }, + { + "name": "Seine River Cruise", + "category": "Sightseeing", + "duration": "1 hour", + "price": "$18", + "rating": 4.6, + "description": "Scenic cruise past Notre-Dame, Eiffel Tower, and historic bridges.", + "availability": "Every 30 minutes, 10:00 AM - 10:00 PM", + "best_time": "Evening for illuminated monuments", + "booking_required": False + }, + { + "name": "Musée d'Orsay Visit", + "category": "Culture", + "duration": "2-3 hours", + "price": "$16", + "rating": 4.7, + "description": "Impressionist masterpieces in a stunning Beaux-Arts railway station.", + "availability": "Tuesday-Sunday 9:30 AM - 6:00 PM", + "best_time": "Weekday mornings", + "booking_required": True + }, + { + "name": "Versailles Palace Day Trip", + "category": "Culture", + "duration": "5-6 hours", + "price": "$75", + "rating": 4.9, + "description": "Explore the opulent palace and stunning gardens of Louis XIV (includes transport).", + "availability": "Daily except Mondays, 8:00 AM departure", + "best_time": "Full day trip", + "booking_required": True + }, + { + "name": "Montmartre Walking Tour", + "category": "Culture", + "duration": "2.5 hours", + "price": "$25", + "rating": 4.6, + "description": "Discover the artistic heart of Paris, including Sacré-Cœur and artists' square.", + "availability": "Daily at 10:00 AM and 2:00 PM", + "best_time": "Morning or late afternoon", + "booking_required": False + }, + { + "name": "French Cooking Class", + "category": "Culinary", + "duration": "3 hours", + "price": "$120", + "rating": 4.9, + "description": "Learn to make classic French dishes like coq au vin and crème brûlée, then enjoy your creations.", + "availability": "Tuesday-Saturday, 10:00 AM and 6:00 PM sessions", + "best_time": "Morning or evening sessions", + "booking_required": True + }, + { + "name": "Wine & Cheese Tasting", + "category": "Culinary", + "duration": "1.5 hours", + "price": "$65", + "rating": 4.7, + "description": "Sample French wines and artisanal cheeses with expert sommelier guidance.", + "availability": "Daily at 5:00 PM and 7:30 PM", + "best_time": "Evening sessions", + "booking_required": True + }, + { + "name": "Food Market Tour", + "category": "Culinary", + "duration": "2 hours", + "price": "$45", + "rating": 4.6, + "description": "Explore authentic Parisian markets and taste local specialties like cheeses, pastries, and charcuterie.", + "availability": "Tuesday, Thursday, Saturday mornings", + "best_time": "Morning (markets are freshest)", + "booking_required": False + } + ] + + if category: + activities = [act for act in all_activities if act["category"] == category] + else: + activities = all_activities + else: + activities = [ + { + "name": "City Walking Tour", + "category": "Sightseeing", + "duration": "3 hours", + "price": "$45", + "rating": 4.7, + "description": "Explore the historic downtown area with an expert guide", + "availability": "Daily at 10:00 AM and 2:00 PM" + } + ] + + return json.dumps({ + "location": location, + "date": date, + "category": category, + "activities_found": len(activities), + "activities": activities, + "note": "Activity search results for Paris with sightseeing, culture, and culinary options" + }) + +def get_activity_details(activity_name: str) -> str: + """ + Get detailed information about a specific activity. + """ + # Paris-specific activity details + activity_details_map = { + "Eiffel Tower Summit Access": { + "name": "Eiffel Tower Summit Access", + "description": "Skip-the-line access to all three levels of the Eiffel Tower, including the summit. Enjoy panoramic views of Paris from 276 meters high.", + "duration": "2-3 hours (self-guided)", + "price": "$35 per person", + "included": ["Skip-the-line ticket", "Access to all 3 levels", "Summit access", "Audio guide app"], + "meeting_point": "Eiffel Tower South Pillar entrance, look for priority access line", + "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera", "Light jacket (summit can be windy)"], + "cancellation_policy": "Free cancellation up to 24 hours in advance", + "languages": ["English", "French", "Spanish", "German", "Italian"], + "max_group_size": "No limit", + "rating": 4.8, + "reviews_count": 15234 + }, + "Louvre Museum Guided Tour": { + "name": "Louvre Museum Guided Tour", + "description": "Expert-guided tour of the world's largest art museum, focusing on must-see masterpieces including Mona Lisa, Venus de Milo, and Winged Victory.", + "duration": "3 hours", + "price": "$55 per person", + "included": ["Skip-the-line entry", "Expert art historian guide", "Headsets for groups over 6", "Museum highlights map"], + "meeting_point": "Glass Pyramid main entrance, look for guide with 'Louvre Tours' sign", + "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera (no flash)", "Water bottle"], + "cancellation_policy": "Free cancellation up to 48 hours in advance", + "languages": ["English", "French", "Spanish"], + "max_group_size": 20, + "rating": 4.7, + "reviews_count": 8921 + }, + "French Cooking Class": { + "name": "French Cooking Class", + "description": "Hands-on cooking experience where you'll learn to prepare classic French dishes like coq au vin, ratatouille, and crème brûlée under expert chef guidance.", + "duration": "3 hours", + "price": "$120 per person", + "included": ["All ingredients", "Chef instruction", "Apron and recipe booklet", "Wine pairing", "Lunch/dinner of your creations"], + "meeting_point": "Le Chef Cooking Studio, 15 Rue du Bac, 7th arrondissement", + "what_to_bring": ["Appetite", "Camera for food photos"], + "cancellation_policy": "Free cancellation up to 72 hours in advance", + "languages": ["English", "French"], + "max_group_size": 12, + "rating": 4.9, + "reviews_count": 2341 + } + } + + details = activity_details_map.get(activity_name, { + "name": activity_name, + "description": "An immersive experience that showcases the best of local culture and attractions.", + "duration": "3 hours", + "price": "$45 per person", + "included": ["Professional guide", "Entry fees"], + "meeting_point": "Central meeting location", + "what_to_bring": ["Comfortable shoes", "Camera"], + "cancellation_policy": "Free cancellation up to 24 hours in advance", + "languages": ["English"], + "max_group_size": 15, + "rating": 4.5, + "reviews_count": 100 + }) + + return json.dumps({ + "activity_details": details + }) + +def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str: + """ + Confirm a booking reservation. + """ + confirmation_number = f"CONF-{booking_type.upper()}-{booking_id}" + + confirmation_data = { + "confirmation_number": confirmation_number, + "booking_type": booking_type, + "status": "Confirmed", + "customer_name": customer_info.get("name", "Guest"), + "email": customer_info.get("email", "guest@example.com"), + "confirmation_sent": True, + "next_steps": [ + "Check your email for booking details", + "Arrive 30 minutes before scheduled time", + "Bring confirmation number and valid ID" + ] + } + + return json.dumps({ + "confirmation": confirmation_data + }) + +def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str: + """ + Check availability for hotel rooms. + """ + availability_status = "Available" + + availability_data = { + "service_type": "hotel", + "hotel_name": hotel_name, + "check_in": check_in, + "check_out": check_out, + "rooms_requested": rooms, + "status": availability_status, + "available_rooms": 8, + "price_per_night": "$185", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + +def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str: + """ + Check availability for flight seats. + """ + availability_status = "Available" + + availability_data = { + "service_type": "flight", + "flight_number": flight_number, + "date": date, + "passengers_requested": passengers, + "status": availability_status, + "available_seats": 45, + "price_per_passenger": "$520", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + +def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str: + """ + Check availability for activity bookings. + """ + availability_status = "Available" + + availability_data = { + "service_type": "activity", + "activity_name": activity_name, + "date": date, + "participants_requested": participants, + "status": availability_status, + "available_spots": 15, + "price_per_person": "$45", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + +def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str: + """ + Process payment for a booking. + """ + transaction_id = f"TXN-{datetime.now().strftime('%Y%m%d%H%M%S')}" + + payment_result = { + "transaction_id": transaction_id, + "amount": amount, + "currency": currency, + "status": "Success", + "payment_method": payment_method.get("type", "Credit Card"), + "last_4_digits": payment_method.get("last_4", "****"), + "booking_reference": booking_reference, + "timestamp": datetime.now().isoformat(), + "receipt_url": f"https://payments.travelagency.com/receipt/{transaction_id}" + } + + return json.dumps({ + "payment_result": payment_result + }) + +def validate_payment_method(payment_method: dict) -> str: + """ + Validate payment method details. + """ + method_type = payment_method.get("type", "credit_card") + + # Validation logic + is_valid = True + validation_messages = [] + + if method_type == "credit_card": + if not payment_method.get("number"): + is_valid = False + validation_messages.append("Card number is required") + if not payment_method.get("expiry"): + is_valid = False + validation_messages.append("Expiry date is required") + if not payment_method.get("cvv"): + is_valid = False + validation_messages.append("CVV is required") + + validation_result = { + "is_valid": is_valid, + "payment_method_type": method_type, + "validation_messages": validation_messages if not is_valid else ["Payment method is valid"], + "supported_currencies": ["USD", "EUR", "GBP", "JPY"], + "processing_fee": "2.5%" + } + + return json.dumps({ + "validation_result": validation_result + }) diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py new file mode 100644 index 0000000000..bf2d8337fe --- /dev/null +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -0,0 +1,494 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Multi-Agent Travel Planning Workflow Evaluation with Multiple Response Tracking + +This sample demonstrates a multi-agent travel planning workflow using the V2 client that: +1. Processes travel queries through 7 specialized agents +2. Tracks MULTIPLE response and conversation IDs per agent for evaluation +3. Uses the new Prompt Agents API (V2) +4. Captures complete interaction sequences including multiple invocations +5. Aggregates findings through a travel planning coordinator + +WORKFLOW STRUCTURE (7 agents): +- Travel Agent Executor → Hotel Search, Flight Search, Activity Search (fan-out) +- Hotel Search Executor → Booking Information Aggregation Executor +- Flight Search Executor → Booking Information Aggregation Executor +- Booking Information Aggregation Executor → Booking Confirmation Executor +- Booking Confirmation Executor → Booking Payment Executor +- Booking Information Aggregation, Booking Payment, Activity Search → Travel Planning Coordinator (ResearchLead) for final aggregation (fan-in) + +Agents: +1. Travel Agent - Main coordinator (no tools to avoid thread conflicts) +2. Hotel Search - Searches hotels with tools +3. Flight Search - Searches flights with tools +4. Activity Search - Searches activities with tools +5. Booking Information Aggregation - Aggregates hotel & flight booking info +6. Booking Confirmation - Confirms bookings with tools +7. Booking Payment - Processes payments with tools +""" + +import asyncio +import json +import os +import sys +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional + +from dotenv import load_dotenv + +# Add the local packages to the path +packages_path = Path(__file__).parent.parent.parent.parent.parent.parent / "packages" +sys.path.insert(0, str(packages_path / "core")) +sys.path.insert(0, str(packages_path / "azure-ai")) + +from agent_framework import ( + AgentExecutorResponse, + AgentRunUpdateEvent, + AgentRunResponseUpdate, + ChatAgent, + ChatMessage, + Executor, + executor, + handler, + Role, + WorkflowContext, + WorkflowBuilder, + WorkflowOutputEvent, +) + +# Import V2 client directly from source file to avoid installed package conflicts +from agent_framework_azure_ai._client import AzureAIClient +from azure.identity.aio import AzureDeveloperCliCredential +from azure.ai.projects.aio import AIProjectClient + +from _tools import ( + # Travel planning tools + search_hotels, + get_hotel_details, + search_flights, + get_flight_details, + search_activities, + confirm_booking, + check_hotel_availability, + check_flight_availability, + process_payment, + validate_payment_method, +) + +load_dotenv() + + +@executor(id="start_executor") +async def start_executor(input: str, ctx: WorkflowContext[List[ChatMessage]]) -> None: + """Initiates the workflow by sending the user query to all specialized agents.""" + await ctx.send_message([ChatMessage(role="user", text=input)]) + + +class ResearchLead(Executor): + """Aggregates and summarizes travel planning findings from all specialized agents.""" + + def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordinator"): + # store=True to preserve conversation history for evaluation + self.agent = chat_client.create_agent( + id="travel-planning-coordinator", + instructions=( + "You are the Travel Planning Coordinator. Your role is to synthesize information from multiple " + "specialized travel agents into a cohesive, actionable travel plan. You receive inputs from: " + "hotel search specialists, flight search specialists, activity planners, booking confirmation agents, " + "payment processors, and booking information aggregators. Provide a clear, comprehensive travel plan " + "that addresses the user's original query with all necessary details including accommodations, " + "transportation, activities, and booking status." + ), + name="travel-planning-coordinator", + store=True + ) + super().__init__(id=id) + + @handler + async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None: + user_query = responses[0].full_conversation[0].text + + # Extract findings from all agent responses + agent_findings = self._extract_agent_findings(responses) + summary_text = "\n".join(agent_findings) if agent_findings else "No specific findings were provided by the agents." + + # Generate comprehensive travel plan summary + messages = [ + ChatMessage(role=Role.SYSTEM, text="You are a travel planning coordinator. Summarize findings from multiple specialized travel agents and provide a clear, comprehensive travel plan based on the user's query."), + ChatMessage(role=Role.USER, text=f"Original query: {user_query}\n\nFindings from specialized travel agents:\n{summary_text}\n\nPlease provide a comprehensive travel plan based on these findings.") + ] + + try: + final_response = await self.agent.run(messages) + output_text = (final_response.messages[-1].text if final_response.messages and final_response.messages[-1].text + else f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}") + except Exception: + output_text = f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}" + + await ctx.yield_output(output_text) + + def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> List[str]: + """Extract findings from agent responses.""" + agent_findings = [] + + for response in responses: + findings = [] + if response.agent_run_response and response.agent_run_response.messages: + for msg in response.agent_run_response.messages: + if msg.role == Role.ASSISTANT and msg.text and msg.text.strip(): + findings.append(msg.text.strip()) + + if findings: + combined_findings = " ".join(findings) + agent_findings.append(f"[{response.executor_id}]: {combined_findings}") + + return agent_findings + + +async def run_workflow_with_response_tracking(query: str, chat_client: Optional[AzureAIClient] = None) -> Dict: + """Run multi-agent workflow and track conversation IDs, response IDs, and interaction sequence. + + Args: + query: The user query to process through the multi-agent workflow + chat_client: Optional AzureAIClient instance + + Returns: + Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis + """ + if chat_client is None: + # Use AzureDeveloperCliCredential to avoid Azure CLI timeout issues + credential = AzureDeveloperCliCredential() + + # Create AIProjectClient with the correct API version for V2 prompt agents + project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=credential, + api_version="2025-11-15-preview", + ) + + try: + async with AzureAIClient( + project_client=project_client, + async_credential=credential + ) as client: + return await _run_workflow_with_client(query, client) + finally: + await credential.close() + await project_client.close() + else: + return await _run_workflow_with_client(query, chat_client) + + +async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> Dict: + """Execute workflow with given client and track all interactions.""" + + # Initialize tracking variables - use lists to track multiple responses per agent + conversation_ids = defaultdict(list) + response_ids = defaultdict(list) + workflow_output = None + + # Create workflow components and keep agent references + # Pass project_client and credential to create separate client instances per agent + workflow, agent_map = await _create_workflow( + chat_client.project_client, + chat_client.credential + ) + + # Process workflow events + events = workflow.run_stream(query) + workflow_output = await _process_workflow_events(events, conversation_ids, response_ids) + + # # Delete all agents after workflow completion + # print("\n=== Cleaning up agents ===") + # for agent_name, agent in agent_map.items(): + # try: + # # Get the actual agent object + # agent_to_delete = agent.agent if hasattr(agent, 'agent') else agent + # chat_client.project_client.agents.delete(agent_name=agent_to_delete.name) + # print(f"Deleted agent: {agent_name}") + # except Exception as e: + # print(f"Failed to delete agent {agent_name}: {e}") + + return { + "conversation_ids": dict(conversation_ids), + "response_ids": dict(response_ids), + "output": workflow_output, + "query": query + } + + +async def _create_workflow(project_client, credential): + """Create the multi-agent travel planning workflow with specialized agents. + + IMPORTANT: Each agent needs its own client instance because the V2 client stores + agent_name and agent_version as instance variables, causing all agents to share + the same agent identity if they share a client. + """ + + # Create separate client for Final Coordinator + final_coordinator_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="final-coordinator" + ) + final_coordinator = ResearchLead(chat_client=final_coordinator_client, id="final-coordinator") + + # Update final_coordinator agent instructions + final_coordinator.agent.instructions = ( + "You are the final coordinator. You will receive responses from multiple agents: " + "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), " + "and activity-search-agent (activities). " + "Review each agent's response, then create a comprehensive travel itinerary organized by: " + "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. " + "Clearly indicate which information came from which agent. Do not use tools." + ) + + # Agent 1: Travel Request Handler (initial coordinator) + # Create separate client with unique agent_name + travel_request_handler_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="travel-request-handler" + ) + travel_request_handler = travel_request_handler_client.create_agent( + id="travel-request-handler", + instructions=( + "You receive user travel queries and relay them to specialized agents. Extract key information: destination, dates, budget, and preferences. Pass this information forward clearly to the next agents." + ), + name="travel-request-handler", + store=True + ) + + # Agent 2: Hotel Search Executor + hotel_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="hotel-search-agent" + ) + hotel_search_agent = hotel_search_client.create_agent( + id="hotel-search-agent", + instructions=( + "You are a hotel search specialist. Your task is ONLY to search for and provide hotel information. Use search_hotels to find options, get_hotel_details for specifics, and check_availability to verify rooms. Output format: List hotel names, prices per night, total cost for the stay, locations, ratings, amenities, and addresses. IMPORTANT: Only provide hotel information without additional commentary." + ), + name="hotel-search-agent", + tools=[search_hotels, get_hotel_details, check_hotel_availability], + store=True + ) + + # Agent 3: Flight Search Executor + flight_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="flight-search-agent" + ) + flight_search_agent = flight_search_client.create_agent( + id="flight-search-agent", + instructions=( + "You are a flight search specialist. Your task is ONLY to search for and provide flight information. Use search_flights to find options, get_flight_details for specifics, and check_availability for seats. Output format: List flight numbers, airlines, departure/arrival times, prices, durations, and cabin class. IMPORTANT: Only provide flight information without additional commentary." + ), + name="flight-search-agent", + tools=[search_flights, get_flight_details, check_flight_availability], + store=True + ) + + # Agent 4: Activity Search Executor + activity_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="activity-search-agent" + ) + activity_search_agent = activity_search_client.create_agent( + id="activity-search-agent", + instructions=( + "You are an activities specialist. Your task is ONLY to search for and provide activity information. Use search_activities to find options for activities. Output format: List activity names, descriptions, prices, durations, ratings, and categories. IMPORTANT: Only provide activity information without additional commentary." + ), + name="activity-search-agent", + tools=[search_activities], + store=True + ) + + # Agent 5: Booking Confirmation Executor + booking_confirmation_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-confirmation-agent" + ) + booking_confirmation_agent = booking_confirmation_client.create_agent( + id="booking-confirmation-agent", + instructions=( + "You confirm bookings. Use check_hotel_availability and check_flight_availability to verify slots, then confirm_booking to finalize. Provide ONLY: confirmation numbers, booking references, and confirmation status." + ), + name="booking-confirmation-agent", + tools=[confirm_booking, check_hotel_availability, check_flight_availability], + store=True + ) + + # Agent 6: Booking Payment Executor + booking_payment_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-payment-agent" + ) + booking_payment_agent = booking_payment_client.create_agent( + id="booking-payment-agent", + instructions=( + "You process payments. Use validate_payment_method to verify payment, then process_payment to complete transactions. Provide ONLY: payment confirmation status, transaction IDs, and payment amounts." + ), + name="booking-payment-agent", + tools=[process_payment, validate_payment_method], + store=True + ) + + # Agent 7: Booking Information Aggregation Executor + booking_info_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-info-aggregation-agent" + ) + booking_info_aggregation_agent = booking_info_client.create_agent( + id="booking-info-aggregation-agent", + instructions=( + "You aggregate hotel and flight search results. Receive options from search agents and organize them. Provide: top 2-3 hotel options with prices and top 2-3 flight options with prices in a structured format." + ), + name="booking-info-aggregation-agent", + store=True + ) + + # Build workflow with logical booking flow: + # 1. start_executor → travel_request_handler + # 2. travel_request_handler → hotel_search, flight_search, activity_search (fan-out) + # 3. hotel_search → booking_info_aggregation + # 4. flight_search → booking_info_aggregation + # 5. booking_info_aggregation → booking_confirmation + # 6. booking_confirmation → booking_payment + # 7. booking_info_aggregation, booking_payment, activity_search → final_coordinator (final aggregation, fan-in) + # + # Max iterations set to 10 (though shouldn't be needed without cycles) + # store=True preserves conversation history on each agent's thread for evaluation + + workflow = (WorkflowBuilder(name='Travel Planning Workflow') + .set_start_executor(start_executor) + .add_edge(start_executor, travel_request_handler) + .add_fan_out_edges(travel_request_handler, [hotel_search_agent, flight_search_agent, activity_search_agent]) + .add_edge(hotel_search_agent, booking_info_aggregation_agent) + .add_edge(flight_search_agent, booking_info_aggregation_agent) + .add_edge(booking_info_aggregation_agent, booking_confirmation_agent) + .add_edge(booking_confirmation_agent, booking_payment_agent) + .add_fan_in_edges([booking_info_aggregation_agent, booking_payment_agent, activity_search_agent], + final_coordinator) + .build()) + + # Return workflow and agent map for thread ID extraction + agent_map = { + "travel_request_handler": travel_request_handler, + "hotel-search-agent": hotel_search_agent, + "flight-search-agent": flight_search_agent, + "activity-search-agent": activity_search_agent, + "booking-confirmation-agent": booking_confirmation_agent, + "booking-payment-agent": booking_payment_agent, + "booking-info-aggregation-agent": booking_info_aggregation_agent, + "final-coordinator": final_coordinator.agent, + } + + return workflow, agent_map + + +async def _process_workflow_events(events, conversation_ids, response_ids): + """Process workflow events and track interactions.""" + workflow_output = None + + async for event in events: + if isinstance(event, WorkflowOutputEvent): + workflow_output = event.data + # Handle Unicode characters that may not be displayable in Windows console + try: + print(f"\nWorkflow Output: {event.data}\n") + except UnicodeEncodeError: + output_str = str(event.data).encode('ascii', 'replace').decode('ascii') + print(f"\nWorkflow Output: {output_str}\n") + + elif isinstance(event, AgentRunUpdateEvent): + _track_agent_ids(event, event.executor_id, response_ids, conversation_ids) + + return workflow_output + + +def _track_agent_ids(event, agent, response_ids, conversation_ids): + """Track agent response and conversation IDs - supporting multiple responses per agent.""" + if isinstance(event.data, AgentRunResponseUpdate): + # Check for conversation_id and response_id from raw_representation + # V2 API stores conversation_id directly on raw_representation (ChatResponseUpdate) + if hasattr(event.data, 'raw_representation') and event.data.raw_representation: + raw = event.data.raw_representation + + # Try conversation_id directly on raw (this is the V2 pattern) + if hasattr(raw, 'conversation_id') and raw.conversation_id: + # Only add if not already in the list + if raw.conversation_id not in conversation_ids[agent]: + conversation_ids[agent].append(raw.conversation_id) + + # Extract response_id from the OpenAI event (available from first event) + if hasattr(raw, 'raw_representation') and raw.raw_representation: + openai_event = raw.raw_representation + + # Check if event has response object with id + if hasattr(openai_event, 'response') and hasattr(openai_event.response, 'id'): + # Only add if not already in the list + if openai_event.response.id not in response_ids[agent]: + response_ids[agent].append(openai_event.response.id) + + +async def create_and_run_workflow(): + """Run the workflow evaluation and display results. + + Returns: + Dictionary containing agents data with conversation IDs, response IDs, and query information + """ + example_queries = [ + "Plan a 3-day trip to Paris from December 15-18, 2025. Budget is $2000. Need hotel near Eiffel Tower, round-trip flights from New York JFK, and recommend 2-3 activities per day.", + "Find a budget hotel in Tokyo for January 5-10, 2026 under $150/night near Shibuya station, book activities including a sushi making class", + "Search for round-trip flights from Los Angeles to London departing March 20, 2026, returning March 27, 2026. Economy class, 2 passengers. Recommend tourist attractions and museums.", + ] + + query = example_queries[0] + print(f"Query: {query}\n") + + result = await run_workflow_with_response_tracking(query) + + # Create output data structure + output_data = { + "agents": {}, + "query": result["query"], + "output": result.get("output", "") + } + + # Create agent-specific mappings - now with lists of IDs + all_agents = set(result["conversation_ids"].keys()) | set(result["response_ids"].keys()) + for agent_name in all_agents: + output_data["agents"][agent_name] = { + "conversation_ids": result["conversation_ids"].get(agent_name, []), + "response_ids": result["response_ids"].get(agent_name, []), + "response_count": len(result["response_ids"].get(agent_name, [])) + } + + print(f"\nTotal agents tracked: {len(output_data['agents'])}") + + # Print summary of multiple responses + print("\n=== Multi-Response Summary ===") + for agent_name, agent_data in output_data["agents"].items(): + response_count = agent_data["response_count"] + print(f"{agent_name}: {response_count} response(s)") + + return output_data + + + +def main(): + """Main function to run the workflow evaluation example.""" + asyncio.run(create_and_run_workflow()) + + +if __name__ == "__main__": + main() diff --git a/python/samples/demos/workflow_evaluation/run_evaluation.py b/python/samples/demos/workflow_evaluation/run_evaluation.py new file mode 100644 index 0000000000..b2adfd8b83 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/run_evaluation.py @@ -0,0 +1,219 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Script to run multi-agent travel planning workflow and evaluate agent responses. + +This script: +1. Executes the multi-agent workflow +2. Displays response data summary +3. Creates and runs evaluation with multiple evaluators +4. Monitors evaluation progress and displays results +""" + +import asyncio +import os +import time + +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from dotenv import load_dotenv + +from create_workflow import create_and_run_workflow + + +def print_section(title: str): + """Print a formatted section header.""" + print(f"\n{'='*80}") + print(f"{title}") + print(f"{'='*80}") + + +async def run_workflow(): + """Execute the multi-agent travel planning workflow. + + Returns: + Dictionary containing workflow data with agent response IDs + """ + print_section("Step 1: Running Workflow") + print("Executing multi-agent travel planning workflow...") + print("This may take a few minutes...") + + workflow_data = await create_and_run_workflow() + + print("Workflow execution completed") + return workflow_data + + +def display_response_summary(workflow_data: dict): + """Display summary of response data.""" + print_section("Step 2: Response Data Summary") + + print(f"Query: {workflow_data['query']}") + print(f"\nAgents tracked: {len(workflow_data['agents'])}") + + for agent_name, agent_data in workflow_data['agents'].items(): + response_count = agent_data['response_count'] + print(f" {agent_name}: {response_count} response(s)") + + +def fetch_agent_responses(openai_client, workflow_data: dict, agent_names: list): + """Fetch and display final responses from specified agents.""" + print_section("Step 3: Fetching Agent Responses") + + for agent_name in agent_names: + if agent_name not in workflow_data['agents']: + continue + + agent_data = workflow_data['agents'][agent_name] + if not agent_data['response_ids']: + continue + + final_response_id = agent_data['response_ids'][-1] + print(f"\n{agent_name}") + print(f" Response ID: {final_response_id}") + + try: + response = openai_client.responses.retrieve(response_id=final_response_id) + content = response.output[-1].content[-1].text + truncated = content[:300] + "..." if len(content) > 300 else content + print(f" Content preview: {truncated}") + except Exception as e: + print(f" Error: {e}") + + +def create_evaluation(openai_client, model_deployment: str): + """Create evaluation with multiple evaluators.""" + print_section("Step 4: Creating Evaluation") + + data_source_config = {"type": "azure_ai_source", "scenario": "responses"} + + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": "relevance", + "evaluator_name": "builtin.relevance", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "groundedness", + "evaluator_name": "builtin.groundedness", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "tool_call_accuracy", + "evaluator_name": "builtin.tool_call_accuracy", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "tool_output_utilization", + "evaluator_name": "builtin.tool_output_utilization", + "initialization_parameters": {"deployment_name": model_deployment} + }, + ] + + eval_object = openai_client.evals.create( + name="Travel Workflow Multi-Evaluator Assessment", + data_source_config=data_source_config, + testing_criteria=testing_criteria, + ) + + print(f"Evaluation created: {eval_object.id}") + print(f"Evaluators: {len(testing_criteria)}") + + return eval_object + + +def run_evaluation(openai_client, eval_object, workflow_data: dict, agent_names: list): + """Run evaluation on selected agent responses.""" + print_section("Step 5: Running Evaluation") + + selected_response_ids = [] + for agent_name in agent_names: + if agent_name in workflow_data['agents']: + agent_data = workflow_data['agents'][agent_name] + if agent_data['response_ids']: + selected_response_ids.append(agent_data['response_ids'][-1]) + + print(f"Selected {len(selected_response_ids)} responses for evaluation") + + data_source = { + "type": "azure_ai_responses", + "item_generation_params": { + "type": "response_retrieval", + "data_mapping": {"response_id": "{{item.resp_id}}"}, + "source": { + "type": "file_content", + "content": [{"item": {"resp_id": resp_id}} for resp_id in selected_response_ids] + }, + }, + } + + eval_run = openai_client.evals.runs.create( + eval_id=eval_object.id, + name="Multi-Agent Response Evaluation", + data_source=data_source + ) + + print(f"Evaluation run created: {eval_run.id}") + + return eval_run + + +def monitor_evaluation(openai_client, eval_object, eval_run): + """Monitor evaluation progress and display results.""" + print_section("Step 6: Monitoring Evaluation") + + print("Waiting for evaluation to complete...") + + while eval_run.status not in ["completed", "failed"]: + eval_run = openai_client.evals.runs.retrieve( + run_id=eval_run.id, + eval_id=eval_object.id + ) + print(f"Status: {eval_run.status}") + time.sleep(5) + + if eval_run.status == "completed": + print("\nEvaluation completed successfully") + print(f"Result counts: {eval_run.result_counts}") + print(f"\nReport URL: {eval_run.report_url}") + else: + print("\nEvaluation failed") + + +async def main(): + """Main execution flow.""" + load_dotenv() + + print("Travel Planning Workflow Evaluation") + + workflow_data = await run_workflow() + + display_response_summary(workflow_data) + + project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), + api_version="2025-11-15-preview" + ) + openai_client = project_client.get_openai_client() + + agents_to_evaluate = ["hotel-search-agent", "flight-search-agent", "activity-search-agent"] + + fetch_agent_responses(openai_client, workflow_data, agents_to_evaluate) + + model_deployment = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o-mini") + eval_object = create_evaluation(openai_client, model_deployment) + + eval_run = run_evaluation(openai_client, eval_object, workflow_data, agents_to_evaluate) + + monitor_evaluation(openai_client, eval_object, eval_run) + + print_section("Complete") + + +if __name__ == "__main__": + asyncio.run(main()) From 85555d989fa76d5b14f57d0a33f159e5e36f7319 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Wed, 19 Nov 2025 19:29:58 +0200 Subject: [PATCH 2/9] Upgrade syntax --- .../demos/workflow_evaluation/create_workflow.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py index bf2d8337fe..9b33656baa 100644 --- a/python/samples/demos/workflow_evaluation/create_workflow.py +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -81,7 +81,7 @@ @executor(id="start_executor") -async def start_executor(input: str, ctx: WorkflowContext[List[ChatMessage]]) -> None: +async def start_executor(input: str, ctx: WorkflowContext[list[ChatMessage]]) -> None: """Initiates the workflow by sending the user query to all specialized agents.""" await ctx.send_message([ChatMessage(role="user", text=input)]) @@ -107,7 +107,7 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi super().__init__(id=id) @handler - async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None: + async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None: user_query = responses[0].full_conversation[0].text # Extract findings from all agent responses @@ -129,7 +129,7 @@ async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: Workf await ctx.yield_output(output_text) - def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> List[str]: + def _extract_agent_findings(self, responses: list[AgentExecutorResponse]) -> list[str]: """Extract findings from agent responses.""" agent_findings = [] @@ -147,7 +147,7 @@ def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> Lis return agent_findings -async def run_workflow_with_response_tracking(query: str, chat_client: Optional[AzureAIClient] = None) -> Dict: +async def run_workflow_with_response_tracking(query: str, chat_client: AzureAIClient | None = None) -> dict: """Run multi-agent workflow and track conversation IDs, response IDs, and interaction sequence. Args: @@ -181,7 +181,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: Optional[ return await _run_workflow_with_client(query, chat_client) -async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> Dict: +async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> dict: """Execute workflow with given client and track all interactions.""" # Initialize tracking variables - use lists to track multiple responses per agent From d33ce3cee8dd9ac7133a26c1949de47d3dd31186 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Wed, 19 Nov 2025 19:42:26 +0200 Subject: [PATCH 3/9] Add copyright line --- python/samples/demos/workflow_evaluation/_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py index 420d1ece9e..0adb665030 100644 --- a/python/samples/demos/workflow_evaluation/_tools.py +++ b/python/samples/demos/workflow_evaluation/_tools.py @@ -1,3 +1,5 @@ +# Copyright (c) Microsoft. All rights reserved. + import json from datetime import datetime From 1f07e5eb7a51e67796e78ac5e294687bce1f9381 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Wed, 19 Nov 2025 19:57:57 +0200 Subject: [PATCH 4/9] import fix --- .../demos/workflow_evaluation/create_workflow.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py index 9b33656baa..caf07484a8 100644 --- a/python/samples/demos/workflow_evaluation/create_workflow.py +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -29,25 +29,15 @@ """ import asyncio -import json import os -import sys from collections import defaultdict -from pathlib import Path -from typing import Dict, List, Optional from dotenv import load_dotenv -# Add the local packages to the path -packages_path = Path(__file__).parent.parent.parent.parent.parent.parent / "packages" -sys.path.insert(0, str(packages_path / "core")) -sys.path.insert(0, str(packages_path / "azure-ai")) - from agent_framework import ( AgentExecutorResponse, AgentRunUpdateEvent, AgentRunResponseUpdate, - ChatAgent, ChatMessage, Executor, executor, @@ -58,9 +48,8 @@ WorkflowOutputEvent, ) -# Import V2 client directly from source file to avoid installed package conflicts from agent_framework_azure_ai._client import AzureAIClient -from azure.identity.aio import AzureDeveloperCliCredential +from azure.identity.aio import DefaultAzureCredential from azure.ai.projects.aio import AIProjectClient from _tools import ( @@ -158,8 +147,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis """ if chat_client is None: - # Use AzureDeveloperCliCredential to avoid Azure CLI timeout issues - credential = AzureDeveloperCliCredential() + credential = DefaultAzureCredential() # Create AIProjectClient with the correct API version for V2 prompt agents project_client = AIProjectClient( From e8ba4680ac62b302c58adcc30ae3e74e318c5549 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Wed, 19 Nov 2025 20:03:57 +0200 Subject: [PATCH 5/9] import fix --- python/samples/demos/workflow_evaluation/create_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py index caf07484a8..611d08ba6f 100644 --- a/python/samples/demos/workflow_evaluation/create_workflow.py +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -48,7 +48,7 @@ WorkflowOutputEvent, ) -from agent_framework_azure_ai._client import AzureAIClient +from agent_framework.azure import AzureAIClient from azure.identity.aio import DefaultAzureCredential from azure.ai.projects.aio import AIProjectClient From f0e8f87645f4967ece56bd3f169b853ce5a27a9d Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Thu, 20 Nov 2025 13:00:09 +0200 Subject: [PATCH 6/9] address pr comments --- .../demos/workflow_evaluation/.env.example | 2 + .../demos/workflow_evaluation/README.md | 7 +- .../demos/workflow_evaluation/_tools.py | 116 ++++++++++++++---- .../workflow_evaluation/create_workflow.py | 84 +++++-------- 4 files changed, 123 insertions(+), 86 deletions(-) create mode 100644 python/samples/demos/workflow_evaluation/.env.example diff --git a/python/samples/demos/workflow_evaluation/.env.example b/python/samples/demos/workflow_evaluation/.env.example new file mode 100644 index 0000000000..3a13025d22 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/.env.example @@ -0,0 +1,2 @@ +AZURE_AI_PROJECT_ENDPOINT="" +AZURE_AI_MODEL_DEPLOYMENT_NAME="" \ No newline at end of file diff --git a/python/samples/demos/workflow_evaluation/README.md b/python/samples/demos/workflow_evaluation/README.md index ee9aa7ffd6..d687e4ce14 100644 --- a/python/samples/demos/workflow_evaluation/README.md +++ b/python/samples/demos/workflow_evaluation/README.md @@ -13,12 +13,7 @@ The evaluation uses four Azure AI built-in evaluators: ## Setup -Create a `.env` file with required configuration: - -```env -AZURE_AI_PROJECT_ENDPOINT= -AZURE_AI_MODEL_DEPLOYMENT_NAME= -``` +Create a `.env` file with configuration as in the `.env.example` file in this folder. ## Running the Evaluation diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py index 0adb665030..e8b70a4472 100644 --- a/python/samples/demos/workflow_evaluation/_tools.py +++ b/python/samples/demos/workflow_evaluation/_tools.py @@ -4,10 +4,17 @@ from datetime import datetime # --- Travel Planning Tools --- +# Note: These are mock tools for demonstration purposes. They return simulated data +# and do not make real API calls or bookings. + +# Mock hotel search tool def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str: - """ - Search for available hotels based on location and dates. + """Search for available hotels based on location and dates. + + Returns: + JSON string containing search results with hotel details including name, rating, + price, distance to landmarks, amenities, and availability. """ # Specific mock data for Paris December 15-18, 2025 if "paris" in location.lower(): @@ -64,9 +71,14 @@ def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) "note": "Hotel search results matching your query" }) + +# Mock hotel details tool def get_hotel_details(hotel_name: str) -> str: - """ - Get detailed information about a specific hotel. + """Get detailed information about a specific hotel. + + Returns: + JSON string containing detailed hotel information including description, + check-in/out times, cancellation policy, reviews, and nearby attractions. """ hotel_details = { "Hotel Eiffel Trocadéro": { @@ -131,9 +143,14 @@ def get_hotel_details(hotel_name: str) -> str: "details": details }) + +# Mock flight search tool def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str: - """ - Search for available flights between two locations. + """Search for available flights between two locations. + + Returns: + JSON string containing flight search results with details including flight numbers, + airlines, departure/arrival times, prices, durations, and baggage allowances. """ # Specific mock data for JFK to Paris December 15-18, 2025 if "jfk" in origin.lower() or "new york" in origin.lower(): @@ -245,9 +262,14 @@ def search_flights(origin: str, destination: str, departure_date: str, return_da "note": "Flight search results for JFK to Paris CDG" }) + +# Mock flight details tool def get_flight_details(flight_number: str) -> str: - """ - Get detailed information about a specific flight. + """Get detailed information about a specific flight. + + Returns: + JSON string containing detailed flight information including airline, aircraft type, + departure/arrival airports and times, gates, terminals, duration, and amenities. """ mock_details = { "flight_number": flight_number, @@ -277,9 +299,14 @@ def get_flight_details(flight_number: str) -> str: "flight_details": mock_details }) + +# Mock activity search tool def search_activities(location: str, date: str = None, category: str = None) -> str: - """ - Search for available activities and attractions at a destination. + """Search for available activities and attractions at a destination. + + Returns: + JSON string containing activity search results with details including name, category, + duration, price, rating, description, availability, and booking requirements. """ # Specific mock data for Paris activities if "paris" in location.lower(): @@ -411,9 +438,14 @@ def search_activities(location: str, date: str = None, category: str = None) -> "note": "Activity search results for Paris with sightseeing, culture, and culinary options" }) + +# Mock activity details tool def get_activity_details(activity_name: str) -> str: - """ - Get detailed information about a specific activity. + """Get detailed information about a specific activity. + + Returns: + JSON string containing detailed activity information including description, duration, + price, included items, meeting point, what to bring, cancellation policy, and reviews. """ # Paris-specific activity details activity_details_map = { @@ -480,9 +512,14 @@ def get_activity_details(activity_name: str) -> str: "activity_details": details }) + +# Mock booking confirmation tool def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str: - """ - Confirm a booking reservation. + """Confirm a booking reservation. + + Returns: + JSON string containing confirmation details including confirmation number, + booking status, customer information, and next steps. """ confirmation_number = f"CONF-{booking_type.upper()}-{booking_id}" @@ -504,9 +541,16 @@ def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> "confirmation": confirmation_data }) + +# Mock hotel availability check tool def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str: - """ - Check availability for hotel rooms. + """Check availability for hotel rooms. + + Sample Date format: "December 15, 2025" + + Returns: + JSON string containing availability status, available rooms count, price per night, + and last checked timestamp. """ availability_status = "Available" @@ -526,9 +570,16 @@ def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, roo "availability": availability_data }) + +# Mock flight availability check tool def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str: - """ - Check availability for flight seats. + """Check availability for flight seats. + + Sample Date format: "December 15, 2025" + + Returns: + JSON string containing availability status, available seats count, price per passenger, + and last checked timestamp. """ availability_status = "Available" @@ -547,9 +598,16 @@ def check_flight_availability(flight_number: str, date: str, passengers: int = 1 "availability": availability_data }) + +# Mock activity availability check tool def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str: - """ - Check availability for activity bookings. + """Check availability for activity bookings. + + Sample Date format: "December 16, 2025" + + Returns: + JSON string containing availability status, available spots count, price per person, + and last checked timestamp. """ availability_status = "Available" @@ -568,9 +626,14 @@ def check_activity_availability(activity_name: str, date: str, participants: int "availability": availability_data }) + +# Mock payment processing tool def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str: - """ - Process payment for a booking. + """Process payment for a booking. + + Returns: + JSON string containing payment result with transaction ID, status, amount, currency, + payment method details, and receipt URL. """ transaction_id = f"TXN-{datetime.now().strftime('%Y%m%d%H%M%S')}" @@ -590,9 +653,14 @@ def process_payment(amount: float, currency: str, payment_method: dict, booking_ "payment_result": payment_result }) + +# Mock payment validation tool def validate_payment_method(payment_method: dict) -> str: - """ - Validate payment method details. + """Validate payment method details. + + Returns: + JSON string containing validation result with is_valid flag, payment method type, + validation messages, supported currencies, and processing fee information. """ method_type = payment_method.get("type", "credit_card") diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py index 611d08ba6f..c33396fc9d 100644 --- a/python/samples/demos/workflow_evaluation/create_workflow.py +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -3,7 +3,7 @@ """ Multi-Agent Travel Planning Workflow Evaluation with Multiple Response Tracking -This sample demonstrates a multi-agent travel planning workflow using the V2 client that: +This sample demonstrates a multi-agent travel planning workflow using the Azure AI Client that: 1. Processes travel queries through 7 specialized agents 2. Tracks MULTIPLE response and conversation IDs per agent for evaluation 3. Uses the new Prompt Agents API (V2) @@ -47,6 +47,7 @@ WorkflowBuilder, WorkflowOutputEvent, ) +from typing_extensions import Never from agent_framework.azure import AzureAIClient from azure.identity.aio import DefaultAzureCredential @@ -83,12 +84,12 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi self.agent = chat_client.create_agent( id="travel-planning-coordinator", instructions=( - "You are the Travel Planning Coordinator. Your role is to synthesize information from multiple " - "specialized travel agents into a cohesive, actionable travel plan. You receive inputs from: " - "hotel search specialists, flight search specialists, activity planners, booking confirmation agents, " - "payment processors, and booking information aggregators. Provide a clear, comprehensive travel plan " - "that addresses the user's original query with all necessary details including accommodations, " - "transportation, activities, and booking status." + "You are the final coordinator. You will receive responses from multiple agents: " + "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), " + "and activity-search-agent (activities). " + "Review each agent's response, then create a comprehensive travel itinerary organized by: " + "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. " + "Clearly indicate which information came from which agent. Do not use tools." ), name="travel-planning-coordinator", store=True @@ -96,7 +97,7 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi super().__init__(id=id) @handler - async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None: + async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[Never, str]) -> None: user_query = responses[0].full_conversation[0].text # Extract findings from all agent responses @@ -147,24 +148,25 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis """ if chat_client is None: - credential = DefaultAzureCredential() - - # Create AIProjectClient with the correct API version for V2 prompt agents - project_client = AIProjectClient( - endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], - credential=credential, - api_version="2025-11-15-preview", - ) - try: - async with AzureAIClient( - project_client=project_client, - async_credential=credential - ) as client: + credential = DefaultAzureCredential() + + # Create AIProjectClient with the correct API version for V2 prompt agents + project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=credential, + api_version="2025-11-15-preview", + ) + + async with ( + credential, + project_client, + AzureAIClient(project_client=project_client, async_credential=credential) as client + ): return await _run_workflow_with_client(query, client) - finally: - await credential.close() - await project_client.close() + except Exception as e: + print(f"Error during workflow execution: {e}") + raise else: return await _run_workflow_with_client(query, chat_client) @@ -188,17 +190,6 @@ async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> d events = workflow.run_stream(query) workflow_output = await _process_workflow_events(events, conversation_ids, response_ids) - # # Delete all agents after workflow completion - # print("\n=== Cleaning up agents ===") - # for agent_name, agent in agent_map.items(): - # try: - # # Get the actual agent object - # agent_to_delete = agent.agent if hasattr(agent, 'agent') else agent - # chat_client.project_client.agents.delete(agent_name=agent_to_delete.name) - # print(f"Deleted agent: {agent_name}") - # except Exception as e: - # print(f"Failed to delete agent {agent_name}: {e}") - return { "conversation_ids": dict(conversation_ids), "response_ids": dict(response_ids), @@ -223,16 +214,6 @@ async def _create_workflow(project_client, credential): ) final_coordinator = ResearchLead(chat_client=final_coordinator_client, id="final-coordinator") - # Update final_coordinator agent instructions - final_coordinator.agent.instructions = ( - "You are the final coordinator. You will receive responses from multiple agents: " - "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), " - "and activity-search-agent (activities). " - "Review each agent's response, then create a comprehensive travel itinerary organized by: " - "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. " - "Clearly indicate which information came from which agent. Do not use tools." - ) - # Agent 1: Travel Request Handler (initial coordinator) # Create separate client with unique agent_name travel_request_handler_client = AzureAIClient( @@ -352,9 +333,6 @@ async def _create_workflow(project_client, credential): # 5. booking_info_aggregation → booking_confirmation # 6. booking_confirmation → booking_payment # 7. booking_info_aggregation, booking_payment, activity_search → final_coordinator (final aggregation, fan-in) - # - # Max iterations set to 10 (though shouldn't be needed without cycles) - # store=True preserves conversation history on each agent's thread for evaluation workflow = (WorkflowBuilder(name='Travel Planning Workflow') .set_start_executor(start_executor) @@ -411,7 +389,7 @@ def _track_agent_ids(event, agent, response_ids, conversation_ids): if hasattr(event.data, 'raw_representation') and event.data.raw_representation: raw = event.data.raw_representation - # Try conversation_id directly on raw (this is the V2 pattern) + # Try conversation_id directly on raw representation if hasattr(raw, 'conversation_id') and raw.conversation_id: # Only add if not already in the list if raw.conversation_id not in conversation_ids[agent]: @@ -472,11 +450,5 @@ async def create_and_run_workflow(): return output_data - -def main(): - """Main function to run the workflow evaluation example.""" - asyncio.run(create_and_run_workflow()) - - if __name__ == "__main__": - main() + asyncio.run(create_and_run_workflow()) From 4a0fae0f1320e008d862a002e777d6db21584213 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Thu, 20 Nov 2025 13:23:46 +0200 Subject: [PATCH 7/9] Python: Workflow eval sample - print evaluator names --- python/samples/demos/workflow_evaluation/run_evaluation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/samples/demos/workflow_evaluation/run_evaluation.py b/python/samples/demos/workflow_evaluation/run_evaluation.py index b2adfd8b83..610f7ade00 100644 --- a/python/samples/demos/workflow_evaluation/run_evaluation.py +++ b/python/samples/demos/workflow_evaluation/run_evaluation.py @@ -120,8 +120,9 @@ def create_evaluation(openai_client, model_deployment: str): testing_criteria=testing_criteria, ) + evaluator_names = [criterion["name"] for criterion in testing_criteria] print(f"Evaluation created: {eval_object.id}") - print(f"Evaluators: {len(testing_criteria)}") + print(f"Evaluators ({len(evaluator_names)}): {', '.join(evaluator_names)}") return eval_object From 1835806d747a3e3cd15386e3f3e0bd96e55bca8d Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Thu, 20 Nov 2025 21:10:38 +0200 Subject: [PATCH 8/9] Python: Workflow eval - address PR comments --- .../demos/workflow_evaluation/_tools.py | 86 ++++++++++++++++--- .../workflow_evaluation/create_workflow.py | 4 +- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py index e8b70a4472..eca03544b2 100644 --- a/python/samples/demos/workflow_evaluation/_tools.py +++ b/python/samples/demos/workflow_evaluation/_tools.py @@ -2,6 +2,10 @@ import json from datetime import datetime +from typing import Annotated + +from agent_framework import ai_function +from pydantic import Field # --- Travel Planning Tools --- # Note: These are mock tools for demonstration purposes. They return simulated data @@ -9,7 +13,13 @@ # Mock hotel search tool -def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str: +@ai_function(name="search_hotels", description="Search for available hotels based on location and dates.") +def search_hotels( + location: Annotated[str, Field(description="City or region to search for hotels.")], + check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")], + check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")], + guests: Annotated[int, Field(description="Number of guests.")] = 2, +) -> str: """Search for available hotels based on location and dates. Returns: @@ -73,7 +83,10 @@ def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) # Mock hotel details tool -def get_hotel_details(hotel_name: str) -> str: +@ai_function(name="get_hotel_details", description="Get detailed information about a specific hotel.") +def get_hotel_details( + hotel_name: Annotated[str, Field(description="Name of the hotel to get details for.")], +) -> str: """Get detailed information about a specific hotel. Returns: @@ -145,7 +158,14 @@ def get_hotel_details(hotel_name: str) -> str: # Mock flight search tool -def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str: +@ai_function(name="search_flights", description="Search for available flights between two locations.") +def search_flights( + origin: Annotated[str, Field(description="Departure airport or city (e.g., 'JFK' or 'New York').")], + destination: Annotated[str, Field(description="Arrival airport or city (e.g., 'CDG' or 'Paris').")], + departure_date: Annotated[str, Field(description="Departure date (e.g., 'December 15, 2025').")], + return_date: Annotated[str | None, Field(description="Return date (e.g., 'December 18, 2025').")] = None, + passengers: Annotated[int, Field(description="Number of passengers.")] = 1, +) -> str: """Search for available flights between two locations. Returns: @@ -264,7 +284,10 @@ def search_flights(origin: str, destination: str, departure_date: str, return_da # Mock flight details tool -def get_flight_details(flight_number: str) -> str: +@ai_function(name="get_flight_details", description="Get detailed information about a specific flight.") +def get_flight_details( + flight_number: Annotated[str, Field(description="Flight number (e.g., 'AF007' or 'DL264').")], +) -> str: """Get detailed information about a specific flight. Returns: @@ -301,7 +324,12 @@ def get_flight_details(flight_number: str) -> str: # Mock activity search tool -def search_activities(location: str, date: str = None, category: str = None) -> str: +@ai_function(name="search_activities", description="Search for available activities and attractions at a destination.") +def search_activities( + location: Annotated[str, Field(description="City or region to search for activities.")], + date: Annotated[str | None, Field(description="Date for the activity (e.g., 'December 16, 2025').")] = None, + category: Annotated[str | None, Field(description="Activity category (e.g., 'Sightseeing', 'Culture', 'Culinary').")] = None, +) -> str: """Search for available activities and attractions at a destination. Returns: @@ -440,7 +468,10 @@ def search_activities(location: str, date: str = None, category: str = None) -> # Mock activity details tool -def get_activity_details(activity_name: str) -> str: +@ai_function(name="get_activity_details", description="Get detailed information about a specific activity.") +def get_activity_details( + activity_name: Annotated[str, Field(description="Name of the activity to get details for.")], +) -> str: """Get detailed information about a specific activity. Returns: @@ -514,7 +545,12 @@ def get_activity_details(activity_name: str) -> str: # Mock booking confirmation tool -def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str: +@ai_function(name="confirm_booking", description="Confirm a booking reservation.") +def confirm_booking( + booking_type: Annotated[str, Field(description="Type of booking (e.g., 'hotel', 'flight', 'activity').")], + booking_id: Annotated[str, Field(description="Unique booking identifier.")], + customer_info: Annotated[dict, Field(description="Customer information including name and email.")], +) -> str: """Confirm a booking reservation. Returns: @@ -543,7 +579,13 @@ def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> # Mock hotel availability check tool -def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str: +@ai_function(name="check_hotel_availability", description="Check availability for hotel rooms.") +def check_hotel_availability( + hotel_name: Annotated[str, Field(description="Name of the hotel to check availability for.")], + check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")], + check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")], + rooms: Annotated[int, Field(description="Number of rooms needed.")] = 1, +) -> str: """Check availability for hotel rooms. Sample Date format: "December 15, 2025" @@ -572,7 +614,12 @@ def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, roo # Mock flight availability check tool -def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str: +@ai_function(name="check_flight_availability", description="Check availability for flight seats.") +def check_flight_availability( + flight_number: Annotated[str, Field(description="Flight number to check availability for.")], + date: Annotated[str, Field(description="Flight date (e.g., 'December 15, 2025').")], + passengers: Annotated[int, Field(description="Number of passengers.")] = 1, +) -> str: """Check availability for flight seats. Sample Date format: "December 15, 2025" @@ -600,7 +647,12 @@ def check_flight_availability(flight_number: str, date: str, passengers: int = 1 # Mock activity availability check tool -def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str: +@ai_function(name="check_activity_availability", description="Check availability for activity bookings.") +def check_activity_availability( + activity_name: Annotated[str, Field(description="Name of the activity to check availability for.")], + date: Annotated[str, Field(description="Activity date (e.g., 'December 16, 2025').")], + participants: Annotated[int, Field(description="Number of participants.")] = 1, +) -> str: """Check availability for activity bookings. Sample Date format: "December 16, 2025" @@ -628,7 +680,13 @@ def check_activity_availability(activity_name: str, date: str, participants: int # Mock payment processing tool -def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str: +@ai_function(name="process_payment", description="Process payment for a booking.") +def process_payment( + amount: Annotated[float, Field(description="Payment amount.")], + currency: Annotated[str, Field(description="Currency code (e.g., 'USD', 'EUR').")], + payment_method: Annotated[dict, Field(description="Payment method details (type, card info).")], + booking_reference: Annotated[str, Field(description="Booking reference number for the payment.")], +) -> str: """Process payment for a booking. Returns: @@ -654,8 +712,12 @@ def process_payment(amount: float, currency: str, payment_method: dict, booking_ }) + # Mock payment validation tool -def validate_payment_method(payment_method: dict) -> str: +@ai_function(name="validate_payment_method", description="Validate a payment method before processing.") +def validate_payment_method( + payment_method: Annotated[dict, Field(description="Payment method to validate (type, number, expiry, cvv).")], +) -> str: """Validate payment method details. Returns: diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py index c33396fc9d..ca8b62e2ef 100644 --- a/python/samples/demos/workflow_evaluation/create_workflow.py +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -149,8 +149,6 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl """ if chat_client is None: try: - credential = DefaultAzureCredential() - # Create AIProjectClient with the correct API version for V2 prompt agents project_client = AIProjectClient( endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], @@ -159,7 +157,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl ) async with ( - credential, + DefaultAzureCredential() as credential, project_client, AzureAIClient(project_client=project_client, async_credential=credential) as client ): From 4557d7a260b8f323fb7a9e943353b02be4d6a407 Mon Sep 17 00:00:00 2001 From: Salma Elshafey Date: Thu, 20 Nov 2025 21:18:01 +0200 Subject: [PATCH 9/9] Update samples readme --- python/samples/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/python/samples/README.md b/python/samples/README.md index 65f77645f2..15905e186f 100644 --- a/python/samples/README.md +++ b/python/samples/README.md @@ -186,6 +186,7 @@ This directory contains samples demonstrating the capabilities of Microsoft Agen |------|-------------| | [`getting_started/evaluation/azure_ai_foundry/red_teaming/red_team_agent_sample.py`](./getting_started/evaluation/azure_ai_foundry/red_teaming/red_team_agent_sample.py) | Red team agent evaluation sample for Azure AI Foundry | | [`getting_started/evaluation/azure_ai_foundry/self_reflection/self_reflection.py`](./getting_started/evaluation/azure_ai_foundry/self_reflection/self_reflection.py) | LLM self-reflection with AI Foundry graders example | +| [`demos/workflow_evaluation/run_evaluation.py`](./demos/workflow_evaluation/run_evaluation.py) | Multi-agent workflow evaluation demo with travel planning agents evaluated using Azure AI Foundry evaluators | ## MCP (Model Context Protocol)