diff --git a/python/samples/README.md b/python/samples/README.md index dbfb00e5a7..bb7d01527a 100644 --- a/python/samples/README.md +++ b/python/samples/README.md @@ -187,6 +187,7 @@ This directory contains samples demonstrating the capabilities of Microsoft Agen |------|-------------| | [`getting_started/evaluation/red_teaming/red_team_agent_sample.py`](./getting_started/evaluation/red_teaming/red_team_agent_sample.py) | Red team agent evaluation sample for Azure AI Foundry | | [`getting_started/evaluation/self_reflection/self_reflection.py`](./getting_started/evaluation/self_reflection/self_reflection.py) | LLM self-reflection with AI Foundry graders example | +| [`demos/workflow_evaluation/run_evaluation.py`](./demos/workflow_evaluation/run_evaluation.py) | Multi-agent workflow evaluation demo with travel planning agents evaluated using Azure AI Foundry evaluators | ## MCP (Model Context Protocol) diff --git a/python/samples/demos/workflow_evaluation/.env.example b/python/samples/demos/workflow_evaluation/.env.example new file mode 100644 index 0000000000..3a13025d22 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/.env.example @@ -0,0 +1,2 @@ +AZURE_AI_PROJECT_ENDPOINT="" +AZURE_AI_MODEL_DEPLOYMENT_NAME="" \ No newline at end of file diff --git a/python/samples/demos/workflow_evaluation/README.md b/python/samples/demos/workflow_evaluation/README.md new file mode 100644 index 0000000000..d687e4ce14 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/README.md @@ -0,0 +1,30 @@ +# Multi-Agent Travel Planning Workflow Evaluation + +This sample demonstrates evaluating a multi-agent workflow using Azure AI's built-in evaluators. The workflow processes travel planning requests through seven specialized agents in a fan-out/fan-in pattern: travel request handler, hotel/flight/activity search agents, booking aggregator, booking confirmation, and payment processing. + +## Evaluation Metrics + +The evaluation uses four Azure AI built-in evaluators: + +- **Relevance** - How well responses address the user query +- **Groundedness** - Whether responses are grounded in available context +- **Tool Call Accuracy** - Correct tool selection and parameter usage +- **Tool Output Utilization** - Effective use of tool outputs in responses + +## Setup + +Create a `.env` file with configuration as in the `.env.example` file in this folder. + +## Running the Evaluation + +Execute the complete workflow and evaluation: + +```bash +python run_evaluation.py +``` + +The script will: +1. Execute the multi-agent travel planning workflow +2. Display response summary for each agent +3. Create and run evaluation on hotel, flight, and activity search agents +4. Monitor progress and display the evaluation report URL diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py new file mode 100644 index 0000000000..eca03544b2 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/_tools.py @@ -0,0 +1,754 @@ +# Copyright (c) Microsoft. All rights reserved. + +import json +from datetime import datetime +from typing import Annotated + +from agent_framework import ai_function +from pydantic import Field + +# --- Travel Planning Tools --- +# Note: These are mock tools for demonstration purposes. They return simulated data +# and do not make real API calls or bookings. + + +# Mock hotel search tool +@ai_function(name="search_hotels", description="Search for available hotels based on location and dates.") +def search_hotels( + location: Annotated[str, Field(description="City or region to search for hotels.")], + check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")], + check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")], + guests: Annotated[int, Field(description="Number of guests.")] = 2, +) -> str: + """Search for available hotels based on location and dates. + + Returns: + JSON string containing search results with hotel details including name, rating, + price, distance to landmarks, amenities, and availability. + """ + # Specific mock data for Paris December 15-18, 2025 + if "paris" in location.lower(): + mock_hotels = [ + { + "name": "Hotel Eiffel Trocadéro", + "rating": 4.6, + "price_per_night": "$185", + "total_price": "$555 for 3 nights", + "distance_to_eiffel_tower": "0.3 miles", + "amenities": ["WiFi", "Breakfast", "Eiffel Tower View", "Concierge"], + "availability": "Available", + "address": "35 Rue Benjamin Franklin, 16th arr., Paris" + }, + { + "name": "Mercure Paris Centre Tour Eiffel", + "rating": 4.4, + "price_per_night": "$220", + "total_price": "$660 for 3 nights", + "distance_to_eiffel_tower": "0.5 miles", + "amenities": ["WiFi", "Restaurant", "Bar", "Gym", "Air Conditioning"], + "availability": "Available", + "address": "20 Rue Jean Rey, 15th arr., Paris" + }, + { + "name": "Pullman Paris Tour Eiffel", + "rating": 4.7, + "price_per_night": "$280", + "total_price": "$840 for 3 nights", + "distance_to_eiffel_tower": "0.2 miles", + "amenities": ["WiFi", "Spa", "Gym", "Restaurant", "Rooftop Bar", "Concierge"], + "availability": "Limited", + "address": "18 Avenue de Suffren, 15th arr., Paris" + } + ] + else: + mock_hotels = [ + { + "name": "Grand Plaza Hotel", + "rating": 4.5, + "price_per_night": "$150", + "amenities": ["WiFi", "Pool", "Gym", "Restaurant"], + "availability": "Available" + } + ] + + return json.dumps({ + "location": location, + "check_in": check_in, + "check_out": check_out, + "guests": guests, + "hotels_found": len(mock_hotels), + "hotels": mock_hotels, + "note": "Hotel search results matching your query" + }) + + +# Mock hotel details tool +@ai_function(name="get_hotel_details", description="Get detailed information about a specific hotel.") +def get_hotel_details( + hotel_name: Annotated[str, Field(description="Name of the hotel to get details for.")], +) -> str: + """Get detailed information about a specific hotel. + + Returns: + JSON string containing detailed hotel information including description, + check-in/out times, cancellation policy, reviews, and nearby attractions. + """ + hotel_details = { + "Hotel Eiffel Trocadéro": { + "description": "Charming boutique hotel with stunning Eiffel Tower views from select rooms. Perfect for couples and families.", + "check_in_time": "3:00 PM", + "check_out_time": "11:00 AM", + "cancellation_policy": "Free cancellation up to 24 hours before check-in", + "reviews": { + "total": 1247, + "recent_comments": [ + "Amazing location! Walked to Eiffel Tower in 5 minutes.", + "Staff was incredibly helpful with restaurant recommendations.", + "Rooms are cozy and clean with great views." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.3 mi)", "Trocadéro Gardens (0.2 mi)", "Seine River (0.4 mi)"] + }, + "Mercure Paris Centre Tour Eiffel": { + "description": "Modern hotel with contemporary rooms and excellent dining options. Close to metro stations.", + "check_in_time": "2:00 PM", + "check_out_time": "12:00 PM", + "cancellation_policy": "Free cancellation up to 48 hours before check-in", + "reviews": { + "total": 2156, + "recent_comments": [ + "Great value for money, clean and comfortable.", + "Restaurant had excellent French cuisine.", + "Easy access to public transportation." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.5 mi)", "Champ de Mars (0.4 mi)", "Les Invalides (0.8 mi)"] + }, + "Pullman Paris Tour Eiffel": { + "description": "Luxury hotel offering panoramic views, upscale amenities, and exceptional service. Ideal for a premium experience.", + "check_in_time": "3:00 PM", + "check_out_time": "12:00 PM", + "cancellation_policy": "Free cancellation up to 72 hours before check-in", + "reviews": { + "total": 3421, + "recent_comments": [ + "Rooftop bar has the best Eiffel Tower views in Paris!", + "Luxurious rooms with every amenity you could want.", + "Worth the price for the location and service." + ] + }, + "nearby_attractions": ["Eiffel Tower (0.2 mi)", "Seine River Cruise Dock (0.3 mi)", "Trocadéro (0.5 mi)"] + } + } + + details = hotel_details.get(hotel_name, { + "name": hotel_name, + "description": "Comfortable hotel with modern amenities", + "check_in_time": "3:00 PM", + "check_out_time": "11:00 AM", + "cancellation_policy": "Standard cancellation policy applies", + "reviews": {"total": 0, "recent_comments": []}, + "nearby_attractions": [] + }) + + return json.dumps({ + "hotel_name": hotel_name, + "details": details + }) + + +# Mock flight search tool +@ai_function(name="search_flights", description="Search for available flights between two locations.") +def search_flights( + origin: Annotated[str, Field(description="Departure airport or city (e.g., 'JFK' or 'New York').")], + destination: Annotated[str, Field(description="Arrival airport or city (e.g., 'CDG' or 'Paris').")], + departure_date: Annotated[str, Field(description="Departure date (e.g., 'December 15, 2025').")], + return_date: Annotated[str | None, Field(description="Return date (e.g., 'December 18, 2025').")] = None, + passengers: Annotated[int, Field(description="Number of passengers.")] = 1, +) -> str: + """Search for available flights between two locations. + + Returns: + JSON string containing flight search results with details including flight numbers, + airlines, departure/arrival times, prices, durations, and baggage allowances. + """ + # Specific mock data for JFK to Paris December 15-18, 2025 + if "jfk" in origin.lower() or "new york" in origin.lower(): + if "paris" in destination.lower() or "cdg" in destination.lower(): + mock_flights = [ + { + "outbound": { + "flight_number": "AF007", + "airline": "Air France", + "departure": "December 15, 2025 at 6:30 PM", + "arrival": "December 16, 2025 at 8:15 AM", + "duration": "7h 45m", + "aircraft": "Boeing 777-300ER", + "class": "Economy", + "price": "$520" + }, + "return": { + "flight_number": "AF008", + "airline": "Air France", + "departure": "December 18, 2025 at 11:00 AM", + "arrival": "December 18, 2025 at 2:15 PM", + "duration": "8h 15m", + "aircraft": "Airbus A350-900", + "class": "Economy", + "price": "Included" + }, + "total_price": "$520", + "stops": "Nonstop", + "baggage": "1 checked bag included" + }, + { + "outbound": { + "flight_number": "DL264", + "airline": "Delta", + "departure": "December 15, 2025 at 10:15 PM", + "arrival": "December 16, 2025 at 12:05 PM", + "duration": "7h 50m", + "aircraft": "Airbus A330-900neo", + "class": "Economy", + "price": "$485" + }, + "return": { + "flight_number": "DL265", + "airline": "Delta", + "departure": "December 18, 2025 at 1:45 PM", + "arrival": "December 18, 2025 at 5:00 PM", + "duration": "8h 15m", + "aircraft": "Airbus A330-900neo", + "class": "Economy", + "price": "Included" + }, + "total_price": "$485", + "stops": "Nonstop", + "baggage": "1 checked bag included" + }, + { + "outbound": { + "flight_number": "UA57", + "airline": "United Airlines", + "departure": "December 15, 2025 at 5:00 PM", + "arrival": "December 16, 2025 at 6:50 AM", + "duration": "7h 50m", + "aircraft": "Boeing 767-400ER", + "class": "Economy", + "price": "$560" + }, + "return": { + "flight_number": "UA58", + "airline": "United Airlines", + "departure": "December 18, 2025 at 9:30 AM", + "arrival": "December 18, 2025 at 12:45 PM", + "duration": "8h 15m", + "aircraft": "Boeing 787-10", + "class": "Economy", + "price": "Included" + }, + "total_price": "$560", + "stops": "Nonstop", + "baggage": "1 checked bag included" + } + ] + else: + mock_flights = [{"flight_number": "XX123", "airline": "Generic Air", "price": "$400", "note": "Generic route"}] + else: + mock_flights = [ + { + "outbound": { + "flight_number": "AA123", + "airline": "Generic Airlines", + "departure": f"{departure_date} at 9:00 AM", + "arrival": f"{departure_date} at 2:30 PM", + "duration": "5h 30m", + "class": "Economy", + "price": "$350" + }, + "total_price": "$350", + "stops": "Nonstop" + } + ] + + return json.dumps({ + "origin": origin, + "destination": destination, + "departure_date": departure_date, + "return_date": return_date, + "passengers": passengers, + "flights_found": len(mock_flights), + "flights": mock_flights, + "note": "Flight search results for JFK to Paris CDG" + }) + + +# Mock flight details tool +@ai_function(name="get_flight_details", description="Get detailed information about a specific flight.") +def get_flight_details( + flight_number: Annotated[str, Field(description="Flight number (e.g., 'AF007' or 'DL264').")], +) -> str: + """Get detailed information about a specific flight. + + Returns: + JSON string containing detailed flight information including airline, aircraft type, + departure/arrival airports and times, gates, terminals, duration, and amenities. + """ + mock_details = { + "flight_number": flight_number, + "airline": "Sky Airways", + "aircraft": "Boeing 737-800", + "departure": { + "airport": "JFK International Airport", + "terminal": "Terminal 4", + "gate": "B23", + "time": "08:00 AM" + }, + "arrival": { + "airport": "Charles de Gaulle Airport", + "terminal": "Terminal 2E", + "gate": "K15", + "time": "11:30 AM local time" + }, + "duration": "3h 30m", + "baggage_allowance": { + "carry_on": "1 bag (10kg)", + "checked": "1 bag (23kg)" + }, + "amenities": ["WiFi", "In-flight entertainment", "Meals included"] + } + + return json.dumps({ + "flight_details": mock_details + }) + + +# Mock activity search tool +@ai_function(name="search_activities", description="Search for available activities and attractions at a destination.") +def search_activities( + location: Annotated[str, Field(description="City or region to search for activities.")], + date: Annotated[str | None, Field(description="Date for the activity (e.g., 'December 16, 2025').")] = None, + category: Annotated[str | None, Field(description="Activity category (e.g., 'Sightseeing', 'Culture', 'Culinary').")] = None, +) -> str: + """Search for available activities and attractions at a destination. + + Returns: + JSON string containing activity search results with details including name, category, + duration, price, rating, description, availability, and booking requirements. + """ + # Specific mock data for Paris activities + if "paris" in location.lower(): + all_activities = [ + { + "name": "Eiffel Tower Summit Access", + "category": "Sightseeing", + "duration": "2-3 hours", + "price": "$35", + "rating": 4.8, + "description": "Skip-the-line access to all three levels including the summit. Best views of Paris!", + "availability": "Daily 9:30 AM - 11:00 PM", + "best_time": "Early morning or sunset", + "booking_required": True + }, + { + "name": "Louvre Museum Guided Tour", + "category": "Sightseeing", + "duration": "3 hours", + "price": "$55", + "rating": 4.7, + "description": "Expert-guided tour covering masterpieces including Mona Lisa and Venus de Milo.", + "availability": "Daily except Tuesdays, 9:00 AM entry", + "best_time": "Morning entry recommended", + "booking_required": True + }, + { + "name": "Seine River Cruise", + "category": "Sightseeing", + "duration": "1 hour", + "price": "$18", + "rating": 4.6, + "description": "Scenic cruise past Notre-Dame, Eiffel Tower, and historic bridges.", + "availability": "Every 30 minutes, 10:00 AM - 10:00 PM", + "best_time": "Evening for illuminated monuments", + "booking_required": False + }, + { + "name": "Musée d'Orsay Visit", + "category": "Culture", + "duration": "2-3 hours", + "price": "$16", + "rating": 4.7, + "description": "Impressionist masterpieces in a stunning Beaux-Arts railway station.", + "availability": "Tuesday-Sunday 9:30 AM - 6:00 PM", + "best_time": "Weekday mornings", + "booking_required": True + }, + { + "name": "Versailles Palace Day Trip", + "category": "Culture", + "duration": "5-6 hours", + "price": "$75", + "rating": 4.9, + "description": "Explore the opulent palace and stunning gardens of Louis XIV (includes transport).", + "availability": "Daily except Mondays, 8:00 AM departure", + "best_time": "Full day trip", + "booking_required": True + }, + { + "name": "Montmartre Walking Tour", + "category": "Culture", + "duration": "2.5 hours", + "price": "$25", + "rating": 4.6, + "description": "Discover the artistic heart of Paris, including Sacré-Cœur and artists' square.", + "availability": "Daily at 10:00 AM and 2:00 PM", + "best_time": "Morning or late afternoon", + "booking_required": False + }, + { + "name": "French Cooking Class", + "category": "Culinary", + "duration": "3 hours", + "price": "$120", + "rating": 4.9, + "description": "Learn to make classic French dishes like coq au vin and crème brûlée, then enjoy your creations.", + "availability": "Tuesday-Saturday, 10:00 AM and 6:00 PM sessions", + "best_time": "Morning or evening sessions", + "booking_required": True + }, + { + "name": "Wine & Cheese Tasting", + "category": "Culinary", + "duration": "1.5 hours", + "price": "$65", + "rating": 4.7, + "description": "Sample French wines and artisanal cheeses with expert sommelier guidance.", + "availability": "Daily at 5:00 PM and 7:30 PM", + "best_time": "Evening sessions", + "booking_required": True + }, + { + "name": "Food Market Tour", + "category": "Culinary", + "duration": "2 hours", + "price": "$45", + "rating": 4.6, + "description": "Explore authentic Parisian markets and taste local specialties like cheeses, pastries, and charcuterie.", + "availability": "Tuesday, Thursday, Saturday mornings", + "best_time": "Morning (markets are freshest)", + "booking_required": False + } + ] + + if category: + activities = [act for act in all_activities if act["category"] == category] + else: + activities = all_activities + else: + activities = [ + { + "name": "City Walking Tour", + "category": "Sightseeing", + "duration": "3 hours", + "price": "$45", + "rating": 4.7, + "description": "Explore the historic downtown area with an expert guide", + "availability": "Daily at 10:00 AM and 2:00 PM" + } + ] + + return json.dumps({ + "location": location, + "date": date, + "category": category, + "activities_found": len(activities), + "activities": activities, + "note": "Activity search results for Paris with sightseeing, culture, and culinary options" + }) + + +# Mock activity details tool +@ai_function(name="get_activity_details", description="Get detailed information about a specific activity.") +def get_activity_details( + activity_name: Annotated[str, Field(description="Name of the activity to get details for.")], +) -> str: + """Get detailed information about a specific activity. + + Returns: + JSON string containing detailed activity information including description, duration, + price, included items, meeting point, what to bring, cancellation policy, and reviews. + """ + # Paris-specific activity details + activity_details_map = { + "Eiffel Tower Summit Access": { + "name": "Eiffel Tower Summit Access", + "description": "Skip-the-line access to all three levels of the Eiffel Tower, including the summit. Enjoy panoramic views of Paris from 276 meters high.", + "duration": "2-3 hours (self-guided)", + "price": "$35 per person", + "included": ["Skip-the-line ticket", "Access to all 3 levels", "Summit access", "Audio guide app"], + "meeting_point": "Eiffel Tower South Pillar entrance, look for priority access line", + "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera", "Light jacket (summit can be windy)"], + "cancellation_policy": "Free cancellation up to 24 hours in advance", + "languages": ["English", "French", "Spanish", "German", "Italian"], + "max_group_size": "No limit", + "rating": 4.8, + "reviews_count": 15234 + }, + "Louvre Museum Guided Tour": { + "name": "Louvre Museum Guided Tour", + "description": "Expert-guided tour of the world's largest art museum, focusing on must-see masterpieces including Mona Lisa, Venus de Milo, and Winged Victory.", + "duration": "3 hours", + "price": "$55 per person", + "included": ["Skip-the-line entry", "Expert art historian guide", "Headsets for groups over 6", "Museum highlights map"], + "meeting_point": "Glass Pyramid main entrance, look for guide with 'Louvre Tours' sign", + "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera (no flash)", "Water bottle"], + "cancellation_policy": "Free cancellation up to 48 hours in advance", + "languages": ["English", "French", "Spanish"], + "max_group_size": 20, + "rating": 4.7, + "reviews_count": 8921 + }, + "French Cooking Class": { + "name": "French Cooking Class", + "description": "Hands-on cooking experience where you'll learn to prepare classic French dishes like coq au vin, ratatouille, and crème brûlée under expert chef guidance.", + "duration": "3 hours", + "price": "$120 per person", + "included": ["All ingredients", "Chef instruction", "Apron and recipe booklet", "Wine pairing", "Lunch/dinner of your creations"], + "meeting_point": "Le Chef Cooking Studio, 15 Rue du Bac, 7th arrondissement", + "what_to_bring": ["Appetite", "Camera for food photos"], + "cancellation_policy": "Free cancellation up to 72 hours in advance", + "languages": ["English", "French"], + "max_group_size": 12, + "rating": 4.9, + "reviews_count": 2341 + } + } + + details = activity_details_map.get(activity_name, { + "name": activity_name, + "description": "An immersive experience that showcases the best of local culture and attractions.", + "duration": "3 hours", + "price": "$45 per person", + "included": ["Professional guide", "Entry fees"], + "meeting_point": "Central meeting location", + "what_to_bring": ["Comfortable shoes", "Camera"], + "cancellation_policy": "Free cancellation up to 24 hours in advance", + "languages": ["English"], + "max_group_size": 15, + "rating": 4.5, + "reviews_count": 100 + }) + + return json.dumps({ + "activity_details": details + }) + + +# Mock booking confirmation tool +@ai_function(name="confirm_booking", description="Confirm a booking reservation.") +def confirm_booking( + booking_type: Annotated[str, Field(description="Type of booking (e.g., 'hotel', 'flight', 'activity').")], + booking_id: Annotated[str, Field(description="Unique booking identifier.")], + customer_info: Annotated[dict, Field(description="Customer information including name and email.")], +) -> str: + """Confirm a booking reservation. + + Returns: + JSON string containing confirmation details including confirmation number, + booking status, customer information, and next steps. + """ + confirmation_number = f"CONF-{booking_type.upper()}-{booking_id}" + + confirmation_data = { + "confirmation_number": confirmation_number, + "booking_type": booking_type, + "status": "Confirmed", + "customer_name": customer_info.get("name", "Guest"), + "email": customer_info.get("email", "guest@example.com"), + "confirmation_sent": True, + "next_steps": [ + "Check your email for booking details", + "Arrive 30 minutes before scheduled time", + "Bring confirmation number and valid ID" + ] + } + + return json.dumps({ + "confirmation": confirmation_data + }) + + +# Mock hotel availability check tool +@ai_function(name="check_hotel_availability", description="Check availability for hotel rooms.") +def check_hotel_availability( + hotel_name: Annotated[str, Field(description="Name of the hotel to check availability for.")], + check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")], + check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")], + rooms: Annotated[int, Field(description="Number of rooms needed.")] = 1, +) -> str: + """Check availability for hotel rooms. + + Sample Date format: "December 15, 2025" + + Returns: + JSON string containing availability status, available rooms count, price per night, + and last checked timestamp. + """ + availability_status = "Available" + + availability_data = { + "service_type": "hotel", + "hotel_name": hotel_name, + "check_in": check_in, + "check_out": check_out, + "rooms_requested": rooms, + "status": availability_status, + "available_rooms": 8, + "price_per_night": "$185", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + + +# Mock flight availability check tool +@ai_function(name="check_flight_availability", description="Check availability for flight seats.") +def check_flight_availability( + flight_number: Annotated[str, Field(description="Flight number to check availability for.")], + date: Annotated[str, Field(description="Flight date (e.g., 'December 15, 2025').")], + passengers: Annotated[int, Field(description="Number of passengers.")] = 1, +) -> str: + """Check availability for flight seats. + + Sample Date format: "December 15, 2025" + + Returns: + JSON string containing availability status, available seats count, price per passenger, + and last checked timestamp. + """ + availability_status = "Available" + + availability_data = { + "service_type": "flight", + "flight_number": flight_number, + "date": date, + "passengers_requested": passengers, + "status": availability_status, + "available_seats": 45, + "price_per_passenger": "$520", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + + +# Mock activity availability check tool +@ai_function(name="check_activity_availability", description="Check availability for activity bookings.") +def check_activity_availability( + activity_name: Annotated[str, Field(description="Name of the activity to check availability for.")], + date: Annotated[str, Field(description="Activity date (e.g., 'December 16, 2025').")], + participants: Annotated[int, Field(description="Number of participants.")] = 1, +) -> str: + """Check availability for activity bookings. + + Sample Date format: "December 16, 2025" + + Returns: + JSON string containing availability status, available spots count, price per person, + and last checked timestamp. + """ + availability_status = "Available" + + availability_data = { + "service_type": "activity", + "activity_name": activity_name, + "date": date, + "participants_requested": participants, + "status": availability_status, + "available_spots": 15, + "price_per_person": "$45", + "last_checked": datetime.now().isoformat() + } + + return json.dumps({ + "availability": availability_data + }) + + +# Mock payment processing tool +@ai_function(name="process_payment", description="Process payment for a booking.") +def process_payment( + amount: Annotated[float, Field(description="Payment amount.")], + currency: Annotated[str, Field(description="Currency code (e.g., 'USD', 'EUR').")], + payment_method: Annotated[dict, Field(description="Payment method details (type, card info).")], + booking_reference: Annotated[str, Field(description="Booking reference number for the payment.")], +) -> str: + """Process payment for a booking. + + Returns: + JSON string containing payment result with transaction ID, status, amount, currency, + payment method details, and receipt URL. + """ + transaction_id = f"TXN-{datetime.now().strftime('%Y%m%d%H%M%S')}" + + payment_result = { + "transaction_id": transaction_id, + "amount": amount, + "currency": currency, + "status": "Success", + "payment_method": payment_method.get("type", "Credit Card"), + "last_4_digits": payment_method.get("last_4", "****"), + "booking_reference": booking_reference, + "timestamp": datetime.now().isoformat(), + "receipt_url": f"https://payments.travelagency.com/receipt/{transaction_id}" + } + + return json.dumps({ + "payment_result": payment_result + }) + + + +# Mock payment validation tool +@ai_function(name="validate_payment_method", description="Validate a payment method before processing.") +def validate_payment_method( + payment_method: Annotated[dict, Field(description="Payment method to validate (type, number, expiry, cvv).")], +) -> str: + """Validate payment method details. + + Returns: + JSON string containing validation result with is_valid flag, payment method type, + validation messages, supported currencies, and processing fee information. + """ + method_type = payment_method.get("type", "credit_card") + + # Validation logic + is_valid = True + validation_messages = [] + + if method_type == "credit_card": + if not payment_method.get("number"): + is_valid = False + validation_messages.append("Card number is required") + if not payment_method.get("expiry"): + is_valid = False + validation_messages.append("Expiry date is required") + if not payment_method.get("cvv"): + is_valid = False + validation_messages.append("CVV is required") + + validation_result = { + "is_valid": is_valid, + "payment_method_type": method_type, + "validation_messages": validation_messages if not is_valid else ["Payment method is valid"], + "supported_currencies": ["USD", "EUR", "GBP", "JPY"], + "processing_fee": "2.5%" + } + + return json.dumps({ + "validation_result": validation_result + }) diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py new file mode 100644 index 0000000000..ca8b62e2ef --- /dev/null +++ b/python/samples/demos/workflow_evaluation/create_workflow.py @@ -0,0 +1,452 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Multi-Agent Travel Planning Workflow Evaluation with Multiple Response Tracking + +This sample demonstrates a multi-agent travel planning workflow using the Azure AI Client that: +1. Processes travel queries through 7 specialized agents +2. Tracks MULTIPLE response and conversation IDs per agent for evaluation +3. Uses the new Prompt Agents API (V2) +4. Captures complete interaction sequences including multiple invocations +5. Aggregates findings through a travel planning coordinator + +WORKFLOW STRUCTURE (7 agents): +- Travel Agent Executor → Hotel Search, Flight Search, Activity Search (fan-out) +- Hotel Search Executor → Booking Information Aggregation Executor +- Flight Search Executor → Booking Information Aggregation Executor +- Booking Information Aggregation Executor → Booking Confirmation Executor +- Booking Confirmation Executor → Booking Payment Executor +- Booking Information Aggregation, Booking Payment, Activity Search → Travel Planning Coordinator (ResearchLead) for final aggregation (fan-in) + +Agents: +1. Travel Agent - Main coordinator (no tools to avoid thread conflicts) +2. Hotel Search - Searches hotels with tools +3. Flight Search - Searches flights with tools +4. Activity Search - Searches activities with tools +5. Booking Information Aggregation - Aggregates hotel & flight booking info +6. Booking Confirmation - Confirms bookings with tools +7. Booking Payment - Processes payments with tools +""" + +import asyncio +import os +from collections import defaultdict + +from dotenv import load_dotenv + +from agent_framework import ( + AgentExecutorResponse, + AgentRunUpdateEvent, + AgentRunResponseUpdate, + ChatMessage, + Executor, + executor, + handler, + Role, + WorkflowContext, + WorkflowBuilder, + WorkflowOutputEvent, +) +from typing_extensions import Never + +from agent_framework.azure import AzureAIClient +from azure.identity.aio import DefaultAzureCredential +from azure.ai.projects.aio import AIProjectClient + +from _tools import ( + # Travel planning tools + search_hotels, + get_hotel_details, + search_flights, + get_flight_details, + search_activities, + confirm_booking, + check_hotel_availability, + check_flight_availability, + process_payment, + validate_payment_method, +) + +load_dotenv() + + +@executor(id="start_executor") +async def start_executor(input: str, ctx: WorkflowContext[list[ChatMessage]]) -> None: + """Initiates the workflow by sending the user query to all specialized agents.""" + await ctx.send_message([ChatMessage(role="user", text=input)]) + + +class ResearchLead(Executor): + """Aggregates and summarizes travel planning findings from all specialized agents.""" + + def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordinator"): + # store=True to preserve conversation history for evaluation + self.agent = chat_client.create_agent( + id="travel-planning-coordinator", + instructions=( + "You are the final coordinator. You will receive responses from multiple agents: " + "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), " + "and activity-search-agent (activities). " + "Review each agent's response, then create a comprehensive travel itinerary organized by: " + "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. " + "Clearly indicate which information came from which agent. Do not use tools." + ), + name="travel-planning-coordinator", + store=True + ) + super().__init__(id=id) + + @handler + async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[Never, str]) -> None: + user_query = responses[0].full_conversation[0].text + + # Extract findings from all agent responses + agent_findings = self._extract_agent_findings(responses) + summary_text = "\n".join(agent_findings) if agent_findings else "No specific findings were provided by the agents." + + # Generate comprehensive travel plan summary + messages = [ + ChatMessage(role=Role.SYSTEM, text="You are a travel planning coordinator. Summarize findings from multiple specialized travel agents and provide a clear, comprehensive travel plan based on the user's query."), + ChatMessage(role=Role.USER, text=f"Original query: {user_query}\n\nFindings from specialized travel agents:\n{summary_text}\n\nPlease provide a comprehensive travel plan based on these findings.") + ] + + try: + final_response = await self.agent.run(messages) + output_text = (final_response.messages[-1].text if final_response.messages and final_response.messages[-1].text + else f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}") + except Exception: + output_text = f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}" + + await ctx.yield_output(output_text) + + def _extract_agent_findings(self, responses: list[AgentExecutorResponse]) -> list[str]: + """Extract findings from agent responses.""" + agent_findings = [] + + for response in responses: + findings = [] + if response.agent_run_response and response.agent_run_response.messages: + for msg in response.agent_run_response.messages: + if msg.role == Role.ASSISTANT and msg.text and msg.text.strip(): + findings.append(msg.text.strip()) + + if findings: + combined_findings = " ".join(findings) + agent_findings.append(f"[{response.executor_id}]: {combined_findings}") + + return agent_findings + + +async def run_workflow_with_response_tracking(query: str, chat_client: AzureAIClient | None = None) -> dict: + """Run multi-agent workflow and track conversation IDs, response IDs, and interaction sequence. + + Args: + query: The user query to process through the multi-agent workflow + chat_client: Optional AzureAIClient instance + + Returns: + Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis + """ + if chat_client is None: + try: + # Create AIProjectClient with the correct API version for V2 prompt agents + project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=credential, + api_version="2025-11-15-preview", + ) + + async with ( + DefaultAzureCredential() as credential, + project_client, + AzureAIClient(project_client=project_client, async_credential=credential) as client + ): + return await _run_workflow_with_client(query, client) + except Exception as e: + print(f"Error during workflow execution: {e}") + raise + else: + return await _run_workflow_with_client(query, chat_client) + + +async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> dict: + """Execute workflow with given client and track all interactions.""" + + # Initialize tracking variables - use lists to track multiple responses per agent + conversation_ids = defaultdict(list) + response_ids = defaultdict(list) + workflow_output = None + + # Create workflow components and keep agent references + # Pass project_client and credential to create separate client instances per agent + workflow, agent_map = await _create_workflow( + chat_client.project_client, + chat_client.credential + ) + + # Process workflow events + events = workflow.run_stream(query) + workflow_output = await _process_workflow_events(events, conversation_ids, response_ids) + + return { + "conversation_ids": dict(conversation_ids), + "response_ids": dict(response_ids), + "output": workflow_output, + "query": query + } + + +async def _create_workflow(project_client, credential): + """Create the multi-agent travel planning workflow with specialized agents. + + IMPORTANT: Each agent needs its own client instance because the V2 client stores + agent_name and agent_version as instance variables, causing all agents to share + the same agent identity if they share a client. + """ + + # Create separate client for Final Coordinator + final_coordinator_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="final-coordinator" + ) + final_coordinator = ResearchLead(chat_client=final_coordinator_client, id="final-coordinator") + + # Agent 1: Travel Request Handler (initial coordinator) + # Create separate client with unique agent_name + travel_request_handler_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="travel-request-handler" + ) + travel_request_handler = travel_request_handler_client.create_agent( + id="travel-request-handler", + instructions=( + "You receive user travel queries and relay them to specialized agents. Extract key information: destination, dates, budget, and preferences. Pass this information forward clearly to the next agents." + ), + name="travel-request-handler", + store=True + ) + + # Agent 2: Hotel Search Executor + hotel_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="hotel-search-agent" + ) + hotel_search_agent = hotel_search_client.create_agent( + id="hotel-search-agent", + instructions=( + "You are a hotel search specialist. Your task is ONLY to search for and provide hotel information. Use search_hotels to find options, get_hotel_details for specifics, and check_availability to verify rooms. Output format: List hotel names, prices per night, total cost for the stay, locations, ratings, amenities, and addresses. IMPORTANT: Only provide hotel information without additional commentary." + ), + name="hotel-search-agent", + tools=[search_hotels, get_hotel_details, check_hotel_availability], + store=True + ) + + # Agent 3: Flight Search Executor + flight_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="flight-search-agent" + ) + flight_search_agent = flight_search_client.create_agent( + id="flight-search-agent", + instructions=( + "You are a flight search specialist. Your task is ONLY to search for and provide flight information. Use search_flights to find options, get_flight_details for specifics, and check_availability for seats. Output format: List flight numbers, airlines, departure/arrival times, prices, durations, and cabin class. IMPORTANT: Only provide flight information without additional commentary." + ), + name="flight-search-agent", + tools=[search_flights, get_flight_details, check_flight_availability], + store=True + ) + + # Agent 4: Activity Search Executor + activity_search_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="activity-search-agent" + ) + activity_search_agent = activity_search_client.create_agent( + id="activity-search-agent", + instructions=( + "You are an activities specialist. Your task is ONLY to search for and provide activity information. Use search_activities to find options for activities. Output format: List activity names, descriptions, prices, durations, ratings, and categories. IMPORTANT: Only provide activity information without additional commentary." + ), + name="activity-search-agent", + tools=[search_activities], + store=True + ) + + # Agent 5: Booking Confirmation Executor + booking_confirmation_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-confirmation-agent" + ) + booking_confirmation_agent = booking_confirmation_client.create_agent( + id="booking-confirmation-agent", + instructions=( + "You confirm bookings. Use check_hotel_availability and check_flight_availability to verify slots, then confirm_booking to finalize. Provide ONLY: confirmation numbers, booking references, and confirmation status." + ), + name="booking-confirmation-agent", + tools=[confirm_booking, check_hotel_availability, check_flight_availability], + store=True + ) + + # Agent 6: Booking Payment Executor + booking_payment_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-payment-agent" + ) + booking_payment_agent = booking_payment_client.create_agent( + id="booking-payment-agent", + instructions=( + "You process payments. Use validate_payment_method to verify payment, then process_payment to complete transactions. Provide ONLY: payment confirmation status, transaction IDs, and payment amounts." + ), + name="booking-payment-agent", + tools=[process_payment, validate_payment_method], + store=True + ) + + # Agent 7: Booking Information Aggregation Executor + booking_info_client = AzureAIClient( + project_client=project_client, + async_credential=credential, + agent_name="booking-info-aggregation-agent" + ) + booking_info_aggregation_agent = booking_info_client.create_agent( + id="booking-info-aggregation-agent", + instructions=( + "You aggregate hotel and flight search results. Receive options from search agents and organize them. Provide: top 2-3 hotel options with prices and top 2-3 flight options with prices in a structured format." + ), + name="booking-info-aggregation-agent", + store=True + ) + + # Build workflow with logical booking flow: + # 1. start_executor → travel_request_handler + # 2. travel_request_handler → hotel_search, flight_search, activity_search (fan-out) + # 3. hotel_search → booking_info_aggregation + # 4. flight_search → booking_info_aggregation + # 5. booking_info_aggregation → booking_confirmation + # 6. booking_confirmation → booking_payment + # 7. booking_info_aggregation, booking_payment, activity_search → final_coordinator (final aggregation, fan-in) + + workflow = (WorkflowBuilder(name='Travel Planning Workflow') + .set_start_executor(start_executor) + .add_edge(start_executor, travel_request_handler) + .add_fan_out_edges(travel_request_handler, [hotel_search_agent, flight_search_agent, activity_search_agent]) + .add_edge(hotel_search_agent, booking_info_aggregation_agent) + .add_edge(flight_search_agent, booking_info_aggregation_agent) + .add_edge(booking_info_aggregation_agent, booking_confirmation_agent) + .add_edge(booking_confirmation_agent, booking_payment_agent) + .add_fan_in_edges([booking_info_aggregation_agent, booking_payment_agent, activity_search_agent], + final_coordinator) + .build()) + + # Return workflow and agent map for thread ID extraction + agent_map = { + "travel_request_handler": travel_request_handler, + "hotel-search-agent": hotel_search_agent, + "flight-search-agent": flight_search_agent, + "activity-search-agent": activity_search_agent, + "booking-confirmation-agent": booking_confirmation_agent, + "booking-payment-agent": booking_payment_agent, + "booking-info-aggregation-agent": booking_info_aggregation_agent, + "final-coordinator": final_coordinator.agent, + } + + return workflow, agent_map + + +async def _process_workflow_events(events, conversation_ids, response_ids): + """Process workflow events and track interactions.""" + workflow_output = None + + async for event in events: + if isinstance(event, WorkflowOutputEvent): + workflow_output = event.data + # Handle Unicode characters that may not be displayable in Windows console + try: + print(f"\nWorkflow Output: {event.data}\n") + except UnicodeEncodeError: + output_str = str(event.data).encode('ascii', 'replace').decode('ascii') + print(f"\nWorkflow Output: {output_str}\n") + + elif isinstance(event, AgentRunUpdateEvent): + _track_agent_ids(event, event.executor_id, response_ids, conversation_ids) + + return workflow_output + + +def _track_agent_ids(event, agent, response_ids, conversation_ids): + """Track agent response and conversation IDs - supporting multiple responses per agent.""" + if isinstance(event.data, AgentRunResponseUpdate): + # Check for conversation_id and response_id from raw_representation + # V2 API stores conversation_id directly on raw_representation (ChatResponseUpdate) + if hasattr(event.data, 'raw_representation') and event.data.raw_representation: + raw = event.data.raw_representation + + # Try conversation_id directly on raw representation + if hasattr(raw, 'conversation_id') and raw.conversation_id: + # Only add if not already in the list + if raw.conversation_id not in conversation_ids[agent]: + conversation_ids[agent].append(raw.conversation_id) + + # Extract response_id from the OpenAI event (available from first event) + if hasattr(raw, 'raw_representation') and raw.raw_representation: + openai_event = raw.raw_representation + + # Check if event has response object with id + if hasattr(openai_event, 'response') and hasattr(openai_event.response, 'id'): + # Only add if not already in the list + if openai_event.response.id not in response_ids[agent]: + response_ids[agent].append(openai_event.response.id) + + +async def create_and_run_workflow(): + """Run the workflow evaluation and display results. + + Returns: + Dictionary containing agents data with conversation IDs, response IDs, and query information + """ + example_queries = [ + "Plan a 3-day trip to Paris from December 15-18, 2025. Budget is $2000. Need hotel near Eiffel Tower, round-trip flights from New York JFK, and recommend 2-3 activities per day.", + "Find a budget hotel in Tokyo for January 5-10, 2026 under $150/night near Shibuya station, book activities including a sushi making class", + "Search for round-trip flights from Los Angeles to London departing March 20, 2026, returning March 27, 2026. Economy class, 2 passengers. Recommend tourist attractions and museums.", + ] + + query = example_queries[0] + print(f"Query: {query}\n") + + result = await run_workflow_with_response_tracking(query) + + # Create output data structure + output_data = { + "agents": {}, + "query": result["query"], + "output": result.get("output", "") + } + + # Create agent-specific mappings - now with lists of IDs + all_agents = set(result["conversation_ids"].keys()) | set(result["response_ids"].keys()) + for agent_name in all_agents: + output_data["agents"][agent_name] = { + "conversation_ids": result["conversation_ids"].get(agent_name, []), + "response_ids": result["response_ids"].get(agent_name, []), + "response_count": len(result["response_ids"].get(agent_name, [])) + } + + print(f"\nTotal agents tracked: {len(output_data['agents'])}") + + # Print summary of multiple responses + print("\n=== Multi-Response Summary ===") + for agent_name, agent_data in output_data["agents"].items(): + response_count = agent_data["response_count"] + print(f"{agent_name}: {response_count} response(s)") + + return output_data + + +if __name__ == "__main__": + asyncio.run(create_and_run_workflow()) diff --git a/python/samples/demos/workflow_evaluation/run_evaluation.py b/python/samples/demos/workflow_evaluation/run_evaluation.py new file mode 100644 index 0000000000..610f7ade00 --- /dev/null +++ b/python/samples/demos/workflow_evaluation/run_evaluation.py @@ -0,0 +1,220 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Script to run multi-agent travel planning workflow and evaluate agent responses. + +This script: +1. Executes the multi-agent workflow +2. Displays response data summary +3. Creates and runs evaluation with multiple evaluators +4. Monitors evaluation progress and displays results +""" + +import asyncio +import os +import time + +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from dotenv import load_dotenv + +from create_workflow import create_and_run_workflow + + +def print_section(title: str): + """Print a formatted section header.""" + print(f"\n{'='*80}") + print(f"{title}") + print(f"{'='*80}") + + +async def run_workflow(): + """Execute the multi-agent travel planning workflow. + + Returns: + Dictionary containing workflow data with agent response IDs + """ + print_section("Step 1: Running Workflow") + print("Executing multi-agent travel planning workflow...") + print("This may take a few minutes...") + + workflow_data = await create_and_run_workflow() + + print("Workflow execution completed") + return workflow_data + + +def display_response_summary(workflow_data: dict): + """Display summary of response data.""" + print_section("Step 2: Response Data Summary") + + print(f"Query: {workflow_data['query']}") + print(f"\nAgents tracked: {len(workflow_data['agents'])}") + + for agent_name, agent_data in workflow_data['agents'].items(): + response_count = agent_data['response_count'] + print(f" {agent_name}: {response_count} response(s)") + + +def fetch_agent_responses(openai_client, workflow_data: dict, agent_names: list): + """Fetch and display final responses from specified agents.""" + print_section("Step 3: Fetching Agent Responses") + + for agent_name in agent_names: + if agent_name not in workflow_data['agents']: + continue + + agent_data = workflow_data['agents'][agent_name] + if not agent_data['response_ids']: + continue + + final_response_id = agent_data['response_ids'][-1] + print(f"\n{agent_name}") + print(f" Response ID: {final_response_id}") + + try: + response = openai_client.responses.retrieve(response_id=final_response_id) + content = response.output[-1].content[-1].text + truncated = content[:300] + "..." if len(content) > 300 else content + print(f" Content preview: {truncated}") + except Exception as e: + print(f" Error: {e}") + + +def create_evaluation(openai_client, model_deployment: str): + """Create evaluation with multiple evaluators.""" + print_section("Step 4: Creating Evaluation") + + data_source_config = {"type": "azure_ai_source", "scenario": "responses"} + + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": "relevance", + "evaluator_name": "builtin.relevance", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "groundedness", + "evaluator_name": "builtin.groundedness", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "tool_call_accuracy", + "evaluator_name": "builtin.tool_call_accuracy", + "initialization_parameters": {"deployment_name": model_deployment} + }, + { + "type": "azure_ai_evaluator", + "name": "tool_output_utilization", + "evaluator_name": "builtin.tool_output_utilization", + "initialization_parameters": {"deployment_name": model_deployment} + }, + ] + + eval_object = openai_client.evals.create( + name="Travel Workflow Multi-Evaluator Assessment", + data_source_config=data_source_config, + testing_criteria=testing_criteria, + ) + + evaluator_names = [criterion["name"] for criterion in testing_criteria] + print(f"Evaluation created: {eval_object.id}") + print(f"Evaluators ({len(evaluator_names)}): {', '.join(evaluator_names)}") + + return eval_object + + +def run_evaluation(openai_client, eval_object, workflow_data: dict, agent_names: list): + """Run evaluation on selected agent responses.""" + print_section("Step 5: Running Evaluation") + + selected_response_ids = [] + for agent_name in agent_names: + if agent_name in workflow_data['agents']: + agent_data = workflow_data['agents'][agent_name] + if agent_data['response_ids']: + selected_response_ids.append(agent_data['response_ids'][-1]) + + print(f"Selected {len(selected_response_ids)} responses for evaluation") + + data_source = { + "type": "azure_ai_responses", + "item_generation_params": { + "type": "response_retrieval", + "data_mapping": {"response_id": "{{item.resp_id}}"}, + "source": { + "type": "file_content", + "content": [{"item": {"resp_id": resp_id}} for resp_id in selected_response_ids] + }, + }, + } + + eval_run = openai_client.evals.runs.create( + eval_id=eval_object.id, + name="Multi-Agent Response Evaluation", + data_source=data_source + ) + + print(f"Evaluation run created: {eval_run.id}") + + return eval_run + + +def monitor_evaluation(openai_client, eval_object, eval_run): + """Monitor evaluation progress and display results.""" + print_section("Step 6: Monitoring Evaluation") + + print("Waiting for evaluation to complete...") + + while eval_run.status not in ["completed", "failed"]: + eval_run = openai_client.evals.runs.retrieve( + run_id=eval_run.id, + eval_id=eval_object.id + ) + print(f"Status: {eval_run.status}") + time.sleep(5) + + if eval_run.status == "completed": + print("\nEvaluation completed successfully") + print(f"Result counts: {eval_run.result_counts}") + print(f"\nReport URL: {eval_run.report_url}") + else: + print("\nEvaluation failed") + + +async def main(): + """Main execution flow.""" + load_dotenv() + + print("Travel Planning Workflow Evaluation") + + workflow_data = await run_workflow() + + display_response_summary(workflow_data) + + project_client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), + api_version="2025-11-15-preview" + ) + openai_client = project_client.get_openai_client() + + agents_to_evaluate = ["hotel-search-agent", "flight-search-agent", "activity-search-agent"] + + fetch_agent_responses(openai_client, workflow_data, agents_to_evaluate) + + model_deployment = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o-mini") + eval_object = create_evaluation(openai_client, model_deployment) + + eval_run = run_evaluation(openai_client, eval_object, workflow_data, agents_to_evaluate) + + monitor_evaluation(openai_client, eval_object, eval_run) + + print_section("Complete") + + +if __name__ == "__main__": + asyncio.run(main())