From 1f6beb97beca7a211465463e7bce78c1f2559716 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Wed, 19 Nov 2025 15:25:39 +0200
Subject: [PATCH 1/9] Create workflow evaluation with Foundry demo

---
 .../demos/workflow_evaluation/README.md       |  35 +
 .../demos/workflow_evaluation/_tools.py       | 622 ++++++++++++++++++
 .../workflow_evaluation/create_workflow.py    | 494 ++++++++++++++
 .../workflow_evaluation/run_evaluation.py     | 219 ++++++
 4 files changed, 1370 insertions(+)
 create mode 100644 python/samples/demos/workflow_evaluation/README.md
 create mode 100644 python/samples/demos/workflow_evaluation/_tools.py
 create mode 100644 python/samples/demos/workflow_evaluation/create_workflow.py
 create mode 100644 python/samples/demos/workflow_evaluation/run_evaluation.py

diff --git a/python/samples/demos/workflow_evaluation/README.md b/python/samples/demos/workflow_evaluation/README.md
new file mode 100644
index 0000000000..ee9aa7ffd6
--- /dev/null
+++ b/python/samples/demos/workflow_evaluation/README.md
@@ -0,0 +1,35 @@
+# Multi-Agent Travel Planning Workflow Evaluation
+
+This sample demonstrates evaluating a multi-agent workflow using Azure AI's built-in evaluators. The workflow processes travel planning requests through seven specialized agents in a fan-out/fan-in pattern: travel request handler, hotel/flight/activity search agents, booking aggregator, booking confirmation, and payment processing.
+
+## Evaluation Metrics
+
+The evaluation uses four Azure AI built-in evaluators:
+
+- **Relevance** - How well responses address the user query
+- **Groundedness** - Whether responses are grounded in available context
+- **Tool Call Accuracy** - Correct tool selection and parameter usage
+- **Tool Output Utilization** - Effective use of tool outputs in responses
+
+## Setup
+
+Create a `.env` file with required configuration:
+
+```env
+AZURE_AI_PROJECT_ENDPOINT=<your-project-endpoint>
+AZURE_AI_MODEL_DEPLOYMENT_NAME=<your-model-deployment>
+```
+
+## Running the Evaluation
+
+Execute the complete workflow and evaluation:
+
+```bash
+python run_evaluation.py
+```
+
+The script will:
+1. Execute the multi-agent travel planning workflow
+2. Display response summary for each agent
+3. Create and run evaluation on hotel, flight, and activity search agents
+4. Monitor progress and display the evaluation report URL
diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py
new file mode 100644
index 0000000000..420d1ece9e
--- /dev/null
+++ b/python/samples/demos/workflow_evaluation/_tools.py
@@ -0,0 +1,622 @@
+import json
+from datetime import datetime
+
+# --- Travel Planning Tools ---
+
+def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str:
+    """
+    Search for available hotels based on location and dates.
+    """
+    # Specific mock data for Paris December 15-18, 2025
+    if "paris" in location.lower():
+        mock_hotels = [
+            {
+                "name": "Hotel Eiffel Trocadéro",
+                "rating": 4.6,
+                "price_per_night": "$185",
+                "total_price": "$555 for 3 nights",
+                "distance_to_eiffel_tower": "0.3 miles",
+                "amenities": ["WiFi", "Breakfast", "Eiffel Tower View", "Concierge"],
+                "availability": "Available",
+                "address": "35 Rue Benjamin Franklin, 16th arr., Paris"
+            },
+            {
+                "name": "Mercure Paris Centre Tour Eiffel",
+                "rating": 4.4,
+                "price_per_night": "$220",
+                "total_price": "$660 for 3 nights",
+                "distance_to_eiffel_tower": "0.5 miles",
+                "amenities": ["WiFi", "Restaurant", "Bar", "Gym", "Air Conditioning"],
+                "availability": "Available",
+                "address": "20 Rue Jean Rey, 15th arr., Paris"
+            },
+            {
+                "name": "Pullman Paris Tour Eiffel",
+                "rating": 4.7,
+                "price_per_night": "$280",
+                "total_price": "$840 for 3 nights",
+                "distance_to_eiffel_tower": "0.2 miles",
+                "amenities": ["WiFi", "Spa", "Gym", "Restaurant", "Rooftop Bar", "Concierge"],
+                "availability": "Limited",
+                "address": "18 Avenue de Suffren, 15th arr., Paris"
+            }
+        ]
+    else:
+        mock_hotels = [
+            {
+                "name": "Grand Plaza Hotel",
+                "rating": 4.5,
+                "price_per_night": "$150",
+                "amenities": ["WiFi", "Pool", "Gym", "Restaurant"],
+                "availability": "Available"
+            }
+        ]
+    
+    return json.dumps({
+        "location": location,
+        "check_in": check_in,
+        "check_out": check_out,
+        "guests": guests,
+        "hotels_found": len(mock_hotels),
+        "hotels": mock_hotels,
+        "note": "Hotel search results matching your query"
+    })
+
+def get_hotel_details(hotel_name: str) -> str:
+    """
+    Get detailed information about a specific hotel.
+    """
+    hotel_details = {
+        "Hotel Eiffel Trocadéro": {
+            "description": "Charming boutique hotel with stunning Eiffel Tower views from select rooms. Perfect for couples and families.",
+            "check_in_time": "3:00 PM",
+            "check_out_time": "11:00 AM",
+            "cancellation_policy": "Free cancellation up to 24 hours before check-in",
+            "reviews": {
+                "total": 1247,
+                "recent_comments": [
+                    "Amazing location! Walked to Eiffel Tower in 5 minutes.",
+                    "Staff was incredibly helpful with restaurant recommendations.",
+                    "Rooms are cozy and clean with great views."
+                ]
+            },
+            "nearby_attractions": ["Eiffel Tower (0.3 mi)", "Trocadéro Gardens (0.2 mi)", "Seine River (0.4 mi)"]
+        },
+        "Mercure Paris Centre Tour Eiffel": {
+            "description": "Modern hotel with contemporary rooms and excellent dining options. Close to metro stations.",
+            "check_in_time": "2:00 PM",
+            "check_out_time": "12:00 PM",
+            "cancellation_policy": "Free cancellation up to 48 hours before check-in",
+            "reviews": {
+                "total": 2156,
+                "recent_comments": [
+                    "Great value for money, clean and comfortable.",
+                    "Restaurant had excellent French cuisine.",
+                    "Easy access to public transportation."
+                ]
+            },
+            "nearby_attractions": ["Eiffel Tower (0.5 mi)", "Champ de Mars (0.4 mi)", "Les Invalides (0.8 mi)"]
+        },
+        "Pullman Paris Tour Eiffel": {
+            "description": "Luxury hotel offering panoramic views, upscale amenities, and exceptional service. Ideal for a premium experience.",
+            "check_in_time": "3:00 PM",
+            "check_out_time": "12:00 PM",
+            "cancellation_policy": "Free cancellation up to 72 hours before check-in",
+            "reviews": {
+                "total": 3421,
+                "recent_comments": [
+                    "Rooftop bar has the best Eiffel Tower views in Paris!",
+                    "Luxurious rooms with every amenity you could want.",
+                    "Worth the price for the location and service."
+                ]
+            },
+            "nearby_attractions": ["Eiffel Tower (0.2 mi)", "Seine River Cruise Dock (0.3 mi)", "Trocadéro (0.5 mi)"]
+        }
+    }
+    
+    details = hotel_details.get(hotel_name, {
+        "name": hotel_name,
+        "description": "Comfortable hotel with modern amenities",
+        "check_in_time": "3:00 PM",
+        "check_out_time": "11:00 AM",
+        "cancellation_policy": "Standard cancellation policy applies",
+        "reviews": {"total": 0, "recent_comments": []},
+        "nearby_attractions": []
+    })
+    
+    return json.dumps({
+        "hotel_name": hotel_name,
+        "details": details
+    })
+
+def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str:
+    """
+    Search for available flights between two locations.
+    """
+    # Specific mock data for JFK to Paris December 15-18, 2025
+    if "jfk" in origin.lower() or "new york" in origin.lower():
+        if "paris" in destination.lower() or "cdg" in destination.lower():
+            mock_flights = [
+                {
+                    "outbound": {
+                        "flight_number": "AF007",
+                        "airline": "Air France",
+                        "departure": "December 15, 2025 at 6:30 PM",
+                        "arrival": "December 16, 2025 at 8:15 AM",
+                        "duration": "7h 45m",
+                        "aircraft": "Boeing 777-300ER",
+                        "class": "Economy",
+                        "price": "$520"
+                    },
+                    "return": {
+                        "flight_number": "AF008",
+                        "airline": "Air France",
+                        "departure": "December 18, 2025 at 11:00 AM",
+                        "arrival": "December 18, 2025 at 2:15 PM",
+                        "duration": "8h 15m",
+                        "aircraft": "Airbus A350-900",
+                        "class": "Economy",
+                        "price": "Included"
+                    },
+                    "total_price": "$520",
+                    "stops": "Nonstop",
+                    "baggage": "1 checked bag included"
+                },
+                {
+                    "outbound": {
+                        "flight_number": "DL264",
+                        "airline": "Delta",
+                        "departure": "December 15, 2025 at 10:15 PM",
+                        "arrival": "December 16, 2025 at 12:05 PM",
+                        "duration": "7h 50m",
+                        "aircraft": "Airbus A330-900neo",
+                        "class": "Economy",
+                        "price": "$485"
+                    },
+                    "return": {
+                        "flight_number": "DL265",
+                        "airline": "Delta",
+                        "departure": "December 18, 2025 at 1:45 PM",
+                        "arrival": "December 18, 2025 at 5:00 PM",
+                        "duration": "8h 15m",
+                        "aircraft": "Airbus A330-900neo",
+                        "class": "Economy",
+                        "price": "Included"
+                    },
+                    "total_price": "$485",
+                    "stops": "Nonstop",
+                    "baggage": "1 checked bag included"
+                },
+                {
+                    "outbound": {
+                        "flight_number": "UA57",
+                        "airline": "United Airlines",
+                        "departure": "December 15, 2025 at 5:00 PM",
+                        "arrival": "December 16, 2025 at 6:50 AM",
+                        "duration": "7h 50m",
+                        "aircraft": "Boeing 767-400ER",
+                        "class": "Economy",
+                        "price": "$560"
+                    },
+                    "return": {
+                        "flight_number": "UA58",
+                        "airline": "United Airlines",
+                        "departure": "December 18, 2025 at 9:30 AM",
+                        "arrival": "December 18, 2025 at 12:45 PM",
+                        "duration": "8h 15m",
+                        "aircraft": "Boeing 787-10",
+                        "class": "Economy",
+                        "price": "Included"
+                    },
+                    "total_price": "$560",
+                    "stops": "Nonstop",
+                    "baggage": "1 checked bag included"
+                }
+            ]
+        else:
+            mock_flights = [{"flight_number": "XX123", "airline": "Generic Air", "price": "$400", "note": "Generic route"}]
+    else:
+        mock_flights = [
+            {
+                "outbound": {
+                    "flight_number": "AA123",
+                    "airline": "Generic Airlines",
+                    "departure": f"{departure_date} at 9:00 AM",
+                    "arrival": f"{departure_date} at 2:30 PM",
+                    "duration": "5h 30m",
+                    "class": "Economy",
+                    "price": "$350"
+                },
+                "total_price": "$350",
+                "stops": "Nonstop"
+            }
+        ]
+    
+    return json.dumps({
+        "origin": origin,
+        "destination": destination,
+        "departure_date": departure_date,
+        "return_date": return_date,
+        "passengers": passengers,
+        "flights_found": len(mock_flights),
+        "flights": mock_flights,
+        "note": "Flight search results for JFK to Paris CDG"
+    })
+
+def get_flight_details(flight_number: str) -> str:
+    """
+    Get detailed information about a specific flight.
+    """
+    mock_details = {
+        "flight_number": flight_number,
+        "airline": "Sky Airways",
+        "aircraft": "Boeing 737-800",
+        "departure": {
+            "airport": "JFK International Airport",
+            "terminal": "Terminal 4",
+            "gate": "B23",
+            "time": "08:00 AM"
+        },
+        "arrival": {
+            "airport": "Charles de Gaulle Airport",
+            "terminal": "Terminal 2E",
+            "gate": "K15",
+            "time": "11:30 AM local time"
+        },
+        "duration": "3h 30m",
+        "baggage_allowance": {
+            "carry_on": "1 bag (10kg)",
+            "checked": "1 bag (23kg)"
+        },
+        "amenities": ["WiFi", "In-flight entertainment", "Meals included"]
+    }
+    
+    return json.dumps({
+        "flight_details": mock_details
+    })
+
+def search_activities(location: str, date: str = None, category: str = None) -> str:
+    """
+    Search for available activities and attractions at a destination.
+    """
+    # Specific mock data for Paris activities
+    if "paris" in location.lower():
+        all_activities = [
+            {
+                "name": "Eiffel Tower Summit Access",
+                "category": "Sightseeing",
+                "duration": "2-3 hours",
+                "price": "$35",
+                "rating": 4.8,
+                "description": "Skip-the-line access to all three levels including the summit. Best views of Paris!",
+                "availability": "Daily 9:30 AM - 11:00 PM",
+                "best_time": "Early morning or sunset",
+                "booking_required": True
+            },
+            {
+                "name": "Louvre Museum Guided Tour",
+                "category": "Sightseeing",
+                "duration": "3 hours",
+                "price": "$55",
+                "rating": 4.7,
+                "description": "Expert-guided tour covering masterpieces including Mona Lisa and Venus de Milo.",
+                "availability": "Daily except Tuesdays, 9:00 AM entry",
+                "best_time": "Morning entry recommended",
+                "booking_required": True
+            },
+            {
+                "name": "Seine River Cruise",
+                "category": "Sightseeing",
+                "duration": "1 hour",
+                "price": "$18",
+                "rating": 4.6,
+                "description": "Scenic cruise past Notre-Dame, Eiffel Tower, and historic bridges.",
+                "availability": "Every 30 minutes, 10:00 AM - 10:00 PM",
+                "best_time": "Evening for illuminated monuments",
+                "booking_required": False
+            },
+            {
+                "name": "Musée d'Orsay Visit",
+                "category": "Culture",
+                "duration": "2-3 hours",
+                "price": "$16",
+                "rating": 4.7,
+                "description": "Impressionist masterpieces in a stunning Beaux-Arts railway station.",
+                "availability": "Tuesday-Sunday 9:30 AM - 6:00 PM",
+                "best_time": "Weekday mornings",
+                "booking_required": True
+            },
+            {
+                "name": "Versailles Palace Day Trip",
+                "category": "Culture",
+                "duration": "5-6 hours",
+                "price": "$75",
+                "rating": 4.9,
+                "description": "Explore the opulent palace and stunning gardens of Louis XIV (includes transport).",
+                "availability": "Daily except Mondays, 8:00 AM departure",
+                "best_time": "Full day trip",
+                "booking_required": True
+            },
+            {
+                "name": "Montmartre Walking Tour",
+                "category": "Culture",
+                "duration": "2.5 hours",
+                "price": "$25",
+                "rating": 4.6,
+                "description": "Discover the artistic heart of Paris, including Sacré-Cœur and artists' square.",
+                "availability": "Daily at 10:00 AM and 2:00 PM",
+                "best_time": "Morning or late afternoon",
+                "booking_required": False
+            },
+            {
+                "name": "French Cooking Class",
+                "category": "Culinary",
+                "duration": "3 hours",
+                "price": "$120",
+                "rating": 4.9,
+                "description": "Learn to make classic French dishes like coq au vin and crème brûlée, then enjoy your creations.",
+                "availability": "Tuesday-Saturday, 10:00 AM and 6:00 PM sessions",
+                "best_time": "Morning or evening sessions",
+                "booking_required": True
+            },
+            {
+                "name": "Wine & Cheese Tasting",
+                "category": "Culinary",
+                "duration": "1.5 hours",
+                "price": "$65",
+                "rating": 4.7,
+                "description": "Sample French wines and artisanal cheeses with expert sommelier guidance.",
+                "availability": "Daily at 5:00 PM and 7:30 PM",
+                "best_time": "Evening sessions",
+                "booking_required": True
+            },
+            {
+                "name": "Food Market Tour",
+                "category": "Culinary",
+                "duration": "2 hours",
+                "price": "$45",
+                "rating": 4.6,
+                "description": "Explore authentic Parisian markets and taste local specialties like cheeses, pastries, and charcuterie.",
+                "availability": "Tuesday, Thursday, Saturday mornings",
+                "best_time": "Morning (markets are freshest)",
+                "booking_required": False
+            }
+        ]
+        
+        if category:
+            activities = [act for act in all_activities if act["category"] == category]
+        else:
+            activities = all_activities
+    else:
+        activities = [
+            {
+                "name": "City Walking Tour",
+                "category": "Sightseeing",
+                "duration": "3 hours",
+                "price": "$45",
+                "rating": 4.7,
+                "description": "Explore the historic downtown area with an expert guide",
+                "availability": "Daily at 10:00 AM and 2:00 PM"
+            }
+        ]
+    
+    return json.dumps({
+        "location": location,
+        "date": date,
+        "category": category,
+        "activities_found": len(activities),
+        "activities": activities,
+        "note": "Activity search results for Paris with sightseeing, culture, and culinary options"
+    })
+
+def get_activity_details(activity_name: str) -> str:
+    """
+    Get detailed information about a specific activity.
+    """
+    # Paris-specific activity details
+    activity_details_map = {
+        "Eiffel Tower Summit Access": {
+            "name": "Eiffel Tower Summit Access",
+            "description": "Skip-the-line access to all three levels of the Eiffel Tower, including the summit. Enjoy panoramic views of Paris from 276 meters high.",
+            "duration": "2-3 hours (self-guided)",
+            "price": "$35 per person",
+            "included": ["Skip-the-line ticket", "Access to all 3 levels", "Summit access", "Audio guide app"],
+            "meeting_point": "Eiffel Tower South Pillar entrance, look for priority access line",
+            "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera", "Light jacket (summit can be windy)"],
+            "cancellation_policy": "Free cancellation up to 24 hours in advance",
+            "languages": ["English", "French", "Spanish", "German", "Italian"],
+            "max_group_size": "No limit",
+            "rating": 4.8,
+            "reviews_count": 15234
+        },
+        "Louvre Museum Guided Tour": {
+            "name": "Louvre Museum Guided Tour",
+            "description": "Expert-guided tour of the world's largest art museum, focusing on must-see masterpieces including Mona Lisa, Venus de Milo, and Winged Victory.",
+            "duration": "3 hours",
+            "price": "$55 per person",
+            "included": ["Skip-the-line entry", "Expert art historian guide", "Headsets for groups over 6", "Museum highlights map"],
+            "meeting_point": "Glass Pyramid main entrance, look for guide with 'Louvre Tours' sign",
+            "what_to_bring": ["Photo ID", "Comfortable shoes", "Camera (no flash)", "Water bottle"],
+            "cancellation_policy": "Free cancellation up to 48 hours in advance",
+            "languages": ["English", "French", "Spanish"],
+            "max_group_size": 20,
+            "rating": 4.7,
+            "reviews_count": 8921
+        },
+        "French Cooking Class": {
+            "name": "French Cooking Class",
+            "description": "Hands-on cooking experience where you'll learn to prepare classic French dishes like coq au vin, ratatouille, and crème brûlée under expert chef guidance.",
+            "duration": "3 hours",
+            "price": "$120 per person",
+            "included": ["All ingredients", "Chef instruction", "Apron and recipe booklet", "Wine pairing", "Lunch/dinner of your creations"],
+            "meeting_point": "Le Chef Cooking Studio, 15 Rue du Bac, 7th arrondissement",
+            "what_to_bring": ["Appetite", "Camera for food photos"],
+            "cancellation_policy": "Free cancellation up to 72 hours in advance",
+            "languages": ["English", "French"],
+            "max_group_size": 12,
+            "rating": 4.9,
+            "reviews_count": 2341
+        }
+    }
+    
+    details = activity_details_map.get(activity_name, {
+        "name": activity_name,
+        "description": "An immersive experience that showcases the best of local culture and attractions.",
+        "duration": "3 hours",
+        "price": "$45 per person",
+        "included": ["Professional guide", "Entry fees"],
+        "meeting_point": "Central meeting location",
+        "what_to_bring": ["Comfortable shoes", "Camera"],
+        "cancellation_policy": "Free cancellation up to 24 hours in advance",
+        "languages": ["English"],
+        "max_group_size": 15,
+        "rating": 4.5,
+        "reviews_count": 100
+    })
+    
+    return json.dumps({
+        "activity_details": details
+    })
+
+def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str:
+    """
+    Confirm a booking reservation.
+    """
+    confirmation_number = f"CONF-{booking_type.upper()}-{booking_id}"
+    
+    confirmation_data = {
+        "confirmation_number": confirmation_number,
+        "booking_type": booking_type,
+        "status": "Confirmed",
+        "customer_name": customer_info.get("name", "Guest"),
+        "email": customer_info.get("email", "guest@example.com"),
+        "confirmation_sent": True,
+        "next_steps": [
+            "Check your email for booking details",
+            "Arrive 30 minutes before scheduled time",
+            "Bring confirmation number and valid ID"
+        ]
+    }
+    
+    return json.dumps({
+        "confirmation": confirmation_data
+    })
+
+def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str:
+    """
+    Check availability for hotel rooms.
+    """
+    availability_status = "Available"
+    
+    availability_data = {
+        "service_type": "hotel",
+        "hotel_name": hotel_name,
+        "check_in": check_in,
+        "check_out": check_out,
+        "rooms_requested": rooms,
+        "status": availability_status,
+        "available_rooms": 8,
+        "price_per_night": "$185",
+        "last_checked": datetime.now().isoformat()
+    }
+    
+    return json.dumps({
+        "availability": availability_data
+    })
+
+def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str:
+    """
+    Check availability for flight seats.
+    """
+    availability_status = "Available"
+    
+    availability_data = {
+        "service_type": "flight",
+        "flight_number": flight_number,
+        "date": date,
+        "passengers_requested": passengers,
+        "status": availability_status,
+        "available_seats": 45,
+        "price_per_passenger": "$520",
+        "last_checked": datetime.now().isoformat()
+    }
+    
+    return json.dumps({
+        "availability": availability_data
+    })
+
+def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str:
+    """
+    Check availability for activity bookings.
+    """
+    availability_status = "Available"
+    
+    availability_data = {
+        "service_type": "activity",
+        "activity_name": activity_name,
+        "date": date,
+        "participants_requested": participants,
+        "status": availability_status,
+        "available_spots": 15,
+        "price_per_person": "$45",
+        "last_checked": datetime.now().isoformat()
+    }
+    
+    return json.dumps({
+        "availability": availability_data
+    })
+
+def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str:
+    """
+    Process payment for a booking.
+    """
+    transaction_id = f"TXN-{datetime.now().strftime('%Y%m%d%H%M%S')}"
+    
+    payment_result = {
+        "transaction_id": transaction_id,
+        "amount": amount,
+        "currency": currency,
+        "status": "Success",
+        "payment_method": payment_method.get("type", "Credit Card"),
+        "last_4_digits": payment_method.get("last_4", "****"),
+        "booking_reference": booking_reference,
+        "timestamp": datetime.now().isoformat(),
+        "receipt_url": f"https://payments.travelagency.com/receipt/{transaction_id}"
+    }
+    
+    return json.dumps({
+        "payment_result": payment_result
+    })
+
+def validate_payment_method(payment_method: dict) -> str:
+    """
+    Validate payment method details.
+    """
+    method_type = payment_method.get("type", "credit_card")
+    
+    # Validation logic
+    is_valid = True
+    validation_messages = []
+    
+    if method_type == "credit_card":
+        if not payment_method.get("number"):
+            is_valid = False
+            validation_messages.append("Card number is required")
+        if not payment_method.get("expiry"):
+            is_valid = False
+            validation_messages.append("Expiry date is required")
+        if not payment_method.get("cvv"):
+            is_valid = False
+            validation_messages.append("CVV is required")
+    
+    validation_result = {
+        "is_valid": is_valid,
+        "payment_method_type": method_type,
+        "validation_messages": validation_messages if not is_valid else ["Payment method is valid"],
+        "supported_currencies": ["USD", "EUR", "GBP", "JPY"],
+        "processing_fee": "2.5%"
+    }
+    
+    return json.dumps({
+        "validation_result": validation_result
+    })
diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
new file mode 100644
index 0000000000..bf2d8337fe
--- /dev/null
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -0,0 +1,494 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""
+Multi-Agent Travel Planning Workflow Evaluation with Multiple Response Tracking
+
+This sample demonstrates a multi-agent travel planning workflow using the V2 client that:
+1. Processes travel queries through 7 specialized agents
+2. Tracks MULTIPLE response and conversation IDs per agent for evaluation
+3. Uses the new Prompt Agents API (V2)
+4. Captures complete interaction sequences including multiple invocations
+5. Aggregates findings through a travel planning coordinator
+
+WORKFLOW STRUCTURE (7 agents):
+- Travel Agent Executor → Hotel Search, Flight Search, Activity Search (fan-out)
+- Hotel Search Executor → Booking Information Aggregation Executor
+- Flight Search Executor → Booking Information Aggregation Executor
+- Booking Information Aggregation Executor → Booking Confirmation Executor
+- Booking Confirmation Executor → Booking Payment Executor
+- Booking Information Aggregation, Booking Payment, Activity Search → Travel Planning Coordinator (ResearchLead) for final aggregation (fan-in)
+
+Agents:
+1. Travel Agent - Main coordinator (no tools to avoid thread conflicts)
+2. Hotel Search - Searches hotels with tools
+3. Flight Search - Searches flights with tools  
+4. Activity Search - Searches activities with tools
+5. Booking Information Aggregation - Aggregates hotel & flight booking info
+6. Booking Confirmation - Confirms bookings with tools
+7. Booking Payment - Processes payments with tools
+"""
+
+import asyncio
+import json
+import os
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from dotenv import load_dotenv
+
+# Add the local packages to the path
+packages_path = Path(__file__).parent.parent.parent.parent.parent.parent / "packages"
+sys.path.insert(0, str(packages_path / "core"))
+sys.path.insert(0, str(packages_path / "azure-ai"))
+
+from agent_framework import (
+    AgentExecutorResponse,
+    AgentRunUpdateEvent,
+    AgentRunResponseUpdate,
+    ChatAgent,
+    ChatMessage,
+    Executor,
+    executor,
+    handler,
+    Role,
+    WorkflowContext,
+    WorkflowBuilder,
+    WorkflowOutputEvent,
+)
+
+# Import V2 client directly from source file to avoid installed package conflicts
+from agent_framework_azure_ai._client import AzureAIClient
+from azure.identity.aio import AzureDeveloperCliCredential
+from azure.ai.projects.aio import AIProjectClient
+
+from _tools import (
+    # Travel planning tools
+    search_hotels,
+    get_hotel_details,
+    search_flights,
+    get_flight_details,
+    search_activities,
+    confirm_booking,
+    check_hotel_availability,
+    check_flight_availability,
+    process_payment,
+    validate_payment_method,
+)
+
+load_dotenv()
+
+
+@executor(id="start_executor")
+async def start_executor(input: str, ctx: WorkflowContext[List[ChatMessage]]) -> None:
+    """Initiates the workflow by sending the user query to all specialized agents."""
+    await ctx.send_message([ChatMessage(role="user", text=input)])
+
+
+class ResearchLead(Executor):
+    """Aggregates and summarizes travel planning findings from all specialized agents."""
+    
+    def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordinator"):
+        # store=True to preserve conversation history for evaluation
+        self.agent = chat_client.create_agent(
+            id="travel-planning-coordinator",
+            instructions=(
+                "You are the Travel Planning Coordinator. Your role is to synthesize information from multiple "
+                "specialized travel agents into a cohesive, actionable travel plan. You receive inputs from: "
+                "hotel search specialists, flight search specialists, activity planners, booking confirmation agents, "
+                "payment processors, and booking information aggregators. Provide a clear, comprehensive travel plan "
+                "that addresses the user's original query with all necessary details including accommodations, "
+                "transportation, activities, and booking status."
+            ),
+            name="travel-planning-coordinator",
+            store=True
+        )
+        super().__init__(id=id)
+
+    @handler
+    async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None:
+        user_query = responses[0].full_conversation[0].text
+        
+        # Extract findings from all agent responses
+        agent_findings = self._extract_agent_findings(responses)
+        summary_text = "\n".join(agent_findings) if agent_findings else "No specific findings were provided by the agents."
+        
+        # Generate comprehensive travel plan summary
+        messages = [
+            ChatMessage(role=Role.SYSTEM, text="You are a travel planning coordinator. Summarize findings from multiple specialized travel agents and provide a clear, comprehensive travel plan based on the user's query."),
+            ChatMessage(role=Role.USER, text=f"Original query: {user_query}\n\nFindings from specialized travel agents:\n{summary_text}\n\nPlease provide a comprehensive travel plan based on these findings.")
+        ]
+        
+        try:
+            final_response = await self.agent.run(messages)
+            output_text = (final_response.messages[-1].text if final_response.messages and final_response.messages[-1].text 
+                          else f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}")
+        except Exception:
+            output_text = f"Based on the available findings, here's your travel plan for '{user_query}': {summary_text}"
+        
+        await ctx.yield_output(output_text)
+    
+    def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> List[str]:
+        """Extract findings from agent responses."""
+        agent_findings = []
+        
+        for response in responses:
+            findings = []
+            if response.agent_run_response and response.agent_run_response.messages:
+                for msg in response.agent_run_response.messages:
+                    if msg.role == Role.ASSISTANT and msg.text and msg.text.strip():
+                        findings.append(msg.text.strip())
+            
+            if findings:
+                combined_findings = " ".join(findings)
+                agent_findings.append(f"[{response.executor_id}]: {combined_findings}")
+        
+        return agent_findings
+
+
+async def run_workflow_with_response_tracking(query: str, chat_client: Optional[AzureAIClient] = None) -> Dict:
+    """Run multi-agent workflow and track conversation IDs, response IDs, and interaction sequence.
+    
+    Args:
+        query: The user query to process through the multi-agent workflow
+        chat_client: Optional AzureAIClient instance
+        
+    Returns:
+        Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis
+    """
+    if chat_client is None:
+        # Use AzureDeveloperCliCredential to avoid Azure CLI timeout issues
+        credential = AzureDeveloperCliCredential()
+        
+        # Create AIProjectClient with the correct API version for V2 prompt agents
+        project_client = AIProjectClient(
+            endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
+            credential=credential,
+            api_version="2025-11-15-preview",
+        )
+        
+        try:
+            async with AzureAIClient(
+                project_client=project_client,
+                async_credential=credential
+            ) as client:
+                return await _run_workflow_with_client(query, client)
+        finally:
+            await credential.close()
+            await project_client.close()
+    else:
+        return await _run_workflow_with_client(query, chat_client)
+
+
+async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> Dict:
+    """Execute workflow with given client and track all interactions."""
+    
+    # Initialize tracking variables - use lists to track multiple responses per agent
+    conversation_ids = defaultdict(list)
+    response_ids = defaultdict(list)
+    workflow_output = None
+    
+    # Create workflow components and keep agent references
+    # Pass project_client and credential to create separate client instances per agent
+    workflow, agent_map = await _create_workflow(
+        chat_client.project_client, 
+        chat_client.credential
+    )
+    
+    # Process workflow events
+    events = workflow.run_stream(query)
+    workflow_output = await _process_workflow_events(events, conversation_ids, response_ids)
+    
+    # # Delete all agents after workflow completion
+    # print("\n=== Cleaning up agents ===")
+    # for agent_name, agent in agent_map.items():
+    #     try:
+    #         # Get the actual agent object
+    #         agent_to_delete = agent.agent if hasattr(agent, 'agent') else agent
+    #         chat_client.project_client.agents.delete(agent_name=agent_to_delete.name)
+    #         print(f"Deleted agent: {agent_name}")
+    #     except Exception as e:
+    #         print(f"Failed to delete agent {agent_name}: {e}")
+    
+    return {
+        "conversation_ids": dict(conversation_ids),
+        "response_ids": dict(response_ids),
+        "output": workflow_output,
+        "query": query
+    }
+
+
+async def _create_workflow(project_client, credential):
+    """Create the multi-agent travel planning workflow with specialized agents.
+    
+    IMPORTANT: Each agent needs its own client instance because the V2 client stores
+    agent_name and agent_version as instance variables, causing all agents to share
+    the same agent identity if they share a client.
+    """
+    
+    # Create separate client for Final Coordinator
+    final_coordinator_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="final-coordinator"
+    )
+    final_coordinator = ResearchLead(chat_client=final_coordinator_client, id="final-coordinator")
+    
+    # Update final_coordinator agent instructions
+    final_coordinator.agent.instructions = (
+        "You are the final coordinator. You will receive responses from multiple agents: "
+        "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), "
+        "and activity-search-agent (activities). "
+        "Review each agent's response, then create a comprehensive travel itinerary organized by: "
+        "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. "
+        "Clearly indicate which information came from which agent. Do not use tools."
+    )
+    
+    # Agent 1: Travel Request Handler (initial coordinator)
+    # Create separate client with unique agent_name
+    travel_request_handler_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="travel-request-handler"
+    )
+    travel_request_handler = travel_request_handler_client.create_agent(
+        id="travel-request-handler",
+        instructions=(
+            "You receive user travel queries and relay them to specialized agents. Extract key information: destination, dates, budget, and preferences. Pass this information forward clearly to the next agents."
+        ),
+        name="travel-request-handler",
+        store=True
+    )
+    
+    # Agent 2: Hotel Search Executor
+    hotel_search_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="hotel-search-agent"
+    )
+    hotel_search_agent = hotel_search_client.create_agent(
+        id="hotel-search-agent",
+        instructions=(
+            "You are a hotel search specialist. Your task is ONLY to search for and provide hotel information. Use search_hotels to find options, get_hotel_details for specifics, and check_availability to verify rooms. Output format: List hotel names, prices per night, total cost for the stay, locations, ratings, amenities, and addresses. IMPORTANT: Only provide hotel information without additional commentary."
+        ),
+        name="hotel-search-agent",
+        tools=[search_hotels, get_hotel_details, check_hotel_availability],
+        store=True
+    )
+    
+    # Agent 3: Flight Search Executor
+    flight_search_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="flight-search-agent"
+    )
+    flight_search_agent = flight_search_client.create_agent(
+        id="flight-search-agent",
+        instructions=(
+            "You are a flight search specialist. Your task is ONLY to search for and provide flight information. Use search_flights to find options, get_flight_details for specifics, and check_availability for seats. Output format: List flight numbers, airlines, departure/arrival times, prices, durations, and cabin class. IMPORTANT: Only provide flight information without additional commentary."
+        ),
+        name="flight-search-agent",
+        tools=[search_flights, get_flight_details, check_flight_availability],
+        store=True
+    )
+    
+    # Agent 4: Activity Search Executor
+    activity_search_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="activity-search-agent"
+    )
+    activity_search_agent = activity_search_client.create_agent(
+        id="activity-search-agent",
+        instructions=(
+            "You are an activities specialist. Your task is ONLY to search for and provide activity information. Use search_activities to find options for activities. Output format: List activity names, descriptions, prices, durations, ratings, and categories. IMPORTANT: Only provide activity information without additional commentary."
+        ),
+        name="activity-search-agent",
+        tools=[search_activities],
+        store=True
+    )
+    
+    # Agent 5: Booking Confirmation Executor
+    booking_confirmation_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="booking-confirmation-agent"
+    )
+    booking_confirmation_agent = booking_confirmation_client.create_agent(
+        id="booking-confirmation-agent",
+        instructions=(
+            "You confirm bookings. Use check_hotel_availability and check_flight_availability to verify slots, then confirm_booking to finalize. Provide ONLY: confirmation numbers, booking references, and confirmation status."
+        ),
+        name="booking-confirmation-agent",
+        tools=[confirm_booking, check_hotel_availability, check_flight_availability],
+        store=True
+    )
+    
+    # Agent 6: Booking Payment Executor
+    booking_payment_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="booking-payment-agent"
+    )
+    booking_payment_agent = booking_payment_client.create_agent(
+        id="booking-payment-agent",
+        instructions=(
+            "You process payments. Use validate_payment_method to verify payment, then process_payment to complete transactions. Provide ONLY: payment confirmation status, transaction IDs, and payment amounts."
+        ),
+        name="booking-payment-agent",
+        tools=[process_payment, validate_payment_method],
+        store=True
+    )
+    
+    # Agent 7: Booking Information Aggregation Executor
+    booking_info_client = AzureAIClient(
+        project_client=project_client,
+        async_credential=credential,
+        agent_name="booking-info-aggregation-agent"
+    )
+    booking_info_aggregation_agent = booking_info_client.create_agent(
+        id="booking-info-aggregation-agent",
+        instructions=(
+            "You aggregate hotel and flight search results. Receive options from search agents and organize them. Provide: top 2-3 hotel options with prices and top 2-3 flight options with prices in a structured format."
+        ),
+        name="booking-info-aggregation-agent",
+        store=True
+    )
+    
+    # Build workflow with logical booking flow:
+    # 1. start_executor → travel_request_handler
+    # 2. travel_request_handler → hotel_search, flight_search, activity_search (fan-out)
+    # 3. hotel_search → booking_info_aggregation
+    # 4. flight_search → booking_info_aggregation
+    # 5. booking_info_aggregation → booking_confirmation
+    # 6. booking_confirmation → booking_payment
+    # 7. booking_info_aggregation, booking_payment, activity_search → final_coordinator (final aggregation, fan-in)
+    # 
+    # Max iterations set to 10 (though shouldn't be needed without cycles)
+    # store=True preserves conversation history on each agent's thread for evaluation
+    
+    workflow = (WorkflowBuilder(name='Travel Planning Workflow')
+            .set_start_executor(start_executor)
+            .add_edge(start_executor, travel_request_handler)
+            .add_fan_out_edges(travel_request_handler, [hotel_search_agent, flight_search_agent, activity_search_agent])
+            .add_edge(hotel_search_agent, booking_info_aggregation_agent)
+            .add_edge(flight_search_agent, booking_info_aggregation_agent)
+            .add_edge(booking_info_aggregation_agent, booking_confirmation_agent)
+            .add_edge(booking_confirmation_agent, booking_payment_agent)
+            .add_fan_in_edges([booking_info_aggregation_agent, booking_payment_agent, activity_search_agent], 
+                             final_coordinator)
+            .build())
+    
+    # Return workflow and agent map for thread ID extraction
+    agent_map = {
+        "travel_request_handler": travel_request_handler,
+        "hotel-search-agent": hotel_search_agent,
+        "flight-search-agent": flight_search_agent,
+        "activity-search-agent": activity_search_agent,
+        "booking-confirmation-agent": booking_confirmation_agent,
+        "booking-payment-agent": booking_payment_agent,
+        "booking-info-aggregation-agent": booking_info_aggregation_agent,
+        "final-coordinator": final_coordinator.agent,
+    }
+    
+    return workflow, agent_map
+
+
+async def _process_workflow_events(events, conversation_ids, response_ids):
+    """Process workflow events and track interactions."""
+    workflow_output = None
+    
+    async for event in events:
+        if isinstance(event, WorkflowOutputEvent):
+            workflow_output = event.data
+            # Handle Unicode characters that may not be displayable in Windows console
+            try:
+                print(f"\nWorkflow Output: {event.data}\n")
+            except UnicodeEncodeError:
+                output_str = str(event.data).encode('ascii', 'replace').decode('ascii')
+                print(f"\nWorkflow Output: {output_str}\n")
+            
+        elif isinstance(event, AgentRunUpdateEvent):
+            _track_agent_ids(event, event.executor_id, response_ids, conversation_ids)
+    
+    return workflow_output
+
+
+def _track_agent_ids(event, agent, response_ids, conversation_ids):
+    """Track agent response and conversation IDs - supporting multiple responses per agent."""
+    if isinstance(event.data, AgentRunResponseUpdate):
+        # Check for conversation_id and response_id from raw_representation
+        # V2 API stores conversation_id directly on raw_representation (ChatResponseUpdate)
+        if hasattr(event.data, 'raw_representation') and event.data.raw_representation:
+            raw = event.data.raw_representation
+            
+            # Try conversation_id directly on raw (this is the V2 pattern)
+            if hasattr(raw, 'conversation_id') and raw.conversation_id:
+                # Only add if not already in the list
+                if raw.conversation_id not in conversation_ids[agent]:
+                    conversation_ids[agent].append(raw.conversation_id)
+            
+            # Extract response_id from the OpenAI event (available from first event)
+            if hasattr(raw, 'raw_representation') and raw.raw_representation:
+                openai_event = raw.raw_representation
+                
+                # Check if event has response object with id
+                if hasattr(openai_event, 'response') and hasattr(openai_event.response, 'id'):
+                    # Only add if not already in the list
+                    if openai_event.response.id not in response_ids[agent]:
+                        response_ids[agent].append(openai_event.response.id)
+
+
+async def create_and_run_workflow():
+    """Run the workflow evaluation and display results.
+    
+    Returns:
+        Dictionary containing agents data with conversation IDs, response IDs, and query information
+    """
+    example_queries = [
+        "Plan a 3-day trip to Paris from December 15-18, 2025. Budget is $2000. Need hotel near Eiffel Tower, round-trip flights from New York JFK, and recommend 2-3 activities per day.",
+        "Find a budget hotel in Tokyo for January 5-10, 2026 under $150/night near Shibuya station, book activities including a sushi making class",
+        "Search for round-trip flights from Los Angeles to London departing March 20, 2026, returning March 27, 2026. Economy class, 2 passengers. Recommend tourist attractions and museums.",
+    ]
+    
+    query = example_queries[0]
+    print(f"Query: {query}\n")
+    
+    result = await run_workflow_with_response_tracking(query)
+    
+    # Create output data structure
+    output_data = {
+        "agents": {},
+        "query": result["query"],
+        "output": result.get("output", "")
+    }
+    
+    # Create agent-specific mappings - now with lists of IDs
+    all_agents = set(result["conversation_ids"].keys()) | set(result["response_ids"].keys())
+    for agent_name in all_agents:
+        output_data["agents"][agent_name] = {
+            "conversation_ids": result["conversation_ids"].get(agent_name, []),
+            "response_ids": result["response_ids"].get(agent_name, []),
+            "response_count": len(result["response_ids"].get(agent_name, []))
+        }
+    
+    print(f"\nTotal agents tracked: {len(output_data['agents'])}")
+    
+    # Print summary of multiple responses
+    print("\n=== Multi-Response Summary ===")
+    for agent_name, agent_data in output_data["agents"].items():
+        response_count = agent_data["response_count"]
+        print(f"{agent_name}: {response_count} response(s)")
+    
+    return output_data
+
+
+
+def main():
+    """Main function to run the workflow evaluation example."""
+    asyncio.run(create_and_run_workflow())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/samples/demos/workflow_evaluation/run_evaluation.py b/python/samples/demos/workflow_evaluation/run_evaluation.py
new file mode 100644
index 0000000000..b2adfd8b83
--- /dev/null
+++ b/python/samples/demos/workflow_evaluation/run_evaluation.py
@@ -0,0 +1,219 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""
+Script to run multi-agent travel planning workflow and evaluate agent responses.
+
+This script:
+1. Executes the multi-agent workflow
+2. Displays response data summary
+3. Creates and runs evaluation with multiple evaluators
+4. Monitors evaluation progress and displays results
+"""
+
+import asyncio
+import os
+import time
+
+from azure.ai.projects import AIProjectClient
+from azure.identity import DefaultAzureCredential
+from dotenv import load_dotenv
+
+from create_workflow import create_and_run_workflow
+
+
+def print_section(title: str):
+    """Print a formatted section header."""
+    print(f"\n{'='*80}")
+    print(f"{title}")
+    print(f"{'='*80}")
+
+
+async def run_workflow():
+    """Execute the multi-agent travel planning workflow.
+    
+    Returns:
+        Dictionary containing workflow data with agent response IDs
+    """
+    print_section("Step 1: Running Workflow")
+    print("Executing multi-agent travel planning workflow...")
+    print("This may take a few minutes...")
+    
+    workflow_data = await create_and_run_workflow()
+    
+    print("Workflow execution completed")
+    return workflow_data
+
+
+def display_response_summary(workflow_data: dict):
+    """Display summary of response data."""
+    print_section("Step 2: Response Data Summary")
+    
+    print(f"Query: {workflow_data['query']}")
+    print(f"\nAgents tracked: {len(workflow_data['agents'])}")
+    
+    for agent_name, agent_data in workflow_data['agents'].items():
+        response_count = agent_data['response_count']
+        print(f"  {agent_name}: {response_count} response(s)")
+
+
+def fetch_agent_responses(openai_client, workflow_data: dict, agent_names: list):
+    """Fetch and display final responses from specified agents."""
+    print_section("Step 3: Fetching Agent Responses")
+    
+    for agent_name in agent_names:
+        if agent_name not in workflow_data['agents']:
+            continue
+            
+        agent_data = workflow_data['agents'][agent_name]
+        if not agent_data['response_ids']:
+            continue
+        
+        final_response_id = agent_data['response_ids'][-1]
+        print(f"\n{agent_name}")
+        print(f"  Response ID: {final_response_id}")
+        
+        try:
+            response = openai_client.responses.retrieve(response_id=final_response_id)
+            content = response.output[-1].content[-1].text
+            truncated = content[:300] + "..." if len(content) > 300 else content
+            print(f"  Content preview: {truncated}")
+        except Exception as e:
+            print(f"  Error: {e}")
+
+
+def create_evaluation(openai_client, model_deployment: str):
+    """Create evaluation with multiple evaluators."""
+    print_section("Step 4: Creating Evaluation")
+    
+    data_source_config = {"type": "azure_ai_source", "scenario": "responses"}
+    
+    testing_criteria = [
+        {
+            "type": "azure_ai_evaluator",
+            "name": "relevance",
+            "evaluator_name": "builtin.relevance",
+            "initialization_parameters": {"deployment_name": model_deployment}
+        },
+        {
+            "type": "azure_ai_evaluator",
+            "name": "groundedness",
+            "evaluator_name": "builtin.groundedness",
+            "initialization_parameters": {"deployment_name": model_deployment}
+        },
+        {
+            "type": "azure_ai_evaluator",
+            "name": "tool_call_accuracy",
+            "evaluator_name": "builtin.tool_call_accuracy",
+            "initialization_parameters": {"deployment_name": model_deployment}
+        },
+        {
+            "type": "azure_ai_evaluator",
+            "name": "tool_output_utilization",
+            "evaluator_name": "builtin.tool_output_utilization",
+            "initialization_parameters": {"deployment_name": model_deployment}
+        },
+    ]
+    
+    eval_object = openai_client.evals.create(
+        name="Travel Workflow Multi-Evaluator Assessment",
+        data_source_config=data_source_config,
+        testing_criteria=testing_criteria,
+    )
+    
+    print(f"Evaluation created: {eval_object.id}")
+    print(f"Evaluators: {len(testing_criteria)}")
+    
+    return eval_object
+
+
+def run_evaluation(openai_client, eval_object, workflow_data: dict, agent_names: list):
+    """Run evaluation on selected agent responses."""
+    print_section("Step 5: Running Evaluation")
+    
+    selected_response_ids = []
+    for agent_name in agent_names:
+        if agent_name in workflow_data['agents']:
+            agent_data = workflow_data['agents'][agent_name]
+            if agent_data['response_ids']:
+                selected_response_ids.append(agent_data['response_ids'][-1])
+    
+    print(f"Selected {len(selected_response_ids)} responses for evaluation")
+    
+    data_source = {
+        "type": "azure_ai_responses",
+        "item_generation_params": {
+            "type": "response_retrieval",
+            "data_mapping": {"response_id": "{{item.resp_id}}"},
+            "source": {
+                "type": "file_content",
+                "content": [{"item": {"resp_id": resp_id}} for resp_id in selected_response_ids]
+            },
+        },
+    }
+    
+    eval_run = openai_client.evals.runs.create(
+        eval_id=eval_object.id,
+        name="Multi-Agent Response Evaluation",
+        data_source=data_source
+    )
+    
+    print(f"Evaluation run created: {eval_run.id}")
+    
+    return eval_run
+
+
+def monitor_evaluation(openai_client, eval_object, eval_run):
+    """Monitor evaluation progress and display results."""
+    print_section("Step 6: Monitoring Evaluation")
+    
+    print("Waiting for evaluation to complete...")
+    
+    while eval_run.status not in ["completed", "failed"]:
+        eval_run = openai_client.evals.runs.retrieve(
+            run_id=eval_run.id,
+            eval_id=eval_object.id
+        )
+        print(f"Status: {eval_run.status}")
+        time.sleep(5)
+    
+    if eval_run.status == "completed":
+        print("\nEvaluation completed successfully")
+        print(f"Result counts: {eval_run.result_counts}")
+        print(f"\nReport URL: {eval_run.report_url}")
+    else:
+        print("\nEvaluation failed")
+
+
+async def main():
+    """Main execution flow."""
+    load_dotenv()
+    
+    print("Travel Planning Workflow Evaluation")
+    
+    workflow_data = await run_workflow()
+    
+    display_response_summary(workflow_data)
+    
+    project_client = AIProjectClient(
+        endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
+        credential=DefaultAzureCredential(),
+        api_version="2025-11-15-preview"
+    )
+    openai_client = project_client.get_openai_client()
+    
+    agents_to_evaluate = ["hotel-search-agent", "flight-search-agent", "activity-search-agent"]
+    
+    fetch_agent_responses(openai_client, workflow_data, agents_to_evaluate)
+    
+    model_deployment = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o-mini")
+    eval_object = create_evaluation(openai_client, model_deployment)
+    
+    eval_run = run_evaluation(openai_client, eval_object, workflow_data, agents_to_evaluate)
+    
+    monitor_evaluation(openai_client, eval_object, eval_run)
+    
+    print_section("Complete")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From 85555d989fa76d5b14f57d0a33f159e5e36f7319 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Wed, 19 Nov 2025 19:29:58 +0200
Subject: [PATCH 2/9] Upgrade syntax

---
 .../demos/workflow_evaluation/create_workflow.py       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
index bf2d8337fe..9b33656baa 100644
--- a/python/samples/demos/workflow_evaluation/create_workflow.py
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -81,7 +81,7 @@
 
 
 @executor(id="start_executor")
-async def start_executor(input: str, ctx: WorkflowContext[List[ChatMessage]]) -> None:
+async def start_executor(input: str, ctx: WorkflowContext[list[ChatMessage]]) -> None:
     """Initiates the workflow by sending the user query to all specialized agents."""
     await ctx.send_message([ChatMessage(role="user", text=input)])
 
@@ -107,7 +107,7 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi
         super().__init__(id=id)
 
     @handler
-    async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None:
+    async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None:
         user_query = responses[0].full_conversation[0].text
         
         # Extract findings from all agent responses
@@ -129,7 +129,7 @@ async def fan_in_handle(self, responses: List[AgentExecutorResponse], ctx: Workf
         
         await ctx.yield_output(output_text)
     
-    def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> List[str]:
+    def _extract_agent_findings(self, responses: list[AgentExecutorResponse]) -> list[str]:
         """Extract findings from agent responses."""
         agent_findings = []
         
@@ -147,7 +147,7 @@ def _extract_agent_findings(self, responses: List[AgentExecutorResponse]) -> Lis
         return agent_findings
 
 
-async def run_workflow_with_response_tracking(query: str, chat_client: Optional[AzureAIClient] = None) -> Dict:
+async def run_workflow_with_response_tracking(query: str, chat_client: AzureAIClient | None = None) -> dict:
     """Run multi-agent workflow and track conversation IDs, response IDs, and interaction sequence.
     
     Args:
@@ -181,7 +181,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: Optional[
         return await _run_workflow_with_client(query, chat_client)
 
 
-async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> Dict:
+async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> dict:
     """Execute workflow with given client and track all interactions."""
     
     # Initialize tracking variables - use lists to track multiple responses per agent

From d33ce3cee8dd9ac7133a26c1949de47d3dd31186 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Wed, 19 Nov 2025 19:42:26 +0200
Subject: [PATCH 3/9] Add copyright line

---
 python/samples/demos/workflow_evaluation/_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py
index 420d1ece9e..0adb665030 100644
--- a/python/samples/demos/workflow_evaluation/_tools.py
+++ b/python/samples/demos/workflow_evaluation/_tools.py
@@ -1,3 +1,5 @@
+# Copyright (c) Microsoft. All rights reserved.
+
 import json
 from datetime import datetime
 

From 1f07e5eb7a51e67796e78ac5e294687bce1f9381 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Wed, 19 Nov 2025 19:57:57 +0200
Subject: [PATCH 4/9] import fix

---
 .../demos/workflow_evaluation/create_workflow.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
index 9b33656baa..caf07484a8 100644
--- a/python/samples/demos/workflow_evaluation/create_workflow.py
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -29,25 +29,15 @@
 """
 
 import asyncio
-import json
 import os
-import sys
 from collections import defaultdict
-from pathlib import Path
-from typing import Dict, List, Optional
 
 from dotenv import load_dotenv
 
-# Add the local packages to the path
-packages_path = Path(__file__).parent.parent.parent.parent.parent.parent / "packages"
-sys.path.insert(0, str(packages_path / "core"))
-sys.path.insert(0, str(packages_path / "azure-ai"))
-
 from agent_framework import (
     AgentExecutorResponse,
     AgentRunUpdateEvent,
     AgentRunResponseUpdate,
-    ChatAgent,
     ChatMessage,
     Executor,
     executor,
@@ -58,9 +48,8 @@
     WorkflowOutputEvent,
 )
 
-# Import V2 client directly from source file to avoid installed package conflicts
 from agent_framework_azure_ai._client import AzureAIClient
-from azure.identity.aio import AzureDeveloperCliCredential
+from azure.identity.aio import DefaultAzureCredential
 from azure.ai.projects.aio import AIProjectClient
 
 from _tools import (
@@ -158,8 +147,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl
         Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis
     """
     if chat_client is None:
-        # Use AzureDeveloperCliCredential to avoid Azure CLI timeout issues
-        credential = AzureDeveloperCliCredential()
+        credential = DefaultAzureCredential()
         
         # Create AIProjectClient with the correct API version for V2 prompt agents
         project_client = AIProjectClient(

From e8ba4680ac62b302c58adcc30ae3e74e318c5549 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Wed, 19 Nov 2025 20:03:57 +0200
Subject: [PATCH 5/9] import fix

---
 python/samples/demos/workflow_evaluation/create_workflow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
index caf07484a8..611d08ba6f 100644
--- a/python/samples/demos/workflow_evaluation/create_workflow.py
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -48,7 +48,7 @@
     WorkflowOutputEvent,
 )
 
-from agent_framework_azure_ai._client import AzureAIClient
+from agent_framework.azure import AzureAIClient
 from azure.identity.aio import DefaultAzureCredential
 from azure.ai.projects.aio import AIProjectClient
 

From f0e8f87645f4967ece56bd3f169b853ce5a27a9d Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Thu, 20 Nov 2025 13:00:09 +0200
Subject: [PATCH 6/9] address pr comments

---
 .../demos/workflow_evaluation/.env.example    |   2 +
 .../demos/workflow_evaluation/README.md       |   7 +-
 .../demos/workflow_evaluation/_tools.py       | 116 ++++++++++++++----
 .../workflow_evaluation/create_workflow.py    |  84 +++++--------
 4 files changed, 123 insertions(+), 86 deletions(-)
 create mode 100644 python/samples/demos/workflow_evaluation/.env.example

diff --git a/python/samples/demos/workflow_evaluation/.env.example b/python/samples/demos/workflow_evaluation/.env.example
new file mode 100644
index 0000000000..3a13025d22
--- /dev/null
+++ b/python/samples/demos/workflow_evaluation/.env.example
@@ -0,0 +1,2 @@
+AZURE_AI_PROJECT_ENDPOINT="<your-project-endpoint>"
+AZURE_AI_MODEL_DEPLOYMENT_NAME="<your-model-deployment>"
\ No newline at end of file
diff --git a/python/samples/demos/workflow_evaluation/README.md b/python/samples/demos/workflow_evaluation/README.md
index ee9aa7ffd6..d687e4ce14 100644
--- a/python/samples/demos/workflow_evaluation/README.md
+++ b/python/samples/demos/workflow_evaluation/README.md
@@ -13,12 +13,7 @@ The evaluation uses four Azure AI built-in evaluators:
 
 ## Setup
 
-Create a `.env` file with required configuration:
-
-```env
-AZURE_AI_PROJECT_ENDPOINT=<your-project-endpoint>
-AZURE_AI_MODEL_DEPLOYMENT_NAME=<your-model-deployment>
-```
+Create a `.env` file with configuration as in the `.env.example` file in this folder.
 
 ## Running the Evaluation
 
diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py
index 0adb665030..e8b70a4472 100644
--- a/python/samples/demos/workflow_evaluation/_tools.py
+++ b/python/samples/demos/workflow_evaluation/_tools.py
@@ -4,10 +4,17 @@
 from datetime import datetime
 
 # --- Travel Planning Tools ---
+# Note: These are mock tools for demonstration purposes. They return simulated data
+# and do not make real API calls or bookings.
 
+
+# Mock hotel search tool
 def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str:
-    """
-    Search for available hotels based on location and dates.
+    """Search for available hotels based on location and dates.
+    
+    Returns:
+        JSON string containing search results with hotel details including name, rating,
+        price, distance to landmarks, amenities, and availability.
     """
     # Specific mock data for Paris December 15-18, 2025
     if "paris" in location.lower():
@@ -64,9 +71,14 @@ def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2)
         "note": "Hotel search results matching your query"
     })
 
+
+# Mock hotel details tool
 def get_hotel_details(hotel_name: str) -> str:
-    """
-    Get detailed information about a specific hotel.
+    """Get detailed information about a specific hotel.
+    
+    Returns:
+        JSON string containing detailed hotel information including description,
+        check-in/out times, cancellation policy, reviews, and nearby attractions.
     """
     hotel_details = {
         "Hotel Eiffel Trocadéro": {
@@ -131,9 +143,14 @@ def get_hotel_details(hotel_name: str) -> str:
         "details": details
     })
 
+
+# Mock flight search tool
 def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str:
-    """
-    Search for available flights between two locations.
+    """Search for available flights between two locations.
+    
+    Returns:
+        JSON string containing flight search results with details including flight numbers,
+        airlines, departure/arrival times, prices, durations, and baggage allowances.
     """
     # Specific mock data for JFK to Paris December 15-18, 2025
     if "jfk" in origin.lower() or "new york" in origin.lower():
@@ -245,9 +262,14 @@ def search_flights(origin: str, destination: str, departure_date: str, return_da
         "note": "Flight search results for JFK to Paris CDG"
     })
 
+
+# Mock flight details tool
 def get_flight_details(flight_number: str) -> str:
-    """
-    Get detailed information about a specific flight.
+    """Get detailed information about a specific flight.
+    
+    Returns:
+        JSON string containing detailed flight information including airline, aircraft type,
+        departure/arrival airports and times, gates, terminals, duration, and amenities.
     """
     mock_details = {
         "flight_number": flight_number,
@@ -277,9 +299,14 @@ def get_flight_details(flight_number: str) -> str:
         "flight_details": mock_details
     })
 
+
+# Mock activity search tool
 def search_activities(location: str, date: str = None, category: str = None) -> str:
-    """
-    Search for available activities and attractions at a destination.
+    """Search for available activities and attractions at a destination.
+    
+    Returns:
+        JSON string containing activity search results with details including name, category,
+        duration, price, rating, description, availability, and booking requirements.
     """
     # Specific mock data for Paris activities
     if "paris" in location.lower():
@@ -411,9 +438,14 @@ def search_activities(location: str, date: str = None, category: str = None) ->
         "note": "Activity search results for Paris with sightseeing, culture, and culinary options"
     })
 
+
+# Mock activity details tool
 def get_activity_details(activity_name: str) -> str:
-    """
-    Get detailed information about a specific activity.
+    """Get detailed information about a specific activity.
+    
+    Returns:
+        JSON string containing detailed activity information including description, duration,
+        price, included items, meeting point, what to bring, cancellation policy, and reviews.
     """
     # Paris-specific activity details
     activity_details_map = {
@@ -480,9 +512,14 @@ def get_activity_details(activity_name: str) -> str:
         "activity_details": details
     })
 
+
+# Mock booking confirmation tool
 def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str:
-    """
-    Confirm a booking reservation.
+    """Confirm a booking reservation.
+    
+    Returns:
+        JSON string containing confirmation details including confirmation number,
+        booking status, customer information, and next steps.
     """
     confirmation_number = f"CONF-{booking_type.upper()}-{booking_id}"
     
@@ -504,9 +541,16 @@ def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) ->
         "confirmation": confirmation_data
     })
 
+
+# Mock hotel availability check tool
 def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str:
-    """
-    Check availability for hotel rooms.
+    """Check availability for hotel rooms.
+    
+    Sample Date format: "December 15, 2025"
+    
+    Returns:
+        JSON string containing availability status, available rooms count, price per night,
+        and last checked timestamp.
     """
     availability_status = "Available"
     
@@ -526,9 +570,16 @@ def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, roo
         "availability": availability_data
     })
 
+
+# Mock flight availability check tool
 def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str:
-    """
-    Check availability for flight seats.
+    """Check availability for flight seats.
+    
+    Sample Date format: "December 15, 2025"
+    
+    Returns:
+        JSON string containing availability status, available seats count, price per passenger,
+        and last checked timestamp.
     """
     availability_status = "Available"
     
@@ -547,9 +598,16 @@ def check_flight_availability(flight_number: str, date: str, passengers: int = 1
         "availability": availability_data
     })
 
+
+# Mock activity availability check tool
 def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str:
-    """
-    Check availability for activity bookings.
+    """Check availability for activity bookings.
+    
+    Sample Date format: "December 16, 2025"
+    
+    Returns:
+        JSON string containing availability status, available spots count, price per person,
+        and last checked timestamp.
     """
     availability_status = "Available"
     
@@ -568,9 +626,14 @@ def check_activity_availability(activity_name: str, date: str, participants: int
         "availability": availability_data
     })
 
+
+# Mock payment processing tool
 def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str:
-    """
-    Process payment for a booking.
+    """Process payment for a booking.
+    
+    Returns:
+        JSON string containing payment result with transaction ID, status, amount, currency,
+        payment method details, and receipt URL.
     """
     transaction_id = f"TXN-{datetime.now().strftime('%Y%m%d%H%M%S')}"
     
@@ -590,9 +653,14 @@ def process_payment(amount: float, currency: str, payment_method: dict, booking_
         "payment_result": payment_result
     })
 
+
+# Mock payment validation tool
 def validate_payment_method(payment_method: dict) -> str:
-    """
-    Validate payment method details.
+    """Validate payment method details.
+    
+    Returns:
+        JSON string containing validation result with is_valid flag, payment method type,
+        validation messages, supported currencies, and processing fee information.
     """
     method_type = payment_method.get("type", "credit_card")
     
diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
index 611d08ba6f..c33396fc9d 100644
--- a/python/samples/demos/workflow_evaluation/create_workflow.py
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -3,7 +3,7 @@
 """
 Multi-Agent Travel Planning Workflow Evaluation with Multiple Response Tracking
 
-This sample demonstrates a multi-agent travel planning workflow using the V2 client that:
+This sample demonstrates a multi-agent travel planning workflow using the Azure AI Client that:
 1. Processes travel queries through 7 specialized agents
 2. Tracks MULTIPLE response and conversation IDs per agent for evaluation
 3. Uses the new Prompt Agents API (V2)
@@ -47,6 +47,7 @@
     WorkflowBuilder,
     WorkflowOutputEvent,
 )
+from typing_extensions import Never
 
 from agent_framework.azure import AzureAIClient
 from azure.identity.aio import DefaultAzureCredential
@@ -83,12 +84,12 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi
         self.agent = chat_client.create_agent(
             id="travel-planning-coordinator",
             instructions=(
-                "You are the Travel Planning Coordinator. Your role is to synthesize information from multiple "
-                "specialized travel agents into a cohesive, actionable travel plan. You receive inputs from: "
-                "hotel search specialists, flight search specialists, activity planners, booking confirmation agents, "
-                "payment processors, and booking information aggregators. Provide a clear, comprehensive travel plan "
-                "that addresses the user's original query with all necessary details including accommodations, "
-                "transportation, activities, and booking status."
+                "You are the final coordinator. You will receive responses from multiple agents: "
+                "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), "
+                "and activity-search-agent (activities). "
+                "Review each agent's response, then create a comprehensive travel itinerary organized by: "
+                "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. "
+                "Clearly indicate which information came from which agent. Do not use tools."
             ),
             name="travel-planning-coordinator",
             store=True
@@ -96,7 +97,7 @@ def __init__(self, chat_client: AzureAIClient, id: str = "travel-planning-coordi
         super().__init__(id=id)
 
     @handler
-    async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[WorkflowOutputEvent]) -> None:
+    async def fan_in_handle(self, responses: list[AgentExecutorResponse], ctx: WorkflowContext[Never, str]) -> None:
         user_query = responses[0].full_conversation[0].text
         
         # Extract findings from all agent responses
@@ -147,24 +148,25 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl
         Dictionary containing interaction sequence, conversation/response IDs, and conversation analysis
     """
     if chat_client is None:
-        credential = DefaultAzureCredential()
-        
-        # Create AIProjectClient with the correct API version for V2 prompt agents
-        project_client = AIProjectClient(
-            endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
-            credential=credential,
-            api_version="2025-11-15-preview",
-        )
-        
         try:
-            async with AzureAIClient(
-                project_client=project_client,
-                async_credential=credential
-            ) as client:
+            credential = DefaultAzureCredential()
+            
+            # Create AIProjectClient with the correct API version for V2 prompt agents
+            project_client = AIProjectClient(
+                endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
+                credential=credential,
+                api_version="2025-11-15-preview",
+            )
+            
+            async with (
+                credential,
+                project_client,
+                AzureAIClient(project_client=project_client, async_credential=credential) as client
+            ):
                 return await _run_workflow_with_client(query, client)
-        finally:
-            await credential.close()
-            await project_client.close()
+        except Exception as e:
+            print(f"Error during workflow execution: {e}")
+            raise
     else:
         return await _run_workflow_with_client(query, chat_client)
 
@@ -188,17 +190,6 @@ async def _run_workflow_with_client(query: str, chat_client: AzureAIClient) -> d
     events = workflow.run_stream(query)
     workflow_output = await _process_workflow_events(events, conversation_ids, response_ids)
     
-    # # Delete all agents after workflow completion
-    # print("\n=== Cleaning up agents ===")
-    # for agent_name, agent in agent_map.items():
-    #     try:
-    #         # Get the actual agent object
-    #         agent_to_delete = agent.agent if hasattr(agent, 'agent') else agent
-    #         chat_client.project_client.agents.delete(agent_name=agent_to_delete.name)
-    #         print(f"Deleted agent: {agent_name}")
-    #     except Exception as e:
-    #         print(f"Failed to delete agent {agent_name}: {e}")
-    
     return {
         "conversation_ids": dict(conversation_ids),
         "response_ids": dict(response_ids),
@@ -223,16 +214,6 @@ async def _create_workflow(project_client, credential):
     )
     final_coordinator = ResearchLead(chat_client=final_coordinator_client, id="final-coordinator")
     
-    # Update final_coordinator agent instructions
-    final_coordinator.agent.instructions = (
-        "You are the final coordinator. You will receive responses from multiple agents: "
-        "booking-info-aggregation-agent (hotel/flight options), booking-payment-agent (payment confirmation), "
-        "and activity-search-agent (activities). "
-        "Review each agent's response, then create a comprehensive travel itinerary organized by: "
-        "1. Flights 2. Hotels 3. Activities 4. Booking confirmations 5. Payment details. "
-        "Clearly indicate which information came from which agent. Do not use tools."
-    )
-    
     # Agent 1: Travel Request Handler (initial coordinator)
     # Create separate client with unique agent_name
     travel_request_handler_client = AzureAIClient(
@@ -352,9 +333,6 @@ async def _create_workflow(project_client, credential):
     # 5. booking_info_aggregation → booking_confirmation
     # 6. booking_confirmation → booking_payment
     # 7. booking_info_aggregation, booking_payment, activity_search → final_coordinator (final aggregation, fan-in)
-    # 
-    # Max iterations set to 10 (though shouldn't be needed without cycles)
-    # store=True preserves conversation history on each agent's thread for evaluation
     
     workflow = (WorkflowBuilder(name='Travel Planning Workflow')
             .set_start_executor(start_executor)
@@ -411,7 +389,7 @@ def _track_agent_ids(event, agent, response_ids, conversation_ids):
         if hasattr(event.data, 'raw_representation') and event.data.raw_representation:
             raw = event.data.raw_representation
             
-            # Try conversation_id directly on raw (this is the V2 pattern)
+            # Try conversation_id directly on raw representation
             if hasattr(raw, 'conversation_id') and raw.conversation_id:
                 # Only add if not already in the list
                 if raw.conversation_id not in conversation_ids[agent]:
@@ -472,11 +450,5 @@ async def create_and_run_workflow():
     return output_data
 
 
-
-def main():
-    """Main function to run the workflow evaluation example."""
-    asyncio.run(create_and_run_workflow())
-
-
 if __name__ == "__main__":
-    main()
+    asyncio.run(create_and_run_workflow())

From 4a0fae0f1320e008d862a002e777d6db21584213 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Thu, 20 Nov 2025 13:23:46 +0200
Subject: [PATCH 7/9] Python: Workflow eval sample - print evaluator names

---
 python/samples/demos/workflow_evaluation/run_evaluation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/samples/demos/workflow_evaluation/run_evaluation.py b/python/samples/demos/workflow_evaluation/run_evaluation.py
index b2adfd8b83..610f7ade00 100644
--- a/python/samples/demos/workflow_evaluation/run_evaluation.py
+++ b/python/samples/demos/workflow_evaluation/run_evaluation.py
@@ -120,8 +120,9 @@ def create_evaluation(openai_client, model_deployment: str):
         testing_criteria=testing_criteria,
     )
     
+    evaluator_names = [criterion["name"] for criterion in testing_criteria]
     print(f"Evaluation created: {eval_object.id}")
-    print(f"Evaluators: {len(testing_criteria)}")
+    print(f"Evaluators ({len(evaluator_names)}): {', '.join(evaluator_names)}")
     
     return eval_object
 

From 1835806d747a3e3cd15386e3f3e0bd96e55bca8d Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Thu, 20 Nov 2025 21:10:38 +0200
Subject: [PATCH 8/9] Python: Workflow eval - address PR comments

---
 .../demos/workflow_evaluation/_tools.py       | 86 ++++++++++++++++---
 .../workflow_evaluation/create_workflow.py    |  4 +-
 2 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/python/samples/demos/workflow_evaluation/_tools.py b/python/samples/demos/workflow_evaluation/_tools.py
index e8b70a4472..eca03544b2 100644
--- a/python/samples/demos/workflow_evaluation/_tools.py
+++ b/python/samples/demos/workflow_evaluation/_tools.py
@@ -2,6 +2,10 @@
 
 import json
 from datetime import datetime
+from typing import Annotated
+
+from agent_framework import ai_function
+from pydantic import Field
 
 # --- Travel Planning Tools ---
 # Note: These are mock tools for demonstration purposes. They return simulated data
@@ -9,7 +13,13 @@
 
 
 # Mock hotel search tool
-def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2) -> str:
+@ai_function(name="search_hotels", description="Search for available hotels based on location and dates.")
+def search_hotels(
+    location: Annotated[str, Field(description="City or region to search for hotels.")],
+    check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")],
+    check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")],
+    guests: Annotated[int, Field(description="Number of guests.")] = 2,
+) -> str:
     """Search for available hotels based on location and dates.
     
     Returns:
@@ -73,7 +83,10 @@ def search_hotels(location: str, check_in: str, check_out: str, guests: int = 2)
 
 
 # Mock hotel details tool
-def get_hotel_details(hotel_name: str) -> str:
+@ai_function(name="get_hotel_details", description="Get detailed information about a specific hotel.")
+def get_hotel_details(
+    hotel_name: Annotated[str, Field(description="Name of the hotel to get details for.")],
+) -> str:
     """Get detailed information about a specific hotel.
     
     Returns:
@@ -145,7 +158,14 @@ def get_hotel_details(hotel_name: str) -> str:
 
 
 # Mock flight search tool
-def search_flights(origin: str, destination: str, departure_date: str, return_date: str = None, passengers: int = 1) -> str:
+@ai_function(name="search_flights", description="Search for available flights between two locations.")
+def search_flights(
+    origin: Annotated[str, Field(description="Departure airport or city (e.g., 'JFK' or 'New York').")],
+    destination: Annotated[str, Field(description="Arrival airport or city (e.g., 'CDG' or 'Paris').")],
+    departure_date: Annotated[str, Field(description="Departure date (e.g., 'December 15, 2025').")],
+    return_date: Annotated[str | None, Field(description="Return date (e.g., 'December 18, 2025').")] = None,
+    passengers: Annotated[int, Field(description="Number of passengers.")] = 1,
+) -> str:
     """Search for available flights between two locations.
     
     Returns:
@@ -264,7 +284,10 @@ def search_flights(origin: str, destination: str, departure_date: str, return_da
 
 
 # Mock flight details tool
-def get_flight_details(flight_number: str) -> str:
+@ai_function(name="get_flight_details", description="Get detailed information about a specific flight.")
+def get_flight_details(
+    flight_number: Annotated[str, Field(description="Flight number (e.g., 'AF007' or 'DL264').")],
+) -> str:
     """Get detailed information about a specific flight.
     
     Returns:
@@ -301,7 +324,12 @@ def get_flight_details(flight_number: str) -> str:
 
 
 # Mock activity search tool
-def search_activities(location: str, date: str = None, category: str = None) -> str:
+@ai_function(name="search_activities", description="Search for available activities and attractions at a destination.")
+def search_activities(
+    location: Annotated[str, Field(description="City or region to search for activities.")],
+    date: Annotated[str | None, Field(description="Date for the activity (e.g., 'December 16, 2025').")] = None,
+    category: Annotated[str | None, Field(description="Activity category (e.g., 'Sightseeing', 'Culture', 'Culinary').")] = None,
+) -> str:
     """Search for available activities and attractions at a destination.
     
     Returns:
@@ -440,7 +468,10 @@ def search_activities(location: str, date: str = None, category: str = None) ->
 
 
 # Mock activity details tool
-def get_activity_details(activity_name: str) -> str:
+@ai_function(name="get_activity_details", description="Get detailed information about a specific activity.")
+def get_activity_details(
+    activity_name: Annotated[str, Field(description="Name of the activity to get details for.")],
+) -> str:
     """Get detailed information about a specific activity.
     
     Returns:
@@ -514,7 +545,12 @@ def get_activity_details(activity_name: str) -> str:
 
 
 # Mock booking confirmation tool
-def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) -> str:
+@ai_function(name="confirm_booking", description="Confirm a booking reservation.")
+def confirm_booking(
+    booking_type: Annotated[str, Field(description="Type of booking (e.g., 'hotel', 'flight', 'activity').")],
+    booking_id: Annotated[str, Field(description="Unique booking identifier.")],
+    customer_info: Annotated[dict, Field(description="Customer information including name and email.")],
+) -> str:
     """Confirm a booking reservation.
     
     Returns:
@@ -543,7 +579,13 @@ def confirm_booking(booking_type: str, booking_id: str, customer_info: dict) ->
 
 
 # Mock hotel availability check tool
-def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, rooms: int = 1) -> str:
+@ai_function(name="check_hotel_availability", description="Check availability for hotel rooms.")
+def check_hotel_availability(
+    hotel_name: Annotated[str, Field(description="Name of the hotel to check availability for.")],
+    check_in: Annotated[str, Field(description="Check-in date (e.g., 'December 15, 2025').")],
+    check_out: Annotated[str, Field(description="Check-out date (e.g., 'December 18, 2025').")],
+    rooms: Annotated[int, Field(description="Number of rooms needed.")] = 1,
+) -> str:
     """Check availability for hotel rooms.
     
     Sample Date format: "December 15, 2025"
@@ -572,7 +614,12 @@ def check_hotel_availability(hotel_name: str, check_in: str, check_out: str, roo
 
 
 # Mock flight availability check tool
-def check_flight_availability(flight_number: str, date: str, passengers: int = 1) -> str:
+@ai_function(name="check_flight_availability", description="Check availability for flight seats.")
+def check_flight_availability(
+    flight_number: Annotated[str, Field(description="Flight number to check availability for.")],
+    date: Annotated[str, Field(description="Flight date (e.g., 'December 15, 2025').")],
+    passengers: Annotated[int, Field(description="Number of passengers.")] = 1,
+) -> str:
     """Check availability for flight seats.
     
     Sample Date format: "December 15, 2025"
@@ -600,7 +647,12 @@ def check_flight_availability(flight_number: str, date: str, passengers: int = 1
 
 
 # Mock activity availability check tool
-def check_activity_availability(activity_name: str, date: str, participants: int = 1) -> str:
+@ai_function(name="check_activity_availability", description="Check availability for activity bookings.")
+def check_activity_availability(
+    activity_name: Annotated[str, Field(description="Name of the activity to check availability for.")],
+    date: Annotated[str, Field(description="Activity date (e.g., 'December 16, 2025').")],
+    participants: Annotated[int, Field(description="Number of participants.")] = 1,
+) -> str:
     """Check availability for activity bookings.
     
     Sample Date format: "December 16, 2025"
@@ -628,7 +680,13 @@ def check_activity_availability(activity_name: str, date: str, participants: int
 
 
 # Mock payment processing tool
-def process_payment(amount: float, currency: str, payment_method: dict, booking_reference: str) -> str:
+@ai_function(name="process_payment", description="Process payment for a booking.")
+def process_payment(
+    amount: Annotated[float, Field(description="Payment amount.")],
+    currency: Annotated[str, Field(description="Currency code (e.g., 'USD', 'EUR').")],
+    payment_method: Annotated[dict, Field(description="Payment method details (type, card info).")],
+    booking_reference: Annotated[str, Field(description="Booking reference number for the payment.")],
+) -> str:
     """Process payment for a booking.
     
     Returns:
@@ -654,8 +712,12 @@ def process_payment(amount: float, currency: str, payment_method: dict, booking_
     })
 
 
+
 # Mock payment validation tool
-def validate_payment_method(payment_method: dict) -> str:
+@ai_function(name="validate_payment_method", description="Validate a payment method before processing.")
+def validate_payment_method(
+    payment_method: Annotated[dict, Field(description="Payment method to validate (type, number, expiry, cvv).")],
+) -> str:
     """Validate payment method details.
     
     Returns:
diff --git a/python/samples/demos/workflow_evaluation/create_workflow.py b/python/samples/demos/workflow_evaluation/create_workflow.py
index c33396fc9d..ca8b62e2ef 100644
--- a/python/samples/demos/workflow_evaluation/create_workflow.py
+++ b/python/samples/demos/workflow_evaluation/create_workflow.py
@@ -149,8 +149,6 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl
     """
     if chat_client is None:
         try:
-            credential = DefaultAzureCredential()
-            
             # Create AIProjectClient with the correct API version for V2 prompt agents
             project_client = AIProjectClient(
                 endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
@@ -159,7 +157,7 @@ async def run_workflow_with_response_tracking(query: str, chat_client: AzureAICl
             )
             
             async with (
-                credential,
+                DefaultAzureCredential() as credential,
                 project_client,
                 AzureAIClient(project_client=project_client, async_credential=credential) as client
             ):

From 4557d7a260b8f323fb7a9e943353b02be4d6a407 Mon Sep 17 00:00:00 2001
From: Salma Elshafey <selshafey@microsoft.com>
Date: Thu, 20 Nov 2025 21:18:01 +0200
Subject: [PATCH 9/9] Update samples readme

---
 python/samples/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/samples/README.md b/python/samples/README.md
index 65f77645f2..15905e186f 100644
--- a/python/samples/README.md
+++ b/python/samples/README.md
@@ -186,6 +186,7 @@ This directory contains samples demonstrating the capabilities of Microsoft Agen
 |------|-------------|
 | [`getting_started/evaluation/azure_ai_foundry/red_teaming/red_team_agent_sample.py`](./getting_started/evaluation/azure_ai_foundry/red_teaming/red_team_agent_sample.py) | Red team agent evaluation sample for Azure AI Foundry |
 | [`getting_started/evaluation/azure_ai_foundry/self_reflection/self_reflection.py`](./getting_started/evaluation/azure_ai_foundry/self_reflection/self_reflection.py) | LLM self-reflection with AI Foundry graders example |
+| [`demos/workflow_evaluation/run_evaluation.py`](./demos/workflow_evaluation/run_evaluation.py) | Multi-agent workflow evaluation demo with travel planning agents evaluated using Azure AI Foundry evaluators |
 
 ## MCP (Model Context Protocol)