From 98d7cc896f242ec4ca591aeb32695d209a8d9605 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Mon, 9 Feb 2026 12:38:55 -0500 Subject: [PATCH 1/3] fixed search in chat --- application/single_app/config.py | 2 +- application/single_app/route_backend_chats.py | 448 ++++++++++-------- docs/explanation/release_notes.md | 23 +- 3 files changed, 260 insertions(+), 213 deletions(-) diff --git a/application/single_app/config.py b/application/single_app/config.py index 2303a89e..cd3cd9c2 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.237.008" +VERSION = "0.237.009" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py index ad514e6f..5f1bf514 100644 --- a/application/single_app/route_backend_chats.py +++ b/application/single_app/route_backend_chats.py @@ -1,4 +1,5 @@ # route_backend_chats.py + from semantic_kernel import Kernel from semantic_kernel.agents.runtime import InProcessRuntime from semantic_kernel.contents.chat_history import ChatHistory @@ -63,7 +64,7 @@ def chat_api(): image_gen_enabled = data.get('image_generation') document_scope = data.get('doc_scope') reload_messages_required = False - + def parse_json_string(candidate: str) -> Any: """Parse JSON content when strings look like serialized structures.""" trimmed = candidate.strip() @@ -77,39 +78,39 @@ def parse_json_string(candidate: str) -> Any: level=logging.DEBUG ) return None - + def dict_requires_reload(payload: Dict[str, Any]) -> bool: """Inspect dictionary payloads for any signal that messages were persisted.""" if payload.get('reload_messages') or payload.get('requires_message_reload'): return True - + metadata = payload.get('metadata') if isinstance(metadata, dict) and metadata.get('requires_message_reload'): return True - + image_url = payload.get('image_url') if isinstance(image_url, dict) and image_url.get('url'): return True if isinstance(image_url, str) and image_url.strip(): return True - + result_type = payload.get('type') if isinstance(result_type, str) and result_type.lower() == 'image_url': return True - + mime = payload.get('mime') if isinstance(mime, str) and mime.startswith('image/'): return True - + for value in payload.values(): if result_requires_message_reload(value): return True return False - + def list_requires_reload(items: List[Any]) -> bool: """Evaluate list items for reload requirements.""" return any(result_requires_message_reload(item) for item in items) - + def result_requires_message_reload(result: Any) -> bool: """Heuristically detect plugin outputs that inject new Cosmos messages (e.g., chart images).""" if result is None: @@ -171,9 +172,9 @@ def result_requires_message_reload(result: Any) -> bool: enable_summarize_content_history_beyond_conversation_history_limit = settings.get('enable_summarize_content_history_beyond_conversation_history_limit', True) # Use a dedicated setting if possible enable_summarize_content_history_for_search = settings.get('enable_summarize_content_history_for_search', False) # Use a dedicated setting if possible number_of_historical_messages_to_summarize = settings.get('number_of_historical_messages_to_summarize', 10) # Number of messages to summarize for search context - + max_file_content_length = 50000 # 50KB - + # Convert toggles from string -> bool if needed if isinstance(hybrid_search_enabled, str): hybrid_search_enabled = hybrid_search_enabled.lower() == 'true' @@ -181,25 +182,25 @@ def result_requires_message_reload(result: Any) -> bool: web_search_enabled = web_search_enabled.lower() == 'true' if isinstance(image_gen_enabled, str): image_gen_enabled = image_gen_enabled.lower() == 'true' - + # GPT & Image generation APIM or direct gpt_model = "" gpt_client = None enable_gpt_apim = settings.get('enable_gpt_apim', False) enable_image_gen_apim = settings.get('enable_image_gen_apim', False) - + try: if enable_gpt_apim: # read raw comma-delimited deployments raw = settings.get('azure_apim_gpt_deployment', '') if not raw: raise ValueError("APIM GPT deployment name not configured.") - + # split, strip, and filter out empty entries apim_models = [m.strip() for m in raw.split(',') if m.strip()] if not apim_models: raise ValueError("No valid APIM GPT deployment names found.") - + # if frontend specified one, use it (must be in the configured list) if frontend_gpt_model: if frontend_gpt_model not in apim_models: @@ -207,18 +208,18 @@ def result_requires_message_reload(result: Any) -> bool: f"Requested model '{frontend_gpt_model}' is not configured for APIM." ) gpt_model = frontend_gpt_model - + # otherwise if there's exactly one deployment, default to it elif len(apim_models) == 1: gpt_model = apim_models[0] - + # otherwise you must pass model_deployment in the request else: raise ValueError( "Multiple APIM GPT deployments configured; please include " "'model_deployment' in your request." ) - + # initialize the APIM client gpt_client = AzureOpenAI( api_version=settings.get('azure_apim_gpt_api_version'), @@ -230,14 +231,14 @@ def result_requires_message_reload(result: Any) -> bool: endpoint = settings.get('azure_openai_gpt_endpoint') api_version = settings.get('azure_openai_gpt_api_version') gpt_model_obj = settings.get('gpt_model', {}) - + if gpt_model_obj and gpt_model_obj.get('selected'): selected_gpt_model = gpt_model_obj['selected'][0] gpt_model = selected_gpt_model['deploymentName'] else: # Fallback or raise error if no model selected/configured raise ValueError("No GPT model selected or configured.") - + if frontend_gpt_model: gpt_model = frontend_gpt_model elif gpt_model_obj and gpt_model_obj.get('selected'): @@ -245,7 +246,7 @@ def result_requires_message_reload(result: Any) -> bool: gpt_model = selected_gpt_model['deploymentName'] else: raise ValueError("No GPT model selected or configured.") - + if auth_type == 'managed_identity': token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope) gpt_client = AzureOpenAI( @@ -261,10 +262,10 @@ def result_requires_message_reload(result: Any) -> bool: azure_endpoint=endpoint, api_key=api_key ) - + if not gpt_client or not gpt_model: raise ValueError("GPT Client or Model could not be initialized.") - + except Exception as e: debug_print(f"Error initializing GPT client/model: {e}") # Handle error appropriately - maybe return 500 or default behavior @@ -330,7 +331,7 @@ def result_requires_message_reload(result: Any) -> bool: except Exception as e: debug_print(f"Error reading conversation {conversation_id}: {e}") return jsonify({'error': f'Error reading conversation: {str(e)}'}), 500 - + # Determine the actual chat context based on existing conversation or document usage # For existing conversations, use the chat_type from conversation metadata # For new conversations, it will be determined during metadata collection @@ -463,6 +464,10 @@ def result_requires_message_reload(result: Any) -> bool: if not allowed: return jsonify({'error': reason}), 403 + # Ensure workspace_search exists before accessing + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = {'search_enabled': False} + if group_doc.get('name'): group_name = group_doc.get('name') user_metadata['workspace_search']['group_name'] = group_name @@ -472,10 +477,16 @@ def result_requires_message_reload(result: Any) -> bool: user_metadata['workspace_search']['group_name'] = None else: debug_print(f"Workspace search - no group found for id: {active_group_id}") + # Ensure workspace_search exists before accessing + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = {'search_enabled': False} user_metadata['workspace_search']['group_name'] = None except Exception as e: debug_print(f"Error retrieving group details: {e}") + # Ensure workspace_search exists before accessing + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = {'search_enabled': False} user_metadata['workspace_search']['group_name'] = None import traceback traceback.print_exc() @@ -492,11 +503,16 @@ def result_requires_message_reload(result: Any) -> bool: except Exception as e: debug_print(f"Error checking public workspace status: {e}") + # Ensure workspace_search exists before accessing + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = {'search_enabled': False} user_metadata['workspace_search']['active_public_workspace_id'] = active_public_workspace_id else: - user_metadata['workspace_search'] = { - 'search_enabled': False - } + # Only set if not already set above + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = { + 'search_enabled': False + } # Agent selection (if available) if hasattr(g, 'kernel_agents') and g.kernel_agents: @@ -586,7 +602,7 @@ def result_requires_message_reload(result: Any) -> bool: previous_thread_id = last_msgs[0].get('thread_id') except Exception as e: debug_print(f"Error fetching last message for threading: {e}") - + # Generate thread_id for the user message # We track the 'tip' of the thread in latest_thread_id import uuid @@ -640,7 +656,7 @@ def result_requires_message_reload(result: Any) -> bool: if conversation_item.get('title', 'New Conversation') == 'New Conversation' and user_message: new_title = (user_message[:30] + '...') if len(user_message) > 30 else user_message conversation_item['title'] = new_title - + conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) # Update timestamp and potentially title # region 3 - Content Safety @@ -652,13 +668,13 @@ def result_requires_message_reload(result: Any) -> bool: block_reasons = [] triggered_categories = [] blocklist_matches = [] - + if settings.get('enable_content_safety') and "content_safety_client" in CLIENTS: try: content_safety_client = CLIENTS["content_safety_client"] request_obj = AnalyzeTextOptions(text=user_message) cs_response = content_safety_client.analyze_text(request_obj) - + max_severity = 0 for cat_result in cs_response.categories_analysis: triggered_categories.append({ @@ -667,7 +683,7 @@ def result_requires_message_reload(result: Any) -> bool: }) if cat_result.severity > max_severity: max_severity = cat_result.severity - + if cs_response.blocklists_match: for match in cs_response.blocklists_match: blocklist_matches.append({ @@ -675,7 +691,7 @@ def result_requires_message_reload(result: Any) -> bool: "blocklistItemId": match.blocklist_item_id, "blocklistItemText": match.blocklist_item_text }) - + # Example: If severity >=4 or blocklist, we call it "blocked" if max_severity >= 4: blocked = True @@ -698,7 +714,7 @@ def result_requires_message_reload(result: Any) -> bool: 'metadata': {} } cosmos_safety_container.upsert_item(safety_item) - + # Instead of 403, we'll add a "safety" message blocked_msg_content = ( "Your message was blocked by Content Safety.\n\n" @@ -715,10 +731,10 @@ def result_requires_message_reload(result: Any) -> bool: "\n".join([f" - {m['blocklistItemText']} (in {m['blocklistName']})" for m in blocklist_matches]) ) - + # Insert a special "role": "safety" or "blocked" safety_message_id = f"{conversation_id}_safety_{int(time.time())}_{random.randint(1000,9999)}" - + safety_doc = { 'id': safety_message_id, 'conversation_id': conversation_id, @@ -729,11 +745,11 @@ def result_requires_message_reload(result: Any) -> bool: 'metadata': {}, # No metadata needed for safety messages } cosmos_messages_container.upsert_item(safety_doc) - + # Update conversation's last_updated conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) - + # Return a normal 200 with a special field: blocked=True return jsonify({ 'reply': blocked_msg_content.strip(), @@ -744,7 +760,7 @@ def result_requires_message_reload(result: Any) -> bool: 'conversation_title': conversation_item['title'], 'message_id': safety_message_id }), 200 - + except HttpResponseError as e: debug_print(f"[Content Safety Error] {e}") except Exception as ex: @@ -763,13 +779,15 @@ def result_requires_message_reload(result: Any) -> bool: limit_n_search = number_of_historical_messages_to_summarize * 2 query_search = f"SELECT TOP {limit_n_search} * FROM c WHERE c.conversation_id = @conv_id ORDER BY c.timestamp DESC" params_search = [{"name": "@conv_id", "value": conversation_id}] - - - try: - last_messages_desc = list(cosmos_messages_container.query_items( - query=query_search, parameters=params_search, partition_key=conversation_id, enable_cross_partition_query=True - )) - last_messages_asc = list(reversed(last_messages_desc)) + + +​ +​ try: +​ last_messages_desc = list(cosmos_messages_container.query_items( +​ query=query_search, parameters=params_search, partition_key=conversation_id, enable_cross_partition_query=True +​ )) +​ last_messages_asc = list(reversed(last_messages_desc)) +​ if last_messages_asc and len(last_messages_asc) >= conversation_history_limit: summary_prompt_search = "Please summarize the key topics or questions from this recent conversation history in 50 words or less:\n\n" @@ -792,7 +810,7 @@ def result_requires_message_reload(result: Any) -> bool: debug_print("[THREAD] No active thread messages available for search summary") else: summary_prompt_search += "\n".join(message_texts_search) - + try: # Use the already initialized gpt_client and gpt_model summary_response_search = gpt_client.chat.completions.create( @@ -866,12 +884,12 @@ def result_requires_message_reload(result: Any) -> bool: return jsonify({ 'error': 'There was an issue with the embedding process. Please check with an admin on embedding configuration.' }), 500 - + if search_results: retrieved_texts = [] combined_documents = [] classifications_found = set(conversation_item.get('classification', [])) # Load existing - + for doc in search_results: # ... (your existing doc processing logic) ... chunk_text = doc.get('chunk_text', '') @@ -884,7 +902,7 @@ def result_requires_message_reload(result: Any) -> bool: chunk_id = doc.get('chunk_id', str(uuid.uuid4())) # Ensure ID exists score = doc.get('score', 0.0) # Add default score group_id = doc.get('group_id', None) # Add default group ID - + citation = f"(Source: {file_name}, Page: {page_number}) [#{citation_id}]" retrieved_texts.append(f"{chunk_text}\n{citation}") combined_documents.append({ @@ -901,16 +919,16 @@ def result_requires_message_reload(result: Any) -> bool: }) if classification: classifications_found.add(classification) - + retrieved_content = "\n\n".join(retrieved_texts) # Construct system prompt for search results system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). - + Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -921,7 +939,7 @@ def result_requires_message_reload(result: Any) -> bool: 'content': system_prompt_search, 'documents': combined_documents # Keep track of docs used }) - + # Loop through each source document/chunk used for this message for source_doc in combined_documents: # 4. Create a citation dictionary, selecting the desired fields @@ -941,85 +959,92 @@ def result_requires_message_reload(result: Any) -> bool: } # Using .get() provides None if a key is missing, preventing KeyErrors hybrid_citations_list.append(citation_data) - + # Reorder hybrid citations list in descending order based on page_number hybrid_citations_list.sort(key=lambda x: x.get('page_number', 0), reverse=True) - + # --- NEW: Extract metadata (keywords/abstract) for additional citations --- # Only if extract_metadata is enabled if settings.get('enable_extract_meta_data', False): from functions_documents import get_document_metadata_for_citations - + # Track which documents we've already processed to avoid duplicates processed_doc_ids = set() - + for doc in search_results: # Get document ID (from the chunk's document reference) # AI Search chunks contain references to their parent document doc_id = doc.get('id', '').split('_')[0] if doc.get('id') else None - + # Skip if we've already processed this document if not doc_id or doc_id in processed_doc_ids: continue - + processed_doc_ids.add(doc_id) # Determine workspace type from the search result fields doc_user_id = doc.get('user_id') doc_group_id = doc.get('group_id') doc_public_workspace_id = doc.get('public_workspace_id') - - # Query Cosmos for this document's metadata - metadata = get_document_metadata_for_citations( - document_id=doc_id, - user_id=doc_user_id if doc_user_id else None, - group_id=doc_group_id if doc_group_id else None, - public_workspace_id=doc_public_workspace_id if doc_public_workspace_id else None - ) - - - # If we have metadata with content, create additional citations - if metadata: - file_name = metadata.get('file_name', 'Unknown') - keywords = metadata.get('keywords', []) - abstract = metadata.get('abstract', '') - - - # Create citation for keywords if they exist - if keywords and len(keywords) > 0: - keywords_text = ', '.join(keywords) if isinstance(keywords, list) else str(keywords) - keywords_citation_id = f"{doc_id}_keywords" - - keywords_citation = { - "file_name": file_name, - "citation_id": keywords_citation_id, - "page_number": "Metadata", # Special page identifier - "chunk_id": keywords_citation_id, - "chunk_sequence": 9999, # High number to sort to end - "score": 0.0, # No relevance score for metadata - "group_id": doc_group_id, - "version": doc.get('version', 'N/A'), - "classification": doc.get('document_classification'), - "metadata_type": "keywords", # Flag this as metadata citation - "metadata_content": keywords_text - } - hybrid_citations_list.append(keywords_citation) - combined_documents.append(keywords_citation) # Add to combined_documents too +​ +​ # Query Cosmos for this document's metadata +​ metadata = get_document_metadata_for_citations( +​ document_id=doc_id, +​ user_id=doc_user_id if doc_user_id else None, +​ group_id=doc_group_id if doc_group_id else None, +​ public_workspace_id=doc_public_workspace_id if doc_public_workspace_id else None +​ ) + + +​ +​ # If we have metadata with content, create additional citations +​ if metadata: +​ file_name = metadata.get('file_name', 'Unknown') +​ keywords = metadata.get('keywords', []) +​ abstract = metadata.get('abstract', '') + + +​ +​ # Create citation for keywords if they exist +​ if keywords and len(keywords) > 0: +​ keywords_text = ', '.join(keywords) if isinstance(keywords, list) else str(keywords) +​ keywords_citation_id = f"{doc_id}_keywords" + + +​ +​ keywords_citation = { +​ "file_name": file_name, +​ "citation_id": keywords_citation_id, +​ "page_number": "Metadata", # Special page identifier +​ "chunk_id": keywords_citation_id, +​ "chunk_sequence": 9999, # High number to sort to end +​ "score": 0.0, # No relevance score for metadata +​ "group_id": doc_group_id, +​ "version": doc.get('version', 'N/A'), +​ "classification": doc.get('document_classification'), +​ "metadata_type": "keywords", # Flag this as metadata citation +​ "metadata_content": keywords_text +​ } +​ hybrid_citations_list.append(keywords_citation) +​ combined_documents.append(keywords_citation) # Add to combined_documents too +​ # Add keywords to retrieved content for the model keywords_context = f"Document Keywords ({file_name}): {keywords_text}" retrieved_texts.append(keywords_context) - + # Create citation for abstract if it exists if abstract and len(abstract.strip()) > 0: abstract_citation_id = f"{doc_id}_abstract" - - # Add keywords to retrieved content for the model - keywords_context = f"Document Keywords ({file_name}): {keywords_text}" - retrieved_texts.append(keywords_context) - + +​ +​ # Add keywords to retrieved content for the model +​ keywords_context = f"Document Keywords ({file_name}): {keywords_text}" +​ retrieved_texts.append(keywords_context) +​ + # Create citation for abstract if it exists if abstract and len(abstract.strip()) > 0: abstract_citation_id = f"{doc_id}_abstract" @@ -1039,16 +1064,18 @@ def result_requires_message_reload(result: Any) -> bool: } hybrid_citations_list.append(abstract_citation) combined_documents.append(abstract_citation) # Add to combined_documents too - + # Add abstract to retrieved content for the model abstract_context = f"Document Abstract ({file_name}): {abstract}" retrieved_texts.append(abstract_context) - - # Add abstract to retrieved content for the model - abstract_context = f"Document Abstract ({file_name}): {abstract}" - retrieved_texts.append(abstract_context) - + +​ +​ # Add abstract to retrieved content for the model +​ abstract_context = f"Document Abstract ({file_name}): {abstract}" +​ retrieved_texts.append(abstract_context) +​ + # Create citation for vision analysis if it exists vision_analysis = metadata.get('vision_analysis') if vision_analysis: @@ -1087,20 +1114,22 @@ def result_requires_message_reload(result: Any) -> bool: vision_context = f"AI Vision Analysis ({file_name}): {vision_content}" retrieved_texts.append(vision_context) - - # Update the system prompt with the enhanced content including metadata - if retrieved_texts: - retrieved_content = "\n\n".join(retrieved_texts) - system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). - Retrieved Excerpts: - {retrieved_content} - Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. + +​ +​ # Update the system prompt with the enhanced content including metadata +​ if retrieved_texts: +​ retrieved_content = "\n\n".join(retrieved_texts) +​ system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). +​ Retrieved Excerpts: +​ {retrieved_content} +​ Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. +​ Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -1110,12 +1139,12 @@ def result_requires_message_reload(result: Any) -> bool: system_messages_for_augmentation[-1]['content'] = system_prompt_search system_messages_for_augmentation[-1]['documents'] = combined_documents # --- END NEW METADATA CITATIONS --- - + # Update conversation classifications if new ones were found if list(classifications_found) != conversation_item.get('classification', []): conversation_item['classification'] = list(classifications_found) # No need to upsert item here, will be updated later - + # Update message-level chat_type based on actual document usage for this message # This must happen after document search is completed so search_results is populated message_chat_type = None @@ -1179,7 +1208,7 @@ def result_requires_message_reload(result: Any) -> bool: # Update the user message in Cosmos DB with the final chat_type information cosmos_messages_container.upsert_item(user_message_doc) debug_print(f"User message re-saved to Cosmos DB with updated chat_context") - + # Image Generation if image_gen_enabled: if enable_image_gen_apim: @@ -1198,7 +1227,7 @@ def result_requires_message_reload(result: Any) -> bool: azure_ad_token_provider=token_provider ) image_gen_model_obj = settings.get('image_gen_model', {}) - + if image_gen_model_obj and image_gen_model_obj.get('selected'): selected_image_gen_model = image_gen_model_obj['selected'][0] image_gen_model = selected_image_gen_model['deploymentName'] @@ -1212,7 +1241,7 @@ def result_requires_message_reload(result: Any) -> bool: if image_gen_obj and image_gen_obj.get('selected'): selected_image_gen_model = image_gen_obj['selected'][0] image_gen_model = selected_image_gen_model['deploymentName'] - + try: debug_print(f"Generating image with model: {image_gen_model}") debug_print(f"Using prompt: {user_message}") @@ -1263,7 +1292,7 @@ def result_requires_message_reload(result: Any) -> bool: # Validate we have a valid image source if not generated_image_url or generated_image_url == 'null': raise ValueError("Generated image URL is null or empty") - + image_message_id = f"{conversation_id}_image_{int(time.time())}_{random.randint(1000,9999)}" # Check if image data is too large for a single Cosmos document (2MB limit) @@ -1299,10 +1328,12 @@ def result_requires_message_reload(result: Any) -> bool: debug_print(f"✅ Chunking verification passed - can reassemble to original size") else: debug_print(f"❌ Chunking verification failed - {len(reassembled_test)} vs {len(generated_image_url)}") - - - # Create main image document with metadata - + + +​ +​ # Create main image document with metadata +​ + # Get user_info and thread_id from the user message for ownership tracking and threading user_info_for_chunked_image = None user_thread_id = None @@ -1421,10 +1452,10 @@ def result_requires_message_reload(result: Any) -> bool: cosmos_messages_container.upsert_item(image_doc) response_image_url = generated_image_url # Image message shares the same thread as user message - + conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) - + return jsonify({ 'reply': "Image loading...", 'image_url': response_image_url, @@ -1456,7 +1487,7 @@ def result_requires_message_reload(result: Any) -> bool: return jsonify({ 'error': user_friendly_message }), status_code - + if web_search_enabled: perform_web_search( settings=settings, @@ -1489,20 +1520,20 @@ def result_requires_message_reload(result: Any) -> bool: all_messages = list(cosmos_messages_container.query_items( query=all_messages_query, parameters=params_all, partition_key=conversation_id, enable_cross_partition_query=True )) - + # Sort messages using threading logic all_messages = sort_messages_by_thread(all_messages) - + total_messages = len(all_messages) - + # Determine which messages are "recent" and which are "older" # `conversation_history_limit` includes the *current* user message num_recent_messages = min(total_messages, conversation_history_limit) num_older_messages = total_messages - num_recent_messages - + recent_messages = all_messages[-num_recent_messages:] # Last N messages older_messages_to_summarize = all_messages[:num_older_messages] # Messages before the recent ones - + # Summarize older messages if needed and present if enable_summarize_content_history_beyond_conversation_history_limit and older_messages_to_summarize: debug_print(f"Summarizing {len(older_messages_to_summarize)} older messages for conversation {conversation_id}") @@ -1530,7 +1561,7 @@ def result_requires_message_reload(result: Any) -> bool: if role in ['system', 'safety', 'blocked', 'image', 'file']: continue content = msg.get('content', '') message_texts_older.append(f"{role.upper()}: {content}") - + if message_texts_older: # Only summarize if there's content to summarize summary_prompt_older += "\n".join(message_texts_older) try: @@ -1557,14 +1588,14 @@ def result_requires_message_reload(result: Any) -> bool: "role": "system", "content": f"\n{summary_of_older}\n" }) - + # Add augmentation system messages (search, agents) next # **Important**: Decide if you want these saved. If so, you need to upsert them now. # For simplicity here, we're just adding them to the API call context. for aug_msg in system_messages_for_augmentation: # 1. Extract the source documents list for this specific system message # Use .get with a default empty list [] for safety in case 'documents' is missing - + # 5. Create the final system_doc dictionary for Cosmos DB upsert system_message_id = f"{conversation_id}_system_aug_{int(time.time())}_{random.randint(1000,9999)}" @@ -1605,7 +1636,7 @@ def result_requires_message_reload(result: Any) -> bool: cosmos_messages_container.upsert_item(system_doc) conversation_history_for_api.append(aug_msg) # Add to API context # System message shares the same thread as user message, no thread update needed - + # --- NEW: Save plugin output as agent citation --- agent_citations_list.append({ "tool_name": str(selected_agent.name) if selected_agent else "All Citations", @@ -1619,7 +1650,7 @@ def result_requires_message_reload(result: Any) -> bool: allowed_roles_in_history = ['user', 'assistant'] # Add 'system' if you PERSIST general system messages not related to augmentation max_file_content_length_in_history = 50000 # Increased limit for all file content in history max_tabular_content_length_in_history = 50000 # Same limit for tabular data consistency - + for message in recent_messages: role = message.get('role') content = message.get('content') @@ -1647,7 +1678,7 @@ def result_requires_message_reload(result: Any) -> bool: # Remove masked portions from content content = remove_masked_content(content, masked_ranges) debug_print(f"[MASK] Applied {len(masked_ranges)} masked ranges to message {message.get('id')}") - + if role in allowed_roles_in_history: conversation_history_for_api.append({"role": role, "content": content}) elif role == 'file': # Handle file content inclusion (simplified) @@ -1733,9 +1764,9 @@ def result_requires_message_reload(result: Any) -> bool: 'role': 'system', 'content': f"[Assistant generated an image based on the prompt: '{prompt}']" }) - + # Ignored roles: 'safety', 'blocked', 'system' (if they are only for augmentation/summary) - + # Ensure the very last message is the current user's message (it should be if fetched correctly) if not conversation_history_for_api or conversation_history_for_api[-1]['role'] != 'user': debug_print("Warning: Last message in history is not the user's current message. Appending.") @@ -1749,11 +1780,11 @@ def result_requires_message_reload(result: Any) -> bool: break if not user_msg_found: # Still not found? Append the original input as fallback conversation_history_for_api.append({"role": "user", "content": user_message}) - + except Exception as e: debug_print(f"Error preparing conversation history: {e}") return jsonify({'error': f'Error preparing conversation history: {str(e)}'}), 500 - + # region 6 - Final GPT Call # --------------------------------------------------------------------- # 6) Final GPT Call @@ -1778,7 +1809,7 @@ def result_requires_message_reload(result: Any) -> bool: "role": "system", "content": default_system_prompt }) - + # --- DRY Fallback Chain Helper --- def try_fallback_chain(steps): """ @@ -1803,7 +1834,7 @@ def try_fallback_chain(steps): continue # If all fail, return default error return ("Sorry, I encountered an error.", gpt_model, None, None) - + # --- Inject facts as a system message at the top of conversation_history_for_api --- def get_facts_for_context(scope_id, scope_type, conversation_id: str = None, agent_id: str = None): settings = get_settings() @@ -1835,7 +1866,7 @@ def get_facts_for_context(scope_id, scope_type, conversation_id: str = None, age fact_lines.append(f"- scope_id: {scope_id}") fact_lines.append(f"- conversation_id: {conversation_id}") return "\n".join(fact_lines) - + async def run_sk_call(callable_obj, *args, **kwargs): log_event( f"Running Semantic Kernel callable: {callable_obj.__name__}", @@ -1901,7 +1932,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): extra={"runtime": runtime} ) await runtime.stop_when_idle() - + ai_message = "Sorry, I encountered an error." # Default error message final_model_used = gpt_model # Track model used for the response kernel_fallback_notice = None @@ -2020,9 +2051,9 @@ async def run_sk_call(callable_obj, *args, **kwargs): "selected_agent_id": agent_id or None, "kernel": bool(kernel is not None), } - + # Use the orchestrator agent as the default agent - + # Add additional metadata here to scope the facts to be returned # Allows for additional per agent and per conversation scoping. @@ -2039,7 +2070,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): "role": "system", "content": f"""\n\n\n\n\n""" }) - + agent_message_history = [ ChatMessageContent( role=msg["role"], @@ -2048,7 +2079,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): ) for msg in conversation_history_for_api ] - + # --- Fallback Chain Steps --- if enable_multi_agent_orchestration and all_agents and "orchestrator" in all_agents and not per_user_semantic_kernel: def invoke_orchestrator(): @@ -2077,7 +2108,7 @@ def orchestrator_error(e): 'on_success': orchestrator_success, 'on_error': orchestrator_error }) - + if selected_agent: def invoke_selected_agent(): return asyncio.run(run_sk_call( @@ -2146,7 +2177,7 @@ def make_json_serializable(obj): "total_duration_ms": sum(inv.duration_ms for inv in plugin_invocations if inv.duration_ms) } ) - + # debug_print(f"[Enhanced Agent Citations] Agent used: {agent_used}") # debug_print(f"[Enhanced Agent Citations] Extracted {len(detailed_citations)} detailed plugin invocations") # for citation in detailed_citations: @@ -2154,10 +2185,10 @@ def make_json_serializable(obj): # debug_print(f" Parameters: {citation['function_arguments']}") # debug_print(f" Result: {citation['function_result']}") # debug_print(f" Duration: {citation['duration_ms']}ms, Success: {citation['success']}") - + # Store detailed citations globally to be accessed by the calling function agent_citations_list.extend(detailed_citations) - + if not reload_messages_required: for citation in detailed_citations: if result_requires_message_reload(citation.get('function_result')): @@ -2180,11 +2211,11 @@ def agent_error(e): level=logging.ERROR, exceptionTraceback=True ) - + selected_agent_type = getattr(selected_agent, 'agent_type', 'local') or 'local' if isinstance(selected_agent_type, str): selected_agent_type = selected_agent_type.lower() - + if selected_agent_type == 'aifoundry': def invoke_foundry_agent(): foundry_metadata = { @@ -2202,7 +2233,7 @@ def invoke_foundry_agent(): agent_message_history, metadata={k: v for k, v in foundry_metadata.items() if v is not None} ) - + def foundry_agent_success(result): msg = str(result) notice = None @@ -2212,7 +2243,7 @@ def foundry_agent_success(result): or getattr(selected_agent, 'deployment_name', None) or agent_used ) - + foundry_citations = getattr(selected_agent, 'last_run_citations', []) or [] if foundry_citations: for citation in foundry_citations: @@ -2229,14 +2260,14 @@ def foundry_agent_success(result): 'timestamp': datetime.utcnow().isoformat(), 'success': True }) - + if enable_multi_agent_orchestration and not per_user_semantic_kernel: notice = ( "[SK Fallback]: The AI assistant is running in single agent fallback mode. " "Some advanced features may not be available. " "Please contact your administrator to configure Semantic Kernel for richer responses." ) - + log_event( f"[Foundry Agent] Invocation complete for {agent_used}", extra={ @@ -2247,9 +2278,9 @@ def foundry_agent_success(result): 'citation_count': len(foundry_citations), } ) - + return (msg, actual_model_deployment, 'agent', notice) - + def foundry_agent_error(e): log_event( f"Error during Azure AI Foundry agent invocation: {str(e)}", @@ -2261,7 +2292,7 @@ def foundry_agent_error(e): level=logging.ERROR, exceptionTraceback=True ) - + fallback_steps.append({ 'name': 'foundry_agent', 'func': invoke_foundry_agent, @@ -2275,7 +2306,7 @@ def foundry_agent_error(e): 'on_success': agent_success, 'on_error': agent_error }) - + if kernel: def invoke_kernel(): chat_history = "\n".join([ @@ -2328,7 +2359,7 @@ def kernel_error(e): 'on_success': kernel_success, 'on_error': kernel_error }) - + def invoke_gpt_fallback(): if not conversation_history_for_api: raise Exception('Cannot generate response: No conversation history available.') @@ -2410,7 +2441,7 @@ def gpt_error(e): 'on_success': gpt_success, 'on_error': gpt_error }) - + fallback_result = try_fallback_chain(fallback_steps) # Unpack result - handle both 4-tuple (SK) and 5-tuple (GPT with tokens) if len(fallback_result) == 5: @@ -2457,7 +2488,7 @@ def gpt_error(e): level=logging.ERROR, exceptionTraceback=True ) - + # region 7 - Save GPT Response # --------------------------------------------------------------------- # 7) Save GPT response (or error message) @@ -2567,7 +2598,7 @@ def gpt_error(e): except Exception as log_error: debug_print(f"⚠️ Warning: Failed to log chat token usage: {log_error}") # Don't fail the chat flow if logging fails - + # Update the user message metadata with the actual model used # This ensures the UI shows the correct model in the metadata panel try: @@ -2583,7 +2614,7 @@ def gpt_error(e): except Exception as e: debug_print(f"Warning: Could not update user message metadata: {e}") - + # Update conversation's last_updated timestamp one last time conversation_item['last_updated'] = datetime.utcnow().isoformat() @@ -2617,7 +2648,7 @@ def gpt_error(e): # Add any other final updates to conversation_item if needed (like classifications if not done earlier) cosmos_conversations_container.upsert_item(conversation_item) - + # --------------------------------------------------------------------- # 8) Return final success (even if AI generated an error message) # --------------------------------------------------------------------- @@ -2663,7 +2694,7 @@ def gpt_error(e): 'error': f'Internal server error: {str(e)}', 'details': error_traceback if app.debug else None }), 500 - + @app.route('/api/chat/stream', methods=['POST']) @swagger_route(security=get_auth_security()) @login_required @@ -3016,7 +3047,7 @@ def generate(): previous_thread_id = last_msgs[0].get('thread_id') except Exception as e: debug_print(f"Error fetching last message for threading: {e}") - + current_user_thread_id = str(uuid.uuid4()) latest_thread_id = current_user_thread_id @@ -3251,9 +3282,9 @@ def generate(): system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -3284,7 +3315,7 @@ def generate(): agent_citations_list=agent_citations_list, web_search_citations_list=web_search_citations_list, ) - + # Update message chat type message_chat_type = None if hybrid_search_enabled and search_results and len(search_results) > 0: @@ -3825,7 +3856,7 @@ def make_json_serializable(obj): 'Connection': 'keep-alive' } ) - + @app.route('/api/message//mask', methods=['POST']) @swagger_route(security=get_auth_security()) @login_required @@ -3978,6 +4009,7 @@ def merge_masked_ranges(ranges): if not ranges: return [] + # Sort by start position sorted_ranges = sorted(ranges, key=lambda x: x['start']) merged = [sorted_ranges[0]] @@ -4010,6 +4042,7 @@ def remove_masked_content(content, masked_ranges): if not masked_ranges or not content: return content + # Sort ranges by start position (descending) to work backwards sorted_ranges = sorted(masked_ranges, key=lambda x: x['start'], reverse=True) @@ -4040,7 +4073,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s debug_print(f"[Citation Extraction] Extracting citations from:\n{content}\n") citations: List[Dict[str, str]] = [] - + markdown_pattern = re.compile(r"\[([^\]]+)\]\((https?://[^\s\)]+)(?:\s+\"([^\"]+)\")?\)") html_pattern = re.compile( r"]+href=\"(https?://[^\"]+)\"([^>]*)>(.*?)", @@ -4048,9 +4081,9 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s ) title_pattern = re.compile(r"title=\"([^\"]+)\"", re.IGNORECASE) url_pattern = re.compile(r"https?://[^\s\)\]\">]+") - + occupied_spans: List[range] = [] - + for match in markdown_pattern.finditer(content): text, url, title = match.groups() url = (url or "").strip().rstrip(".,)") @@ -4059,7 +4092,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s display_title = (title or text or url).strip() citations.append({"url": url, "title": display_title}) occupied_spans.append(range(match.start(), match.end())) - + for match in html_pattern.finditer(content): url, attrs, inner = match.groups() url = (url or "").strip().rstrip(".,)") @@ -4071,7 +4104,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s display_title = (title or inner_text or url).strip() citations.append({"url": url, "title": display_title}) occupied_spans.append(range(match.start(), match.end())) - + for match in url_pattern.finditer(content): if any(match.start() in span for span in occupied_spans): continue @@ -4080,7 +4113,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s continue citations.append({"url": url, "title": url}) debug_print(f"[Citation Extraction] Extracted {len(citations)} citations. - {citations}\n") - + return citations @@ -4096,7 +4129,7 @@ def _extract_token_usage_from_metadata(metadata: Dict[str, Any]) -> Dict[str, in if not usage: debug_print("[Web Search][Token Usage Extraction] No usage field found in metadata.") return {} - + if isinstance(usage, str): raw_usage = usage.strip() if not raw_usage: @@ -4112,20 +4145,20 @@ def _extract_token_usage_from_metadata(metadata: Dict[str, Any]) -> Dict[str, in "[Web Search][Token Usage Extraction] Failed to parse usage string." ) return {} - + if not isinstance(usage, Mapping): debug_print( "[Web Search][Token Usage Extraction] Usage is not a mapping. " f"type={type(usage)}" ) return {} - + def to_int(value: Any) -> Optional[int]: try: return int(float(value)) except (TypeError, ValueError): return None - + total_tokens = to_int(usage.get("total_tokens")) if total_tokens is None: debug_print( @@ -4133,14 +4166,14 @@ def to_int(value: Any) -> Optional[int]: f"usage={usage}" ) return {} - + prompt_tokens = to_int(usage.get("prompt_tokens")) or 0 completion_tokens = to_int(usage.get("completion_tokens")) or 0 debug_print( "[Web Search][Token Usage Extraction] Extracted token usage - " f"prompt: {prompt_tokens}, completion: {completion_tokens}, total: {total_tokens}" ) - + return { "total_tokens": int(total_tokens), "prompt_tokens": int(prompt_tokens), @@ -4175,13 +4208,14 @@ def perform_web_search( debug_print(f"[WebSearch] active_public_workspace_id: {active_public_workspace_id}") debug_print(f"[WebSearch] search_query: {search_query[:100] if search_query else None}...") + enable_web_search = settings.get("enable_web_search") debug_print(f"[WebSearch] enable_web_search setting: {enable_web_search}") if not enable_web_search: debug_print("[WebSearch] Web search is DISABLED in settings, returning early") return True # Not an error, just disabled - + debug_print("[WebSearch] Web search is ENABLED, proceeding...") web_search_agent = settings.get("web_search_agent") or {} @@ -4200,7 +4234,7 @@ def perform_web_search( safe_keys = ['agent_id', 'project_id', 'endpoint'] safe_info = {k: foundry_settings.get(k, '') for k in safe_keys} debug_print(f"[WebSearch] foundry_settings (safe keys): {safe_info}") - + agent_id = (foundry_settings.get("agent_id") or "").strip() debug_print(f"[WebSearch] Extracted agent_id: '{agent_id}'") @@ -4220,7 +4254,7 @@ def perform_web_search( "content": "Web search was requested but is not properly configured. Please inform the user that web search is currently unavailable and you cannot provide real-time information. Do not attempt to answer questions requiring current information from your training data.", }) return False # Configuration error - + debug_print(f"[WebSearch] Agent ID is configured: {agent_id}") query_text = None @@ -4230,7 +4264,7 @@ def perform_web_search( except NameError: query_text = None debug_print("[WebSearch] search_query not defined, query_text is None") - + query_text = (query_text or user_message or "").strip() debug_print(f"[WebSearch] Final query_text after fallback: '{query_text[:100] if query_text else ''}'") @@ -4245,13 +4279,13 @@ def perform_web_search( level=logging.WARNING, ) return True # Not an error, just empty query - + debug_print(f"[WebSearch] Building message history with query: {query_text[:100]}...") message_history = [ ChatMessageContent(role="user", content=query_text) ] debug_print(f"[WebSearch] Message history created with {len(message_history)} message(s)") - + try: foundry_metadata = { "conversation_id": conversation_id, @@ -4311,7 +4345,7 @@ def perform_web_search( "content": f"Web search failed with an unexpected error: {exc}. Please inform the user that the web search encountered an error and you cannot provide real-time information for this query. Do not attempt to answer questions requiring current information from your training data - instead, acknowledge the search failure and suggest the user try again.", }) return False # Search failed - + debug_print("[WebSearch] ========== FOUNDRY AGENT RESULT ==========") debug_print(f"[WebSearch] Result type: {type(result)}") debug_print(f"[WebSearch] Result has message: {bool(result.message)}") @@ -4340,7 +4374,7 @@ def perform_web_search( debug_print(f"[WebSearch] Foundry metadata: {metadata_payload}") else: debug_print("[WebSearch] Foundry metadata: ") - + if result.message: debug_print("[WebSearch] Adding result message to system_messages_for_augmentation") system_messages_for_augmentation.append({ @@ -4348,7 +4382,7 @@ def perform_web_search( "content": f"Web search results:\n{result.message}", }) debug_print(f"[WebSearch] Added system message to augmentation list. Total augmentation messages: {len(system_messages_for_augmentation)}") - + debug_print("[WebSearch] Extracting web citations from result message...") web_citations = _extract_web_search_citations_from_content(result.message) debug_print(f"[WebSearch] Extracted {len(web_citations)} web citations from message content") @@ -4359,7 +4393,7 @@ def perform_web_search( debug_print("[WebSearch] No web citations extracted from message content") else: debug_print("[WebSearch] No result.message to process for augmentation") - + citations = result.citations or [] debug_print(f"[WebSearch] Processing {len(citations)} citations from result.citations") if citations: @@ -4383,7 +4417,7 @@ def perform_web_search( debug_print(f"[WebSearch] Total agent_citations_list now has {len(agent_citations_list)} citations") else: debug_print("[WebSearch] No citations in result.citations to process") - + debug_print(f"[WebSearch] Starting token usage extraction from Foundry metadata. Metadata: {result.metadata}") token_usage = _extract_token_usage_from_metadata(result.metadata or {}) if token_usage.get("total_tokens"): @@ -4393,7 +4427,7 @@ def perform_web_search( workspace_type = 'public' elif active_group_id: workspace_type = 'group' - + log_token_usage( user_id=user_id, token_type='web_search', @@ -4422,7 +4456,7 @@ def perform_web_search( }, level=logging.WARNING, ) - + debug_print("[WebSearch] ========== FINAL SUMMARY ==========") debug_print(f"[WebSearch] system_messages_for_augmentation count: {len(system_messages_for_augmentation)}") debug_print(f"[WebSearch] agent_citations_list count: {len(agent_citations_list)}") diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index 5901ff04..ab6b2717 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -1,7 +1,8 @@ + # Feature Release -### **(v0.237.008)** +### **(v0.237.009)** #### New Features @@ -16,6 +17,14 @@ #### Bug Fixes +* **Workspace Search Deselection KeyError Fix** + * Fixed HTTP 500 error when deselecting the workspace search button after having a document selected. Users would get "Could not get a response. HTTP error! status: 500" in the chat interface. + * **Root Cause**: When workspace search was deselected (`hybrid_search_enabled = False`), the `user_metadata['workspace_search']` dictionary was never initialized. However, subsequent code for handling group scope or public workspace context attempted to access `user_metadata['workspace_search']['group_name']` or other properties, causing a KeyError. + * **Error**: `KeyError: 'workspace_search'` at lines 468, 479 in `route_backend_chats.py` when trying to set group_name or active_public_workspace_id. + * **Solution**: Added defensive checks before accessing `user_metadata['workspace_search']`. If the key doesn't exist, initialize it with `{'search_enabled': False}` before attempting to set additional properties like group_name or workspace IDs. + * **Workaround**: Clicking Home and then back to Chat worked because it triggered a page reload that reset the state properly. + * (Ref: `route_backend_chats.py`, workspace search, metadata initialization, KeyError handling) + * **OpenAPI Basic Authentication Fix** * Fixed "session not authenticated" errors when using Basic Authentication with OpenAPI actions, even when credentials were correct. * **Root Cause**: Mismatch between how the UI stored Basic Auth credentials (as `username:password` string in `auth.key`) and how the OpenAPI plugin factory expected them (as separate `username` and `password` properties in `additionalFields`). @@ -221,7 +230,7 @@ * **Frontend Integration**: UI can query allowed auth types to display only valid options. * **Files Modified**: `route_backend_plugins.py`. * (Ref: plugin authentication, auth type constraints, OpenAPI plugins, security) - + #### Bug Fixes * **Control Center Chart Date Labels Fix** @@ -780,9 +789,11 @@ * (Ref: `functions_authentication.py`, `functions_documents.py`, Video Indexer workflow logging) ### **(v0.229.014)** + #### Bug Fixes ##### Public Workspace Management Fixes + * **Public Workspace Management Permission Fix** * Fixed incorrect permission checking for public workspace management operations when "Require Membership to Create Public Workspaces" setting was enabled. * **Issue**: Users with legitimate access to manage workspaces (Owner/Admin/DocumentManager) were incorrectly shown "Forbidden" errors when accessing management functionality. @@ -801,7 +812,9 @@ * (Ref: `chat-documents.js`, scope label updates, dynamic workspace display) ======= + ##### User Interface and Content Rendering Fixes + * **Unicode Table Rendering Fix** * Fixed issue where AI-generated tables using Unicode box-drawing characters were not rendering as proper HTML tables in the chat interface. * **Problem**: AI agents (particularly ESAM Agent) generated Unicode tables that appeared as plain text instead of formatted tables. @@ -1133,7 +1146,7 @@ * (Ref: `artifacts/architecture.vsdx`) * **Health Check** * Provide admins ability to enable a healthcheck api. - * (Ref: `route_external_health.py`) + * (Ref: `route_external_health.py`) #### Bug Fixes @@ -1609,9 +1622,9 @@ We introduced a robust user feedback system, expanded content-safety features fo 5. **Inline File Previews in Chat** - Files attached to a conversation can be previewed directly from the chat, with text or data displayed in a pop-up. -7. **Optional Image Generation** +6. **Optional Image Generation** - Users can toggle an “Image” button to create images via Azure OpenAI (e.g., DALL·E) when configured in Admin Settings. -8. **App Roles & Enterprise Application** +7. **App Roles & Enterprise Application** - Provides a robust way to control user access at scale. - Admins can assign roles to new users or entire Azure AD groups. \ No newline at end of file From 3836ea890d112adbe8c6f0b4607ddcb39fbde410 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Mon, 9 Feb 2026 13:05:22 -0500 Subject: [PATCH 2/3] fixed buttons in manage group --- application/single_app/static/js/group/manage_group.js | 9 --------- 1 file changed, 9 deletions(-) diff --git a/application/single_app/static/js/group/manage_group.js b/application/single_app/static/js/group/manage_group.js index 250a7b70..a6b00cc4 100644 --- a/application/single_app/static/js/group/manage_group.js +++ b/application/single_app/static/js/group/manage_group.js @@ -473,8 +473,6 @@ function renderMemberActions(member) { } else { return ` `; @@ -546,10 +541,6 @@ function loadPendingRequests() { data-request-id="${u.userId}">Approve - - `; From ac67cbf6a1b20a8fd050a016618c842cc8a7019b Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Mon, 9 Feb 2026 13:07:02 -0500 Subject: [PATCH 3/3] fixed issue with workspace search --- application/single_app/route_backend_chats.py | 471 +++++++++--------- 1 file changed, 222 insertions(+), 249 deletions(-) diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py index 5f1bf514..10ea1abe 100644 --- a/application/single_app/route_backend_chats.py +++ b/application/single_app/route_backend_chats.py @@ -1,5 +1,4 @@ # route_backend_chats.py - from semantic_kernel import Kernel from semantic_kernel.agents.runtime import InProcessRuntime from semantic_kernel.contents.chat_history import ChatHistory @@ -64,7 +63,7 @@ def chat_api(): image_gen_enabled = data.get('image_generation') document_scope = data.get('doc_scope') reload_messages_required = False - + def parse_json_string(candidate: str) -> Any: """Parse JSON content when strings look like serialized structures.""" trimmed = candidate.strip() @@ -78,39 +77,39 @@ def parse_json_string(candidate: str) -> Any: level=logging.DEBUG ) return None - + def dict_requires_reload(payload: Dict[str, Any]) -> bool: """Inspect dictionary payloads for any signal that messages were persisted.""" if payload.get('reload_messages') or payload.get('requires_message_reload'): return True - + metadata = payload.get('metadata') if isinstance(metadata, dict) and metadata.get('requires_message_reload'): return True - + image_url = payload.get('image_url') if isinstance(image_url, dict) and image_url.get('url'): return True if isinstance(image_url, str) and image_url.strip(): return True - + result_type = payload.get('type') if isinstance(result_type, str) and result_type.lower() == 'image_url': return True - + mime = payload.get('mime') if isinstance(mime, str) and mime.startswith('image/'): return True - + for value in payload.values(): if result_requires_message_reload(value): return True return False - + def list_requires_reload(items: List[Any]) -> bool: """Evaluate list items for reload requirements.""" return any(result_requires_message_reload(item) for item in items) - + def result_requires_message_reload(result: Any) -> bool: """Heuristically detect plugin outputs that inject new Cosmos messages (e.g., chart images).""" if result is None: @@ -172,9 +171,9 @@ def result_requires_message_reload(result: Any) -> bool: enable_summarize_content_history_beyond_conversation_history_limit = settings.get('enable_summarize_content_history_beyond_conversation_history_limit', True) # Use a dedicated setting if possible enable_summarize_content_history_for_search = settings.get('enable_summarize_content_history_for_search', False) # Use a dedicated setting if possible number_of_historical_messages_to_summarize = settings.get('number_of_historical_messages_to_summarize', 10) # Number of messages to summarize for search context - + max_file_content_length = 50000 # 50KB - + # Convert toggles from string -> bool if needed if isinstance(hybrid_search_enabled, str): hybrid_search_enabled = hybrid_search_enabled.lower() == 'true' @@ -182,25 +181,25 @@ def result_requires_message_reload(result: Any) -> bool: web_search_enabled = web_search_enabled.lower() == 'true' if isinstance(image_gen_enabled, str): image_gen_enabled = image_gen_enabled.lower() == 'true' - + # GPT & Image generation APIM or direct gpt_model = "" gpt_client = None enable_gpt_apim = settings.get('enable_gpt_apim', False) enable_image_gen_apim = settings.get('enable_image_gen_apim', False) - + try: if enable_gpt_apim: # read raw comma-delimited deployments raw = settings.get('azure_apim_gpt_deployment', '') if not raw: raise ValueError("APIM GPT deployment name not configured.") - + # split, strip, and filter out empty entries apim_models = [m.strip() for m in raw.split(',') if m.strip()] if not apim_models: raise ValueError("No valid APIM GPT deployment names found.") - + # if frontend specified one, use it (must be in the configured list) if frontend_gpt_model: if frontend_gpt_model not in apim_models: @@ -208,18 +207,18 @@ def result_requires_message_reload(result: Any) -> bool: f"Requested model '{frontend_gpt_model}' is not configured for APIM." ) gpt_model = frontend_gpt_model - + # otherwise if there's exactly one deployment, default to it elif len(apim_models) == 1: gpt_model = apim_models[0] - + # otherwise you must pass model_deployment in the request else: raise ValueError( "Multiple APIM GPT deployments configured; please include " "'model_deployment' in your request." ) - + # initialize the APIM client gpt_client = AzureOpenAI( api_version=settings.get('azure_apim_gpt_api_version'), @@ -231,14 +230,14 @@ def result_requires_message_reload(result: Any) -> bool: endpoint = settings.get('azure_openai_gpt_endpoint') api_version = settings.get('azure_openai_gpt_api_version') gpt_model_obj = settings.get('gpt_model', {}) - + if gpt_model_obj and gpt_model_obj.get('selected'): selected_gpt_model = gpt_model_obj['selected'][0] gpt_model = selected_gpt_model['deploymentName'] else: # Fallback or raise error if no model selected/configured raise ValueError("No GPT model selected or configured.") - + if frontend_gpt_model: gpt_model = frontend_gpt_model elif gpt_model_obj and gpt_model_obj.get('selected'): @@ -246,7 +245,7 @@ def result_requires_message_reload(result: Any) -> bool: gpt_model = selected_gpt_model['deploymentName'] else: raise ValueError("No GPT model selected or configured.") - + if auth_type == 'managed_identity': token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope) gpt_client = AzureOpenAI( @@ -262,10 +261,10 @@ def result_requires_message_reload(result: Any) -> bool: azure_endpoint=endpoint, api_key=api_key ) - + if not gpt_client or not gpt_model: raise ValueError("GPT Client or Model could not be initialized.") - + except Exception as e: debug_print(f"Error initializing GPT client/model: {e}") # Handle error appropriately - maybe return 500 or default behavior @@ -331,7 +330,7 @@ def result_requires_message_reload(result: Any) -> bool: except Exception as e: debug_print(f"Error reading conversation {conversation_id}: {e}") return jsonify({'error': f'Error reading conversation: {str(e)}'}), 500 - + # Determine the actual chat context based on existing conversation or document usage # For existing conversations, use the chat_type from conversation metadata # For new conversations, it will be determined during metadata collection @@ -443,7 +442,7 @@ def result_requires_message_reload(result: Any) -> bool: doc_results = list(cosmos_container.query_items( query=doc_query, parameters=doc_params, enable_cross_partition_query=True )) - if doc_results: + if doc_results and 'workspace_search' in user_metadata: doc_info = doc_results[0] user_metadata['workspace_search']['document_name'] = doc_info.get('title') or doc_info.get('file_name') user_metadata['workspace_search']['document_filename'] = doc_info.get('file_name') @@ -464,30 +463,24 @@ def result_requires_message_reload(result: Any) -> bool: if not allowed: return jsonify({'error': reason}), 403 - # Ensure workspace_search exists before accessing - if 'workspace_search' not in user_metadata: - user_metadata['workspace_search'] = {'search_enabled': False} - if group_doc.get('name'): group_name = group_doc.get('name') - user_metadata['workspace_search']['group_name'] = group_name - debug_print(f"Workspace search - set group_name to: {group_name}") + if 'workspace_search' in user_metadata: + user_metadata['workspace_search']['group_name'] = group_name + debug_print(f"Workspace search - set group_name to: {group_name}") else: debug_print(f"Workspace search - no name for group: {active_group_id}") - user_metadata['workspace_search']['group_name'] = None + if 'workspace_search' in user_metadata: + user_metadata['workspace_search']['group_name'] = None else: debug_print(f"Workspace search - no group found for id: {active_group_id}") - # Ensure workspace_search exists before accessing - if 'workspace_search' not in user_metadata: - user_metadata['workspace_search'] = {'search_enabled': False} - user_metadata['workspace_search']['group_name'] = None + if 'workspace_search' in user_metadata: + user_metadata['workspace_search']['group_name'] = None except Exception as e: debug_print(f"Error retrieving group details: {e}") - # Ensure workspace_search exists before accessing - if 'workspace_search' not in user_metadata: - user_metadata['workspace_search'] = {'search_enabled': False} - user_metadata['workspace_search']['group_name'] = None + if 'workspace_search' in user_metadata: + user_metadata['workspace_search']['group_name'] = None import traceback traceback.print_exc() @@ -503,16 +496,14 @@ def result_requires_message_reload(result: Any) -> bool: except Exception as e: debug_print(f"Error checking public workspace status: {e}") - # Ensure workspace_search exists before accessing - if 'workspace_search' not in user_metadata: - user_metadata['workspace_search'] = {'search_enabled': False} - user_metadata['workspace_search']['active_public_workspace_id'] = active_public_workspace_id - else: - # Only set if not already set above - if 'workspace_search' not in user_metadata: - user_metadata['workspace_search'] = { - 'search_enabled': False - } + if 'workspace_search' in user_metadata: + user_metadata['workspace_search']['active_public_workspace_id'] = active_public_workspace_id + + # Ensure workspace_search key always exists for consistency + if 'workspace_search' not in user_metadata: + user_metadata['workspace_search'] = { + 'search_enabled': False + } # Agent selection (if available) if hasattr(g, 'kernel_agents') and g.kernel_agents: @@ -602,7 +593,7 @@ def result_requires_message_reload(result: Any) -> bool: previous_thread_id = last_msgs[0].get('thread_id') except Exception as e: debug_print(f"Error fetching last message for threading: {e}") - + # Generate thread_id for the user message # We track the 'tip' of the thread in latest_thread_id import uuid @@ -656,7 +647,7 @@ def result_requires_message_reload(result: Any) -> bool: if conversation_item.get('title', 'New Conversation') == 'New Conversation' and user_message: new_title = (user_message[:30] + '...') if len(user_message) > 30 else user_message conversation_item['title'] = new_title - + conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) # Update timestamp and potentially title # region 3 - Content Safety @@ -668,13 +659,13 @@ def result_requires_message_reload(result: Any) -> bool: block_reasons = [] triggered_categories = [] blocklist_matches = [] - + if settings.get('enable_content_safety') and "content_safety_client" in CLIENTS: try: content_safety_client = CLIENTS["content_safety_client"] request_obj = AnalyzeTextOptions(text=user_message) cs_response = content_safety_client.analyze_text(request_obj) - + max_severity = 0 for cat_result in cs_response.categories_analysis: triggered_categories.append({ @@ -683,7 +674,7 @@ def result_requires_message_reload(result: Any) -> bool: }) if cat_result.severity > max_severity: max_severity = cat_result.severity - + if cs_response.blocklists_match: for match in cs_response.blocklists_match: blocklist_matches.append({ @@ -691,7 +682,7 @@ def result_requires_message_reload(result: Any) -> bool: "blocklistItemId": match.blocklist_item_id, "blocklistItemText": match.blocklist_item_text }) - + # Example: If severity >=4 or blocklist, we call it "blocked" if max_severity >= 4: blocked = True @@ -714,7 +705,7 @@ def result_requires_message_reload(result: Any) -> bool: 'metadata': {} } cosmos_safety_container.upsert_item(safety_item) - + # Instead of 403, we'll add a "safety" message blocked_msg_content = ( "Your message was blocked by Content Safety.\n\n" @@ -731,10 +722,10 @@ def result_requires_message_reload(result: Any) -> bool: "\n".join([f" - {m['blocklistItemText']} (in {m['blocklistName']})" for m in blocklist_matches]) ) - + # Insert a special "role": "safety" or "blocked" safety_message_id = f"{conversation_id}_safety_{int(time.time())}_{random.randint(1000,9999)}" - + safety_doc = { 'id': safety_message_id, 'conversation_id': conversation_id, @@ -745,11 +736,11 @@ def result_requires_message_reload(result: Any) -> bool: 'metadata': {}, # No metadata needed for safety messages } cosmos_messages_container.upsert_item(safety_doc) - + # Update conversation's last_updated conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) - + # Return a normal 200 with a special field: blocked=True return jsonify({ 'reply': blocked_msg_content.strip(), @@ -760,7 +751,7 @@ def result_requires_message_reload(result: Any) -> bool: 'conversation_title': conversation_item['title'], 'message_id': safety_message_id }), 200 - + except HttpResponseError as e: debug_print(f"[Content Safety Error] {e}") except Exception as ex: @@ -779,15 +770,13 @@ def result_requires_message_reload(result: Any) -> bool: limit_n_search = number_of_historical_messages_to_summarize * 2 query_search = f"SELECT TOP {limit_n_search} * FROM c WHERE c.conversation_id = @conv_id ORDER BY c.timestamp DESC" params_search = [{"name": "@conv_id", "value": conversation_id}] - - -​ -​ try: -​ last_messages_desc = list(cosmos_messages_container.query_items( -​ query=query_search, parameters=params_search, partition_key=conversation_id, enable_cross_partition_query=True -​ )) -​ last_messages_asc = list(reversed(last_messages_desc)) -​ + + + try: + last_messages_desc = list(cosmos_messages_container.query_items( + query=query_search, parameters=params_search, partition_key=conversation_id, enable_cross_partition_query=True + )) + last_messages_asc = list(reversed(last_messages_desc)) if last_messages_asc and len(last_messages_asc) >= conversation_history_limit: summary_prompt_search = "Please summarize the key topics or questions from this recent conversation history in 50 words or less:\n\n" @@ -810,7 +799,7 @@ def result_requires_message_reload(result: Any) -> bool: debug_print("[THREAD] No active thread messages available for search summary") else: summary_prompt_search += "\n".join(message_texts_search) - + try: # Use the already initialized gpt_client and gpt_model summary_response_search = gpt_client.chat.completions.create( @@ -884,12 +873,12 @@ def result_requires_message_reload(result: Any) -> bool: return jsonify({ 'error': 'There was an issue with the embedding process. Please check with an admin on embedding configuration.' }), 500 - + if search_results: retrieved_texts = [] combined_documents = [] classifications_found = set(conversation_item.get('classification', [])) # Load existing - + for doc in search_results: # ... (your existing doc processing logic) ... chunk_text = doc.get('chunk_text', '') @@ -902,7 +891,7 @@ def result_requires_message_reload(result: Any) -> bool: chunk_id = doc.get('chunk_id', str(uuid.uuid4())) # Ensure ID exists score = doc.get('score', 0.0) # Add default score group_id = doc.get('group_id', None) # Add default group ID - + citation = f"(Source: {file_name}, Page: {page_number}) [#{citation_id}]" retrieved_texts.append(f"{chunk_text}\n{citation}") combined_documents.append({ @@ -919,16 +908,16 @@ def result_requires_message_reload(result: Any) -> bool: }) if classification: classifications_found.add(classification) - + retrieved_content = "\n\n".join(retrieved_texts) # Construct system prompt for search results system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). - + Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -939,7 +928,7 @@ def result_requires_message_reload(result: Any) -> bool: 'content': system_prompt_search, 'documents': combined_documents # Keep track of docs used }) - + # Loop through each source document/chunk used for this message for source_doc in combined_documents: # 4. Create a citation dictionary, selecting the desired fields @@ -959,92 +948,85 @@ def result_requires_message_reload(result: Any) -> bool: } # Using .get() provides None if a key is missing, preventing KeyErrors hybrid_citations_list.append(citation_data) - + # Reorder hybrid citations list in descending order based on page_number hybrid_citations_list.sort(key=lambda x: x.get('page_number', 0), reverse=True) - + # --- NEW: Extract metadata (keywords/abstract) for additional citations --- # Only if extract_metadata is enabled if settings.get('enable_extract_meta_data', False): from functions_documents import get_document_metadata_for_citations - + # Track which documents we've already processed to avoid duplicates processed_doc_ids = set() - + for doc in search_results: # Get document ID (from the chunk's document reference) # AI Search chunks contain references to their parent document doc_id = doc.get('id', '').split('_')[0] if doc.get('id') else None - + # Skip if we've already processed this document if not doc_id or doc_id in processed_doc_ids: continue - + processed_doc_ids.add(doc_id) # Determine workspace type from the search result fields doc_user_id = doc.get('user_id') doc_group_id = doc.get('group_id') doc_public_workspace_id = doc.get('public_workspace_id') + + # Query Cosmos for this document's metadata + metadata = get_document_metadata_for_citations( + document_id=doc_id, + user_id=doc_user_id if doc_user_id else None, + group_id=doc_group_id if doc_group_id else None, + public_workspace_id=doc_public_workspace_id if doc_public_workspace_id else None + ) -​ -​ # Query Cosmos for this document's metadata -​ metadata = get_document_metadata_for_citations( -​ document_id=doc_id, -​ user_id=doc_user_id if doc_user_id else None, -​ group_id=doc_group_id if doc_group_id else None, -​ public_workspace_id=doc_public_workspace_id if doc_public_workspace_id else None -​ ) - - -​ -​ # If we have metadata with content, create additional citations -​ if metadata: -​ file_name = metadata.get('file_name', 'Unknown') -​ keywords = metadata.get('keywords', []) -​ abstract = metadata.get('abstract', '') - - -​ -​ # Create citation for keywords if they exist -​ if keywords and len(keywords) > 0: -​ keywords_text = ', '.join(keywords) if isinstance(keywords, list) else str(keywords) -​ keywords_citation_id = f"{doc_id}_keywords" - - -​ -​ keywords_citation = { -​ "file_name": file_name, -​ "citation_id": keywords_citation_id, -​ "page_number": "Metadata", # Special page identifier -​ "chunk_id": keywords_citation_id, -​ "chunk_sequence": 9999, # High number to sort to end -​ "score": 0.0, # No relevance score for metadata -​ "group_id": doc_group_id, -​ "version": doc.get('version', 'N/A'), -​ "classification": doc.get('document_classification'), -​ "metadata_type": "keywords", # Flag this as metadata citation -​ "metadata_content": keywords_text -​ } -​ hybrid_citations_list.append(keywords_citation) -​ combined_documents.append(keywords_citation) # Add to combined_documents too -​ + + # If we have metadata with content, create additional citations + if metadata: + file_name = metadata.get('file_name', 'Unknown') + keywords = metadata.get('keywords', []) + abstract = metadata.get('abstract', '') + + + # Create citation for keywords if they exist + if keywords and len(keywords) > 0: + keywords_text = ', '.join(keywords) if isinstance(keywords, list) else str(keywords) + keywords_citation_id = f"{doc_id}_keywords" + + + keywords_citation = { + "file_name": file_name, + "citation_id": keywords_citation_id, + "page_number": "Metadata", # Special page identifier + "chunk_id": keywords_citation_id, + "chunk_sequence": 9999, # High number to sort to end + "score": 0.0, # No relevance score for metadata + "group_id": doc_group_id, + "version": doc.get('version', 'N/A'), + "classification": doc.get('document_classification'), + "metadata_type": "keywords", # Flag this as metadata citation + "metadata_content": keywords_text + } + hybrid_citations_list.append(keywords_citation) + combined_documents.append(keywords_citation) # Add to combined_documents too # Add keywords to retrieved content for the model keywords_context = f"Document Keywords ({file_name}): {keywords_text}" retrieved_texts.append(keywords_context) - + # Create citation for abstract if it exists if abstract and len(abstract.strip()) > 0: abstract_citation_id = f"{doc_id}_abstract" - -​ -​ # Add keywords to retrieved content for the model -​ keywords_context = f"Document Keywords ({file_name}): {keywords_text}" -​ retrieved_texts.append(keywords_context) -​ - + + # Add keywords to retrieved content for the model + keywords_context = f"Document Keywords ({file_name}): {keywords_text}" + retrieved_texts.append(keywords_context) + # Create citation for abstract if it exists if abstract and len(abstract.strip()) > 0: abstract_citation_id = f"{doc_id}_abstract" @@ -1064,18 +1046,16 @@ def result_requires_message_reload(result: Any) -> bool: } hybrid_citations_list.append(abstract_citation) combined_documents.append(abstract_citation) # Add to combined_documents too - + # Add abstract to retrieved content for the model abstract_context = f"Document Abstract ({file_name}): {abstract}" retrieved_texts.append(abstract_context) - -​ -​ # Add abstract to retrieved content for the model -​ abstract_context = f"Document Abstract ({file_name}): {abstract}" -​ retrieved_texts.append(abstract_context) -​ - + + # Add abstract to retrieved content for the model + abstract_context = f"Document Abstract ({file_name}): {abstract}" + retrieved_texts.append(abstract_context) + # Create citation for vision analysis if it exists vision_analysis = metadata.get('vision_analysis') if vision_analysis: @@ -1114,22 +1094,20 @@ def result_requires_message_reload(result: Any) -> bool: vision_context = f"AI Vision Analysis ({file_name}): {vision_content}" retrieved_texts.append(vision_context) - -​ -​ # Update the system prompt with the enhanced content including metadata -​ if retrieved_texts: -​ retrieved_content = "\n\n".join(retrieved_texts) -​ system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). -​ Retrieved Excerpts: -​ {retrieved_content} -​ Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. -​ + + # Update the system prompt with the enhanced content including metadata + if retrieved_texts: + retrieved_content = "\n\n".join(retrieved_texts) + system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). + Retrieved Excerpts: + {retrieved_content} + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -1139,12 +1117,12 @@ def result_requires_message_reload(result: Any) -> bool: system_messages_for_augmentation[-1]['content'] = system_prompt_search system_messages_for_augmentation[-1]['documents'] = combined_documents # --- END NEW METADATA CITATIONS --- - + # Update conversation classifications if new ones were found if list(classifications_found) != conversation_item.get('classification', []): conversation_item['classification'] = list(classifications_found) # No need to upsert item here, will be updated later - + # Update message-level chat_type based on actual document usage for this message # This must happen after document search is completed so search_results is populated message_chat_type = None @@ -1208,7 +1186,7 @@ def result_requires_message_reload(result: Any) -> bool: # Update the user message in Cosmos DB with the final chat_type information cosmos_messages_container.upsert_item(user_message_doc) debug_print(f"User message re-saved to Cosmos DB with updated chat_context") - + # Image Generation if image_gen_enabled: if enable_image_gen_apim: @@ -1227,7 +1205,7 @@ def result_requires_message_reload(result: Any) -> bool: azure_ad_token_provider=token_provider ) image_gen_model_obj = settings.get('image_gen_model', {}) - + if image_gen_model_obj and image_gen_model_obj.get('selected'): selected_image_gen_model = image_gen_model_obj['selected'][0] image_gen_model = selected_image_gen_model['deploymentName'] @@ -1241,7 +1219,7 @@ def result_requires_message_reload(result: Any) -> bool: if image_gen_obj and image_gen_obj.get('selected'): selected_image_gen_model = image_gen_obj['selected'][0] image_gen_model = selected_image_gen_model['deploymentName'] - + try: debug_print(f"Generating image with model: {image_gen_model}") debug_print(f"Using prompt: {user_message}") @@ -1292,7 +1270,7 @@ def result_requires_message_reload(result: Any) -> bool: # Validate we have a valid image source if not generated_image_url or generated_image_url == 'null': raise ValueError("Generated image URL is null or empty") - + image_message_id = f"{conversation_id}_image_{int(time.time())}_{random.randint(1000,9999)}" # Check if image data is too large for a single Cosmos document (2MB limit) @@ -1328,12 +1306,10 @@ def result_requires_message_reload(result: Any) -> bool: debug_print(f"✅ Chunking verification passed - can reassemble to original size") else: debug_print(f"❌ Chunking verification failed - {len(reassembled_test)} vs {len(generated_image_url)}") - - -​ -​ # Create main image document with metadata -​ - + + + # Create main image document with metadata + # Get user_info and thread_id from the user message for ownership tracking and threading user_info_for_chunked_image = None user_thread_id = None @@ -1452,10 +1428,10 @@ def result_requires_message_reload(result: Any) -> bool: cosmos_messages_container.upsert_item(image_doc) response_image_url = generated_image_url # Image message shares the same thread as user message - + conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) - + return jsonify({ 'reply': "Image loading...", 'image_url': response_image_url, @@ -1487,7 +1463,7 @@ def result_requires_message_reload(result: Any) -> bool: return jsonify({ 'error': user_friendly_message }), status_code - + if web_search_enabled: perform_web_search( settings=settings, @@ -1520,20 +1496,20 @@ def result_requires_message_reload(result: Any) -> bool: all_messages = list(cosmos_messages_container.query_items( query=all_messages_query, parameters=params_all, partition_key=conversation_id, enable_cross_partition_query=True )) - + # Sort messages using threading logic all_messages = sort_messages_by_thread(all_messages) - + total_messages = len(all_messages) - + # Determine which messages are "recent" and which are "older" # `conversation_history_limit` includes the *current* user message num_recent_messages = min(total_messages, conversation_history_limit) num_older_messages = total_messages - num_recent_messages - + recent_messages = all_messages[-num_recent_messages:] # Last N messages older_messages_to_summarize = all_messages[:num_older_messages] # Messages before the recent ones - + # Summarize older messages if needed and present if enable_summarize_content_history_beyond_conversation_history_limit and older_messages_to_summarize: debug_print(f"Summarizing {len(older_messages_to_summarize)} older messages for conversation {conversation_id}") @@ -1561,7 +1537,7 @@ def result_requires_message_reload(result: Any) -> bool: if role in ['system', 'safety', 'blocked', 'image', 'file']: continue content = msg.get('content', '') message_texts_older.append(f"{role.upper()}: {content}") - + if message_texts_older: # Only summarize if there's content to summarize summary_prompt_older += "\n".join(message_texts_older) try: @@ -1588,14 +1564,14 @@ def result_requires_message_reload(result: Any) -> bool: "role": "system", "content": f"\n{summary_of_older}\n" }) - + # Add augmentation system messages (search, agents) next # **Important**: Decide if you want these saved. If so, you need to upsert them now. # For simplicity here, we're just adding them to the API call context. for aug_msg in system_messages_for_augmentation: # 1. Extract the source documents list for this specific system message # Use .get with a default empty list [] for safety in case 'documents' is missing - + # 5. Create the final system_doc dictionary for Cosmos DB upsert system_message_id = f"{conversation_id}_system_aug_{int(time.time())}_{random.randint(1000,9999)}" @@ -1636,7 +1612,7 @@ def result_requires_message_reload(result: Any) -> bool: cosmos_messages_container.upsert_item(system_doc) conversation_history_for_api.append(aug_msg) # Add to API context # System message shares the same thread as user message, no thread update needed - + # --- NEW: Save plugin output as agent citation --- agent_citations_list.append({ "tool_name": str(selected_agent.name) if selected_agent else "All Citations", @@ -1650,7 +1626,7 @@ def result_requires_message_reload(result: Any) -> bool: allowed_roles_in_history = ['user', 'assistant'] # Add 'system' if you PERSIST general system messages not related to augmentation max_file_content_length_in_history = 50000 # Increased limit for all file content in history max_tabular_content_length_in_history = 50000 # Same limit for tabular data consistency - + for message in recent_messages: role = message.get('role') content = message.get('content') @@ -1678,7 +1654,7 @@ def result_requires_message_reload(result: Any) -> bool: # Remove masked portions from content content = remove_masked_content(content, masked_ranges) debug_print(f"[MASK] Applied {len(masked_ranges)} masked ranges to message {message.get('id')}") - + if role in allowed_roles_in_history: conversation_history_for_api.append({"role": role, "content": content}) elif role == 'file': # Handle file content inclusion (simplified) @@ -1764,9 +1740,9 @@ def result_requires_message_reload(result: Any) -> bool: 'role': 'system', 'content': f"[Assistant generated an image based on the prompt: '{prompt}']" }) - + # Ignored roles: 'safety', 'blocked', 'system' (if they are only for augmentation/summary) - + # Ensure the very last message is the current user's message (it should be if fetched correctly) if not conversation_history_for_api or conversation_history_for_api[-1]['role'] != 'user': debug_print("Warning: Last message in history is not the user's current message. Appending.") @@ -1780,11 +1756,11 @@ def result_requires_message_reload(result: Any) -> bool: break if not user_msg_found: # Still not found? Append the original input as fallback conversation_history_for_api.append({"role": "user", "content": user_message}) - + except Exception as e: debug_print(f"Error preparing conversation history: {e}") return jsonify({'error': f'Error preparing conversation history: {str(e)}'}), 500 - + # region 6 - Final GPT Call # --------------------------------------------------------------------- # 6) Final GPT Call @@ -1809,7 +1785,7 @@ def result_requires_message_reload(result: Any) -> bool: "role": "system", "content": default_system_prompt }) - + # --- DRY Fallback Chain Helper --- def try_fallback_chain(steps): """ @@ -1834,7 +1810,7 @@ def try_fallback_chain(steps): continue # If all fail, return default error return ("Sorry, I encountered an error.", gpt_model, None, None) - + # --- Inject facts as a system message at the top of conversation_history_for_api --- def get_facts_for_context(scope_id, scope_type, conversation_id: str = None, agent_id: str = None): settings = get_settings() @@ -1866,7 +1842,7 @@ def get_facts_for_context(scope_id, scope_type, conversation_id: str = None, age fact_lines.append(f"- scope_id: {scope_id}") fact_lines.append(f"- conversation_id: {conversation_id}") return "\n".join(fact_lines) - + async def run_sk_call(callable_obj, *args, **kwargs): log_event( f"Running Semantic Kernel callable: {callable_obj.__name__}", @@ -1932,7 +1908,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): extra={"runtime": runtime} ) await runtime.stop_when_idle() - + ai_message = "Sorry, I encountered an error." # Default error message final_model_used = gpt_model # Track model used for the response kernel_fallback_notice = None @@ -2051,9 +2027,9 @@ async def run_sk_call(callable_obj, *args, **kwargs): "selected_agent_id": agent_id or None, "kernel": bool(kernel is not None), } - - # Use the orchestrator agent as the default agent + # Use the orchestrator agent as the default agent + # Add additional metadata here to scope the facts to be returned # Allows for additional per agent and per conversation scoping. @@ -2070,7 +2046,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): "role": "system", "content": f"""\n\n\n\n\n""" }) - + agent_message_history = [ ChatMessageContent( role=msg["role"], @@ -2079,7 +2055,7 @@ async def run_sk_call(callable_obj, *args, **kwargs): ) for msg in conversation_history_for_api ] - + # --- Fallback Chain Steps --- if enable_multi_agent_orchestration and all_agents and "orchestrator" in all_agents and not per_user_semantic_kernel: def invoke_orchestrator(): @@ -2108,7 +2084,7 @@ def orchestrator_error(e): 'on_success': orchestrator_success, 'on_error': orchestrator_error }) - + if selected_agent: def invoke_selected_agent(): return asyncio.run(run_sk_call( @@ -2177,7 +2153,7 @@ def make_json_serializable(obj): "total_duration_ms": sum(inv.duration_ms for inv in plugin_invocations if inv.duration_ms) } ) - + # debug_print(f"[Enhanced Agent Citations] Agent used: {agent_used}") # debug_print(f"[Enhanced Agent Citations] Extracted {len(detailed_citations)} detailed plugin invocations") # for citation in detailed_citations: @@ -2185,10 +2161,10 @@ def make_json_serializable(obj): # debug_print(f" Parameters: {citation['function_arguments']}") # debug_print(f" Result: {citation['function_result']}") # debug_print(f" Duration: {citation['duration_ms']}ms, Success: {citation['success']}") - + # Store detailed citations globally to be accessed by the calling function agent_citations_list.extend(detailed_citations) - + if not reload_messages_required: for citation in detailed_citations: if result_requires_message_reload(citation.get('function_result')): @@ -2211,11 +2187,11 @@ def agent_error(e): level=logging.ERROR, exceptionTraceback=True ) - + selected_agent_type = getattr(selected_agent, 'agent_type', 'local') or 'local' if isinstance(selected_agent_type, str): selected_agent_type = selected_agent_type.lower() - + if selected_agent_type == 'aifoundry': def invoke_foundry_agent(): foundry_metadata = { @@ -2233,7 +2209,7 @@ def invoke_foundry_agent(): agent_message_history, metadata={k: v for k, v in foundry_metadata.items() if v is not None} ) - + def foundry_agent_success(result): msg = str(result) notice = None @@ -2243,7 +2219,7 @@ def foundry_agent_success(result): or getattr(selected_agent, 'deployment_name', None) or agent_used ) - + foundry_citations = getattr(selected_agent, 'last_run_citations', []) or [] if foundry_citations: for citation in foundry_citations: @@ -2260,14 +2236,14 @@ def foundry_agent_success(result): 'timestamp': datetime.utcnow().isoformat(), 'success': True }) - + if enable_multi_agent_orchestration and not per_user_semantic_kernel: notice = ( "[SK Fallback]: The AI assistant is running in single agent fallback mode. " "Some advanced features may not be available. " "Please contact your administrator to configure Semantic Kernel for richer responses." ) - + log_event( f"[Foundry Agent] Invocation complete for {agent_used}", extra={ @@ -2278,9 +2254,9 @@ def foundry_agent_success(result): 'citation_count': len(foundry_citations), } ) - + return (msg, actual_model_deployment, 'agent', notice) - + def foundry_agent_error(e): log_event( f"Error during Azure AI Foundry agent invocation: {str(e)}", @@ -2292,7 +2268,7 @@ def foundry_agent_error(e): level=logging.ERROR, exceptionTraceback=True ) - + fallback_steps.append({ 'name': 'foundry_agent', 'func': invoke_foundry_agent, @@ -2306,7 +2282,7 @@ def foundry_agent_error(e): 'on_success': agent_success, 'on_error': agent_error }) - + if kernel: def invoke_kernel(): chat_history = "\n".join([ @@ -2359,7 +2335,7 @@ def kernel_error(e): 'on_success': kernel_success, 'on_error': kernel_error }) - + def invoke_gpt_fallback(): if not conversation_history_for_api: raise Exception('Cannot generate response: No conversation history available.') @@ -2441,7 +2417,7 @@ def gpt_error(e): 'on_success': gpt_success, 'on_error': gpt_error }) - + fallback_result = try_fallback_chain(fallback_steps) # Unpack result - handle both 4-tuple (SK) and 5-tuple (GPT with tokens) if len(fallback_result) == 5: @@ -2488,7 +2464,7 @@ def gpt_error(e): level=logging.ERROR, exceptionTraceback=True ) - + # region 7 - Save GPT Response # --------------------------------------------------------------------- # 7) Save GPT response (or error message) @@ -2598,7 +2574,7 @@ def gpt_error(e): except Exception as log_error: debug_print(f"⚠️ Warning: Failed to log chat token usage: {log_error}") # Don't fail the chat flow if logging fails - + # Update the user message metadata with the actual model used # This ensures the UI shows the correct model in the metadata panel try: @@ -2614,7 +2590,7 @@ def gpt_error(e): except Exception as e: debug_print(f"Warning: Could not update user message metadata: {e}") - + # Update conversation's last_updated timestamp one last time conversation_item['last_updated'] = datetime.utcnow().isoformat() @@ -2648,7 +2624,7 @@ def gpt_error(e): # Add any other final updates to conversation_item if needed (like classifications if not done earlier) cosmos_conversations_container.upsert_item(conversation_item) - + # --------------------------------------------------------------------- # 8) Return final success (even if AI generated an error message) # --------------------------------------------------------------------- @@ -2694,7 +2670,7 @@ def gpt_error(e): 'error': f'Internal server error: {str(e)}', 'details': error_traceback if app.debug else None }), 500 - + @app.route('/api/chat/stream', methods=['POST']) @swagger_route(security=get_auth_security()) @login_required @@ -3047,7 +3023,7 @@ def generate(): previous_thread_id = last_msgs[0].get('thread_id') except Exception as e: debug_print(f"Error fetching last message for threading: {e}") - + current_user_thread_id = str(uuid.uuid4()) latest_thread_id = current_user_thread_id @@ -3282,9 +3258,9 @@ def generate(): system_prompt_search = f"""You are an AI assistant. Use the following retrieved document excerpts to answer the user's question. Cite sources using the format (Source: filename, Page: page number). Retrieved Excerpts: {retrieved_content} - + Based *only* on the information provided above, answer the user's query. If the answer isn't in the excerpts, say so. - + Example User: What is the policy on double dipping? Assistant: The policy prohibits entities from using federal funds received through one program to apply for additional funds through another program, commonly known as 'double dipping' (Source: PolicyDocument.pdf, Page: 12) @@ -3315,7 +3291,7 @@ def generate(): agent_citations_list=agent_citations_list, web_search_citations_list=web_search_citations_list, ) - + # Update message chat type message_chat_type = None if hybrid_search_enabled and search_results and len(search_results) > 0: @@ -3856,7 +3832,7 @@ def make_json_serializable(obj): 'Connection': 'keep-alive' } ) - + @app.route('/api/message//mask', methods=['POST']) @swagger_route(security=get_auth_security()) @login_required @@ -4009,7 +3985,6 @@ def merge_masked_ranges(ranges): if not ranges: return [] - # Sort by start position sorted_ranges = sorted(ranges, key=lambda x: x['start']) merged = [sorted_ranges[0]] @@ -4042,7 +4017,6 @@ def remove_masked_content(content, masked_ranges): if not masked_ranges or not content: return content - # Sort ranges by start position (descending) to work backwards sorted_ranges = sorted(masked_ranges, key=lambda x: x['start'], reverse=True) @@ -4073,7 +4047,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s debug_print(f"[Citation Extraction] Extracting citations from:\n{content}\n") citations: List[Dict[str, str]] = [] - + markdown_pattern = re.compile(r"\[([^\]]+)\]\((https?://[^\s\)]+)(?:\s+\"([^\"]+)\")?\)") html_pattern = re.compile( r"]+href=\"(https?://[^\"]+)\"([^>]*)>(.*?)", @@ -4081,9 +4055,9 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s ) title_pattern = re.compile(r"title=\"([^\"]+)\"", re.IGNORECASE) url_pattern = re.compile(r"https?://[^\s\)\]\">]+") - + occupied_spans: List[range] = [] - + for match in markdown_pattern.finditer(content): text, url, title = match.groups() url = (url or "").strip().rstrip(".,)") @@ -4092,7 +4066,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s display_title = (title or text or url).strip() citations.append({"url": url, "title": display_title}) occupied_spans.append(range(match.start(), match.end())) - + for match in html_pattern.finditer(content): url, attrs, inner = match.groups() url = (url or "").strip().rstrip(".,)") @@ -4104,7 +4078,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s display_title = (title or inner_text or url).strip() citations.append({"url": url, "title": display_title}) occupied_spans.append(range(match.start(), match.end())) - + for match in url_pattern.finditer(content): if any(match.start() in span for span in occupied_spans): continue @@ -4113,7 +4087,7 @@ def _extract_web_search_citations_from_content(content: str) -> List[Dict[str, s continue citations.append({"url": url, "title": url}) debug_print(f"[Citation Extraction] Extracted {len(citations)} citations. - {citations}\n") - + return citations @@ -4129,7 +4103,7 @@ def _extract_token_usage_from_metadata(metadata: Dict[str, Any]) -> Dict[str, in if not usage: debug_print("[Web Search][Token Usage Extraction] No usage field found in metadata.") return {} - + if isinstance(usage, str): raw_usage = usage.strip() if not raw_usage: @@ -4145,20 +4119,20 @@ def _extract_token_usage_from_metadata(metadata: Dict[str, Any]) -> Dict[str, in "[Web Search][Token Usage Extraction] Failed to parse usage string." ) return {} - + if not isinstance(usage, Mapping): debug_print( "[Web Search][Token Usage Extraction] Usage is not a mapping. " f"type={type(usage)}" ) return {} - + def to_int(value: Any) -> Optional[int]: try: return int(float(value)) except (TypeError, ValueError): return None - + total_tokens = to_int(usage.get("total_tokens")) if total_tokens is None: debug_print( @@ -4166,14 +4140,14 @@ def to_int(value: Any) -> Optional[int]: f"usage={usage}" ) return {} - + prompt_tokens = to_int(usage.get("prompt_tokens")) or 0 completion_tokens = to_int(usage.get("completion_tokens")) or 0 debug_print( "[Web Search][Token Usage Extraction] Extracted token usage - " f"prompt: {prompt_tokens}, completion: {completion_tokens}, total: {total_tokens}" ) - + return { "total_tokens": int(total_tokens), "prompt_tokens": int(prompt_tokens), @@ -4208,14 +4182,13 @@ def perform_web_search( debug_print(f"[WebSearch] active_public_workspace_id: {active_public_workspace_id}") debug_print(f"[WebSearch] search_query: {search_query[:100] if search_query else None}...") - enable_web_search = settings.get("enable_web_search") debug_print(f"[WebSearch] enable_web_search setting: {enable_web_search}") if not enable_web_search: debug_print("[WebSearch] Web search is DISABLED in settings, returning early") return True # Not an error, just disabled - + debug_print("[WebSearch] Web search is ENABLED, proceeding...") web_search_agent = settings.get("web_search_agent") or {} @@ -4234,7 +4207,7 @@ def perform_web_search( safe_keys = ['agent_id', 'project_id', 'endpoint'] safe_info = {k: foundry_settings.get(k, '') for k in safe_keys} debug_print(f"[WebSearch] foundry_settings (safe keys): {safe_info}") - + agent_id = (foundry_settings.get("agent_id") or "").strip() debug_print(f"[WebSearch] Extracted agent_id: '{agent_id}'") @@ -4254,7 +4227,7 @@ def perform_web_search( "content": "Web search was requested but is not properly configured. Please inform the user that web search is currently unavailable and you cannot provide real-time information. Do not attempt to answer questions requiring current information from your training data.", }) return False # Configuration error - + debug_print(f"[WebSearch] Agent ID is configured: {agent_id}") query_text = None @@ -4264,7 +4237,7 @@ def perform_web_search( except NameError: query_text = None debug_print("[WebSearch] search_query not defined, query_text is None") - + query_text = (query_text or user_message or "").strip() debug_print(f"[WebSearch] Final query_text after fallback: '{query_text[:100] if query_text else ''}'") @@ -4279,13 +4252,13 @@ def perform_web_search( level=logging.WARNING, ) return True # Not an error, just empty query - + debug_print(f"[WebSearch] Building message history with query: {query_text[:100]}...") message_history = [ ChatMessageContent(role="user", content=query_text) ] debug_print(f"[WebSearch] Message history created with {len(message_history)} message(s)") - + try: foundry_metadata = { "conversation_id": conversation_id, @@ -4345,7 +4318,7 @@ def perform_web_search( "content": f"Web search failed with an unexpected error: {exc}. Please inform the user that the web search encountered an error and you cannot provide real-time information for this query. Do not attempt to answer questions requiring current information from your training data - instead, acknowledge the search failure and suggest the user try again.", }) return False # Search failed - + debug_print("[WebSearch] ========== FOUNDRY AGENT RESULT ==========") debug_print(f"[WebSearch] Result type: {type(result)}") debug_print(f"[WebSearch] Result has message: {bool(result.message)}") @@ -4374,7 +4347,7 @@ def perform_web_search( debug_print(f"[WebSearch] Foundry metadata: {metadata_payload}") else: debug_print("[WebSearch] Foundry metadata: ") - + if result.message: debug_print("[WebSearch] Adding result message to system_messages_for_augmentation") system_messages_for_augmentation.append({ @@ -4382,7 +4355,7 @@ def perform_web_search( "content": f"Web search results:\n{result.message}", }) debug_print(f"[WebSearch] Added system message to augmentation list. Total augmentation messages: {len(system_messages_for_augmentation)}") - + debug_print("[WebSearch] Extracting web citations from result message...") web_citations = _extract_web_search_citations_from_content(result.message) debug_print(f"[WebSearch] Extracted {len(web_citations)} web citations from message content") @@ -4393,7 +4366,7 @@ def perform_web_search( debug_print("[WebSearch] No web citations extracted from message content") else: debug_print("[WebSearch] No result.message to process for augmentation") - + citations = result.citations or [] debug_print(f"[WebSearch] Processing {len(citations)} citations from result.citations") if citations: @@ -4417,7 +4390,7 @@ def perform_web_search( debug_print(f"[WebSearch] Total agent_citations_list now has {len(agent_citations_list)} citations") else: debug_print("[WebSearch] No citations in result.citations to process") - + debug_print(f"[WebSearch] Starting token usage extraction from Foundry metadata. Metadata: {result.metadata}") token_usage = _extract_token_usage_from_metadata(result.metadata or {}) if token_usage.get("total_tokens"): @@ -4427,7 +4400,7 @@ def perform_web_search( workspace_type = 'public' elif active_group_id: workspace_type = 'group' - + log_token_usage( user_id=user_id, token_type='web_search', @@ -4456,7 +4429,7 @@ def perform_web_search( }, level=logging.WARNING, ) - + debug_print("[WebSearch] ========== FINAL SUMMARY ==========") debug_print(f"[WebSearch] system_messages_for_augmentation count: {len(system_messages_for_augmentation)}") debug_print(f"[WebSearch] agent_citations_list count: {len(agent_citations_list)}")