From e58a48c85db87e5ed5e758623471f6e8faa91bef Mon Sep 17 00:00:00 2001
From: Eric Curtin <eric.curtin@docker.com>
Date: Tue, 23 Dec 2025 11:38:32 +0000
Subject: [PATCH] Add conversation history support to chat mode

The interactive mode now maintains conversation context across chat
exchanges. The readline-based interactive mode was enhanced to track
conversation history and pass it to the model on each request. A new
ChatWithMessagesContext method was added to support conversation history
while maintaining backward compatibility through the existing
ChatWithContext method. The conversation history uses the processed user
messages after file inclusions and image processing to ensure accuracy.
Additionally, the code now properly handles context cancellation by
returning the assistant response when cancelled. The basic interactive
mode fallback was removed since readline initialization is now required.

Signed-off-by: Eric Curtin <eric.curtin@docker.com>
---
 cmd/cli/commands/run.go          | 175 +++++++++++++++----------------
 cmd/cli/desktop/desktop.go       |  53 ++++++----
 cmd/cli/readline/term.go         |   6 --
 cmd/cli/readline/term_windows.go |   7 --
 4 files changed, 116 insertions(+), 125 deletions(-)

diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
index 91c05b043..22780f272 100644
--- a/cmd/cli/commands/run.go
+++ b/cmd/cli/commands/run.go
@@ -140,8 +140,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 		AltPlaceholder: `Use """ to end multi-line input`,
 	})
 	if err != nil {
-		// Fall back to basic input mode if readline initialization fails
-		return generateInteractiveBasic(cmd, desktopClient, model)
+		return err
 	}
 
 	// Disable history if the environment variable is set
@@ -154,6 +153,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 
 	var sb strings.Builder
 	var multiline bool
+	var conversationHistory []desktop.OpenAIChatMessage
 
 	// Add a helper function to handle file inclusion when @ is pressed
 	// We'll implement a basic version here that shows a message when @ is pressed
@@ -245,7 +245,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				}
 			}()
 
-			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
+			assistantResponse, processedUserMessage, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory)
 
 			// Clean up signal handler
 			signal.Stop(sigChan)
@@ -263,70 +263,21 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				continue
 			}
 
+			// Add the processed user message and assistant response to conversation history.
+			// Using the processed message ensures the history reflects exactly what the model
+			// received (after file inclusions and image processing), not the raw user input.
+			conversationHistory = append(conversationHistory, processedUserMessage)
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "assistant",
+				Content: assistantResponse,
+			})
+
 			cmd.Println()
 			sb.Reset()
 		}
 	}
 }
 
-// generateInteractiveBasic provides a basic interactive mode (fallback)
-func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
-	scanner := bufio.NewScanner(os.Stdin)
-	for {
-		userInput, err := readMultilineInput(cmd, scanner)
-		if err != nil {
-			if err.Error() == "EOF" {
-				break
-			}
-			return fmt.Errorf("Error reading input: %w", err)
-		}
-
-		if strings.ToLower(strings.TrimSpace(userInput)) == "/bye" {
-			break
-		}
-
-		if strings.TrimSpace(userInput) == "" {
-			continue
-		}
-
-		// Create a cancellable context for the chat request
-		// This allows us to cancel the request if the user presses Ctrl+C during response generation
-		chatCtx, cancelChat := context.WithCancel(cmd.Context())
-
-		// Set up signal handler to cancel the context on Ctrl+C
-		sigChan := make(chan os.Signal, 1)
-		signal.Notify(sigChan, syscall.SIGINT)
-		go func() {
-			select {
-			case <-sigChan:
-				cancelChat()
-			case <-chatCtx.Done():
-				// Context cancelled, exit goroutine
-				// Context cancelled, exit goroutine
-			}
-		}()
-
-		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
-
-		cancelChat()
-		signal.Stop(sigChan)
-		cancelChat()
-
-		if err != nil {
-			// Check if the error is due to context cancellation (Ctrl+C during response)
-			if errors.Is(err, context.Canceled) {
-				fmt.Println("\nUse Ctrl + d or /bye to exit.")
-			} else {
-				cmd.PrintErrln(handleClientError(err, "Failed to generate a response"))
-			}
-			continue
-		}
-
-		cmd.Println()
-	}
-	return nil
-}
-
 var (
 	markdownRenderer *glamour.TermRenderer
 	lastWidth        int
@@ -507,47 +458,93 @@ func renderMarkdown(content string) (string, error) {
 	return rendered, nil
 }
 
+// buildUserMessage constructs an OpenAIChatMessage for the user with the processed prompt and images.
+// This is used to ensure conversation history reflects exactly what the model received.
+func buildUserMessage(prompt string, imageURLs []string) desktop.OpenAIChatMessage {
+	if len(imageURLs) > 0 {
+		// Multimodal message with images - build content array
+		contentParts := make([]desktop.ContentPart, 0, len(imageURLs)+1)
+
+		// Add all images first
+		for _, imageURL := range imageURLs {
+			contentParts = append(contentParts, desktop.ContentPart{
+				Type: "image_url",
+				ImageURL: &desktop.ImageURL{
+					URL: imageURL,
+				},
+			})
+		}
+
+		// Add text prompt if present
+		if prompt != "" {
+			contentParts = append(contentParts, desktop.ContentPart{
+				Type: "text",
+				Text: prompt,
+			})
+		}
+
+		return desktop.OpenAIChatMessage{
+			Role:    "user",
+			Content: contentParts,
+		}
+	}
+
+	// Simple text-only message
+	return desktop.OpenAIChatMessage{
+		Role:    "user",
+		Content: prompt,
+	}
+}
+
 // chatWithMarkdown performs chat and streams the response with selective markdown rendering.
 func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
-	return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
+	_, _, err := chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt, nil)
+	return err
 }
 
 // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
-func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
+// It accepts an optional conversation history and returns both the assistant's response and the processed user message
+// (after file inclusions and image processing) for accurate history tracking.
+func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string, conversationHistory []desktop.OpenAIChatMessage) (assistantResponse string, processedUserMessage desktop.OpenAIChatMessage, err error) {
 	colorMode, _ := cmd.Flags().GetString("color")
 	useMarkdown := shouldUseMarkdown(colorMode)
 	debug, _ := cmd.Flags().GetBool("debug")
 
 	// Process file inclusions first (files referenced with @ symbol)
-	prompt, err := processFileInclusions(prompt)
+	prompt, err = processFileInclusions(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process file inclusions: %w", err)
+		return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process file inclusions: %w", err)
 	}
 
 	var imageURLs []string
 	cleanedPrompt, imgs, err := processImagesInPrompt(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process images: %w", err)
+		return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process images: %w", err)
 	}
 	prompt = cleanedPrompt
 	imageURLs = imgs
 
+	// Build the processed user message to return for history tracking.
+	// This reflects exactly what the model receives.
+	processedUserMessage = buildUserMessage(prompt, imageURLs)
+
 	if !useMarkdown {
 		// Simple case: just stream as plain text
-		return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+		assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 			cmd.Print(content)
 		}, false)
+		return assistantResponse, processedUserMessage, err
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
 	markdownBuffer := NewStreamingMarkdownBuffer()
 
-	err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+	assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 		// Use the streaming markdown buffer to intelligently render content
-		rendered, err := markdownBuffer.AddContent(content, true)
-		if err != nil {
+		rendered, renderErr := markdownBuffer.AddContent(content, true)
+		if renderErr != nil {
 			if debug {
-				cmd.PrintErrln(err)
+				cmd.PrintErrln(renderErr)
 			}
 			// Fallback to plain text on error
 			cmd.Print(content)
@@ -556,7 +553,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		}
 	}, true)
 	if err != nil {
-		return err
+		return assistantResponse, processedUserMessage, err
 	}
 
 	// Flush any remaining content from the markdown buffer
@@ -564,7 +561,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		cmd.Print(remaining)
 	}
 
-	return nil
+	return assistantResponse, processedUserMessage, nil
 }
 
 func newRunCmd() *cobra.Command {
@@ -641,14 +638,10 @@ func newRunCmd() *cobra.Command {
 					return nil
 				}
 
-				// Interactive mode for external OpenAI endpoint
-				if term.IsTerminal(int(os.Stdin.Fd())) {
-					termenv.SetDefaultOutput(
-						termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-					)
-					return generateInteractiveWithReadline(cmd, openaiClient, model)
-				}
-				return generateInteractiveBasic(cmd, openaiClient, model)
+				termenv.SetDefaultOutput(
+					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+				)
+				return generateInteractiveWithReadline(cmd, openaiClient, model)
 			}
 
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
@@ -746,19 +739,15 @@ func newRunCmd() *cobra.Command {
 				return nil
 			}
 
-			// Use enhanced readline-based interactive mode when terminal is available
-			if term.IsTerminal(int(os.Stdin.Fd())) {
-				// Initialize termenv with color caching before starting interactive session.
-				// This queries the terminal background color once and caches it, preventing
-				// OSC response sequences from appearing in stdin during the interactive loop.
-				termenv.SetDefaultOutput(
-					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-				)
-				return generateInteractiveWithReadline(cmd, desktopClient, model)
-			}
+			// Initialize termenv with color caching before starting interactive session.
+			// This queries the terminal background color once and caches it, preventing
+			// OSC response sequences from appearing in stdin during the interactive loop.
+			termenv.SetDefaultOutput(
+				termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+			)
+
+			return generateInteractiveWithReadline(cmd, desktopClient, model)
 
-			// Fall back to basic mode if not a terminal
-			return generateInteractiveBasic(cmd, desktopClient, model)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, 1),
 	}
diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
index 1ea3b2044..056cf3263 100644
--- a/cmd/cli/desktop/desktop.go
+++ b/cmd/cli/desktop/desktop.go
@@ -350,13 +350,14 @@ func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(
 	return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
 }
 
-// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
-func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
-	// Build the message content - either simple string or multimodal array
+// ChatWithMessagesContext performs a chat request with conversation history and returns the assistant's response.
+// This allows maintaining conversation context across multiple exchanges.
+func (c *Client) ChatWithMessagesContext(ctx context.Context, model string, conversationHistory []OpenAIChatMessage, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) (string, error) {
+	// Build the current user message content - either simple string or multimodal array
 	var messageContent interface{}
 	if len(imageURLs) > 0 {
 		// Multimodal message with images
-		contentParts := make([]ContentPart, 0, len(imageURLs))
+		contentParts := make([]ContentPart, 0, len(imageURLs)+1)
 
 		// Add all images first
 		for _, imageURL := range imageURLs {
@@ -382,20 +383,23 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		messageContent = prompt
 	}
 
+	// Build messages array with conversation history plus current message
+	messages := make([]OpenAIChatMessage, 0, len(conversationHistory)+1)
+	messages = append(messages, conversationHistory...)
+	messages = append(messages, OpenAIChatMessage{
+		Role:    "user",
+		Content: messageContent,
+	})
+
 	reqBody := OpenAIChatRequest{
-		Model: model,
-		Messages: []OpenAIChatMessage{
-			{
-				Role:    "user",
-				Content: messageContent,
-			},
-		},
-		Stream: true,
+		Model:    model,
+		Messages: messages,
+		Stream:   true,
 	}
 
 	jsonData, err := json.Marshal(reqBody)
 	if err != nil {
-		return fmt.Errorf("error marshaling request: %w", err)
+		return "", fmt.Errorf("error marshaling request: %w", err)
 	}
 
 	completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions"
@@ -407,13 +411,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		bytes.NewReader(jsonData),
 	)
 	if err != nil {
-		return c.handleQueryError(err, completionsPath)
+		return "", c.handleQueryError(err, completionsPath)
 	}
 	defer resp.Body.Close()
 
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
-		return fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
+		return "", fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
 	}
 
 	type chatPrinterState int
@@ -425,7 +429,11 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 
 	printerState := chatPrinterNone
 	reasoningFmt := color.New().Add(color.Italic)
+	if !shouldUseMarkdown {
+		reasoningFmt.DisableColor()
+	}
 
+	var assistantResponse strings.Builder
 	var finalUsage *struct {
 		CompletionTokens int `json:"completion_tokens"`
 		PromptTokens     int `json:"prompt_tokens"`
@@ -437,7 +445,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		// Check if context was cancelled
 		select {
 		case <-ctx.Done():
-			return ctx.Err()
+			return assistantResponse.String(), ctx.Err()
 		default:
 		}
 
@@ -458,7 +466,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 
 		var streamResp OpenAIChatResponse
 		if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
-			return fmt.Errorf("error parsing stream response: %w", err)
+			return assistantResponse.String(), fmt.Errorf("error parsing stream response: %w", err)
 		}
 
 		if streamResp.Usage != nil {
@@ -493,12 +501,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 				}
 				printerState = chatPrinterContent
 				outputFunc(chunk)
+				assistantResponse.WriteString(chunk)
 			}
 		}
 	}
 
 	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("error reading response stream: %w", err)
+		return assistantResponse.String(), fmt.Errorf("error reading response stream: %w", err)
 	}
 
 	if finalUsage != nil {
@@ -514,7 +523,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		outputFunc(usageFmt.Sprint(usageInfo))
 	}
 
-	return nil
+	return assistantResponse.String(), nil
+}
+
+// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
+func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
+	_, err := c.ChatWithMessagesContext(ctx, model, nil, prompt, imageURLs, outputFunc, shouldUseMarkdown)
+	return err
 }
 
 func (c *Client) Remove(modelArgs []string, force bool) (string, error) {
diff --git a/cmd/cli/readline/term.go b/cmd/cli/readline/term.go
index 5584cd257..69f17f1e9 100644
--- a/cmd/cli/readline/term.go
+++ b/cmd/cli/readline/term.go
@@ -29,9 +29,3 @@ func UnsetRawMode(fd uintptr, termios any) error {
 	t := termios.(*Termios)
 	return setTermios(fd, t)
 }
-
-// IsTerminal returns true if the given file descriptor is a terminal.
-func IsTerminal(fd uintptr) bool {
-	_, err := getTermios(fd)
-	return err == nil
-}
diff --git a/cmd/cli/readline/term_windows.go b/cmd/cli/readline/term_windows.go
index 3b35149b8..8a9e9a587 100644
--- a/cmd/cli/readline/term_windows.go
+++ b/cmd/cli/readline/term_windows.go
@@ -8,13 +8,6 @@ type State struct {
 	mode uint32
 }
 
-// IsTerminal checks if the given file descriptor is associated with a terminal
-func IsTerminal(fd uintptr) bool {
-	var st uint32
-	err := windows.GetConsoleMode(windows.Handle(fd), &st)
-	return err == nil
-}
-
 func SetRawMode(fd uintptr) (*State, error) {
 	var st uint32
 	if err := windows.GetConsoleMode(windows.Handle(fd), &st); err != nil {