diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
index 91c05b043..22780f272 100644
--- a/cmd/cli/commands/run.go
+++ b/cmd/cli/commands/run.go
@@ -140,8 +140,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 		AltPlaceholder: `Use """ to end multi-line input`,
 	})
 	if err != nil {
-		// Fall back to basic input mode if readline initialization fails
-		return generateInteractiveBasic(cmd, desktopClient, model)
+		return err
 	}
 
 	// Disable history if the environment variable is set
@@ -154,6 +153,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 
 	var sb strings.Builder
 	var multiline bool
+	var conversationHistory []desktop.OpenAIChatMessage
 
 	// Add a helper function to handle file inclusion when @ is pressed
 	// We'll implement a basic version here that shows a message when @ is pressed
@@ -245,7 +245,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				}
 			}()
 
-			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
+			assistantResponse, processedUserMessage, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory)
 
 			// Clean up signal handler
 			signal.Stop(sigChan)
@@ -263,70 +263,21 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				continue
 			}
 
+			// Add the processed user message and assistant response to conversation history.
+			// Using the processed message ensures the history reflects exactly what the model
+			// received (after file inclusions and image processing), not the raw user input.
+			conversationHistory = append(conversationHistory, processedUserMessage)
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "assistant",
+				Content: assistantResponse,
+			})
+
 			cmd.Println()
 			sb.Reset()
 		}
 	}
 }
 
-// generateInteractiveBasic provides a basic interactive mode (fallback)
-func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
-	scanner := bufio.NewScanner(os.Stdin)
-	for {
-		userInput, err := readMultilineInput(cmd, scanner)
-		if err != nil {
-			if err.Error() == "EOF" {
-				break
-			}
-			return fmt.Errorf("Error reading input: %w", err)
-		}
-
-		if strings.ToLower(strings.TrimSpace(userInput)) == "/bye" {
-			break
-		}
-
-		if strings.TrimSpace(userInput) == "" {
-			continue
-		}
-
-		// Create a cancellable context for the chat request
-		// This allows us to cancel the request if the user presses Ctrl+C during response generation
-		chatCtx, cancelChat := context.WithCancel(cmd.Context())
-
-		// Set up signal handler to cancel the context on Ctrl+C
-		sigChan := make(chan os.Signal, 1)
-		signal.Notify(sigChan, syscall.SIGINT)
-		go func() {
-			select {
-			case <-sigChan:
-				cancelChat()
-			case <-chatCtx.Done():
-				// Context cancelled, exit goroutine
-				// Context cancelled, exit goroutine
-			}
-		}()
-
-		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
-
-		cancelChat()
-		signal.Stop(sigChan)
-		cancelChat()
-
-		if err != nil {
-			// Check if the error is due to context cancellation (Ctrl+C during response)
-			if errors.Is(err, context.Canceled) {
-				fmt.Println("\nUse Ctrl + d or /bye to exit.")
-			} else {
-				cmd.PrintErrln(handleClientError(err, "Failed to generate a response"))
-			}
-			continue
-		}
-
-		cmd.Println()
-	}
-	return nil
-}
-
 var (
 	markdownRenderer *glamour.TermRenderer
 	lastWidth        int
@@ -507,47 +458,93 @@ func renderMarkdown(content string) (string, error) {
 	return rendered, nil
 }
 
+// buildUserMessage constructs an OpenAIChatMessage for the user with the processed prompt and images.
+// This is used to ensure conversation history reflects exactly what the model received.
+func buildUserMessage(prompt string, imageURLs []string) desktop.OpenAIChatMessage {
+	if len(imageURLs) > 0 {
+		// Multimodal message with images - build content array
+		contentParts := make([]desktop.ContentPart, 0, len(imageURLs)+1)
+
+		// Add all images first
+		for _, imageURL := range imageURLs {
+			contentParts = append(contentParts, desktop.ContentPart{
+				Type: "image_url",
+				ImageURL: &desktop.ImageURL{
+					URL: imageURL,
+				},
+			})
+		}
+
+		// Add text prompt if present
+		if prompt != "" {
+			contentParts = append(contentParts, desktop.ContentPart{
+				Type: "text",
+				Text: prompt,
+			})
+		}
+
+		return desktop.OpenAIChatMessage{
+			Role:    "user",
+			Content: contentParts,
+		}
+	}
+
+	// Simple text-only message
+	return desktop.OpenAIChatMessage{
+		Role:    "user",
+		Content: prompt,
+	}
+}
+
 // chatWithMarkdown performs chat and streams the response with selective markdown rendering.
 func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
-	return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
+	_, _, err := chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt, nil)
+	return err
 }
 
 // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
-func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
+// It accepts an optional conversation history and returns both the assistant's response and the processed user message
+// (after file inclusions and image processing) for accurate history tracking.
+func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string, conversationHistory []desktop.OpenAIChatMessage) (assistantResponse string, processedUserMessage desktop.OpenAIChatMessage, err error) {
 	colorMode, _ := cmd.Flags().GetString("color")
 	useMarkdown := shouldUseMarkdown(colorMode)
 	debug, _ := cmd.Flags().GetBool("debug")
 
 	// Process file inclusions first (files referenced with @ symbol)
-	prompt, err := processFileInclusions(prompt)
+	prompt, err = processFileInclusions(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process file inclusions: %w", err)
+		return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process file inclusions: %w", err)
 	}
 
 	var imageURLs []string
 	cleanedPrompt, imgs, err := processImagesInPrompt(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process images: %w", err)
+		return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process images: %w", err)
 	}
 	prompt = cleanedPrompt
 	imageURLs = imgs
 
+	// Build the processed user message to return for history tracking.
+	// This reflects exactly what the model receives.
+	processedUserMessage = buildUserMessage(prompt, imageURLs)
+
 	if !useMarkdown {
 		// Simple case: just stream as plain text
-		return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+		assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 			cmd.Print(content)
 		}, false)
+		return assistantResponse, processedUserMessage, err
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
 	markdownBuffer := NewStreamingMarkdownBuffer()
 
-	err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+	assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 		// Use the streaming markdown buffer to intelligently render content
-		rendered, err := markdownBuffer.AddContent(content, true)
-		if err != nil {
+		rendered, renderErr := markdownBuffer.AddContent(content, true)
+		if renderErr != nil {
 			if debug {
-				cmd.PrintErrln(err)
+				cmd.PrintErrln(renderErr)
 			}
 			// Fallback to plain text on error
 			cmd.Print(content)
@@ -556,7 +553,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		}
 	}, true)
 	if err != nil {
-		return err
+		return assistantResponse, processedUserMessage, err
 	}
 
 	// Flush any remaining content from the markdown buffer
@@ -564,7 +561,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		cmd.Print(remaining)
 	}
 
-	return nil
+	return assistantResponse, processedUserMessage, nil
 }
 
 func newRunCmd() *cobra.Command {
@@ -641,14 +638,10 @@ func newRunCmd() *cobra.Command {
 					return nil
 				}
 
-				// Interactive mode for external OpenAI endpoint
-				if term.IsTerminal(int(os.Stdin.Fd())) {
-					termenv.SetDefaultOutput(
-						termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-					)
-					return generateInteractiveWithReadline(cmd, openaiClient, model)
-				}
-				return generateInteractiveBasic(cmd, openaiClient, model)
+				termenv.SetDefaultOutput(
+					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+				)
+				return generateInteractiveWithReadline(cmd, openaiClient, model)
 			}
 
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
@@ -746,19 +739,15 @@ func newRunCmd() *cobra.Command {
 				return nil
 			}
 
-			// Use enhanced readline-based interactive mode when terminal is available
-			if term.IsTerminal(int(os.Stdin.Fd())) {
-				// Initialize termenv with color caching before starting interactive session.
-				// This queries the terminal background color once and caches it, preventing
-				// OSC response sequences from appearing in stdin during the interactive loop.
-				termenv.SetDefaultOutput(
-					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-				)
-				return generateInteractiveWithReadline(cmd, desktopClient, model)
-			}
+			// Initialize termenv with color caching before starting interactive session.
+			// This queries the terminal background color once and caches it, preventing
+			// OSC response sequences from appearing in stdin during the interactive loop.
+			termenv.SetDefaultOutput(
+				termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+			)
+
+			return generateInteractiveWithReadline(cmd, desktopClient, model)
 
-			// Fall back to basic mode if not a terminal
-			return generateInteractiveBasic(cmd, desktopClient, model)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, 1),
 	}
diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
index 1ea3b2044..056cf3263 100644
--- a/cmd/cli/desktop/desktop.go
+++ b/cmd/cli/desktop/desktop.go
@@ -350,13 +350,14 @@ func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(
 	return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
 }
 
-// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
-func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
-	// Build the message content - either simple string or multimodal array
+// ChatWithMessagesContext performs a chat request with conversation history and returns the assistant's response.
+// This allows maintaining conversation context across multiple exchanges.
+func (c *Client) ChatWithMessagesContext(ctx context.Context, model string, conversationHistory []OpenAIChatMessage, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) (string, error) {
+	// Build the current user message content - either simple string or multimodal array
 	var messageContent interface{}
 	if len(imageURLs) > 0 {
 		// Multimodal message with images
-		contentParts := make([]ContentPart, 0, len(imageURLs))
+		contentParts := make([]ContentPart, 0, len(imageURLs)+1)
 
 		// Add all images first
 		for _, imageURL := range imageURLs {
@@ -382,20 +383,23 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		messageContent = prompt
 	}
 
+	// Build messages array with conversation history plus current message
+	messages := make([]OpenAIChatMessage, 0, len(conversationHistory)+1)
+	messages = append(messages, conversationHistory...)
+	messages = append(messages, OpenAIChatMessage{
+		Role:    "user",
+		Content: messageContent,
+	})
+
 	reqBody := OpenAIChatRequest{
-		Model: model,
-		Messages: []OpenAIChatMessage{
-			{
-				Role:    "user",
-				Content: messageContent,
-			},
-		},
-		Stream: true,
+		Model:    model,
+		Messages: messages,
+		Stream:   true,
 	}
 
 	jsonData, err := json.Marshal(reqBody)
 	if err != nil {
-		return fmt.Errorf("error marshaling request: %w", err)
+		return "", fmt.Errorf("error marshaling request: %w", err)
 	}
 
 	completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions"
@@ -407,13 +411,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		bytes.NewReader(jsonData),
 	)
 	if err != nil {
-		return c.handleQueryError(err, completionsPath)
+		return "", c.handleQueryError(err, completionsPath)
 	}
 	defer resp.Body.Close()
 
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
-		return fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
+		return "", fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
 	}
 
 	type chatPrinterState int
@@ -425,7 +429,11 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 
 	printerState := chatPrinterNone
 	reasoningFmt := color.New().Add(color.Italic)
+	if !shouldUseMarkdown {
+		reasoningFmt.DisableColor()
+	}
 
+	var assistantResponse strings.Builder
 	var finalUsage *struct {
 		CompletionTokens int `json:"completion_tokens"`
 		PromptTokens     int `json:"prompt_tokens"`
@@ -437,7 +445,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		// Check if context was cancelled
 		select {
 		case <-ctx.Done():
-			return ctx.Err()
+			return assistantResponse.String(), ctx.Err()
 		default:
 		}
 
@@ -458,7 +466,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 
 		var streamResp OpenAIChatResponse
 		if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
-			return fmt.Errorf("error parsing stream response: %w", err)
+			return assistantResponse.String(), fmt.Errorf("error parsing stream response: %w", err)
 		}
 
 		if streamResp.Usage != nil {
@@ -493,12 +501,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 				}
 				printerState = chatPrinterContent
 				outputFunc(chunk)
+				assistantResponse.WriteString(chunk)
 			}
 		}
 	}
 
 	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("error reading response stream: %w", err)
+		return assistantResponse.String(), fmt.Errorf("error reading response stream: %w", err)
 	}
 
 	if finalUsage != nil {
@@ -514,7 +523,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		outputFunc(usageFmt.Sprint(usageInfo))
 	}
 
-	return nil
+	return assistantResponse.String(), nil
+}
+
+// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
+func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
+	_, err := c.ChatWithMessagesContext(ctx, model, nil, prompt, imageURLs, outputFunc, shouldUseMarkdown)
+	return err
 }
 
 func (c *Client) Remove(modelArgs []string, force bool) (string, error) {
diff --git a/cmd/cli/readline/term.go b/cmd/cli/readline/term.go
index 5584cd257..69f17f1e9 100644
--- a/cmd/cli/readline/term.go
+++ b/cmd/cli/readline/term.go
@@ -29,9 +29,3 @@ func UnsetRawMode(fd uintptr, termios any) error {
 	t := termios.(*Termios)
 	return setTermios(fd, t)
 }
-
-// IsTerminal returns true if the given file descriptor is a terminal.
-func IsTerminal(fd uintptr) bool {
-	_, err := getTermios(fd)
-	return err == nil
-}
diff --git a/cmd/cli/readline/term_windows.go b/cmd/cli/readline/term_windows.go
index 3b35149b8..8a9e9a587 100644
--- a/cmd/cli/readline/term_windows.go
+++ b/cmd/cli/readline/term_windows.go
@@ -8,13 +8,6 @@ type State struct {
 	mode uint32
 }
 
-// IsTerminal checks if the given file descriptor is associated with a terminal
-func IsTerminal(fd uintptr) bool {
-	var st uint32
-	err := windows.GetConsoleMode(windows.Handle(fd), &st)
-	return err == nil
-}
-
 func SetRawMode(fd uintptr) (*State, error) {
 	var st uint32
 	if err := windows.GetConsoleMode(windows.Handle(fd), &st); err != nil {