diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go index 91c05b043..22780f272 100644 --- a/cmd/cli/commands/run.go +++ b/cmd/cli/commands/run.go @@ -140,8 +140,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. AltPlaceholder: `Use """ to end multi-line input`, }) if err != nil { - // Fall back to basic input mode if readline initialization fails - return generateInteractiveBasic(cmd, desktopClient, model) + return err } // Disable history if the environment variable is set @@ -154,6 +153,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. var sb strings.Builder var multiline bool + var conversationHistory []desktop.OpenAIChatMessage // Add a helper function to handle file inclusion when @ is pressed // We'll implement a basic version here that shows a message when @ is pressed @@ -245,7 +245,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. } }() - err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput) + assistantResponse, processedUserMessage, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory) // Clean up signal handler signal.Stop(sigChan) @@ -263,70 +263,21 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. continue } + // Add the processed user message and assistant response to conversation history. + // Using the processed message ensures the history reflects exactly what the model + // received (after file inclusions and image processing), not the raw user input. + conversationHistory = append(conversationHistory, processedUserMessage) + conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{ + Role: "assistant", + Content: assistantResponse, + }) + cmd.Println() sb.Reset() } } } -// generateInteractiveBasic provides a basic interactive mode (fallback) -func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error { - scanner := bufio.NewScanner(os.Stdin) - for { - userInput, err := readMultilineInput(cmd, scanner) - if err != nil { - if err.Error() == "EOF" { - break - } - return fmt.Errorf("Error reading input: %w", err) - } - - if strings.ToLower(strings.TrimSpace(userInput)) == "/bye" { - break - } - - if strings.TrimSpace(userInput) == "" { - continue - } - - // Create a cancellable context for the chat request - // This allows us to cancel the request if the user presses Ctrl+C during response generation - chatCtx, cancelChat := context.WithCancel(cmd.Context()) - - // Set up signal handler to cancel the context on Ctrl+C - sigChan := make(chan os.Signal, 1) - signal.Notify(sigChan, syscall.SIGINT) - go func() { - select { - case <-sigChan: - cancelChat() - case <-chatCtx.Done(): - // Context cancelled, exit goroutine - // Context cancelled, exit goroutine - } - }() - - err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput) - - cancelChat() - signal.Stop(sigChan) - cancelChat() - - if err != nil { - // Check if the error is due to context cancellation (Ctrl+C during response) - if errors.Is(err, context.Canceled) { - fmt.Println("\nUse Ctrl + d or /bye to exit.") - } else { - cmd.PrintErrln(handleClientError(err, "Failed to generate a response")) - } - continue - } - - cmd.Println() - } - return nil -} - var ( markdownRenderer *glamour.TermRenderer lastWidth int @@ -507,47 +458,93 @@ func renderMarkdown(content string) (string, error) { return rendered, nil } +// buildUserMessage constructs an OpenAIChatMessage for the user with the processed prompt and images. +// This is used to ensure conversation history reflects exactly what the model received. +func buildUserMessage(prompt string, imageURLs []string) desktop.OpenAIChatMessage { + if len(imageURLs) > 0 { + // Multimodal message with images - build content array + contentParts := make([]desktop.ContentPart, 0, len(imageURLs)+1) + + // Add all images first + for _, imageURL := range imageURLs { + contentParts = append(contentParts, desktop.ContentPart{ + Type: "image_url", + ImageURL: &desktop.ImageURL{ + URL: imageURL, + }, + }) + } + + // Add text prompt if present + if prompt != "" { + contentParts = append(contentParts, desktop.ContentPart{ + Type: "text", + Text: prompt, + }) + } + + return desktop.OpenAIChatMessage{ + Role: "user", + Content: contentParts, + } + } + + // Simple text-only message + return desktop.OpenAIChatMessage{ + Role: "user", + Content: prompt, + } +} + // chatWithMarkdown performs chat and streams the response with selective markdown rendering. func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error { - return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt) + _, _, err := chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt, nil) + return err } // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering. -func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error { +// It accepts an optional conversation history and returns both the assistant's response and the processed user message +// (after file inclusions and image processing) for accurate history tracking. +func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string, conversationHistory []desktop.OpenAIChatMessage) (assistantResponse string, processedUserMessage desktop.OpenAIChatMessage, err error) { colorMode, _ := cmd.Flags().GetString("color") useMarkdown := shouldUseMarkdown(colorMode) debug, _ := cmd.Flags().GetBool("debug") // Process file inclusions first (files referenced with @ symbol) - prompt, err := processFileInclusions(prompt) + prompt, err = processFileInclusions(prompt) if err != nil { - return fmt.Errorf("failed to process file inclusions: %w", err) + return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process file inclusions: %w", err) } var imageURLs []string cleanedPrompt, imgs, err := processImagesInPrompt(prompt) if err != nil { - return fmt.Errorf("failed to process images: %w", err) + return "", desktop.OpenAIChatMessage{}, fmt.Errorf("failed to process images: %w", err) } prompt = cleanedPrompt imageURLs = imgs + // Build the processed user message to return for history tracking. + // This reflects exactly what the model receives. + processedUserMessage = buildUserMessage(prompt, imageURLs) + if !useMarkdown { // Simple case: just stream as plain text - return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) { + assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) { cmd.Print(content) }, false) + return assistantResponse, processedUserMessage, err } // For markdown: use streaming buffer to render code blocks as they complete markdownBuffer := NewStreamingMarkdownBuffer() - err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) { + assistantResponse, err = client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) { // Use the streaming markdown buffer to intelligently render content - rendered, err := markdownBuffer.AddContent(content, true) - if err != nil { + rendered, renderErr := markdownBuffer.AddContent(content, true) + if renderErr != nil { if debug { - cmd.PrintErrln(err) + cmd.PrintErrln(renderErr) } // Fallback to plain text on error cmd.Print(content) @@ -556,7 +553,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de } }, true) if err != nil { - return err + return assistantResponse, processedUserMessage, err } // Flush any remaining content from the markdown buffer @@ -564,7 +561,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de cmd.Print(remaining) } - return nil + return assistantResponse, processedUserMessage, nil } func newRunCmd() *cobra.Command { @@ -641,14 +638,10 @@ func newRunCmd() *cobra.Command { return nil } - // Interactive mode for external OpenAI endpoint - if term.IsTerminal(int(os.Stdin.Fd())) { - termenv.SetDefaultOutput( - termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)), - ) - return generateInteractiveWithReadline(cmd, openaiClient, model) - } - return generateInteractiveBasic(cmd, openaiClient, model) + termenv.SetDefaultOutput( + termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)), + ) + return generateInteractiveWithReadline(cmd, openaiClient, model) } if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil { @@ -746,19 +739,15 @@ func newRunCmd() *cobra.Command { return nil } - // Use enhanced readline-based interactive mode when terminal is available - if term.IsTerminal(int(os.Stdin.Fd())) { - // Initialize termenv with color caching before starting interactive session. - // This queries the terminal background color once and caches it, preventing - // OSC response sequences from appearing in stdin during the interactive loop. - termenv.SetDefaultOutput( - termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)), - ) - return generateInteractiveWithReadline(cmd, desktopClient, model) - } + // Initialize termenv with color caching before starting interactive session. + // This queries the terminal background color once and caches it, preventing + // OSC response sequences from appearing in stdin during the interactive loop. + termenv.SetDefaultOutput( + termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)), + ) + + return generateInteractiveWithReadline(cmd, desktopClient, model) - // Fall back to basic mode if not a terminal - return generateInteractiveBasic(cmd, desktopClient, model) }, ValidArgsFunction: completion.ModelNames(getDesktopClient, 1), } diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go index 1ea3b2044..056cf3263 100644 --- a/cmd/cli/desktop/desktop.go +++ b/cmd/cli/desktop/desktop.go @@ -350,13 +350,14 @@ func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func( return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown) } -// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering. -func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { - // Build the message content - either simple string or multimodal array +// ChatWithMessagesContext performs a chat request with conversation history and returns the assistant's response. +// This allows maintaining conversation context across multiple exchanges. +func (c *Client) ChatWithMessagesContext(ctx context.Context, model string, conversationHistory []OpenAIChatMessage, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) (string, error) { + // Build the current user message content - either simple string or multimodal array var messageContent interface{} if len(imageURLs) > 0 { // Multimodal message with images - contentParts := make([]ContentPart, 0, len(imageURLs)) + contentParts := make([]ContentPart, 0, len(imageURLs)+1) // Add all images first for _, imageURL := range imageURLs { @@ -382,20 +383,23 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag messageContent = prompt } + // Build messages array with conversation history plus current message + messages := make([]OpenAIChatMessage, 0, len(conversationHistory)+1) + messages = append(messages, conversationHistory...) + messages = append(messages, OpenAIChatMessage{ + Role: "user", + Content: messageContent, + }) + reqBody := OpenAIChatRequest{ - Model: model, - Messages: []OpenAIChatMessage{ - { - Role: "user", - Content: messageContent, - }, - }, - Stream: true, + Model: model, + Messages: messages, + Stream: true, } jsonData, err := json.Marshal(reqBody) if err != nil { - return fmt.Errorf("error marshaling request: %w", err) + return "", fmt.Errorf("error marshaling request: %w", err) } completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions" @@ -407,13 +411,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag bytes.NewReader(jsonData), ) if err != nil { - return c.handleQueryError(err, completionsPath) + return "", c.handleQueryError(err, completionsPath) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) - return fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body) + return "", fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body) } type chatPrinterState int @@ -425,7 +429,11 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag printerState := chatPrinterNone reasoningFmt := color.New().Add(color.Italic) + if !shouldUseMarkdown { + reasoningFmt.DisableColor() + } + var assistantResponse strings.Builder var finalUsage *struct { CompletionTokens int `json:"completion_tokens"` PromptTokens int `json:"prompt_tokens"` @@ -437,7 +445,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag // Check if context was cancelled select { case <-ctx.Done(): - return ctx.Err() + return assistantResponse.String(), ctx.Err() default: } @@ -458,7 +466,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag var streamResp OpenAIChatResponse if err := json.Unmarshal([]byte(data), &streamResp); err != nil { - return fmt.Errorf("error parsing stream response: %w", err) + return assistantResponse.String(), fmt.Errorf("error parsing stream response: %w", err) } if streamResp.Usage != nil { @@ -493,12 +501,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag } printerState = chatPrinterContent outputFunc(chunk) + assistantResponse.WriteString(chunk) } } } if err := scanner.Err(); err != nil { - return fmt.Errorf("error reading response stream: %w", err) + return assistantResponse.String(), fmt.Errorf("error reading response stream: %w", err) } if finalUsage != nil { @@ -514,7 +523,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag outputFunc(usageFmt.Sprint(usageInfo)) } - return nil + return assistantResponse.String(), nil +} + +// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering. +func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { + _, err := c.ChatWithMessagesContext(ctx, model, nil, prompt, imageURLs, outputFunc, shouldUseMarkdown) + return err } func (c *Client) Remove(modelArgs []string, force bool) (string, error) { diff --git a/cmd/cli/readline/term.go b/cmd/cli/readline/term.go index 5584cd257..69f17f1e9 100644 --- a/cmd/cli/readline/term.go +++ b/cmd/cli/readline/term.go @@ -29,9 +29,3 @@ func UnsetRawMode(fd uintptr, termios any) error { t := termios.(*Termios) return setTermios(fd, t) } - -// IsTerminal returns true if the given file descriptor is a terminal. -func IsTerminal(fd uintptr) bool { - _, err := getTermios(fd) - return err == nil -} diff --git a/cmd/cli/readline/term_windows.go b/cmd/cli/readline/term_windows.go index 3b35149b8..8a9e9a587 100644 --- a/cmd/cli/readline/term_windows.go +++ b/cmd/cli/readline/term_windows.go @@ -8,13 +8,6 @@ type State struct { mode uint32 } -// IsTerminal checks if the given file descriptor is associated with a terminal -func IsTerminal(fd uintptr) bool { - var st uint32 - err := windows.GetConsoleMode(windows.Handle(fd), &st) - return err == nil -} - func SetRawMode(fd uintptr) (*State, error) { var st uint32 if err := windows.GetConsoleMode(windows.Handle(fd), &st); err != nil {