diff --git a/cmd/cli/commands/list.go b/cmd/cli/commands/list.go index 29ff291b..0c7f6675 100644 --- a/cmd/cli/commands/list.go +++ b/cmd/cli/commands/list.go @@ -21,6 +21,7 @@ import ( func newListCmd() *cobra.Command { var jsonFormat, openai, quiet bool + var openaiURL string c := &cobra.Command{ Use: "list [OPTIONS] [MODEL]", Aliases: []string{"ls"}, @@ -31,6 +32,46 @@ func newListCmd() *cobra.Command { return fmt.Errorf("--quiet flag cannot be used with --openai flag or OpenAI backend") } + // Handle --openaiurl flag for external OpenAI endpoints + if openaiURL != "" { + if quiet { + return fmt.Errorf("--quiet flag cannot be used with --openaiurl flag") + } + ctx, err := desktop.NewContextForOpenAI(openaiURL) + if err != nil { + return fmt.Errorf("invalid OpenAI URL: %w", err) + } + client := desktop.New(ctx) + models, err := client.ListOpenAI() + if err != nil { + return handleClientError(err, "Failed to list models from OpenAI endpoint") + } + var modelFilter string + if len(args) > 0 { + modelFilter = args[0] + } + if modelFilter != "" { + filtered := models.Data[:0] + for _, m := range models.Data { + if matchesModelFilter(m.ID, modelFilter) { + filtered = append(filtered, m) + } + } + models.Data = filtered + } + if jsonFormat { + output, err := formatter.ToStandardJSON(models) + if err != nil { + return err + } + fmt.Fprint(cmd.OutOrStdout(), output) + return nil + } + // Display in table format with only MODEL NAME populated + fmt.Fprint(cmd.OutOrStdout(), prettyPrintOpenAIModels(models)) + return nil + } + // If we're doing an automatic install, only show the installation // status if it won't corrupt machine-readable output. var standaloneInstallPrinter standalone.StatusPrinter @@ -56,6 +97,7 @@ func newListCmd() *cobra.Command { c.Flags().BoolVar(&jsonFormat, "json", false, "List models in a JSON format") c.Flags().BoolVar(&openai, "openai", false, "List models in an OpenAI format") c.Flags().BoolVarP(&quiet, "quiet", "q", false, "Only show model IDs") + c.Flags().StringVar(&openaiURL, "openaiurl", "", "OpenAI-compatible API endpoint URL to list models from") return c } @@ -239,3 +281,24 @@ func appendRow(table *tablewriter.Table, tag string, model dmrm.Model) { model.Config.Size, }) } + +// prettyPrintOpenAIModels formats OpenAI model list in table format with only MODEL NAME populated +func prettyPrintOpenAIModels(models dmrm.OpenAIModelList) string { + // Sort models by ID + sort.Slice(models.Data, func(i, j int) bool { + return strings.ToLower(models.Data[i].ID) < strings.ToLower(models.Data[j].ID) + }) + + var buf bytes.Buffer + table := newTable(&buf) + table.Header([]string{"MODEL NAME", "CREATED"}) + for _, model := range models.Data { + table.Append([]string{ + model.ID, + units.HumanDuration(time.Since(time.Unix(model.Created, 0))) + " ago", + }) + } + + table.Render() + return buf.String() +} diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go index 377a18b2..91c05b04 100644 --- a/cmd/cli/commands/run.go +++ b/cmd/cli/commands/run.go @@ -571,6 +571,7 @@ func newRunCmd() *cobra.Command { var debug bool var colorMode string var detach bool + var openaiURL string const cmdArgs = "MODEL [PROMPT]" c := &cobra.Command{ @@ -585,10 +586,6 @@ func newRunCmd() *cobra.Command { } }, RunE: func(cmd *cobra.Command, args []string) error { - if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil { - return fmt.Errorf("unable to initialize standalone model runner: %w", err) - } - model := args[0] prompt := "" argsLen := len(args) @@ -621,6 +618,43 @@ func newRunCmd() *cobra.Command { } } + // Handle --openaiurl flag for external OpenAI endpoints + if openaiURL != "" { + if detach { + return fmt.Errorf("--detach flag cannot be used with --openaiurl flag") + } + ctx, err := desktop.NewContextForOpenAI(openaiURL) + if err != nil { + return fmt.Errorf("invalid OpenAI URL: %w", err) + } + openaiClient := desktop.New(ctx) + + if prompt != "" { + // Single prompt mode + useMarkdown := shouldUseMarkdown(colorMode) + if err := openaiClient.ChatWithContext(cmd.Context(), model, prompt, nil, func(content string) { + cmd.Print(content) + }, useMarkdown); err != nil { + return handleClientError(err, "Failed to generate a response") + } + cmd.Println() + return nil + } + + // Interactive mode for external OpenAI endpoint + if term.IsTerminal(int(os.Stdin.Fd())) { + termenv.SetDefaultOutput( + termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)), + ) + return generateInteractiveWithReadline(cmd, openaiClient, model) + } + return generateInteractiveBasic(cmd, openaiClient, model) + } + + if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil { + return fmt.Errorf("unable to initialize standalone model runner: %w", err) + } + // Check if this is an NVIDIA NIM image if isNIMImage(model) { // NIM images are handled differently - they run as Docker containers @@ -733,6 +767,7 @@ func newRunCmd() *cobra.Command { c.Flags().BoolVar(&debug, "debug", false, "Enable debug logging") c.Flags().StringVar(&colorMode, "color", "no", "Use colored output (auto|yes|no)") c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction") + c.Flags().StringVar(&openaiURL, "openaiurl", "", "OpenAI-compatible API endpoint URL to chat with") return c } diff --git a/cmd/cli/desktop/context.go b/cmd/cli/desktop/context.go index 14871409..c92121a8 100644 --- a/cmd/cli/desktop/context.go +++ b/cmd/cli/desktop/context.go @@ -98,6 +98,10 @@ type ModelRunnerContext struct { urlPrefix *url.URL // client is the model runner client. client DockerHttpClient + // openaiPathPrefix is the path prefix for OpenAI-compatible endpoints. + // For internal Docker Model Runner, this is "/engines/v1". + // For external OpenAI-compatible endpoints, this is empty (the URL already includes the version path). + openaiPathPrefix string } // NewContextForMock is a ModelRunnerContext constructor exposed only for the @@ -108,9 +112,10 @@ func NewContextForMock(client DockerHttpClient) *ModelRunnerContext { panic("error occurred while parsing known-good URL") } return &ModelRunnerContext{ - kind: types.ModelRunnerEngineKindDesktop, - urlPrefix: urlPrefix, - client: client, + kind: types.ModelRunnerEngineKindDesktop, + urlPrefix: urlPrefix, + client: client, + openaiPathPrefix: inference.InferencePrefix + "/v1", } } @@ -128,9 +133,26 @@ func NewContextForTest(endpoint string, client DockerHttpClient, kind types.Mode } return &ModelRunnerContext{ - kind: kind, - urlPrefix: urlPrefix, - client: client, + kind: kind, + urlPrefix: urlPrefix, + client: client, + openaiPathPrefix: inference.InferencePrefix + "/v1", + }, nil +} + +// NewContextForOpenAI creates a ModelRunnerContext for connecting to an external +// OpenAI-compatible API endpoint. This is used when the --openaiurl flag is specified. +func NewContextForOpenAI(endpoint string) (*ModelRunnerContext, error) { + urlPrefix, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid OpenAI endpoint URL: %w", err) + } + + return &ModelRunnerContext{ + kind: types.ModelRunnerEngineKindMobyManual, + urlPrefix: urlPrefix, + client: http.DefaultClient, + openaiPathPrefix: "", // Empty prefix for external OpenAI-compatible endpoints }, nil } @@ -262,9 +284,10 @@ func DetectContext(ctx context.Context, cli *command.DockerCli, printer standalo // Success. return &ModelRunnerContext{ - kind: kind, - urlPrefix: urlPrefix, - client: client, + kind: kind, + urlPrefix: urlPrefix, + client: client, + openaiPathPrefix: inference.InferencePrefix + "/v1", }, nil } @@ -289,6 +312,13 @@ func (c *ModelRunnerContext) Client() DockerHttpClient { return c.client } +// OpenAIPathPrefix returns the path prefix for OpenAI-compatible endpoints. +// For internal Docker Model Runner, this returns the inference prefix. +// For external OpenAI-compatible endpoints, this returns an empty string. +func (c *ModelRunnerContext) OpenAIPathPrefix() string { + return c.openaiPathPrefix +} + func setUserAgent(client DockerHttpClient, userAgent string) { if httpClient, ok := client.(*http.Client); ok { transport := httpClient.Transport diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go index 11facc42..1ea3b204 100644 --- a/cmd/cli/desktop/desktop.go +++ b/cmd/cli/desktop/desktop.go @@ -276,7 +276,7 @@ func (c *Client) List() ([]dmrm.Model, error) { } func (c *Client) ListOpenAI() (dmrm.OpenAIModelList, error) { - modelsRoute := inference.InferencePrefix + "/v1/models" + modelsRoute := c.modelRunner.OpenAIPathPrefix() + "/models" body, err := c.listRaw(modelsRoute, "") if err != nil { return dmrm.OpenAIModelList{}, err @@ -304,7 +304,7 @@ func (c *Client) Inspect(model string, remote bool) (dmrm.Model, error) { } func (c *Client) InspectOpenAI(model string) (dmrm.OpenAIModel, error) { - modelsRoute := inference.InferencePrefix + "/v1/models" + modelsRoute := c.modelRunner.OpenAIPathPrefix() + "/models" rawResponse, err := c.listRaw(fmt.Sprintf("%s/%s", modelsRoute, model), model) if err != nil { return dmrm.OpenAIModel{}, err @@ -398,7 +398,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag return fmt.Errorf("error marshaling request: %w", err) } - completionsPath := inference.InferencePrefix + "/v1/chat/completions" + completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions" resp, err := c.doRequestWithAuthContext( ctx, diff --git a/cmd/cli/docs/reference/docker_model_list.yaml b/cmd/cli/docs/reference/docker_model_list.yaml index a48cfa09..242462f4 100644 --- a/cmd/cli/docs/reference/docker_model_list.yaml +++ b/cmd/cli/docs/reference/docker_model_list.yaml @@ -26,6 +26,15 @@ options: experimentalcli: false kubernetes: false swarm: false + - option: openaiurl + value_type: string + description: OpenAI-compatible API endpoint URL to list models from + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false - option: quiet shorthand: q value_type: bool diff --git a/cmd/cli/docs/reference/docker_model_run.yaml b/cmd/cli/docs/reference/docker_model_run.yaml index 781c2bae..c35dce09 100644 --- a/cmd/cli/docs/reference/docker_model_run.yaml +++ b/cmd/cli/docs/reference/docker_model_run.yaml @@ -41,6 +41,15 @@ options: experimentalcli: false kubernetes: false swarm: false + - option: openaiurl + value_type: string + description: OpenAI-compatible API endpoint URL to chat with + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false examples: |- ### One-time prompt diff --git a/cmd/cli/docs/reference/model_list.md b/cmd/cli/docs/reference/model_list.md index b6c051f2..24d260a5 100644 --- a/cmd/cli/docs/reference/model_list.md +++ b/cmd/cli/docs/reference/model_list.md @@ -9,11 +9,12 @@ List the models pulled to your local environment ### Options -| Name | Type | Default | Description | -|:----------------|:-------|:--------|:--------------------------------| -| `--json` | `bool` | | List models in a JSON format | -| `--openai` | `bool` | | List models in an OpenAI format | -| `-q`, `--quiet` | `bool` | | Only show model IDs | +| Name | Type | Default | Description | +|:----------------|:---------|:--------|:-------------------------------------------------------| +| `--json` | `bool` | | List models in a JSON format | +| `--openai` | `bool` | | List models in an OpenAI format | +| `--openaiurl` | `string` | | OpenAI-compatible API endpoint URL to list models from | +| `-q`, `--quiet` | `bool` | | Only show model IDs | diff --git a/cmd/cli/docs/reference/model_run.md b/cmd/cli/docs/reference/model_run.md index 0d271dc9..cdebb6f6 100644 --- a/cmd/cli/docs/reference/model_run.md +++ b/cmd/cli/docs/reference/model_run.md @@ -10,6 +10,7 @@ Run a model and interact with it using a submitted prompt or chat mode | `--color` | `string` | `no` | Use colored output (auto\|yes\|no) | | `--debug` | `bool` | | Enable debug logging | | `-d`, `--detach` | `bool` | | Load the model in the background without interaction | +| `--openaiurl` | `string` | | OpenAI-compatible API endpoint URL to chat with |