-
Notifications
You must be signed in to change notification settings - Fork 72
Add /api/generate endpoint for model loading and unloading #266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| package commands | ||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "github.com/docker/model-runner/cmd/cli/commands/completion" | ||
| "github.com/docker/model-runner/cmd/cli/desktop" | ||
| "github.com/docker/model-runner/pkg/inference/models" | ||
| "github.com/spf13/cobra" | ||
| ) | ||
|
|
||
| func newStopCmd() *cobra.Command { | ||
| var backend string | ||
|
|
||
| const cmdArgs = "MODEL" | ||
| c := &cobra.Command{ | ||
| Use: "stop " + cmdArgs, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this and don't use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've found myself using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, an alias would be the way to go for this IMO.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, lets alias, might as well create "docker model load" also |
||
| Short: "Stop a running model", | ||
| RunE: func(cmd *cobra.Command, args []string) error { | ||
| model := models.NormalizeModelName(args[0]) | ||
| unloadResp, err := desktopClient.Unload(desktop.UnloadRequest{Backend: backend, Models: []string{model}}) | ||
| if err != nil { | ||
| err = handleClientError(err, "Failed to stop model") | ||
| return handleNotRunningError(err) | ||
| } | ||
| unloaded := unloadResp.UnloadedRunners | ||
| if unloaded == 0 { | ||
| cmd.Println("No such model running.") | ||
| } else { | ||
| cmd.Printf("Stopped %d model(s).\n", unloaded) | ||
| } | ||
| return nil | ||
| }, | ||
| ValidArgsFunction: completion.NoComplete, | ||
| } | ||
| c.Args = func(cmd *cobra.Command, args []string) error { | ||
| if len(args) < 1 { | ||
| return fmt.Errorf( | ||
| "'docker model stop' requires MODEL.\\n\\n" + | ||
| "Usage: docker model stop " + cmdArgs + "\\n\\n" + | ||
| "See 'docker model stop --help' for more information.", | ||
| ) | ||
| } | ||
| return nil | ||
| } | ||
| c.Flags().StringVar(&backend, "backend", "", "Optional backend to target") | ||
| return c | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -39,6 +39,14 @@ func handleClientError(err error, message string) error { | |||||||||||||||
| return errors.Join(err, errors.New(message)) | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| // handleNotRunningError checks if the error indicates that the model was not running | ||||||||||||||||
| // and returns a user-friendly message in that case | ||||||||||||||||
| func handleNotRunningError(err error) error { | ||||||||||||||||
| // For now, just return the error as-is | ||||||||||||||||
| // This function can be expanded to handle specific "model not running" errors in the future | ||||||||||||||||
| return err | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
|
Comment on lines
+42
to
+49
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is done in the
Suggested change
|
||||||||||||||||
| // stripDefaultsFromModelName removes the default "ai/" prefix and ":latest" tag for display. | ||||||||||||||||
| // Examples: | ||||||||||||||||
| // - "ai/gemma3:latest" -> "gemma3" | ||||||||||||||||
|
|
||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| command: docker model stop | ||
| short: Stop a running model | ||
| long: Stop a running model | ||
| usage: docker model stop MODEL | ||
| pname: docker model | ||
| plink: docker_model.yaml | ||
| options: | ||
| - option: backend | ||
| value_type: string | ||
| description: Optional backend to target | ||
| deprecated: false | ||
| hidden: false | ||
| experimental: false | ||
| experimentalcli: false | ||
| kubernetes: false | ||
| swarm: false | ||
| deprecated: false | ||
| hidden: false | ||
| experimental: false | ||
| experimentalcli: false | ||
| kubernetes: false | ||
| swarm: false | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| # docker model stop | ||
|
|
||
| <!---MARKER_GEN_START--> | ||
| Stop a running model | ||
|
|
||
| ### Options | ||
|
|
||
| | Name | Type | Default | Description | | ||
| |:------------|:---------|:--------|:---------------------------| | ||
| | `--backend` | `string` | | Optional backend to target | | ||
|
|
||
|
|
||
| <!---MARKER_GEN_END--> | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The request/response will still be recorder.
Why don't we just load the model without running inference?