@@ -97,6 +97,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc {
9797 m ["GET " + inference .InferencePrefix + "/v1/models" ] = h .handleModels
9898 m ["GET " + inference .InferencePrefix + "/v1/models/{name...}" ] = h .handleModels
9999
100+ m ["POST " + inference .InferencePrefix + "/install-backend" ] = h .InstallBackend
100101 m ["GET " + inference .InferencePrefix + "/status" ] = h .GetBackendStatus
101102 m ["GET " + inference .InferencePrefix + "/ps" ] = h .GetRunningBackends
102103 m ["GET " + inference .InferencePrefix + "/df" ] = h .GetDiskUsage
@@ -201,6 +202,8 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
201202 // don't allow any requests to be scheduled for a backend until it has
202203 // completed installation.
203204 if err := h .scheduler .installer .wait (r .Context (), backend .Name ()); err != nil {
205+ h .scheduler .log .Warnln ("ALOHA" , err )
206+
204207 if errors .Is (err , ErrBackendNotFound ) {
205208 http .Error (w , err .Error (), http .StatusNotFound )
206209 } else if errors .Is (err , errInstallerNotStarted ) {
@@ -211,6 +214,8 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
211214 // shutting down (since that will also cancel the request context).
212215 // Either way, provide a response, even if it's ignored.
213216 http .Error (w , "service unavailable" , http .StatusServiceUnavailable )
217+ } else if errors .Is (err , errBackendNotInstalled ) {
218+ http .Error (w , fmt .Sprintf ("backend %q is not installed; run: docker model install-runner --backend %s" , backend .Name (), backend .Name ()), http .StatusPreconditionFailed )
214219 } else if errors .Is (err , vllm .ErrorNotFound ) {
215220 http .Error (w , err .Error (), http .StatusPreconditionFailed )
216221 } else {
@@ -336,6 +341,38 @@ func (h *HTTPHandler) Unload(w http.ResponseWriter, r *http.Request) {
336341 }
337342}
338343
344+ // installBackendRequest is the JSON body for the install-backend endpoint.
345+ type installBackendRequest struct {
346+ Backend string `json:"backend"`
347+ }
348+
349+ // InstallBackend handles POST <inference-prefix>/install-backend requests.
350+ // It triggers on-demand installation of a deferred backend.
351+ func (h * HTTPHandler ) InstallBackend (w http.ResponseWriter , r * http.Request ) {
352+ body , err := io .ReadAll (http .MaxBytesReader (w , r .Body , maximumOpenAIInferenceRequestSize ))
353+ if err != nil {
354+ http .Error (w , "failed to read request body" , http .StatusInternalServerError )
355+ return
356+ }
357+
358+ var req installBackendRequest
359+ if err := json .Unmarshal (body , & req ); err != nil || req .Backend == "" {
360+ http .Error (w , "invalid request: backend is required" , http .StatusBadRequest )
361+ return
362+ }
363+
364+ if err := h .scheduler .InstallBackend (r .Context (), req .Backend ); err != nil {
365+ if errors .Is (err , ErrBackendNotFound ) {
366+ http .Error (w , err .Error (), http .StatusNotFound )
367+ } else {
368+ http .Error (w , fmt .Sprintf ("backend installation failed: %v" , err ), http .StatusInternalServerError )
369+ }
370+ return
371+ }
372+
373+ w .WriteHeader (http .StatusOK )
374+ }
375+
339376// Configure handles POST <inference-prefix>/{backend}/_configure requests.
340377func (h * HTTPHandler ) Configure (w http.ResponseWriter , r * http.Request ) {
341378 // Determine the requested backend and ensure that it's valid.
0 commit comments