File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -112,6 +112,13 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
112112 for _ , route := range openAIRoutes {
113113 m [route ] = s .handleOpenAIInference
114114 }
115+
116+ // Register /v1/models routes - these delegate to the model manager
117+ m ["GET " + inference .InferencePrefix + "/{backend}/v1/models" ] = s .handleModels
118+ m ["GET " + inference .InferencePrefix + "/{backend}/v1/models/{name...}" ] = s .handleModels
119+ m ["GET " + inference .InferencePrefix + "/v1/models" ] = s .handleModels
120+ m ["GET " + inference .InferencePrefix + "/v1/models/{name...}" ] = s .handleModels
121+
115122 m ["GET " + inference .InferencePrefix + "/status" ] = s .GetBackendStatus
116123 m ["GET " + inference .InferencePrefix + "/ps" ] = s .GetRunningBackends
117124 m ["GET " + inference .InferencePrefix + "/df" ] = s .GetDiskUsage
@@ -502,6 +509,12 @@ func parseBackendMode(mode string) inference.BackendMode {
502509 }
503510}
504511
512+ // handleModels handles GET /engines/{backend}/v1/models* requests
513+ // by delegating to the model manager
514+ func (s * Scheduler ) handleModels (w http.ResponseWriter , r * http.Request ) {
515+ s .modelManager .ServeHTTP (w , r )
516+ }
517+
505518// ServeHTTP implements net/http.Handler.ServeHTTP.
506519func (s * Scheduler ) ServeHTTP (w http.ResponseWriter , r * http.Request ) {
507520 s .lock .RLock ()
You can’t perform that action at this time.
0 commit comments