Skip to content

Commit 8aa769e

Browse files
committed
Merge branch 'main' into add-multimodal-demo
2 parents 87c8274 + 247003b commit 8aa769e

1 file changed

Lines changed: 13 additions & 0 deletions

File tree

pkg/inference/scheduling/scheduler.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
112112
for _, route := range openAIRoutes {
113113
m[route] = s.handleOpenAIInference
114114
}
115+
116+
// Register /v1/models routes - these delegate to the model manager
117+
m["GET "+inference.InferencePrefix+"/{backend}/v1/models"] = s.handleModels
118+
m["GET "+inference.InferencePrefix+"/{backend}/v1/models/{name...}"] = s.handleModels
119+
m["GET "+inference.InferencePrefix+"/v1/models"] = s.handleModels
120+
m["GET "+inference.InferencePrefix+"/v1/models/{name...}"] = s.handleModels
121+
115122
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
116123
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
117124
m["GET "+inference.InferencePrefix+"/df"] = s.GetDiskUsage
@@ -502,6 +509,12 @@ func parseBackendMode(mode string) inference.BackendMode {
502509
}
503510
}
504511

512+
// handleModels handles GET /engines/{backend}/v1/models* requests
513+
// by delegating to the model manager
514+
func (s *Scheduler) handleModels(w http.ResponseWriter, r *http.Request) {
515+
s.modelManager.ServeHTTP(w, r)
516+
}
517+
505518
// ServeHTTP implements net/http.Handler.ServeHTTP.
506519
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
507520
s.lock.RLock()

0 commit comments

Comments
 (0)