6767 .hidden {
6868 display : none;
6969 }
70+ # modelWarning {
71+ background-color : # fff3cd ;
72+ color : # 856404 ;
73+ border : 1px solid # ffeaa7 ;
74+ border-radius : 4px ;
75+ padding : 10px ;
76+ margin-top : 5px ;
77+ font-size : 14px ;
78+ width : 100% ;
79+ box-sizing : border-box;
80+ display : none;
81+ }
82+ # modelWarning .show {
83+ display : block;
84+ }
85+ # modelWarning a {
86+ color : # 856404 ;
87+ text-decoration : underline;
88+ }
89+ # modelInfo {
90+ background-color : # d1ecf1 ;
91+ color : # 0c5460 ;
92+ border : 1px solid # bee5eb ;
93+ border-radius : 4px ;
94+ padding : 10px ;
95+ margin-top : 5px ;
96+ font-size : 14px ;
97+ width : 100% ;
98+ box-sizing : border-box;
99+ }
100+ # modelInfo a {
101+ color : # 0c5460 ;
102+ text-decoration : underline;
103+ }
70104 </ style >
71105</ head >
72106< body >
@@ -79,7 +113,15 @@ <h1>Camera Interaction App</h1>
79113< div class ="io-areas ">
80114 < div >
81115 < label for ="baseURL "> Base API:</ label > < br >
82- < input id ="baseURL " name ="baseURL " value ="http://127.0.0.1:12434/engines/llama.cpp ">
116+ < input id ="baseURL " name ="baseURL " value ="http://127.0.0.1:12434/engines/llama.cpp " style ="width: 20em; ">
117+ </ div >
118+ < div >
119+ < label for ="modelSelect "> Model:</ label > < br >
120+ < select id ="modelSelect " name ="Model " style ="width: 40em; padding: 8px; ">
121+ < option value =""> Loading models...</ option >
122+ </ select >
123+ < div id ="modelWarning "> </ div >
124+ < div id ="modelInfo "> </ div >
83125 </ div >
84126 < div >
85127 < label for ="instructionText "> Instruction:</ label > < br >
@@ -107,6 +149,9 @@ <h1>Camera Interaction App</h1>
107149 const video = document . getElementById ( 'videoFeed' ) ;
108150 const canvas = document . getElementById ( 'canvas' ) ;
109151 const baseURL = document . getElementById ( 'baseURL' ) ;
152+ const modelSelect = document . getElementById ( 'modelSelect' ) ;
153+ const modelWarning = document . getElementById ( 'modelWarning' ) ;
154+ const modelInfo = document . getElementById ( 'modelInfo' ) ;
110155 const instructionText = document . getElementById ( 'instructionText' ) ;
111156 const responseText = document . getElementById ( 'responseText' ) ;
112157 const intervalSelect = document . getElementById ( 'intervalSelect' ) ;
@@ -119,6 +164,62 @@ <h1>Camera Interaction App</h1>
119164 let isProcessing = false ;
120165 let isWaitingForResponse = false ;
121166
167+ const RECOMMENDED_MODEL = 'ai/smolvlm:500M-Q8_0' ;
168+
169+ // Fetch available models from the API
170+ async function fetchModels ( ) {
171+ // Base info message - always shown in modelInfo
172+ modelInfo . innerHTML = `ℹ️ To pull a model, run: <code>docker model pull <model-name></code><br>Find more models at: <a href="https://hub.docker.com/r/ai" target="_blank">https://hub.docker.com/r/ai</a>` ;
173+
174+ try {
175+ const response = await fetch ( `${ baseURL . value } /v1/models` ) ;
176+ if ( ! response . ok ) {
177+ throw new Error ( `HTTP error! status: ${ response . status } ` ) ;
178+ }
179+ const data = await response . json ( ) ;
180+ const models = data . data . map ( model => model . id ) ;
181+
182+ // Clear and populate the model selector
183+ modelSelect . innerHTML = '' ;
184+
185+ if ( models . length === 0 ) {
186+ modelSelect . innerHTML = '<option value="">No models available</option>' ;
187+ modelWarning . innerHTML = `⚠️ No models found. Please ensure the model runner is active and models are loaded.` ;
188+ modelWarning . classList . add ( 'show' ) ;
189+ return ;
190+ }
191+
192+ // Add all models to the selector
193+ models . forEach ( modelId => {
194+ const option = document . createElement ( 'option' ) ;
195+ option . value = modelId ;
196+ option . textContent = modelId ;
197+ modelSelect . appendChild ( option ) ;
198+ } ) ;
199+
200+ // Check if the recommended model exists
201+ const recommendedModelExists = models . includes ( RECOMMENDED_MODEL ) ;
202+
203+ if ( recommendedModelExists ) {
204+ modelSelect . value = RECOMMENDED_MODEL ;
205+ // Hide warning - everything is good
206+ modelWarning . classList . remove ( 'show' ) ;
207+ } else {
208+ // Select the first model
209+ modelSelect . value = models [ 0 ] ;
210+ // Show suggestion about recommended model
211+ modelWarning . innerHTML = `💡 We recommend to run this demo with <code>${ RECOMMENDED_MODEL } </code>` ;
212+ modelWarning . classList . add ( 'show' ) ;
213+ }
214+
215+ } catch ( error ) {
216+ console . error ( 'Error fetching models:' , error ) ;
217+ modelSelect . innerHTML = '<option value="">Error loading models</option>' ;
218+ modelWarning . innerHTML = `⚠️ Error loading models: ${ error . message } <br>Please check that the API is accessible at ${ baseURL . value } /v1/models` ;
219+ modelWarning . classList . add ( 'show' ) ;
220+ }
221+ }
222+
122223 // Returns response text (string)
123224 async function sendChatCompletionRequest ( instruction , imageBase64URL ) {
124225 isWaitingForResponse = true ;
@@ -128,7 +229,7 @@ <h1>Camera Interaction App</h1>
128229 'Content-Type' : 'application/json'
129230 } ,
130231 body : JSON . stringify ( {
131- model : 'ai/smolvlm:500M-Q8_0' ,
232+ model : modelSelect . value ,
132233 max_tokens : 100 ,
133234 messages : [
134235 { role : 'user' , content : [
@@ -143,6 +244,15 @@ <h1>Camera Interaction App</h1>
143244 isWaitingForResponse = false ;
144245 if ( ! response . ok ) {
145246 const errorData = await response . text ( ) ;
247+ try {
248+ const errorJson = JSON . parse ( errorData ) ;
249+ if ( errorJson . error && errorJson . error . message &&
250+ errorJson . error . message . includes ( 'image input is not supported' ) ) {
251+ return `⚠️ This model doesn't support vision. Please select a vision-capable model like '${ RECOMMENDED_MODEL } '.` ;
252+ }
253+ } catch ( e ) {
254+ // If parsing fails, fall through to generic error
255+ }
146256 return `Server error: ${ response . status } - ${ errorData } ` ;
147257 }
148258 const data = await response . json ( ) ;
@@ -248,8 +358,11 @@ <h1>Camera Interaction App</h1>
248358 }
249359 } ) ;
250360
251- // Initialize camera when the page loads
252- window . addEventListener ( 'DOMContentLoaded' , initCamera ) ;
361+ // Initialize camera and fetch models when the page loads
362+ window . addEventListener ( 'DOMContentLoaded' , ( ) => {
363+ initCamera ( ) ;
364+ fetchModels ( ) ;
365+ } ) ;
253366
254367 // Optional: Stop stream when page is closed/navigated away to release camera
255368 window . addEventListener ( 'beforeunload' , ( ) => {
@@ -263,4 +376,4 @@ <h1>Camera Interaction App</h1>
263376
264377</ script >
265378</ body >
266- </ html >
379+ </ html >
0 commit comments