@@ -189,6 +189,10 @@ def evaluate(
189189
190190 # run the evaluation
191191 print (f"Command run in sandbox { e2b_endpoint } " )
192+ if not isinstance (pass_k , str ):
193+ pass_k = "," .join (map (str , pass_k ))
194+ if not isinstance (selective_evaluate , str ):
195+ selective_evaluate = "," .join (map (str , selective_evaluate ))
192196 sandbox .commands .run ("bigcodebench.evaluate --execution 'local' "
193197 f"--split { split } --subset { subset } --samples { samples } "
194198 f"--pass_k { pass_k } --save_pass_rate { save_pass_rate } --calibrated { calibrated } "
@@ -206,10 +210,7 @@ def evaluate(
206210
207211 pass_at_k = dict ()
208212
209- if isinstance (pass_k , str ):
210- passk = [int (k ) for k in pass_k .split ("," )]
211- else :
212- passk = pass_k
213+ passk = [int (k ) for k in pass_k .split ("," )]
213214
214215 if parallel < 1 :
215216 n_workers = max (1 , multiprocessing .cpu_count () // 2 )
@@ -224,7 +225,10 @@ def evaluate(
224225
225226 # Add selective evaluation logic
226227 if selective_evaluate :
227- selected_ids = set (selective_evaluate .split ("," ))
228+ if isinstance (selective_evaluate , str ):
229+ selected_ids = set (selective_evaluate .split ("," ))
230+ else :
231+ selected_ids = set (selective_evaluate )
228232 problems = {k : v for k , v in problems .items () if k in selected_ids }
229233 if not problems :
230234 raise ValueError (f"None of the provided task IDs { selected_ids } were found in the dataset" )
0 commit comments