1+ {
2+ "mercury" : {
3+ "Easy_pass@1" : 0.6522727272727272 ,
4+ "Easy_pass@3" : 0.7193181818181817 ,
5+ "Easy_pass@5" : 0.7272727272727273 ,
6+ "Easy_beyond@1" : 0.497185477591695 ,
7+ "Easy_beyond@3" : 0.4990392326594245 ,
8+ "Easy_beyond@5" : 0.48808251879601455 ,
9+ "Medium_pass@1" : 0.6271604938271604 ,
10+ "Medium_pass@3" : 0.7000000000000001 ,
11+ "Medium_pass@5" : 0.7160493827160493 ,
12+ "Medium_beyond@1" : 0.5068938378577104 ,
13+ "Medium_beyond@3" : 0.49677158926321824 ,
14+ "Medium_beyond@5" : 0.4976374449174768 ,
15+ "Hard_pass@1" : 0.28965517241379307 ,
16+ "Hard_pass@3" : 0.39425287356321836 ,
17+ "Hard_pass@5" : 0.4367816091954023 ,
18+ "Hard_beyond@1" : 0.1938273397041406 ,
19+ "Hard_beyond@3" : 0.20802385794546693 ,
20+ "Hard_beyond@5" : 0.2020810910984664 ,
21+ "Average_pass@1" : 0.5210937499999999 ,
22+ "Average_pass@3" : 0.602734375 ,
23+ "Average_pass@5" : 0.625 ,
24+ "Average_beyond@1" : 0.39716289628439005 ,
25+ "Average_beyond@3" : 0.39942197986564726 ,
26+ "Average_beyond@5" : 0.3939099594450451 ,
27+ "Easy" : {
28+ "failed@load" : 107 ,
29+ "failed@eval" : 14 ,
30+ "failed@cases" : 32 ,
31+ "failed@timeout" : 0 ,
32+ "failed@error" : 0 ,
33+ "passed" : 287
34+ },
35+ "Medium" : {
36+ "failed@load" : 101 ,
37+ "failed@eval" : 5 ,
38+ "failed@cases" : 45 ,
39+ "failed@timeout" : 0 ,
40+ "failed@error" : 0 ,
41+ "passed" : 254
42+ },
43+ "Hard" : {
44+ "failed@load" : 22 ,
45+ "failed@eval" : 77 ,
46+ "failed@cases" : 210 ,
47+ "failed@timeout" : 0 ,
48+ "failed@error" : 0 ,
49+ "passed" : 126
50+ }
51+ },
52+ "config" : {
53+ "prefix" : " " ,
54+ "do_sample" : true ,
55+ "temperature" : 0.2 ,
56+ "top_k" : 0 ,
57+ "top_p" : 0.95 ,
58+ "n_samples" : 5 ,
59+ "eos" : " <|endoftext|>" ,
60+ "seed" : 0 ,
61+ "model" : " bigcode/starcoder2-7b" ,
62+ "modeltype" : " causal" ,
63+ "peft_model" : null ,
64+ "revision" : null ,
65+ "use_auth_token" : false ,
66+ "trust_remote_code" : false ,
67+ "tasks" : " mercury" ,
68+ "instruction_tokens" : null ,
69+ "batch_size" : 5 ,
70+ "max_length_generation" : 2048 ,
71+ "precision" : " fp32" ,
72+ "load_in_8bit" : false ,
73+ "load_in_4bit" : true ,
74+ "left_padding" : false ,
75+ "limit" : null ,
76+ "limit_start" : 0 ,
77+ "save_every_k_tasks" : -1 ,
78+ "postprocess" : true ,
79+ "allow_code_execution" : true ,
80+ "generation_only" : false ,
81+ "load_generations_path" : null ,
82+ "load_data_path" : null ,
83+ "metric_output_path" : " starcoder2-7b-mercury-result.json" ,
84+ "save_generations" : true ,
85+ "load_generations_intermediate_paths" : null ,
86+ "save_generations_path" : " generations.json" ,
87+ "save_references" : false ,
88+ "save_references_path" : " references.json" ,
89+ "prompt" : " prompt" ,
90+ "max_memory_per_gpu" : null ,
91+ "check_references" : false
92+ }
93+ }
0 commit comments