1+ {
2+ "mercury" : {
3+ "Easy_pass@1" : 0.5886363636363636 ,
4+ "Easy_pass@3" : 0.6772727272727272 ,
5+ "Easy_pass@5" : 0.6931818181818182 ,
6+ "Easy_beyond@1" : 0.4266841736035482 ,
7+ "Easy_beyond@3" : 0.42610832793092135 ,
8+ "Easy_beyond@5" : 0.425783531255473 ,
9+ "Medium_pass@1" : 0.5358024691358024 ,
10+ "Medium_pass@3" : 0.6641975308641975 ,
11+ "Medium_pass@5" : 0.691358024691358 ,
12+ "Medium_beyond@1" : 0.40880182186815306 ,
13+ "Medium_beyond@3" : 0.3870873940702929 ,
14+ "Medium_beyond@5" : 0.38123465036206794 ,
15+ "Hard_pass@1" : 0.25287356321839083 ,
16+ "Hard_pass@3" : 0.3620689655172414 ,
17+ "Hard_pass@5" : 0.40229885057471265 ,
18+ "Hard_beyond@1" : 0.2000108284605912 ,
19+ "Hard_beyond@3" : 0.17081908449838798 ,
20+ "Hard_beyond@5" : 0.18665931819632417 ,
21+ "Average_pass@1" : 0.45781249999999996 ,
22+ "Average_pass@3" : 0.566015625 ,
23+ "Average_pass@5" : 0.59375 ,
24+ "Average_beyond@1" : 0.34399256611134416 ,
25+ "Average_beyond@3" : 0.32700340675380685 ,
26+ "Average_beyond@5" : 0.3304231176284739 ,
27+ "Easy" : {
28+ "failed@load" : 106 ,
29+ "failed@eval" : 16 ,
30+ "failed@cases" : 59 ,
31+ "failed@timeout" : 0 ,
32+ "failed@error" : 0 ,
33+ "passed" : 259
34+ },
35+ "Medium" : {
36+ "failed@load" : 104 ,
37+ "failed@eval" : 8 ,
38+ "failed@cases" : 76 ,
39+ "failed@timeout" : 0 ,
40+ "failed@error" : 0 ,
41+ "passed" : 217
42+ },
43+ "Hard" : {
44+ "failed@load" : 37 ,
45+ "failed@eval" : 63 ,
46+ "failed@cases" : 225 ,
47+ "failed@timeout" : 0 ,
48+ "failed@error" : 0 ,
49+ "passed" : 110
50+ }
51+ },
52+ "config" : {
53+ "prefix" : " " ,
54+ "do_sample" : true ,
55+ "temperature" : 0.2 ,
56+ "top_k" : 0 ,
57+ "top_p" : 0.95 ,
58+ "n_samples" : 5 ,
59+ "eos" : " <|endoftext|>" ,
60+ "seed" : 0 ,
61+ "model" : " /home/mingzhe/Projects/Mercury/checkpoints/deepseek-ai/deepseek-coder-1.3b-base-sft-final_checkpoint" ,
62+ "modeltype" : " causal" ,
63+ "peft_model" : null ,
64+ "revision" : null ,
65+ "use_auth_token" : false ,
66+ "trust_remote_code" : false ,
67+ "tasks" : " mercury" ,
68+ "instruction_tokens" : null ,
69+ "batch_size" : 5 ,
70+ "max_length_generation" : 2048 ,
71+ "precision" : " fp32" ,
72+ "load_in_8bit" : false ,
73+ "load_in_4bit" : true ,
74+ "left_padding" : false ,
75+ "limit" : null ,
76+ "limit_start" : 0 ,
77+ "save_every_k_tasks" : -1 ,
78+ "postprocess" : true ,
79+ "allow_code_execution" : true ,
80+ "generation_only" : false ,
81+ "load_generations_path" : null ,
82+ "load_data_path" : null ,
83+ "metric_output_path" : " deepseek-coder-1.3b-base-SFT-mercury-result.json" ,
84+ "save_generations" : true ,
85+ "load_generations_intermediate_paths" : null ,
86+ "save_generations_path" : " generations.json" ,
87+ "save_references" : false ,
88+ "save_references_path" : " references.json" ,
89+ "prompt" : " prompt" ,
90+ "max_memory_per_gpu" : null ,
91+ "check_references" : false
92+ }
93+ }
0 commit comments