Skip to content

Commit 8dd5e6c

Browse files
committed
update
1 parent f4e3912 commit 8dd5e6c

2 files changed

Lines changed: 94 additions & 0 deletions

File tree

bigcode_eval/evaluator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def evaluate(self, task_name, intermediate_generations=None):
104104
if self.allow_code_execution and task.requires_execution:
105105
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
106106
print("Evaluating generations...")
107+
del self.model
107108
results = task.process_results(generations, references)
108109
return results
109110

starcoder2-7b-mercury-result.json

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"mercury": {
3+
"Easy_pass@1": 0.6522727272727272,
4+
"Easy_pass@3": 0.7193181818181817,
5+
"Easy_pass@5": 0.7272727272727273,
6+
"Easy_beyond@1": 0.497185477591695,
7+
"Easy_beyond@3": 0.4990392326594245,
8+
"Easy_beyond@5": 0.48808251879601455,
9+
"Medium_pass@1": 0.6271604938271604,
10+
"Medium_pass@3": 0.7000000000000001,
11+
"Medium_pass@5": 0.7160493827160493,
12+
"Medium_beyond@1": 0.5068938378577104,
13+
"Medium_beyond@3": 0.49677158926321824,
14+
"Medium_beyond@5": 0.4976374449174768,
15+
"Hard_pass@1": 0.28965517241379307,
16+
"Hard_pass@3": 0.39425287356321836,
17+
"Hard_pass@5": 0.4367816091954023,
18+
"Hard_beyond@1": 0.1938273397041406,
19+
"Hard_beyond@3": 0.20802385794546693,
20+
"Hard_beyond@5": 0.2020810910984664,
21+
"Average_pass@1": 0.5210937499999999,
22+
"Average_pass@3": 0.602734375,
23+
"Average_pass@5": 0.625,
24+
"Average_beyond@1": 0.39716289628439005,
25+
"Average_beyond@3": 0.39942197986564726,
26+
"Average_beyond@5": 0.3939099594450451,
27+
"Easy": {
28+
"failed@load": 107,
29+
"failed@eval": 14,
30+
"failed@cases": 32,
31+
"failed@timeout": 0,
32+
"failed@error": 0,
33+
"passed": 287
34+
},
35+
"Medium": {
36+
"failed@load": 101,
37+
"failed@eval": 5,
38+
"failed@cases": 45,
39+
"failed@timeout": 0,
40+
"failed@error": 0,
41+
"passed": 254
42+
},
43+
"Hard": {
44+
"failed@load": 22,
45+
"failed@eval": 77,
46+
"failed@cases": 210,
47+
"failed@timeout": 0,
48+
"failed@error": 0,
49+
"passed": 126
50+
}
51+
},
52+
"config": {
53+
"prefix": "",
54+
"do_sample": true,
55+
"temperature": 0.2,
56+
"top_k": 0,
57+
"top_p": 0.95,
58+
"n_samples": 5,
59+
"eos": "<|endoftext|>",
60+
"seed": 0,
61+
"model": "bigcode/starcoder2-7b",
62+
"modeltype": "causal",
63+
"peft_model": null,
64+
"revision": null,
65+
"use_auth_token": false,
66+
"trust_remote_code": false,
67+
"tasks": "mercury",
68+
"instruction_tokens": null,
69+
"batch_size": 5,
70+
"max_length_generation": 2048,
71+
"precision": "fp32",
72+
"load_in_8bit": false,
73+
"load_in_4bit": true,
74+
"left_padding": false,
75+
"limit": null,
76+
"limit_start": 0,
77+
"save_every_k_tasks": -1,
78+
"postprocess": true,
79+
"allow_code_execution": true,
80+
"generation_only": false,
81+
"load_generations_path": null,
82+
"load_data_path": null,
83+
"metric_output_path": "starcoder2-7b-mercury-result.json",
84+
"save_generations": true,
85+
"load_generations_intermediate_paths": null,
86+
"save_generations_path": "generations.json",
87+
"save_references": false,
88+
"save_references_path": "references.json",
89+
"prompt": "prompt",
90+
"max_memory_per_gpu": null,
91+
"check_references": false
92+
}
93+
}

0 commit comments

Comments
 (0)