Skip to content

Commit f4e3912

Browse files
committed
update
1 parent 6c2ebbf commit f4e3912

7 files changed

Lines changed: 482 additions & 26 deletions

bigcode_eval/generation.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,14 @@ def parallel_generations(
121121
has_encoder=args.modeltype == "seq2seq",
122122
instruction_tokens=instruction_tokens,
123123
)
124+
print("TokenizedDataset Finished.")
124125

126+
print("DataLoader Loading...")
125127
# do not confuse args.batch_size, which is actually the num_return_sequences
126128
ds_loader = DataLoader(ds_tokenized, batch_size=1)
129+
print("DataLoader Loaded.")
127130

131+
print("Accelerator preparing...")
128132
is_loaded_in_8bit = getattr(model, "is_loaded_in_8bit", False)
129133
is_loaded_in_4bit = getattr(model, "is_loaded_in_4bit", False)
130134
if args.max_memory_per_gpu is not None:
@@ -138,7 +142,7 @@ def parallel_generations(
138142
# model.to() is not supported for 8bit and 4bit models
139143
model, ds_loader = accelerator.prepare(model, ds_loader)
140144

141-
print("complete_code...")
145+
print("Complete_code...")
142146
generations = complete_code(
143147
task,
144148
accelerator,

bigcode_eval/tasks/custom_metrics/beyond_eval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,9 @@ def compute_beyond_eval(generations_list, reference_list, timeout=10):
329329
}
330330

331331
errors = {
332-
"Easy": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
333-
"Medium": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
334-
"Hard": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
332+
"Easy": {"failed@load": 0,"failed@eval": 0,'failed@cases': 0,"failed@timeout": 0,"failed@error": 0,"passed":0},
333+
"Medium": {"failed@load": 0,"failed@eval": 0,"failed@cases": 0,"failed@timeout": 0,"failed@error": 0,"passed":0},
334+
"Hard": {"failed@load": 0,"failed@eval": 0,"failed@cases": 0,"failed@timeout": 0,"failed@error": 0,"passed":0},
335335
}
336336

337337
for generations, instance in tqdm(zip(generations_list, reference_list), total=len(generations_list), desc='compute_beyond_eval'):
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"mercury": {
3+
"Easy_pass@1": 0.5886363636363636,
4+
"Easy_pass@3": 0.6772727272727272,
5+
"Easy_pass@5": 0.6931818181818182,
6+
"Easy_beyond@1": 0.4266841736035482,
7+
"Easy_beyond@3": 0.42610832793092135,
8+
"Easy_beyond@5": 0.425783531255473,
9+
"Medium_pass@1": 0.5358024691358024,
10+
"Medium_pass@3": 0.6641975308641975,
11+
"Medium_pass@5": 0.691358024691358,
12+
"Medium_beyond@1": 0.40880182186815306,
13+
"Medium_beyond@3": 0.3870873940702929,
14+
"Medium_beyond@5": 0.38123465036206794,
15+
"Hard_pass@1": 0.25287356321839083,
16+
"Hard_pass@3": 0.3620689655172414,
17+
"Hard_pass@5": 0.40229885057471265,
18+
"Hard_beyond@1": 0.2000108284605912,
19+
"Hard_beyond@3": 0.17081908449838798,
20+
"Hard_beyond@5": 0.18665931819632417,
21+
"Average_pass@1": 0.45781249999999996,
22+
"Average_pass@3": 0.566015625,
23+
"Average_pass@5": 0.59375,
24+
"Average_beyond@1": 0.34399256611134416,
25+
"Average_beyond@3": 0.32700340675380685,
26+
"Average_beyond@5": 0.3304231176284739,
27+
"Easy": {
28+
"failed@load": 106,
29+
"failed@eval": 16,
30+
"failed@cases": 59,
31+
"failed@timeout": 0,
32+
"failed@error": 0,
33+
"passed": 259
34+
},
35+
"Medium": {
36+
"failed@load": 104,
37+
"failed@eval": 8,
38+
"failed@cases": 76,
39+
"failed@timeout": 0,
40+
"failed@error": 0,
41+
"passed": 217
42+
},
43+
"Hard": {
44+
"failed@load": 37,
45+
"failed@eval": 63,
46+
"failed@cases": 225,
47+
"failed@timeout": 0,
48+
"failed@error": 0,
49+
"passed": 110
50+
}
51+
},
52+
"config": {
53+
"prefix": "",
54+
"do_sample": true,
55+
"temperature": 0.2,
56+
"top_k": 0,
57+
"top_p": 0.95,
58+
"n_samples": 5,
59+
"eos": "<|endoftext|>",
60+
"seed": 0,
61+
"model": "/home/mingzhe/Projects/Mercury/checkpoints/deepseek-ai/deepseek-coder-1.3b-base-sft-final_checkpoint",
62+
"modeltype": "causal",
63+
"peft_model": null,
64+
"revision": null,
65+
"use_auth_token": false,
66+
"trust_remote_code": false,
67+
"tasks": "mercury",
68+
"instruction_tokens": null,
69+
"batch_size": 5,
70+
"max_length_generation": 2048,
71+
"precision": "fp32",
72+
"load_in_8bit": false,
73+
"load_in_4bit": true,
74+
"left_padding": false,
75+
"limit": null,
76+
"limit_start": 0,
77+
"save_every_k_tasks": -1,
78+
"postprocess": true,
79+
"allow_code_execution": true,
80+
"generation_only": false,
81+
"load_generations_path": null,
82+
"load_data_path": null,
83+
"metric_output_path": "deepseek-coder-1.3b-base-SFT-mercury-result.json",
84+
"save_generations": true,
85+
"load_generations_intermediate_paths": null,
86+
"save_generations_path": "generations.json",
87+
"save_references": false,
88+
"save_references_path": "references.json",
89+
"prompt": "prompt",
90+
"max_memory_per_gpu": null,
91+
"check_references": false
92+
}
93+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"mercury": {
3+
"Easy_pass@1": 0.6090909090909091,
4+
"Easy_pass@3": 0.7227272727272728,
5+
"Easy_pass@5": 0.7613636363636364,
6+
"Easy_beyond@1": 0.39631002329427506,
7+
"Easy_beyond@3": 0.4231957886134728,
8+
"Easy_beyond@5": 0.42617706833178476,
9+
"Medium_pass@1": 0.5333333333333333,
10+
"Medium_pass@3": 0.7222222222222222,
11+
"Medium_pass@5": 0.7901234567901234,
12+
"Medium_beyond@1": 0.40183295548897685,
13+
"Medium_beyond@3": 0.39689597812757504,
14+
"Medium_beyond@5": 0.38881432300601,
15+
"Hard_pass@1": 0.23448275862068965,
16+
"Hard_pass@3": 0.3264367816091954,
17+
"Hard_pass@5": 0.3563218390804598,
18+
"Hard_beyond@1": 0.17817100930156568,
19+
"Hard_beyond@3": 0.18359816292882367,
20+
"Hard_beyond@5": 0.18548098359439458,
21+
"Average_pass@1": 0.4578125,
22+
"Average_pass@3": 0.587890625,
23+
"Average_pass@5": 0.6328125,
24+
"Average_beyond@1": 0.32392433302242013,
25+
"Average_beyond@3": 0.3334486085981518,
26+
"Average_beyond@5": 0.33255620214607884,
27+
"Easy": {
28+
"failed@load": 81,
29+
"failed@eval": 17,
30+
"failed@cases": 74,
31+
"failed@timeout": 0,
32+
"failed@error": 0,
33+
"passed": 268
34+
},
35+
"Medium": {
36+
"failed@load": 85,
37+
"failed@eval": 31,
38+
"failed@cases": 73,
39+
"failed@timeout": 0,
40+
"failed@error": 0,
41+
"passed": 216
42+
},
43+
"Hard": {
44+
"failed@load": 55,
45+
"failed@eval": 98,
46+
"failed@cases": 180,
47+
"failed@timeout": 0,
48+
"failed@error": 0,
49+
"passed": 102
50+
}
51+
},
52+
"config": {
53+
"prefix": "",
54+
"do_sample": true,
55+
"temperature": 0.2,
56+
"top_k": 0,
57+
"top_p": 0.95,
58+
"n_samples": 5,
59+
"eos": "<|endoftext|>",
60+
"seed": 0,
61+
"model": "deepseek-ai/deepseek-coder-1.3b-base",
62+
"modeltype": "causal",
63+
"peft_model": null,
64+
"revision": null,
65+
"use_auth_token": false,
66+
"trust_remote_code": false,
67+
"tasks": "mercury",
68+
"instruction_tokens": null,
69+
"batch_size": 12,
70+
"max_length_generation": 2048,
71+
"precision": "fp32",
72+
"load_in_8bit": false,
73+
"load_in_4bit": true,
74+
"left_padding": false,
75+
"limit": null,
76+
"limit_start": 0,
77+
"save_every_k_tasks": -1,
78+
"postprocess": true,
79+
"allow_code_execution": true,
80+
"generation_only": false,
81+
"load_generations_path": null,
82+
"load_data_path": null,
83+
"metric_output_path": "deepseek-coder-1.3b-base-mercury-result.json",
84+
"save_generations": true,
85+
"load_generations_intermediate_paths": null,
86+
"save_generations_path": "generations.json",
87+
"save_references": false,
88+
"save_references_path": "references.json",
89+
"prompt": "prompt",
90+
"max_memory_per_gpu": null,
91+
"check_references": false
92+
}
93+
}

0 commit comments

Comments
 (0)