Skip to content

Commit 6c2ebbf

Browse files
committed
update
1 parent 7117b47 commit 6c2ebbf

2 files changed

Lines changed: 16 additions & 5 deletions

File tree

bigcode_eval/tasks/custom_metrics/beyond_eval.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,12 @@ def compute_beyond_eval(generations_list, reference_list, timeout=10):
328328
"Average": dict(total_c=list(), correct_c=list(), beyond_c=list()),
329329
}
330330

331+
errors = {
332+
"Easy": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
333+
"Medium": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
334+
"Hard": dict(failed_load=0, failed_eval=0, failed_cases=0, failed_timeout=0, failed_error=0, passed=0),
335+
}
336+
331337
for generations, instance in tqdm(zip(generations_list, reference_list), total=len(generations_list), desc='compute_beyond_eval'):
332338
# Construct runtime distribution from sample solutions
333339
runtimes = list()
@@ -377,6 +383,9 @@ def compute_beyond_eval(generations_list, reference_list, timeout=10):
377383
p_c += 1
378384
else:
379385
runtime = float('inf')
386+
387+
# Statistic Errors
388+
errors[difficulty][results[0]['result']] += 1
380389

381390
runtime = min(runtime, max_runtime)
382391
runtime = max(runtime, min_runtime)
@@ -390,10 +399,10 @@ def compute_beyond_eval(generations_list, reference_list, timeout=10):
390399
scores['Average']['correct_c'] += [p_c]
391400
scores['Average']['beyond_c'] += [b_l]
392401

393-
print(f'total: {t_c}')
394-
print(f'correct: {p_c}')
395-
print(f'beyond: {b_l}')
396-
print("-" * 60)
402+
# print(f'total: {t_c}')
403+
# print(f'correct: {p_c}')
404+
# print(f'beyond: {b_l}')
405+
# print("-" * 60)
397406

398407
results = dict()
399408
for difficulty in ['Easy', "Medium", "Hard", "Average"]:
@@ -406,6 +415,8 @@ def compute_beyond_eval(generations_list, reference_list, timeout=10):
406415

407416
results.update(pass_at_k)
408417
results.update(beyond_at_k)
418+
419+
results.update(errors)
409420

410421
return results
411422

mercury.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ accelerate launch --main_process_port 30002 main.py \
2222
--save_generations \
2323
--metric_output_path starcoder2-7b-mercury-result.json
2424

25-
accelerate launch --main_process_port 30005 main.py \
25+
accelerate launch --main_process_port 30002 main.py \
2626
--model /home/mingzhe/Projects/Mercury/checkpoints/bigcode/starcoder2-3b-sft-final_checkpoint \
2727
--load_in_4bit \
2828
--max_length_generation 2048 \

0 commit comments

Comments
 (0)