We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ff8f629 commit c957071Copy full SHA for c957071
1 file changed
bigcodebench/evaluate.py
@@ -110,6 +110,9 @@ def evaluate(flags):
110
assert flags.samples.endswith(".jsonl")
111
result_path = flags.samples.replace(".jsonl", "_eval_results.json")
112
113
+ problems = get_bigcodebench()
114
+ dataset_hash = get_bigcodebench_hash()
115
+
116
if not flags.no_gt:
117
expected_time = get_groundtruth(problems, dataset_hash, flags.check_gt_only)
118
else:
@@ -122,8 +125,6 @@ def evaluate(flags):
122
125
123
126
results = compatible_eval_result(results)
124
127
- problems = get_bigcodebench()
- dataset_hash = get_bigcodebench_hash()
128
129
if flags.check_gt_only:
130
return
0 commit comments