Skip to content

Commit 8cdcdfe

Browse files
committed
release bigcodebench data 0.1.3
1 parent 80c83b6 commit 8cdcdfe

2 files changed

Lines changed: 60 additions & 1 deletion

File tree

bigcodebench/data/bigcodebench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
BIGCODEBENCH_OVERRIDE_PATH = os.environ.get("BIGCODEBENCH_OVERRIDE_PATH", None)
1616
BIGCODEBENCH_HF = "bigcode/bigcodebench"
17-
BIGCODEBENCH_VERSION = "v0.1.2"
17+
BIGCODEBENCH_VERSION = "v0.1.3"
1818

1919
def _ready_bigcodebench_path(subset="full", version="default") -> str:
2020
if BIGCODEBENCH_OVERRIDE_PATH:

tools/fix_v022.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from datasets import load_dataset, Dataset, DatasetDict
2+
from huggingface_hub import HfApi
3+
4+
import json
5+
import copy
6+
7+
BIGCODEBENCH_HF = "bigcode/bigcodebench"
8+
BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard"
9+
BIGCODEBENCH_VERSION = "v0.1.2"
10+
BIGCODEBENCH_UPDATE = "bigcode/bcb_update"
11+
BIGCODEBENCH_NEW_VERSION = "v0.1.3"
12+
13+
def map_ds(sample):
14+
if sample["task_id"] in ["BigCodeBench/1005"]:
15+
for k in sample.keys():
16+
sample[k] = sample[k].replace(
17+
"https://getsamplefiles.com/download/zip/sample-2.zip", "https://getsamplefiles.com/download/zip/sample-5.zip"
18+
).replace(
19+
"sample_2", "sample_5"
20+
).replace(
21+
"Sample 2", "Sample 5"
22+
)
23+
return sample
24+
25+
if __name__ == "__main__":
26+
api = HfApi()
27+
ds_dict = load_dataset(BIGCODEBENCH_HF)
28+
hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
29+
ds = ds_dict[BIGCODEBENCH_VERSION]
30+
hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
31+
function_id = [1005]
32+
33+
new_ds = ds.map(map_ds)
34+
new_ds.to_json("BigCodeBench.jsonl")
35+
ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds
36+
ds_dict.push_to_hub(BIGCODEBENCH_HF)
37+
38+
new_hard_ds = hard_ds.map(map_ds)
39+
new_hard_ds.to_json("BigCodeBench-Hard.jsonl")
40+
hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds
41+
hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF)
42+
43+
for i in function_id:
44+
old_sample = ds.select([i])
45+
new_sample = new_ds.select([i])
46+
old_sample.to_json("old.jsonl")
47+
new_sample.to_json("new.jsonl")
48+
api.upload_file(
49+
path_or_fileobj="old.jsonl",
50+
path_in_repo=f"{i}/old.jsonl",
51+
repo_id=BIGCODEBENCH_UPDATE,
52+
# repo_type="dataset"
53+
)
54+
api.upload_file(
55+
path_or_fileobj="new.jsonl",
56+
path_in_repo=f"{i}/new.jsonl",
57+
repo_id=BIGCODEBENCH_UPDATE,
58+
# repo_type="dataset"
59+
)

0 commit comments

Comments
 (0)