Skip to content

Commit 77c5479

Browse files
refactor: consolidate dataset scenarios by updating bq-search-and-insight and adding bq-insight-and-forecast
1 parent 1b1f1af commit 77c5479

1 file changed

Lines changed: 12 additions & 10 deletions

File tree

evals/dataset.json

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,32 @@
11
{
22
"scenarios": [
33
{
4-
"id": "bq-search-catalog",
5-
"starting_prompt": "Search for tables related to sales in project ext-test-bigquery-analytics.",
6-
"conversation_plan": "Ask the agent to search for tables with 'sales' in the prompt.",
4+
"id": "bq-search-and-insight",
5+
"starting_prompt": "Find tables related to sales in project ext-test-bigquery-analytics.",
6+
"conversation_plan": "First, ask the agent to find tables related to sales. Once it lists the tables (which should include 'sales_data' in 'evalbench_ci'), ask it to identify the top product by sales in that table.",
77
"expected_trajectory": [
8-
"search_catalog"
8+
"search_catalog",
9+
"ask_data_insights"
910
],
1011
"env": {
1112
"GOOGLE_CLOUD_PROJECT": "ext-test-bigquery-analytics"
1213
},
1314
"kind": "tools",
14-
"max_turns": 3
15+
"max_turns": 4
1516
},
1617
{
17-
"id": "bq-ask-insights",
18-
"starting_prompt": "What are the top products by sales in the table 'sales_data' in dataset 'sales' and project 'ext-test-bigquery-analytics'?",
19-
"conversation_plan": "Ask the agent to get insights about the sales table.",
18+
"id": "bq-insight-and-forecast",
19+
"starting_prompt": "What are the top products by sales in the table 'sales_data' in dataset 'evalbench_ci' and project 'ext-test-bigquery-analytics'?",
20+
"conversation_plan": "First, ask the agent to find the top products by sales in the sales_data table. After it identifies the top products, ask it to forecast the sales for the top product for the next 5 steps.",
2021
"expected_trajectory": [
21-
"ask_data_insights"
22+
"ask_data_insights",
23+
"forecast"
2224
],
2325
"env": {
2426
"GOOGLE_CLOUD_PROJECT": "ext-test-bigquery-analytics"
2527
},
2628
"kind": "tools",
27-
"max_turns": 3
29+
"max_turns": 4
2830
}
2931
]
3032
}

0 commit comments

Comments
 (0)