chore: update dataset prompt phrasing and add skills evaluation configs to run_config

omkargaikwad23 · omkargaikwad23 · commit fa0f857bbcc7 · 2026-05-04T07:49:59.000Z
diff --git a/evals/dataset.json b/evals/dataset.json
@@ -17,7 +17,7 @@
     {
       "id": "cloud-sql-schema-tables-explore",
       "starting_prompt": "I want to understand the structure of my database.",
-      "conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for the database.",
+      "conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for that database.",
       "expected_trajectory": [
         "list_databases",
         "list_tables"
diff --git a/evals/run_config.yaml b/evals/run_config.yaml
@@ -25,12 +25,15 @@ scorers:
     model_config: /workspace/evals/gemini_2.5_pro_model.yaml
   behavioral_metrics:
     model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+  skills_best_practices:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
 
   # Performance
   turn_count: {}
   end_to_end_latency: {}
   tool_call_latency: {}
   token_consumption: {}
+  skills_trajectory: {}
 
 reporting:
   bigquery:

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`{`
`18`	`18`	`"id": "cloud-sql-schema-tables-explore",`
`19`	`19`	`"starting_prompt": "I want to understand the structure of my database.",`
`20`		`- "conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for the database.",`
	`20`	`+ "conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for that database.",`
`21`	`21`	`"expected_trajectory": [`
`22`	`22`	`"list_databases",`
`23`	`23`	`"list_tables"`