Skip to content

Commit ae11bdf

Browse files
feat: inject CLOUD_SQL_POSTGRES_PASSWORD into build step and update evaluation dataset scenarios
1 parent 8b9462c commit ae11bdf

3 files changed

Lines changed: 51 additions & 6 deletions

File tree

cloudbuild.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ steps:
4949
# --- STEP 3: Fully Integrated Evaluation to Persist Results ---
5050
- name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest'
5151
entrypoint: 'bash'
52-
secretEnv: ['DB_PASSWORD']
52+
# Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable
53+
secretEnv: ['DB_PASSWORD']
5354
args:
5455
- '-c'
5556
- |
@@ -58,6 +59,8 @@ steps:
5859
5960
export EVAL_GCP_PROJECT_ID=$PROJECT_ID
6061
export EVAL_GCP_PROJECT_REGION=us-central1
62+
# Maps the decrypted DB_PASSWORD to the exact variable expected by gemini_cli and extension skills
63+
export CLOUD_SQL_POSTGRES_PASSWORD=$$DB_PASSWORD
6164
6265
echo "Patching client to use insecure credentials..."
6366

evals/dataset.json

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,56 @@
11
{
22
"scenarios": [
33
{
4-
"id": "cloud-sql-debug-01",
5-
"starting_prompt": "I need to debug the database.",
6-
"conversation_plan": "Ask the agent to list instances in project ext-test-cloud-sql-postgres. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.",
4+
"id": "cloud-sql-list-instances",
5+
"starting_prompt": "Show me all the Cloud SQL instances in this project.",
6+
"conversation_plan": "Ask the agent to list the Cloud SQL instances in the current project.",
77
"expected_trajectory": [
88
"list_instances"
99
],
1010
"kind": "tool",
11-
"max_turns": 15
11+
"max_turns": 5
12+
},
13+
{
14+
"id": "cloud-sql-data-explore",
15+
"starting_prompt": "What schemas and tables do we have in this database? Please list them.",
16+
"conversation_plan": "Ask the agent to list the schemas in the database. Then ask to list the tables.",
17+
"expected_trajectory": [
18+
"list_schemas",
19+
"list_tables"
20+
],
21+
"kind": "tool",
22+
"max_turns": 5
23+
},
24+
{
25+
"id": "cloud-sql-perf-troubleshoot",
26+
"starting_prompt": "The database is running slow. Are there any active queries running for more than 10 seconds or any locks?",
27+
"conversation_plan": "Ask the agent to check for active queries running longer than 10 seconds. Then ask to check for locks.",
28+
"expected_trajectory": [
29+
"list_active_queries",
30+
"list_locks"
31+
],
32+
"kind": "tool",
33+
"max_turns": 5
34+
},
35+
{
36+
"id": "cloud-sql-metrics-cpu",
37+
"starting_prompt": "Can you show me the CPU utilization for instance 'daily-ci-evals-db' in project 'ext-test-cloud-sql-postgres' for the last 5 minutes?",
38+
"conversation_plan": "Ask the agent to query the CPU utilization metric for the specified instance and project using PromQL.",
39+
"expected_trajectory": [
40+
"get_system_metrics"
41+
],
42+
"kind": "tool",
43+
"max_turns": 4
44+
},
45+
{
46+
"id": "cloud-sql-unused-indexes",
47+
"starting_prompt": "Are there any unused indexes in the database that we can clean up?",
48+
"conversation_plan": "Ask the agent to list unused indexes in the database.",
49+
"expected_trajectory": [
50+
"list_indexes"
51+
],
52+
"kind": "tool",
53+
"max_turns": 4
1254
}
1355
]
1456
}

evals/model_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ env:
2121
setup:
2222
extensions:
2323
"/workspace":
24-
settings:
24+
settings:
2525
CLOUD_SQL_POSTGRES_PROJECT: "ext-test-cloud-sql-postgres"
2626
CLOUD_SQL_POSTGRES_INSTANCE: "daily-ci-evals-db"
2727
CLOUD_SQL_POSTGRES_REGION: "us-central1"

0 commit comments

Comments
 (0)