Skip to content

Commit b6305c2

Browse files
feat: add Dockerfile and Cloud Build pipeline for automated Evalbench testing of Cloud SQL PostgreSQL extension
1 parent e3f0d60 commit b6305c2

5 files changed

Lines changed: 145 additions & 0 deletions

File tree

Dockerfile

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# --- Stage 1: Build the binary from source (Latest Nightly) ---
2+
FROM golang:1.25 AS builder
3+
4+
WORKDIR /build
5+
6+
# Clone the official genai-toolbox source code (always latest main branch)
7+
RUN git clone --depth 1 https://github.com/googleapis/genai-toolbox.git .
8+
9+
# Compile the binary with CGO ENABLED to support all upstream database drivers (Oracle, etc.)
10+
RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o toolbox .
11+
12+
# --- Stage 2: Final Lightweight Runtime Image ---
13+
# Using the exact same image (golang:1.25) for runtime to perfectly match GLIBC versions
14+
FROM golang:1.25
15+
16+
17+
# Install necessary runtime certificates and standard C libraries for CGO binary
18+
RUN apt-get update && apt-get install -y ca-certificates libc6 && rm -rf /var/lib/apt/lists/*
19+
20+
WORKDIR /app
21+
22+
# Copy the freshly compiled binary from the builder stage
23+
COPY --from=builder /build/toolbox /app/toolbox
24+
RUN chmod +x /app/toolbox
25+
26+
# Copy the extension's skills and configuration into the container
27+
COPY skills/ ./skills/
28+
COPY gemini-extension.json .
29+
30+
# Add required tools.yaml placeholder to satisfy binary startup checks
31+
RUN touch tools.yaml
32+
33+
# Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks
34+
ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"]
35+
36+

cloudbuild.yaml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
steps:
2+
3+
# --- STEP 1: Build and Push Docker Image ---
4+
- name: 'gcr.io/cloud-builders/docker'
5+
args:
6+
- 'build'
7+
- '-t'
8+
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'
9+
- '.'
10+
11+
- name: 'gcr.io/cloud-builders/docker'
12+
args:
13+
- 'push'
14+
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'
15+
16+
# --- STEP 2: Deploy to Cloud Run ---
17+
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
18+
entrypoint: gcloud
19+
args:
20+
- 'run'
21+
- 'deploy'
22+
- 'cloud-sql-postgresql-server'
23+
- '--image=us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'
24+
- '--region=us-central1'
25+
- '--allow-unauthenticated'
26+
- '--port=8080'
27+
- '--timeout=300'
28+
- '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC'
29+
30+
# --- STEP 3: Run Eval Server in Background ---
31+
- name: 'gcr.io/cloud-builders/docker'
32+
args:
33+
- 'run'
34+
- '-d'
35+
- '--network=cloudbuild'
36+
- '--name=eval_server'
37+
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest'
38+
39+
# --- STEP 4: Run Evalbench Evaluation Client ---
40+
# - name: 'python:3.10'
41+
# entrypoint: 'bash'
42+
# args:
43+
# - '-c'
44+
# - |
45+
# # Clone Evalbench
46+
# git clone https://github.com/GoogleCloudPlatform/evalbench.git
47+
# cd evalbench
48+
49+
# # Install Dependencies
50+
# pip install -r requirements.txt
51+
52+
# # Setup Environment Variables
53+
# export EVAL_GCP_PROJECT_ID=omkar-playground
54+
# export EVAL_GCP_PROJECT_REGION=us-central1
55+
# export EVAL_CONFIG=../evals/run_config.yaml
56+
57+
# # Compile required protobuf modules and Run Evaluation Client against the eval_server container
58+
# make proto
59+
# ./run_client.sh --endpoint=eval_server:50051
60+
61+
62+
options:
63+
env:
64+
- 'DOCKER_BUILDKIT=1'

evals/dataset.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"scenarios": [
3+
{
4+
"id": "cloud-sql-debug-01",
5+
"starting_prompt": "I need to debug the database.",
6+
"conversation_plan": "Ask the agent to list instances in project omkar-playground. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.",
7+
"expected_trajectory": [
8+
"list_instances",
9+
"get_metrics"
10+
],
11+
"kind": "tool",
12+
"max_turns": 15
13+
}
14+
]
15+
}

evals/model_config.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
gemini_cli_version: "@google/gemini-cli@0.26.0"
2+
generator: gemini_cli
3+
env:
4+
GOOGLE_CLOUD_PROJECT: "omkar-playground"
5+
GOOGLE_CLOUD_LOCATION: "us-central1"
6+
GOOGLE_GENAI_USE_VERTEXAI: "true"
7+
GEMINI_API_MODEL: "gemini-2.5-pro"
8+
setup:
9+
extensions:
10+
"https://github.com/gemini-cli-extensions/cloud-sql-postgresql":
11+
settings:
12+
CLOUD_SQL_POSTGRES_PROJECT: "omkar-playground"
13+
CLOUD_SQL_POSTGRES_INSTANCE: "omkar-demo-postgres-1"
14+
CLOUD_SQL_POSTGRES_REGION: "us-central1"
15+
CLOUD_SQL_POSTGRES_DATABASE: "postgres"
16+
CLOUD_SQL_POSTGRES_USER: "postgres"
17+
CLOUD_SQL_POSTGRES_PASSWORD: '7`[EP^`U"_frcD;q'
18+
CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC"

evals/run_config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
dataset_config: /workspace/evals/dataset.json
2+
dataset_format: gemini-cli-format
3+
4+
orchestrator: geminicli
5+
model_config: /workspace/evals/model_config.yaml
6+
# You can reference default simulated user models provided by the evalbench repo:
7+
simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
8+
9+
scorers:
10+
trajectory_matcher: {}
11+
goal_completion:
12+
model_config: datasets/model_configs/gemini_2.5_pro_model.yaml

0 commit comments

Comments
 (0)