From 6f8c58c1ea99b746fe451e70d8aecdf8463bdfa9 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Fri, 10 Apr 2026 08:43:38 +0000 Subject: [PATCH] feat: add Dockerfile and Cloud Build pipeline for automated Evalbench testing of Cloud SQL PostgreSQL extension --- Dockerfile | 36 +++++++++++++++++++++++ cloudbuild.yaml | 64 +++++++++++++++++++++++++++++++++++++++++ evals/dataset.json | 15 ++++++++++ evals/model_config.yaml | 18 ++++++++++++ evals/run_config.yaml | 12 ++++++++ 5 files changed, 145 insertions(+) create mode 100644 Dockerfile create mode 100644 cloudbuild.yaml create mode 100644 evals/dataset.json create mode 100644 evals/model_config.yaml create mode 100644 evals/run_config.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..39e5b3c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# --- Stage 1: Build the binary from source (Latest Nightly) --- +FROM golang:1.25 AS builder + +WORKDIR /build + +# Clone the official genai-toolbox source code (always latest main branch) +RUN git clone --depth 1 https://github.com/googleapis/genai-toolbox.git . + +# Compile the binary with CGO ENABLED to support all upstream database drivers (Oracle, etc.) +RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o toolbox . + +# --- Stage 2: Final Lightweight Runtime Image --- +# Using the exact same image (golang:1.25) for runtime to perfectly match GLIBC versions +FROM golang:1.25 + + +# Install necessary runtime certificates and standard C libraries for CGO binary +RUN apt-get update && apt-get install -y ca-certificates libc6 && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy the freshly compiled binary from the builder stage +COPY --from=builder /build/toolbox /app/toolbox +RUN chmod +x /app/toolbox + +# Copy the extension's skills and configuration into the container +COPY skills/ ./skills/ +COPY gemini-extension.json . + +# Add required tools.yaml placeholder to satisfy binary startup checks +RUN touch tools.yaml + +# Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks +ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"] + + diff --git a/cloudbuild.yaml b/cloudbuild.yaml new file mode 100644 index 0000000..44bb5ff --- /dev/null +++ b/cloudbuild.yaml @@ -0,0 +1,64 @@ +steps: + + # --- STEP 1: Build and Push Docker Image --- + - name: 'gcr.io/cloud-builders/docker' + args: + - 'build' + - '-t' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - '.' + + - name: 'gcr.io/cloud-builders/docker' + args: + - 'push' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + + # --- STEP 2: Deploy to Cloud Run --- + - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' + entrypoint: gcloud + args: + - 'run' + - 'deploy' + - 'cloud-sql-postgresql-server' + - '--image=us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - '--region=us-central1' + - '--allow-unauthenticated' + - '--port=8080' + - '--timeout=300' + - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' + + # --- STEP 3: Run Eval Server in Background --- + - name: 'gcr.io/cloud-builders/docker' + args: + - 'run' + - '-d' + - '--network=cloudbuild' + - '--name=eval_server' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest' + + # --- STEP 4: Run Evalbench Evaluation Client --- + # - name: 'python:3.10' + # entrypoint: 'bash' + # args: + # - '-c' + # - | + # # Clone Evalbench + # git clone https://github.com/GoogleCloudPlatform/evalbench.git + # cd evalbench + + # # Install Dependencies + # pip install -r requirements.txt + + # # Setup Environment Variables + # export EVAL_GCP_PROJECT_ID=omkar-playground + # export EVAL_GCP_PROJECT_REGION=us-central1 + # export EVAL_CONFIG=../evals/run_config.yaml + + # # Compile required protobuf modules and Run Evaluation Client against the eval_server container + # make proto + # ./run_client.sh --endpoint=eval_server:50051 + + +options: + env: + - 'DOCKER_BUILDKIT=1' diff --git a/evals/dataset.json b/evals/dataset.json new file mode 100644 index 0000000..42af644 --- /dev/null +++ b/evals/dataset.json @@ -0,0 +1,15 @@ +{ + "scenarios": [ + { + "id": "cloud-sql-debug-01", + "starting_prompt": "I need to debug the database.", + "conversation_plan": "Ask the agent to list instances in project omkar-playground. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.", + "expected_trajectory": [ + "list_instances", + "get_metrics" + ], + "kind": "tool", + "max_turns": 15 + } + ] +} \ No newline at end of file diff --git a/evals/model_config.yaml b/evals/model_config.yaml new file mode 100644 index 0000000..dbb2dc5 --- /dev/null +++ b/evals/model_config.yaml @@ -0,0 +1,18 @@ +gemini_cli_version: "@google/gemini-cli@0.26.0" +generator: gemini_cli +env: + GOOGLE_CLOUD_PROJECT: "omkar-playground" + GOOGLE_CLOUD_LOCATION: "us-central1" + GOOGLE_GENAI_USE_VERTEXAI: "true" + GEMINI_API_MODEL: "gemini-2.5-pro" +setup: + extensions: + "https://github.com/gemini-cli-extensions/cloud-sql-postgresql": + settings: + CLOUD_SQL_POSTGRES_PROJECT: "omkar-playground" + CLOUD_SQL_POSTGRES_INSTANCE: "omkar-demo-postgres-1" + CLOUD_SQL_POSTGRES_REGION: "us-central1" + CLOUD_SQL_POSTGRES_DATABASE: "postgres" + CLOUD_SQL_POSTGRES_USER: "postgres" + CLOUD_SQL_POSTGRES_PASSWORD: '7`[EP^`U"_frcD;q' + CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC" diff --git a/evals/run_config.yaml b/evals/run_config.yaml new file mode 100644 index 0000000..a631de9 --- /dev/null +++ b/evals/run_config.yaml @@ -0,0 +1,12 @@ +dataset_config: /workspace/evals/dataset.json +dataset_format: gemini-cli-format + +orchestrator: geminicli +model_config: /workspace/evals/model_config.yaml +# You can reference default simulated user models provided by the evalbench repo: +simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + +scorers: + trajectory_matcher: {} + goal_completion: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml