Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# --- Stage 1: Build the binary from source (Latest Nightly) ---
FROM golang:1.25 AS builder

WORKDIR /build

# Clone the official genai-toolbox source code (always latest main branch)
RUN git clone --depth 1 https://github.com/googleapis/genai-toolbox.git .

# Compile the binary with CGO ENABLED to support all upstream database drivers (Oracle, etc.)
RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o toolbox .

# --- Stage 2: Final Lightweight Runtime Image ---
# Using the exact same image (golang:1.25) for runtime to perfectly match GLIBC versions
FROM golang:1.25


# Install necessary runtime certificates and standard C libraries for CGO binary
RUN apt-get update && apt-get install -y ca-certificates libc6 && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Copy the freshly compiled binary from the builder stage
COPY --from=builder /build/toolbox /app/toolbox
RUN chmod +x /app/toolbox

# Copy the extension's skills and configuration into the container
COPY skills/ ./skills/
COPY gemini-extension.json .

# Add required tools.yaml placeholder to satisfy binary startup checks
RUN touch tools.yaml

# Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks
ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"]


64 changes: 64 additions & 0 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
steps:

# --- STEP 1: Build and Push Docker Image ---
- name: 'gcr.io/cloud-builders/docker'
args:
- 'build'
- '-t'
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'
- '.'

- name: 'gcr.io/cloud-builders/docker'
args:
- 'push'
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'

# --- STEP 2: Deploy to Cloud Run ---
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
entrypoint: gcloud
args:
- 'run'
- 'deploy'
- 'cloud-sql-postgresql-server'
- '--image=us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest'
- '--region=us-central1'
- '--allow-unauthenticated'
- '--port=8080'
- '--timeout=300'
- '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC'

# --- STEP 3: Run Eval Server in Background ---
- name: 'gcr.io/cloud-builders/docker'
args:
- 'run'
- '-d'
- '--network=cloudbuild'
- '--name=eval_server'
- 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest'

# --- STEP 4: Run Evalbench Evaluation Client ---
# - name: 'python:3.10'
# entrypoint: 'bash'
# args:
# - '-c'
# - |
# # Clone Evalbench
# git clone https://github.com/GoogleCloudPlatform/evalbench.git
# cd evalbench

# # Install Dependencies
# pip install -r requirements.txt

# # Setup Environment Variables
# export EVAL_GCP_PROJECT_ID=omkar-playground
# export EVAL_GCP_PROJECT_REGION=us-central1
# export EVAL_CONFIG=../evals/run_config.yaml

# # Compile required protobuf modules and Run Evaluation Client against the eval_server container
# make proto
# ./run_client.sh --endpoint=eval_server:50051


options:
env:
- 'DOCKER_BUILDKIT=1'
15 changes: 15 additions & 0 deletions evals/dataset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"scenarios": [
{
"id": "cloud-sql-debug-01",
"starting_prompt": "I need to debug the database.",
"conversation_plan": "Ask the agent to list instances in project omkar-playground. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.",
"expected_trajectory": [
"list_instances",
"get_metrics"
],
"kind": "tool",
"max_turns": 15
}
]
}
18 changes: 18 additions & 0 deletions evals/model_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
gemini_cli_version: "@google/gemini-cli@0.26.0"
generator: gemini_cli
env:
GOOGLE_CLOUD_PROJECT: "omkar-playground"
GOOGLE_CLOUD_LOCATION: "us-central1"
GOOGLE_GENAI_USE_VERTEXAI: "true"
GEMINI_API_MODEL: "gemini-2.5-pro"
setup:
extensions:
"https://github.com/gemini-cli-extensions/cloud-sql-postgresql":
settings:
CLOUD_SQL_POSTGRES_PROJECT: "omkar-playground"
CLOUD_SQL_POSTGRES_INSTANCE: "omkar-demo-postgres-1"
CLOUD_SQL_POSTGRES_REGION: "us-central1"
CLOUD_SQL_POSTGRES_DATABASE: "postgres"
CLOUD_SQL_POSTGRES_USER: "postgres"
CLOUD_SQL_POSTGRES_PASSWORD: '7`[EP^`U"_frcD;q'
CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC"
12 changes: 12 additions & 0 deletions evals/run_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
dataset_config: /workspace/evals/dataset.json
dataset_format: gemini-cli-format

orchestrator: geminicli
model_config: /workspace/evals/model_config.yaml
# You can reference default simulated user models provided by the evalbench repo:
simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml

scorers:
trajectory_matcher: {}
goal_completion:
model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
Loading