gemini-cli-extensions · omkargaikwad23 · Apr 22, 2026 · Apr 10, 2026 · Apr 13, 2026 · Apr 13, 2026
@@ -0,0 +1,64 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+steps:
+
+  # --- Evaluation Step ---
+  - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:89aa9fefd4b247610a95ef0896ba55d468563f50'
+    entrypoint: 'bash'
+    # Decrypts the secret from Secret Manager into the DB_PASSWORD environment variable
+    secretEnv: ['DB_PASSWORD'] 
+    args:
+      - '-c'
+      - |
+        set -e
+        # Workaround for evalbench bug: settings are only applied if path basename matches extension ID
+        ln -s /workspace /workspace/cloud-sql-postgresql
+        cd /evalbench
+
+        export EVAL_GCP_PROJECT_ID=$PROJECT_ID
+        export EVAL_GCP_PROJECT_REGION=us-central1
+        # Maps the decrypted DB_PASSWORD to the exact variable expected by gemini_cli and extension skills
+        export CLOUD_SQL_POSTGRES_PASSWORD=$$DB_PASSWORD
+
+        # Substitute environment variables in model_config.yaml
+        python3 /workspace/evals/substitute_env.py
+
+        export EVALBENCH_INSECURE=True
+        export EVALBENCH_HOST=0.0.0.0
+        cd evalbench
+        export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+        export PYTHONPATH=./evalproto:.
+        export CLOUD_RUN=True
+        export PORT=50051
+
+        echo "Starting Evaluation Server in background..."
+        python3 -u ./eval_server.py --localhost </dev/null 2>&1 | tee server.log &
+
+        echo "Waiting for port 50051 to open..."
+        python3 /workspace/evals/wait_for_port.py || { echo "Server failed to bind port."; exit 1; }
+
+        echo "Server is running. Launching Evaluation Client..."
+        cd /evalbench
+        export PYTHONPATH=./evalbench:./evalbench/evalproto
+
+        python3 evalbench/client/eval_client.py --experiment=/workspace/evals/run_config.yaml --endpoint=local || { echo "Client failed! Server logs:"; cat /evalbench/evalbench/server.log; exit 1; }
+
+availableSecrets:
+  secretManager:
+  - versionName: projects/$PROJECT_ID/secrets/daily-ci-evals-db-password/versions/latest
+    env: 'DB_PASSWORD'
@@ -0,0 +1,74 @@
+{
+  "scenarios": [
+    {
+      "id": "cloud-sql-debug-instance",
+      "starting_prompt": "Check on my databases in project ext-test-cloud-sql-postgres.",
+      "conversation_plan": "Ask the agent to list all Cloud SQL instances in the project. Once all instances are listed, if 'daily-ci-evals-db' exists, get its details and validate it is RUNNABLE.",
+      "expected_trajectory": [
+        "list_instances",
+        "get_instance"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 4
+    },
+    {
+      "id": "cloud-sql-schema-tables-explore",
+      "starting_prompt": "I want to understand the structure of my database.",
+      "conversation_plan": "First, ask the agent to list the schemas in the database. After the agent provides the schemas, ask it to list the tables specifically for the 'public' schema.",
+      "expected_trajectory": [
+        "list_schemas",
+        "list_tables"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 6
+    },
+    {
+      "id": "cloud-sql-performance-check",
+      "starting_prompt": "Our database performance seems degraded.",
+      "conversation_plan": "Start by asking the agent to check for any active queries that are running for a long time (e.g., more than 10 seconds). After the agent responds, follow up by asking if there are any database locks that might be causing issues.",
+      "expected_trajectory": [
+        "list_active_queries",
+        "list_locks"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 6
+    },
+    {
+      "id": "cloud-sql-metrics-cpu-investigation",
+      "starting_prompt": "I'm worried about the database load for daily-ci-evals-db.",
+      "conversation_plan": "First, ask the agent to check the CPU utilization for the instance 'daily-ci-evals-db' for the last 5 minutes. After the agent provides the CPU data, ask it to check the overall database stats to see connection counts or transaction volume.",
+      "expected_trajectory": [
+        "get_system_metrics",
+        "list_database_stats"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 6
+    },
+    {
+      "id": "cloud-sql-instance-not-found",
+      "starting_prompt": "Get details for the instance 'missing-db-123'.",
+      "conversation_plan": "The user asks for details of an instance named 'missing-db-123' that doesn't exist. The agent should try to get it, fail, and inform the user. The user will then ask to list instances to find the correct name.",
+      "expected_trajectory": [
+        "get_instance",
+        "list_instances"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 4
+    }
+  ]
+}
@@ -0,0 +1,18 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+generator: gcp_vertex_gemini
+vertex_model: gemini-2.5-pro
+base_prompt: ""
+execs_per_minute: 5
@@ -0,0 +1,32 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+gemini_cli_version: "@google/gemini-cli@0.38.1"
+generator: gemini_cli
+env:
+  GOOGLE_CLOUD_PROJECT: "ext-test-cloud-sql-postgres"
+  GOOGLE_CLOUD_LOCATION: "global"
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+setup:
+  extensions:
+    # Points to the symlink created in cloudbuild.yaml to match the extension ID
+    "/workspace/cloud-sql-postgresql":
+      settings:
+        CLOUD_SQL_POSTGRES_PROJECT: "ext-test-cloud-sql-postgres"
+        CLOUD_SQL_POSTGRES_INSTANCE: "daily-ci-evals-db"
+        CLOUD_SQL_POSTGRES_REGION: "us-central1"
+        CLOUD_SQL_POSTGRES_DATABASE: "postgres"
+        CLOUD_SQL_POSTGRES_USER: "postgres"
+        CLOUD_SQL_POSTGRES_PASSWORD: '${CLOUD_SQL_POSTGRES_PASSWORD}'
+        CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC"
@@ -0,0 +1,45 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+extension_id: cloud-sql-postgresql
+
+dataset_config: /workspace/evals/dataset.json
+dataset_format: gemini-cli-format
+
+orchestrator: geminicli
+model_config: /workspace/evals/model_config.yaml
+# You can reference default simulated user models provided by the evalbench repo:
+simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+
+scorers:
+  # Structural
+  trajectory_matcher: {}
+
+  # Qualitative (Judge-based)
+  goal_completion:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+  behavioral_metrics:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+  parameter_analysis:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+
+  # Performance
+  turn_count: {}
+  end_to_end_latency: {}
+  tool_call_latency: {}
+  token_consumption: {}
+
+reporting:
+  bigquery:
+    gcp_project_id: cloud-db-nl2sql
@@ -0,0 +1,17 @@
+import os
+import re
+
+def main():
+    yaml_path = '/workspace/evals/model_config.yaml'
+    if os.path.exists(yaml_path):
+        with open(yaml_path, 'r') as f:
+            content = f.read()
+        content = re.sub(r'\${(\w+)}', lambda m: os.environ.get(m.group(1), m.group(0)), content)
+        with open(yaml_path, 'w') as f:
+            f.write(content)
+        print(f"Successfully substituted environment variables in {yaml_path}")
+    else:
+        print(f"File not found: {yaml_path}")
+
+if __name__ == '__main__':
+    main()
diff --git a/evals/wait_for_port.py b/evals/wait_for_port.py
@@ -0,0 +1,20 @@
+import socket
+import time
+import sys
+
+def main():
+    for i in range(20):
+        try:
+            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            s.settimeout(1)
+            s.connect(('127.0.0.1', 50051))
+            print('Port 50051 is open!')
+            sys.exit(0)
+        except Exception:
+            print('Port not open yet, retrying...')
+            time.sleep(1)
+    print('Port failed to open')
+    sys.exit(1)
+
+if __name__ == '__main__':
+    main()