@@ -25,40 +25,74 @@ steps:
2525 - ' --allow-unauthenticated'
2626 - ' --port=8080'
2727 - ' --timeout=300'
28- - ' --set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q ,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC'
28+ - ' --set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=[PASSWORD] ,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC'
2929
30- # --- STEP 3: Run Eval Server in Background ---
31- - name : ' gcr.io/cloud-builders/docker'
30+ # --- STEP 3: Fully Integrated Evaluation to Persist Results ---
31+ - name : ' us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest'
32+ entrypoint : ' bash'
3233 args :
33- - ' run'
34- - ' -d'
35- - ' --network=cloudbuild'
36- - ' --name=eval_server'
37- - ' us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest'
34+ - ' -c'
35+ - |
36+ set -e
37+ cd /evalbench
38+
39+ export EVAL_GCP_PROJECT_ID=omkar-playground
40+ export EVAL_GCP_PROJECT_REGION=us-central1
41+
42+ echo "Compiling protobuf files..."
43+ python3 -m grpc_tools.protoc --proto_path=evalbench/evalproto --python_out=evalbench/evalproto --grpc_python_out=evalbench/evalproto evalbench/evalproto/*.proto
44+
45+ echo "Patching client to use insecure credentials..."
46+ # sed -i 's/"localhost:50051"/"127.0.0.1:50051"/g' evalbench/client/eval_client.py
47+ sed -i 's/grpc.alts_channel_credentials()/None/g' evalbench/client/eval_client.py
48+ sed -i 's/grpc.aio.secure_channel(address, channel_creds)/grpc.aio.insecure_channel(address)/g' evalbench/client/eval_client.py
49+
50+ echo "Patching server to listen on all IPv4 interfaces (0.0.0.0)..."
51+ sed -i 's/"\[::\]:%s"/"0.0.0.0:%s"/g' /evalbench/evalbench/eval_server.py
52+ echo "Checking bind success in server (writing to stderr)..."
53+ sed -i 's|server.add_insecure_port("0.0.0.0:%s" % PORT)|bound_port = server.add_insecure_port("0.0.0.0:%s" % PORT)\n import sys\n sys.stderr.write(f"BOUND_PORT: {bound_port}\\n")\n if bound_port == 0: raise RuntimeError("Failed to bind to port!")|' /evalbench/evalbench/eval_server.py
3854
39- # --- STEP 4: Run Evalbench Evaluation Client ---
40- # - name: 'python:3.10'
41- # entrypoint: 'bash'
42- # args:
43- # - '-c'
44- # - |
45- # # Clone Evalbench
46- # git clone https://github.com/GoogleCloudPlatform/evalbench.git
47- # cd evalbench
55+ echo "Patching eval_service.py to fix TypeError in get_reporters..."
56+ sed -i 's|reporters = get_reporters(config.get("reporting"), job_id, run_time)|reporters = get_reporters(config.get("reporting") or {}, job_id, run_time)|' /evalbench/evalbench/eval_service.py
57+
58+ echo "Patching util/session.py to make ADK import lazy..."
59+ sed -i 's|from google.adk.sessions import VertexAiSessionService||' /evalbench/evalbench/util/session.py
60+ sed -i 's| def __init__(self, config):| def __init__(self, config):\n from google.adk.sessions import VertexAiSessionService|' /evalbench/evalbench/util/session.py
61+ echo "Patching databases/util.py to make SecretManagerClient lazy..."
62+ sed -i 's|CLIENT = secretmanager_v1.SecretManagerServiceClient()|CLIENT = None\ndef get_client():\n global CLIENT\n if CLIENT is None:\n CLIENT = secretmanager_v1.SecretManagerServiceClient()\n return CLIENT|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py"
63+ sed -i 's|CLIENT.access_secret_version|get_client().access_secret_version|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py usage"
64+ cd evalbench
65+ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
66+ export PYTHONPATH=./evalproto:.
67+ export CLOUD_RUN=True
68+ export PORT=50051
69+
70+
71+
72+ echo "Starting Evaluation Server in background..."
73+ # NEW: Added </dev/null in case it was waiting for input
74+ python3 -u ./eval_server.py --localhost </dev/null &
75+ SERVER_PID=$$!
4876
49- # # Install Dependencies
50- # pip install -r requirements.txt
77+ echo "Waiting for port 50051 to open..."
78+ python3 -c "
79+ import socket
80+ import time
81+ for i in range(20):
82+ try:
83+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
84+ s.connect(('127.0.0.1', 50051))
85+ print('Port is open!')
86+ exit(0)
87+ except Exception as e:
88+ print(f'Port not open yet: {e}')
89+ time.sleep(1)
90+ print('Port failed to open')
91+ exit(1)
92+ " || { echo "Server failed to bind port. Check logs above."; exit 1; }
5193
52- # # Setup Environment Variables
53- # export EVAL_GCP_PROJECT_ID=omkar-playground
54- # export EVAL_GCP_PROJECT_REGION=us-central1
55- # export EVAL_CONFIG=../evals/run_config.yaml
94+ echo "Server is running. Launching Evaluation Client..."
95+ cd /evalbench
96+ export PYTHONPATH=./evalbench:./evalbench/evalproto
5697
57- # # Compile required protobuf modules and Run Evaluation Client against the eval_server container
58- # make proto
59- # ./run_client.sh --endpoint=eval_server:50051
60-
61-
62- options :
63- env :
64- - ' DOCKER_BUILDKIT=1'
98+ python3 evalbench/client/eval_client.py --experiment=/workspace/evals/run_config.yaml --endpoint=local || { echo "Client failed! Server logs:"; cat /evalbench/evalbench/server.log; exit 1; }
0 commit comments