@@ -4,7 +4,6 @@ set -uo pipefail
44echo " === End-to-End Cold Start Benchmark ==="
55echo " Date: $( date -u) "
66echo " GPU: $( nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2> /dev/null || echo ' none' ) "
7- df -h /tmp | tail -1 | awk ' {print "Disk: " $4 " free"}'
87echo " "
98
109SCRIPT_DIR=" $( cd " $( dirname " $0 " ) " && pwd) "
@@ -15,15 +14,30 @@ export PYTHONPATH="$PROJECT_DIR/python:${PYTHONPATH:-}"
1514
1615MODEL_ID=" ${SNAP_MODEL:- Qwen/ Qwen2.5-7B} "
1716
17+ # All temp data on the volume (more space than /tmp)
18+ BENCH_DIR=" /gpu-cli-workspaces/.bench-e2e"
19+ rm -rf " $BENCH_DIR "
20+ mkdir -p " $BENCH_DIR "
21+
22+ # Remove system torchvision that conflicts with fresh torch installs
23+ pip uninstall -y torchvision 2> /dev/null || true
24+
25+ # Clear pip cache so scenario 1 is a true cold install
26+ pip cache purge 2> /dev/null || true
27+
28+ # Clear HF cache so model download is truly cold
29+ export HF_HOME=" $BENCH_DIR /hf-cache"
30+
31+ df -h /gpu-cli-workspaces | tail -1 | awk ' {print "Disk: " $4 " free on /gpu-cli-workspaces"}'
32+ echo " "
33+
1834# ============================================================
1935# Scenario 1: pip install + from_pretrained (traditional)
2036# ============================================================
2137echo " --- Scenario 1: pip install + from_pretrained ---"
22- # Clean slate
23- rm -rf /tmp/.pip-bench-venv
2438BENCH_START=$( date +%s%3N)
2539
26- cat > /tmp/ bench_pip.py << PYEOF
40+ cat > " $BENCH_DIR / bench_pip.py" << PYEOF
2741import time
2842t_script = time.monotonic()
2943
@@ -33,7 +47,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3347t_import = time.monotonic()
3448
3549tokenizer = AutoTokenizer.from_pretrained("$MODEL_ID ")
36- model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", torch_dtype =torch.bfloat16, device_map="cpu")
50+ model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", dtype =torch.bfloat16, device_map="cpu")
3751model.eval()
3852t_model = time.monotonic()
3953
@@ -47,27 +61,30 @@ print(f"RESULT: {result}")
4761print(f"TIME import={t_import-t_script:.2f}s model={t_model-t_import:.2f}s inference={t_inf-t_model:.2f}s total={t_inf-t_script:.2f}s")
4862PYEOF
4963
50- # Install into a fresh venv (simulates cold container)
51- python3 -m venv /tmp/. pip-bench- venv
52- /tmp/. pip-bench- venv/bin/pip install -q torch transformers accelerate 2>&1 | tail -3
64+ # Fresh venv, no pip cache — true cold install
65+ python3 -m venv " $BENCH_DIR / pip-venv"
66+ " $BENCH_DIR / pip-venv/bin/pip" install --no-cache-dir -q torch transformers accelerate 2>&1 | tail -3
5367PIP_DONE=$( date +%s%3N)
5468echo " pip install: $(( PIP_DONE - BENCH_START )) ms"
5569
56- /tmp/. pip-bench- venv/bin/python /tmp/ bench_pip.py 2>&1 | grep -E " ^(RESULT|TIME) "
70+ " $BENCH_DIR / pip-venv/bin/python" " $BENCH_DIR / bench_pip.py" 2>&1 | tail -30
5771BENCH_END=$( date +%s%3N)
5872echo " Total wall clock (install + load + inference): $(( BENCH_END - BENCH_START )) ms"
59- rm -rf /tmp/. pip-bench- venv
73+ rm -rf " $BENCH_DIR / pip-venv"
6074echo " "
6175
76+ # Clear HF cache so scenario 2 also downloads fresh
77+ rm -rf " $HF_HOME "
78+
6279# ============================================================
6380# Scenario 2: zerostart cold + from_pretrained
6481# ============================================================
6582echo " --- Scenario 2: zerostart cold + from_pretrained ---"
66- export ZEROSTART_CACHE=" /tmp/. zs-e2e-bench "
83+ export ZEROSTART_CACHE=" $BENCH_DIR / zs-cache "
6784export ZS_NO_SHARED_CACHE=1
6885rm -rf " $ZEROSTART_CACHE "
6986
70- cat > /tmp/ bench_zs_cold.py << PYEOF
87+ cat > " $BENCH_DIR / bench_zs_cold.py" << PYEOF
7188import time
7289t_script = time.monotonic()
7390
@@ -77,7 +94,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
7794t_import = time.monotonic()
7895
7996tokenizer = AutoTokenizer.from_pretrained("$MODEL_ID ")
80- model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", torch_dtype =torch.bfloat16, device_map="cpu")
97+ model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", dtype =torch.bfloat16, device_map="cpu")
8198model.eval()
8299t_model = time.monotonic()
83100
@@ -92,7 +109,7 @@ print(f"TIME import={t_import-t_script:.2f}s model={t_model-t_import:.2f}s infer
92109PYEOF
93110
94111ZS_START=$( date +%s%3N)
95- $ZS run -p torch -p transformers -p accelerate /tmp/ bench_zs_cold.py 2>&1 | grep -E " ^(RESULT|TIME|Resolved|Daemon|Environment|Cache) "
112+ $ZS run -p torch -p transformers -p accelerate " $BENCH_DIR / bench_zs_cold.py" 2>&1 | tail -30
96113ZS_END=$( date +%s%3N)
97114echo " Total wall clock (zerostart cold + load + inference): $(( ZS_END - ZS_START )) ms"
98115echo " "
@@ -101,10 +118,11 @@ echo ""
101118# Scenario 3: zerostart warm + from_pretrained
102119# ============================================================
103120echo " --- Scenario 3: zerostart warm + from_pretrained ---"
104- # Cache is now populated from Scenario 2
121+ # zerostart package cache is warm from Scenario 2
122+ # HF model cache is warm from Scenario 2
105123
106124ZS_WARM_START=$( date +%s%3N)
107- $ZS run -p torch -p transformers -p accelerate /tmp/ bench_zs_cold.py 2>&1 | grep -E " ^(RESULT|TIME|Cache) "
125+ $ZS run -p torch -p transformers -p accelerate " $BENCH_DIR / bench_zs_cold.py" 2>&1 | tail -30
108126ZS_WARM_END=$( date +%s%3N)
109127echo " Total wall clock (zerostart warm + load + inference): $(( ZS_WARM_END - ZS_WARM_START )) ms"
110128echo " "
@@ -114,39 +132,46 @@ echo ""
114132# ============================================================
115133echo " --- Scenario 4: Create snapshot for hydrate ---"
116134
117- cat > /tmp/bench_create_snap.py << PYEOF
118- import time
135+ cat > " $BENCH_DIR /bench_create_snap.py" << PYEOF
136+ import time, logging
137+ logging.basicConfig(level=logging.INFO, format="%(name)-20s %(message)s")
119138t0 = time.monotonic()
120139import torch
121140from transformers import AutoModelForCausalLM, AutoTokenizer
122141from zerostart.snapshot import snapshot
123142
124143tokenizer = AutoTokenizer.from_pretrained("$MODEL_ID ")
125- model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", torch_dtype =torch.bfloat16, device_map="cpu")
144+ model = AutoModelForCausalLM.from_pretrained("$MODEL_ID ", dtype =torch.bfloat16, device_map="cpu")
126145model.eval()
127146
128147import shutil
129- shutil.rmtree("/tmp /e2e-snapshot", ignore_errors=True)
130- snapshot(state={"model": model, "tokenizer": tokenizer}, path="/tmp /e2e-snapshot")
148+ shutil.rmtree("$BENCH_DIR /e2e-snapshot", ignore_errors=True)
149+ snapshot(state={"model": model, "tokenizer": tokenizer}, path="$BENCH_DIR /e2e-snapshot")
131150t1 = time.monotonic()
132151print(f"Snapshot created in {t1-t0:.2f}s")
133152PYEOF
134153
135- $ZS run -p torch -p transformers -p accelerate -p cloudpickle /tmp/ bench_create_snap.py 2>&1 | grep -E " ^(Snapshot|Cache) "
154+ $ZS run -p torch -p transformers -p accelerate -p cloudpickle " $BENCH_DIR / bench_create_snap.py" 2>&1 | tail -30
136155
137156echo " "
138157echo " --- Scenario 4: zerostart warm + hydrate + inference ---"
139158
140- cat > /tmp/bench_hydrate.py << PYEOF
141- import time
159+ # Reuse the warm zerostart package cache from scenarios 2/3 —
160+ # the comparison is model loading (hydrate vs from_pretrained),
161+ # not package installation.
162+ export ZEROSTART_CACHE=" $BENCH_DIR /zs-cache"
163+
164+ cat > " $BENCH_DIR /bench_hydrate.py" << PYEOF
165+ import time, logging
166+ logging.basicConfig(level=logging.INFO, format="%(name)-20s %(message)s")
142167t_script = time.monotonic()
143168
144169import torch
145170from zerostart.snapshot import hydrate
146171
147172t_import = time.monotonic()
148173
149- restored = hydrate("/tmp /e2e-snapshot")
174+ restored = hydrate("$BENCH_DIR /e2e-snapshot")
150175model = restored["model"]
151176model.eval()
152177tokenizer = restored["tokenizer"]
@@ -163,7 +188,7 @@ print(f"TIME import={t_import-t_script:.2f}s hydrate={t_hydrate-t_import:.2f}s i
163188PYEOF
164189
165190ZS_HYD_START=$( date +%s%3N)
166- $ZS run -p torch -p transformers -p accelerate -p cloudpickle /tmp/ bench_hydrate.py 2>&1 | grep -E " ^(RESULT|TIME|Cache) "
191+ $ZS run -p torch -p transformers -p accelerate -p cloudpickle " $BENCH_DIR / bench_hydrate.py" 2>&1 | tail -30
167192ZS_HYD_END=$( date +%s%3N)
168193echo " Total wall clock (zerostart warm + hydrate + inference): $(( ZS_HYD_END - ZS_HYD_START )) ms"
169194echo " "
@@ -180,3 +205,6 @@ echo " 2. zerostart cold + from_pretrained: $(( ZS_END - ZS_START ))ms"
180205echo " 3. zerostart warm + from_pretrained: $(( ZS_WARM_END - ZS_WARM_START )) ms"
181206echo " 4. zerostart warm + hydrate (snapshot): $(( ZS_HYD_END - ZS_HYD_START )) ms"
182207echo " ============================================================"
208+
209+ # Cleanup
210+ rm -rf " $BENCH_DIR "
0 commit comments