Skip to content

Commit 84c418d

Browse files
committed
feat: try with 2 GPUs (agent & training separated) | use better model
1 parent ec5afc3 commit 84c418d

4 files changed

Lines changed: 636 additions & 8 deletions

File tree

autoresearch/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ RUN uv pip install --system --no-cache-dir \
1919
torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128
2020

2121
RUN uv pip install --system --no-cache-dir \
22-
vllm \
22+
"vllm>=0.17.0" \
2323
kernels>=0.11.7 \
2424
rustbpe>=0.1.0 \
2525
tiktoken>=0.11.0 \

autoresearch/algo.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
"""
22
Autonomous autoresearch agent loop for ocean network.
33
4-
Runs inside a Docker container on a remote GPU node.
5-
Uses a local open-source LLM (Qwen3-32B-AWQ via vLLM) to iteratively
6-
improve train.py, measuring val_bpb as the optimization target.
4+
Runs inside a Docker container on a 2×H200 GPU node.
5+
GPU 0: dedicated to the agent LLM (Qwen3.5-27B via vLLM, unquantized bf16)
6+
GPU 1: dedicated to training (full 141GB VRAM)
77
"""
88

99
import json
@@ -14,15 +14,23 @@
1414
import time
1515
from datetime import datetime, timezone
1616

17+
# ---------------------------------------------------------------------------
18+
# GPU isolation — must be set before any CUDA imports
19+
# ---------------------------------------------------------------------------
20+
21+
AGENT_GPU = "0"
22+
TRAINING_GPU = "1"
23+
os.environ["CUDA_VISIBLE_DEVICES"] = AGENT_GPU # vLLM only sees GPU 0
24+
1725
# ---------------------------------------------------------------------------
1826
# Configuration
1927
# ---------------------------------------------------------------------------
2028

21-
AGENT_MODEL = "Qwen/Qwen3-32B-AWQ"
22-
GPU_MEMORY_UTILIZATION = 0.25 # ~35GB for weights+KV cache, rest for training
29+
AGENT_MODEL = "Qwen/Qwen3.5-27B"
30+
GPU_MEMORY_UTILIZATION = 0.90 # dedicated GPU — use most of it
2331
MAX_ITERATIONS = 200
2432
TRAINING_TIMEOUT = 600 # 10 minutes
25-
MAX_MODEL_LEN = 40960 # total context window (input + output)
33+
MAX_MODEL_LEN = 65536 # larger context — dedicated GPU has plenty of room
2634
MAX_OUTPUT_TOKENS = 16384 # max tokens for LLM output (enough for full train.py)
2735
TEMPERATURE = 0.7
2836
STAGNATION_THRESHOLD = 5 # consecutive non-improvements before nudge
@@ -138,13 +146,18 @@ def run_training(train_py_content):
138146
"""
139147
write_file(TRAIN_PY_PATH, train_py_content)
140148

149+
# Run training on the dedicated training GPU
150+
train_env = os.environ.copy()
151+
train_env["CUDA_VISIBLE_DEVICES"] = TRAINING_GPU
152+
141153
try:
142154
result = subprocess.run(
143155
[sys.executable, TRAIN_PY_PATH],
144156
capture_output=True,
145157
text=True,
146158
timeout=TRAINING_TIMEOUT,
147159
cwd="/app",
160+
env=train_env,
148161
)
149162
except subprocess.TimeoutExpired as e:
150163
stderr_text = e.stderr if isinstance(e.stderr, str) else (e.stderr.decode() if e.stderr else "")
@@ -419,6 +432,7 @@ def main():
419432
max_model_len=MAX_MODEL_LEN,
420433
dtype="auto",
421434
trust_remote_code=True,
435+
enforce_eager=True, # avoid DeltaNet compilation issues with Qwen3.5
422436
)
423437
sampling_params = SamplingParams(
424438
max_tokens=MAX_OUTPUT_TOKENS,

0 commit comments

Comments
 (0)