|
1 | 1 | """ |
2 | 2 | Autonomous autoresearch agent loop for ocean network. |
3 | 3 |
|
4 | | -Runs inside a Docker container on a remote GPU node. |
5 | | -Uses a local open-source LLM (Qwen3-32B-AWQ via vLLM) to iteratively |
6 | | -improve train.py, measuring val_bpb as the optimization target. |
| 4 | +Runs inside a Docker container on a 2×H200 GPU node. |
| 5 | +GPU 0: dedicated to the agent LLM (Qwen3.5-27B via vLLM, unquantized bf16) |
| 6 | +GPU 1: dedicated to training (full 141GB VRAM) |
7 | 7 | """ |
8 | 8 |
|
9 | 9 | import json |
|
14 | 14 | import time |
15 | 15 | from datetime import datetime, timezone |
16 | 16 |
|
| 17 | +# --------------------------------------------------------------------------- |
| 18 | +# GPU isolation — must be set before any CUDA imports |
| 19 | +# --------------------------------------------------------------------------- |
| 20 | + |
| 21 | +AGENT_GPU = "0" |
| 22 | +TRAINING_GPU = "1" |
| 23 | +os.environ["CUDA_VISIBLE_DEVICES"] = AGENT_GPU # vLLM only sees GPU 0 |
| 24 | + |
17 | 25 | # --------------------------------------------------------------------------- |
18 | 26 | # Configuration |
19 | 27 | # --------------------------------------------------------------------------- |
20 | 28 |
|
21 | | -AGENT_MODEL = "Qwen/Qwen3-32B-AWQ" |
22 | | -GPU_MEMORY_UTILIZATION = 0.25 # ~35GB for weights+KV cache, rest for training |
| 29 | +AGENT_MODEL = "Qwen/Qwen3.5-27B" |
| 30 | +GPU_MEMORY_UTILIZATION = 0.90 # dedicated GPU — use most of it |
23 | 31 | MAX_ITERATIONS = 200 |
24 | 32 | TRAINING_TIMEOUT = 600 # 10 minutes |
25 | | -MAX_MODEL_LEN = 40960 # total context window (input + output) |
| 33 | +MAX_MODEL_LEN = 65536 # larger context — dedicated GPU has plenty of room |
26 | 34 | MAX_OUTPUT_TOKENS = 16384 # max tokens for LLM output (enough for full train.py) |
27 | 35 | TEMPERATURE = 0.7 |
28 | 36 | STAGNATION_THRESHOLD = 5 # consecutive non-improvements before nudge |
@@ -138,13 +146,18 @@ def run_training(train_py_content): |
138 | 146 | """ |
139 | 147 | write_file(TRAIN_PY_PATH, train_py_content) |
140 | 148 |
|
| 149 | + # Run training on the dedicated training GPU |
| 150 | + train_env = os.environ.copy() |
| 151 | + train_env["CUDA_VISIBLE_DEVICES"] = TRAINING_GPU |
| 152 | + |
141 | 153 | try: |
142 | 154 | result = subprocess.run( |
143 | 155 | [sys.executable, TRAIN_PY_PATH], |
144 | 156 | capture_output=True, |
145 | 157 | text=True, |
146 | 158 | timeout=TRAINING_TIMEOUT, |
147 | 159 | cwd="/app", |
| 160 | + env=train_env, |
148 | 161 | ) |
149 | 162 | except subprocess.TimeoutExpired as e: |
150 | 163 | stderr_text = e.stderr if isinstance(e.stderr, str) else (e.stderr.decode() if e.stderr else "") |
@@ -419,6 +432,7 @@ def main(): |
419 | 432 | max_model_len=MAX_MODEL_LEN, |
420 | 433 | dtype="auto", |
421 | 434 | trust_remote_code=True, |
| 435 | + enforce_eager=True, # avoid DeltaNet compilation issues with Qwen3.5 |
422 | 436 | ) |
423 | 437 | sampling_params = SamplingParams( |
424 | 438 | max_tokens=MAX_OUTPUT_TOKENS, |
|
0 commit comments