Skip to content

Commit 0e9160f

Browse files
committed
fix(autoresearch): reduce model memory
1 parent 09aa5c8 commit 0e9160f

1 file changed

Lines changed: 2 additions & 1 deletion

File tree

autoresearch/algo.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
GPU_MEMORY_UTILIZATION = 0.25 # ~35GB for LLM weights+KV cache, rest for training
2323
MAX_ITERATIONS = 200
2424
TRAINING_TIMEOUT = 600 # 10 minutes
25-
MAX_MODEL_LEN = 40960
25+
MAX_MODEL_LEN = 32000
2626
MAX_OUTPUT_TOKENS = 16384 # max tokens for LLM output (enough for full train.py)
2727
TEMPERATURE = 0.7
2828
STAGNATION_THRESHOLD = 3 # consecutive non-improvements before nudge
@@ -419,6 +419,7 @@ def main():
419419
max_model_len=MAX_MODEL_LEN,
420420
dtype="auto",
421421
trust_remote_code=True,
422+
enforce_eager=True, # required: torch.compile incompatibility in this vLLM version
422423
)
423424
sampling_params = SamplingParams(
424425
max_tokens=MAX_OUTPUT_TOKENS,

0 commit comments

Comments
 (0)