File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 3131MAX_ITERATIONS = 200
3232TRAINING_TIMEOUT = 600 # 10 minutes
3333MAX_MODEL_LEN = 65536 # larger context — dedicated GPU has plenty of room
34- MAX_OUTPUT_TOKENS = 4096 # train.py is ~2K tokens; 4K is plenty and keeps generation fast
34+ MAX_OUTPUT_TOKENS = 10000 # train.py is ~8K tokens; need enough room for the full file
3535TEMPERATURE = 0.7
3636STAGNATION_THRESHOLD = 3 # consecutive non-improvements before nudge
3737MAX_HISTORY_IN_PROMPT = 20 # only show last N iterations in prompt
@@ -432,7 +432,7 @@ def main():
432432 max_model_len = MAX_MODEL_LEN ,
433433 dtype = "auto" ,
434434 trust_remote_code = True ,
435- enforce_eager = True , # avoid DeltaNet compilation issues with Qwen3.5
435+ enforce_eager = True , # required: CUDA graphs fail for Qwen3.5 DeltaNet on this vLLM version
436436 )
437437 sampling_params = SamplingParams (
438438 max_tokens = MAX_OUTPUT_TOKENS ,
You can’t perform that action at this time.
0 commit comments