Skip to content

Commit 37738c2

Browse files
committed
Update server.py
1 parent 3cd8c9c commit 37738c2

1 file changed

Lines changed: 17 additions & 5 deletions

File tree

optillm/server.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -397,9 +397,9 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
397397
if approach == 'none':
398398
# Use the request_config that was already prepared and passed to this function
399399
kwargs = request_config.copy() if request_config else {}
400-
400+
401401
# Remove items that are handled separately by the framework
402-
kwargs.pop('n', None) # n is handled by execute_n_times
402+
# Note: 'n' is NOT removed - the none_approach passes it to the client which handles multiple completions
403403
kwargs.pop('stream', None) # stream is handled by proxy()
404404

405405
# Reconstruct original messages from system_prompt and initial_query
@@ -547,17 +547,29 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
547547
return responses, total_tokens
548548

549549
def generate_streaming_response(final_response, model):
550-
# Yield the final response
550+
# Generate a unique response ID
551+
response_id = f"chatcmpl-{int(time.time()*1000)}"
552+
created = int(time.time())
553+
554+
# Yield the final response with OpenAI-compatible format
551555
if isinstance(final_response, list):
552556
for index, response in enumerate(final_response):
557+
# First chunk includes role
553558
yield "data: " + json.dumps({
554-
"choices": [{"delta": {"content": response}, "index": index, "finish_reason": "stop"}],
559+
"id": response_id,
560+
"object": "chat.completion.chunk",
561+
"created": created,
555562
"model": model,
563+
"choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}],
556564
}) + "\n\n"
557565
else:
566+
# First chunk includes role
558567
yield "data: " + json.dumps({
559-
"choices": [{"delta": {"content": final_response}, "index": 0, "finish_reason": "stop"}],
568+
"id": response_id,
569+
"object": "chat.completion.chunk",
570+
"created": created,
560571
"model": model,
572+
"choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}],
561573
}) + "\n\n"
562574

563575
# Yield the final message to indicate the stream has ended

0 commit comments

Comments
 (0)