File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -109,7 +109,7 @@ def main(opt):
109109
110110 for model_name in models :
111111 logging .info (f"Testing model: { model_name } " )
112- input_text = "This is a test input for the LLM ."
112+ input_text = "Tell me an adventure story ."
113113 try :
114114 result = llm_client .test (model_name , input_text )
115115 logging .info (f"Test result for model '{ model_name } ': { result } " )
Original file line number Diff line number Diff line change 33import time
44import logging
55import uuid
6- from .token_calc import calculate_token_length
6+ # from .token_calc import calculate_token_length
77
88logging .basicConfig (level = logging .INFO , format = '%(asctime)s - %(levelname)s - %(message)s' )
99
@@ -24,6 +24,7 @@ def send_request_stream(self, request):
2424 response = b""
2525 parsed_responses = []
2626 output_text = ""
27+ token_count = 0
2728
2829 start_time = time .time ()
2930 first_packet_time = None
@@ -42,13 +43,14 @@ def send_request_stream(self, request):
4243 if first_packet_time is None :
4344 first_packet_time = time .time ()
4445 output_text += parsed_response ["data" ]["delta" ]
46+ token_count += 3
4547
4648 if "data" in parsed_response and parsed_response ["data" ].get ("finish" , False ):
4749 end_time = time .time ()
4850 total_time = end_time - start_time
4951 first_packet_latency = first_packet_time - start_time if first_packet_time else None
5052
51- token_count = calculate_token_length (output_text )
53+ # token_count = calculate_token_length(output_text)
5254 token_speed = token_count / total_time if total_time > 0 else 0
5355
5456 logging .info ("Stream reception completed." )
You can’t perform that action at this time.
0 commit comments