Skip to content

Commit a566f15

Browse files
committed
add benchmark
1 parent 00e7528 commit a566f15

3 files changed

Lines changed: 95 additions & 5 deletions

File tree

cozeloop/internal/trace/exporter.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@
22
# SPDX-License-Identifier: MIT
33

44
import base64
5-
import json
65
import logging
7-
import threading
8-
from datetime import datetime
9-
106
from typing import Dict, List, Optional, Tuple, Callable, Any
117

128
from cozeloop.spec.tracespce import ModelInput, ModelMessagePart, ModelMessagePartType, ModelImageURL, ModelFileURL, ModelOutput

cozeloop/internal/trace/span_processor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
# available at https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/trace/export/__init__.py
99
#
1010
# This modified file is released under the same license.
11+
import threading
1112

1213
from cozeloop.internal.trace.exporter import *
1314
from cozeloop.internal.trace.queue_manager import BatchQueueManager, BatchQueueManagerOptions
1415
from cozeloop.internal.trace.span import Span
1516

1617
DEFAULT_MAX_QUEUE_LENGTH = 2048
17-
DEFAULT_MAX_EXPORT_BATCH_LENGTH = 100
18+
DEFAULT_MAX_EXPORT_BATCH_LENGTH = 512
1819
DEFAULT_MAX_EXPORT_BATCH_BYTE_SIZE = 4 * 1024 * 1024 # 4MB
1920
MAX_RETRY_EXPORT_BATCH_LENGTH = 50
2021
DEFAULT_SCHEDULE_DELAY = 1000 # millisecond
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
2+
# SPDX-License-Identifier: MIT
3+
import logging
4+
import time
5+
import threading
6+
7+
from cozeloop import set_log_level, new_client
8+
9+
10+
class LLMRunner:
11+
def __init__(self, client):
12+
self.client = client
13+
14+
def llm_call(self):
15+
"""
16+
Simulate an LLM call and set relevant span tags.
17+
"""
18+
input_data = 'test input'
19+
span = self.client.start_span("llmCall", "model")
20+
try:
21+
# Assuming llm is processing
22+
# output = ChatOpenAI().invoke(input=input_data)
23+
24+
# mock resp
25+
time.sleep(1)
26+
output = "I'm a robot. I don't have a specific name. You can give me one."
27+
input_token = 232
28+
output_token = 1211
29+
30+
# set tag key: `input`
31+
span.set_input(input_data)
32+
# set tag key: `output`
33+
span.set_output(output)
34+
# set tag key: `model_provider`, e.g., openai, etc.
35+
span.set_model_provider("openai")
36+
# set tag key: `start_time_first_resp`
37+
# Timestamp of the first packet return from LLM, unit: microseconds.
38+
# When `start_time_first_resp` is set, a tag named `latency_first_resp` calculated
39+
# based on the span's StartTime will be added, meaning the latency for the first packet.
40+
span.set_start_time_first_resp(int(time.time() * 1000000))
41+
# set tag key: `input_tokens`. The amount of input tokens.
42+
# when the `input_tokens` value is set, it will automatically sum with the `output_tokens` to calculate the `tokens` tag.
43+
span.set_input_tokens(input_token)
44+
# set tag key: `output_tokens`. The amount of output tokens.
45+
# when the `output_tokens` value is set, it will automatically sum with the `input_tokens` to calculate the `tokens` tag.
46+
span.set_output_tokens(output_token)
47+
# set tag key: `model_name`, e.g., gpt-4-1106-preview, etc.
48+
span.set_model_name("gpt-4-1106-preview")
49+
50+
return None
51+
except Exception as e:
52+
raise e
53+
finally:
54+
span.finish()
55+
56+
57+
set_log_level(logging.DEBUG)
58+
client = new_client()
59+
llm_runner = LLMRunner(client)
60+
61+
def worker(interval, stop_event):
62+
while not stop_event.is_set():
63+
start_time = time.time()
64+
65+
threading.Thread(target=llm_runner.llm_call).start()
66+
67+
elapsed_time = time.time() - start_time
68+
69+
if elapsed_time < interval:
70+
time.sleep(interval - elapsed_time)
71+
72+
73+
def benchmark(qps, duration):
74+
"""
75+
qps: qps
76+
duration: test run time
77+
"""
78+
interval = 1.0 / qps
79+
stop_event = threading.Event()
80+
81+
82+
control_thread = threading.Thread(target=worker, args=(interval, stop_event))
83+
control_thread.start()
84+
85+
# run duration
86+
time.sleep(duration)
87+
88+
stop_event.set()
89+
control_thread.join()
90+
91+
92+
def test_trace_benchmark():
93+
benchmark(qps=1000, duration=20)

0 commit comments

Comments
 (0)