add benchmark

JqRrt · JqRrt · commit a566f15f54aa · 2025-04-08T11:01:51.000+08:00
diff --git a/cozeloop/internal/trace/exporter.py b/cozeloop/internal/trace/exporter.py
@@ -2,11 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import base64
-import json
 import logging
-import threading
-from datetime import datetime
-
 from typing import Dict, List, Optional, Tuple, Callable, Any
 
 from cozeloop.spec.tracespce import ModelInput, ModelMessagePart, ModelMessagePartType, ModelImageURL, ModelFileURL, ModelOutput
diff --git a/cozeloop/internal/trace/span_processor.py b/cozeloop/internal/trace/span_processor.py
@@ -8,13 +8,14 @@
 # available at https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/trace/export/__init__.py
 #
 # This modified file is released under the same license.
+import threading
 
 from cozeloop.internal.trace.exporter import *
 from cozeloop.internal.trace.queue_manager import BatchQueueManager, BatchQueueManagerOptions
 from cozeloop.internal.trace.span import Span
 
 DEFAULT_MAX_QUEUE_LENGTH = 2048
-DEFAULT_MAX_EXPORT_BATCH_LENGTH = 100
+DEFAULT_MAX_EXPORT_BATCH_LENGTH = 512
 DEFAULT_MAX_EXPORT_BATCH_BYTE_SIZE = 4 * 1024 * 1024  # 4MB
 MAX_RETRY_EXPORT_BATCH_LENGTH = 50
 DEFAULT_SCHEDULE_DELAY = 1000  # millisecond
diff --git a/tests/internal/trace/test_benchmark.py b/tests/internal/trace/test_benchmark.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+import logging
+import time
+import threading
+
+from cozeloop import set_log_level, new_client
+
+
+class LLMRunner:
+    def __init__(self, client):
+        self.client = client
+
+    def llm_call(self):
+        """
+        Simulate an LLM call and set relevant span tags.
+        """
+        input_data = 'test input'
+        span = self.client.start_span("llmCall", "model")
+        try:
+            # Assuming llm is processing
+            # output = ChatOpenAI().invoke(input=input_data)
+
+            # mock resp
+            time.sleep(1)
+            output = "I'm a robot. I don't have a specific name. You can give me one."
+            input_token = 232
+            output_token = 1211
+
+            # set tag key: `input`
+            span.set_input(input_data)
+            # set tag key: `output`
+            span.set_output(output)
+            # set tag key: `model_provider`, e.g., openai, etc.
+            span.set_model_provider("openai")
+            # set tag key: `start_time_first_resp`
+            # Timestamp of the first packet return from LLM, unit: microseconds.
+            # When `start_time_first_resp` is set, a tag named `latency_first_resp` calculated
+            # based on the span's StartTime will be added, meaning the latency for the first packet.
+            span.set_start_time_first_resp(int(time.time() * 1000000))
+            # set tag key: `input_tokens`. The amount of input tokens.
+            # when the `input_tokens` value is set, it will automatically sum with the `output_tokens` to calculate the `tokens` tag.
+            span.set_input_tokens(input_token)
+            # set tag key: `output_tokens`. The amount of output tokens.
+            # when the `output_tokens` value is set, it will automatically sum with the `input_tokens` to calculate the `tokens` tag.
+            span.set_output_tokens(output_token)
+            # set tag key: `model_name`, e.g., gpt-4-1106-preview, etc.
+            span.set_model_name("gpt-4-1106-preview")
+
+            return None
+        except Exception as e:
+            raise e
+        finally:
+            span.finish()
+
+
+set_log_level(logging.DEBUG)
+client = new_client()
+llm_runner = LLMRunner(client)
+
+def worker(interval, stop_event):
+    while not stop_event.is_set():
+        start_time = time.time()
+
+        threading.Thread(target=llm_runner.llm_call).start()
+
+        elapsed_time = time.time() - start_time
+
+        if elapsed_time < interval:
+            time.sleep(interval - elapsed_time)
+
+
+def benchmark(qps, duration):
+    """
+    qps: qps
+    duration: test run time
+    """
+    interval = 1.0 / qps
+    stop_event = threading.Event()
+
+
+    control_thread = threading.Thread(target=worker, args=(interval, stop_event))
+    control_thread.start()
+
+    # run duration
+    time.sleep(duration)
+
+    stop_event.set()
+    control_thread.join()
+
+
+def test_trace_benchmark():
+    benchmark(qps=1000, duration=20)