1616
1717import os
1818import shortuuid
19- from prometheus_client import Counter , Gauge
19+ from prometheus_client import Counter , Gauge , Histogram
20+
21+ from jetstream .engine .token_utils import DEFAULT_PREFILL_BUCKETS
2022
2123
2224class JetstreamMetricsCollector :
@@ -55,6 +57,39 @@ def __new__(cls):
5557 documentation = "Total time taken to start the Jetstream server" ,
5658 labelnames = ["id" ],
5759 )
60+ _request_input_length = Histogram (
61+ name = "jetstream_request_input_length" ,
62+ documentation = "Number of input tokens per request" ,
63+ labelnames = ["id" ],
64+ buckets = DEFAULT_PREFILL_BUCKETS ,
65+ )
66+ _request_output_length = Histogram (
67+ name = "jetstream_request_output_length" ,
68+ documentation = "Number of output tokens per request" ,
69+ labelnames = ["id" ],
70+ buckets = [
71+ 1 ,
72+ 2 ,
73+ 5 ,
74+ 10 ,
75+ 20 ,
76+ 50 ,
77+ 100 ,
78+ 200 ,
79+ 500 ,
80+ 1000 ,
81+ 2000 ,
82+ 5000 ,
83+ 10000 ,
84+ 20000 ,
85+ 50000 ,
86+ 100000 ,
87+ 200000 ,
88+ 500000 ,
89+ 1000000 ,
90+ 2000000 ,
91+ ],
92+ )
5893 _request_success_count = Counter (
5994 name = "jetstream_request_success_count" ,
6095 documentation = "Number of requests successfully completed" ,
@@ -76,5 +111,11 @@ def get_slots_used_percentage_metric(self, idx: int):
76111 def get_server_startup_latency_metric (self ):
77112 return self ._server_startup_latency .labels (id = self ._id )
78113
114+ def get_request_input_length (self ):
115+ return self ._request_input_length .labels (id = self ._id )
116+
117+ def get_request_output_length (self ):
118+ return self ._request_output_length .labels (id = self ._id )
119+
79120 def get_request_success_count_metric (self ):
80121 return self ._request_success_count .labels (id = self ._id )
0 commit comments