30
30
# begin-metrics-definitions
31
31
class Metrics :
32
32
labelname_finish_reason = "finished_reason"
33
- _base_library = prometheus_client
33
+ _gauge_cls = prometheus_client .Gauge
34
+ _counter_cls = prometheus_client .Counter
35
+ _histogram_cls = prometheus_client .Histogram
34
36
35
37
def __init__ (self , labelnames : List [str ], max_model_len : int ):
36
38
# Unregister any existing vLLM collectors
37
39
self ._unregister_vllm_metrics ()
38
40
39
41
# Config Information
40
- self .info_cache_config = prometheus_client .Info (
41
- name = 'vllm:cache_config' ,
42
- documentation = 'information of cache_config' )
42
+ self ._create_info_cache_config ()
43
43
44
44
# System stats
45
45
# Scheduler State
46
- self .gauge_scheduler_running = self ._base_library . Gauge (
46
+ self .gauge_scheduler_running = self ._gauge_cls (
47
47
name = "vllm:num_requests_running" ,
48
48
documentation = "Number of requests currently running on GPU." ,
49
49
labelnames = labelnames )
50
- self .gauge_scheduler_waiting = self ._base_library . Gauge (
50
+ self .gauge_scheduler_waiting = self ._gauge_cls (
51
51
name = "vllm:num_requests_waiting" ,
52
52
documentation = "Number of requests waiting to be processed." ,
53
53
labelnames = labelnames )
54
- self .gauge_scheduler_swapped = self ._base_library . Gauge (
54
+ self .gauge_scheduler_swapped = self ._gauge_cls (
55
55
name = "vllm:num_requests_swapped" ,
56
56
documentation = "Number of requests swapped to CPU." ,
57
57
labelnames = labelnames )
58
58
# KV Cache Usage in %
59
- self .gauge_gpu_cache_usage = self ._base_library . Gauge (
59
+ self .gauge_gpu_cache_usage = self ._gauge_cls (
60
60
name = "vllm:gpu_cache_usage_perc" ,
61
61
documentation = "GPU KV-cache usage. 1 means 100 percent usage." ,
62
62
labelnames = labelnames )
63
- self .gauge_cpu_cache_usage = self ._base_library . Gauge (
63
+ self .gauge_cpu_cache_usage = self ._gauge_cls (
64
64
name = "vllm:cpu_cache_usage_perc" ,
65
65
documentation = "CPU KV-cache usage. 1 means 100 percent usage." ,
66
66
labelnames = labelnames )
67
67
68
68
# Iteration stats
69
- self .counter_num_preemption = self ._base_library . Counter (
69
+ self .counter_num_preemption = self ._counter_cls (
70
70
name = "vllm:num_preemptions_total" ,
71
71
documentation = "Cumulative number of preemption from the engine." ,
72
72
labelnames = labelnames )
73
- self .counter_prompt_tokens = self ._base_library . Counter (
73
+ self .counter_prompt_tokens = self ._counter_cls (
74
74
name = "vllm:prompt_tokens_total" ,
75
75
documentation = "Number of prefill tokens processed." ,
76
76
labelnames = labelnames )
77
- self .counter_generation_tokens = self ._base_library . Counter (
77
+ self .counter_generation_tokens = self ._counter_cls (
78
78
name = "vllm:generation_tokens_total" ,
79
79
documentation = "Number of generation tokens processed." ,
80
80
labelnames = labelnames )
81
- self .histogram_time_to_first_token = self ._base_library . Histogram (
81
+ self .histogram_time_to_first_token = self ._histogram_cls (
82
82
name = "vllm:time_to_first_token_seconds" ,
83
83
documentation = "Histogram of time to first token in seconds." ,
84
84
labelnames = labelnames ,
85
85
buckets = [
86
86
0.001 , 0.005 , 0.01 , 0.02 , 0.04 , 0.06 , 0.08 , 0.1 , 0.25 , 0.5 ,
87
87
0.75 , 1.0 , 2.5 , 5.0 , 7.5 , 10.0
88
88
])
89
- self .histogram_time_per_output_token = self ._base_library . Histogram (
89
+ self .histogram_time_per_output_token = self ._histogram_cls (
90
90
name = "vllm:time_per_output_token_seconds" ,
91
91
documentation = "Histogram of time per output token in seconds." ,
92
92
labelnames = labelnames ,
@@ -97,67 +97,145 @@ def __init__(self, labelnames: List[str], max_model_len: int):
97
97
98
98
# Request stats
99
99
# Latency
100
- self .histogram_e2e_time_request = self ._base_library . Histogram (
100
+ self .histogram_e2e_time_request = self ._histogram_cls (
101
101
name = "vllm:e2e_request_latency_seconds" ,
102
102
documentation = "Histogram of end to end request latency in seconds." ,
103
103
labelnames = labelnames ,
104
104
buckets = [1.0 , 2.5 , 5.0 , 10.0 , 15.0 , 20.0 , 30.0 , 40.0 , 50.0 , 60.0 ])
105
105
# Metadata
106
- self .histogram_num_prompt_tokens_request = self ._base_library . Histogram (
106
+ self .histogram_num_prompt_tokens_request = self ._histogram_cls (
107
107
name = "vllm:request_prompt_tokens" ,
108
108
documentation = "Number of prefill tokens processed." ,
109
109
labelnames = labelnames ,
110
110
buckets = build_1_2_5_buckets (max_model_len ),
111
111
)
112
112
self .histogram_num_generation_tokens_request = \
113
- self ._base_library . Histogram (
113
+ self ._histogram_cls (
114
114
name = "vllm:request_generation_tokens" ,
115
115
documentation = "Number of generation tokens processed." ,
116
116
labelnames = labelnames ,
117
117
buckets = build_1_2_5_buckets (max_model_len ),
118
118
)
119
- self .histogram_best_of_request = self ._base_library . Histogram (
119
+ self .histogram_best_of_request = self ._histogram_cls (
120
120
name = "vllm:request_params_best_of" ,
121
121
documentation = "Histogram of the best_of request parameter." ,
122
122
labelnames = labelnames ,
123
123
buckets = [1 , 2 , 5 , 10 , 20 ],
124
124
)
125
- self .histogram_n_request = self ._base_library . Histogram (
125
+ self .histogram_n_request = self ._histogram_cls (
126
126
name = "vllm:request_params_n" ,
127
127
documentation = "Histogram of the n request parameter." ,
128
128
labelnames = labelnames ,
129
129
buckets = [1 , 2 , 5 , 10 , 20 ],
130
130
)
131
- self .counter_request_success = self ._base_library . Counter (
131
+ self .counter_request_success = self ._counter_cls (
132
132
name = "vllm:request_success_total" ,
133
133
documentation = "Count of successfully processed requests." ,
134
134
labelnames = labelnames + [Metrics .labelname_finish_reason ])
135
135
136
136
# Deprecated in favor of vllm:prompt_tokens_total
137
- self .gauge_avg_prompt_throughput = self ._base_library . Gauge (
137
+ self .gauge_avg_prompt_throughput = self ._gauge_cls (
138
138
name = "vllm:avg_prompt_throughput_toks_per_s" ,
139
139
documentation = "Average prefill throughput in tokens/s." ,
140
140
labelnames = labelnames ,
141
141
)
142
142
# Deprecated in favor of vllm:generation_tokens_total
143
- self .gauge_avg_generation_throughput = self ._base_library . Gauge (
143
+ self .gauge_avg_generation_throughput = self ._gauge_cls (
144
144
name = "vllm:avg_generation_throughput_toks_per_s" ,
145
145
documentation = "Average generation throughput in tokens/s." ,
146
146
labelnames = labelnames ,
147
147
)
148
148
149
+ def _create_info_cache_config (self ) -> None :
150
+ # Config Information
151
+ self .info_cache_config = prometheus_client .Info (
152
+ name = 'vllm:cache_config' ,
153
+ documentation = 'information of cache_config' )
154
+
149
155
def _unregister_vllm_metrics (self ) -> None :
150
- for collector in list (self . _base_library .REGISTRY ._collector_to_names ):
156
+ for collector in list (prometheus_client .REGISTRY ._collector_to_names ):
151
157
if hasattr (collector , "_name" ) and "vllm" in collector ._name :
152
- self ._base_library .REGISTRY .unregister (collector )
158
+ prometheus_client .REGISTRY .unregister (collector )
159
+
160
+
161
+ # end-metrics-definitions
162
+
163
+
164
+ class _RayGaugeWrapper :
165
+ """Wraps around ray.util.metrics.Gauge to provide same API as
166
+ prometheus_client.Gauge"""
167
+
168
+ def __init__ (self ,
169
+ name : str ,
170
+ documentation : str = "" ,
171
+ labelnames : Optional [List [str ]] = None ):
172
+ labelnames_tuple = tuple (labelnames ) if labelnames else None
173
+ self ._gauge = ray_metrics .Gauge (name = name ,
174
+ description = documentation ,
175
+ tag_keys = labelnames_tuple )
176
+
177
+ def labels (self , ** labels ):
178
+ self ._gauge .set_default_tags (labels )
179
+ return self
180
+
181
+ def set (self , value : Union [int , float ]):
182
+ return self ._gauge .set (value )
183
+
184
+
185
+ class _RayCounterWrapper :
186
+ """Wraps around ray.util.metrics.Counter to provide same API as
187
+ prometheus_client.Counter"""
188
+
189
+ def __init__ (self ,
190
+ name : str ,
191
+ documentation : str = "" ,
192
+ labelnames : Optional [List [str ]] = None ):
193
+ labelnames_tuple = tuple (labelnames ) if labelnames else None
194
+ self ._counter = ray_metrics .Counter (name = name ,
195
+ description = documentation ,
196
+ tag_keys = labelnames_tuple )
197
+
198
+ def labels (self , ** labels ):
199
+ self ._counter .set_default_tags (labels )
200
+ return self
201
+
202
+ def inc (self , value : Union [int , float ] = 1.0 ):
203
+ if value == 0 :
204
+ return
205
+ return self ._counter .inc (value )
206
+
207
+
208
+ class _RayHistogramWrapper :
209
+ """Wraps around ray.util.metrics.Histogram to provide same API as
210
+ prometheus_client.Histogram"""
211
+
212
+ def __init__ (self ,
213
+ name : str ,
214
+ documentation : str = "" ,
215
+ labelnames : Optional [List [str ]] = None ,
216
+ buckets : Optional [List [float ]] = None ):
217
+ labelnames_tuple = tuple (labelnames ) if labelnames else None
218
+ self ._histogram = ray_metrics .Histogram (name = name ,
219
+ description = documentation ,
220
+ tag_keys = labelnames_tuple ,
221
+ boundaries = buckets )
222
+
223
+ def labels (self , ** labels ):
224
+ self ._histogram .set_default_tags (labels )
225
+ return self
226
+
227
+ def observe (self , value : Union [int , float ]):
228
+ return self ._histogram .observe (value )
153
229
154
230
155
231
class RayMetrics (Metrics ):
156
232
"""
157
233
RayMetrics is used by RayPrometheusStatLogger to log to Ray metrics.
158
234
Provides the same metrics as Metrics but uses Ray's util.metrics library.
159
235
"""
160
- _base_library = ray_metrics
236
+ _gauge_cls = _RayGaugeWrapper
237
+ _counter_cls = _RayCounterWrapper
238
+ _histogram_cls = _RayHistogramWrapper
161
239
162
240
def __init__ (self , labelnames : List [str ], max_model_len : int ):
163
241
if ray_metrics is None :
@@ -168,8 +246,9 @@ def _unregister_vllm_metrics(self) -> None:
168
246
# No-op on purpose
169
247
pass
170
248
171
-
172
- # end-metrics-definitions
249
+ def _create_info_cache_config (self ) -> None :
250
+ # No-op on purpose
251
+ pass
173
252
174
253
175
254
def build_1_2_5_buckets (max_value : int ) -> List [int ]:
@@ -457,4 +536,7 @@ def log(self, stats: Stats):
457
536
458
537
class RayPrometheusStatLogger (PrometheusStatLogger ):
459
538
"""RayPrometheusStatLogger uses Ray metrics instead."""
460
- _metrics_cls = RayMetrics
539
+ _metrics_cls = RayMetrics
540
+
541
+ def info (self , type : str , obj : SupportsMetricsInfo ) -> None :
542
+ return None
0 commit comments