1
1
import time
2
2
from abc import ABC , abstractmethod
3
- from typing import Dict , List
3
+ from typing import List
4
4
5
5
import numpy as np
6
6
import prometheus_client
7
7
8
+ from vllm .config import ModelConfig
8
9
from vllm .logger import init_logger
9
10
from vllm .v1 .metrics .stats import IterationStats , SchedulerStats
10
11
@@ -78,13 +79,13 @@ def log(self, scheduler_stats: SchedulerStats,
78
79
79
80
class PrometheusStatLogger (StatLoggerBase ):
80
81
81
- def __init__ (self , labels : Dict [ str , str ] ):
82
- self .labels = labels
82
+ def __init__ (self , model_config : ModelConfig ):
83
+ self ._unregister_vllm_metrics ()
83
84
84
- labelnames = self . labels . keys ()
85
- labelvalues = self . labels . values ()
85
+ labelnames = [ "model_name" ]
86
+ labelvalues = [ model_config . served_model_name ]
86
87
87
- self . _unregister_vllm_metrics ()
88
+ max_model_len = model_config . max_model_len
88
89
89
90
self .gauge_scheduler_running = prometheus_client .Gauge (
90
91
name = "vllm:num_requests_running" ,
@@ -106,6 +107,20 @@ def __init__(self, labels: Dict[str, str]):
106
107
documentation = "Number of generation tokens processed." ,
107
108
labelnames = labelnames ).labels (* labelvalues )
108
109
110
+ self .histogram_num_prompt_tokens_request = \
111
+ prometheus_client .Histogram (
112
+ name = "vllm:request_prompt_tokens" ,
113
+ documentation = "Number of prefill tokens processed." ,
114
+ buckets = build_1_2_5_buckets (max_model_len ),
115
+ labelnames = labelnames ).labels (* labelvalues )
116
+
117
+ self .histogram_num_generation_tokens_request = \
118
+ prometheus_client .Histogram (
119
+ name = "vllm:request_generation_tokens" ,
120
+ documentation = "Number of generation tokens processed." ,
121
+ buckets = build_1_2_5_buckets (max_model_len ),
122
+ labelnames = labelnames ).labels (* labelvalues )
123
+
109
124
def log (self , scheduler_stats : SchedulerStats ,
110
125
iteration_stats : IterationStats ):
111
126
"""Log to prometheus."""
@@ -116,9 +131,42 @@ def log(self, scheduler_stats: SchedulerStats,
116
131
self .counter_generation_tokens .inc (
117
132
iteration_stats .num_generation_tokens )
118
133
134
+ for finished_request in iteration_stats .finished_requests :
135
+ self .histogram_num_prompt_tokens_request .observe (
136
+ finished_request .num_prompt_tokens )
137
+ self .histogram_num_generation_tokens_request .observe (
138
+ finished_request .num_generation_tokens )
139
+
119
140
@staticmethod
120
141
def _unregister_vllm_metrics ():
121
142
# Unregister any existing vLLM collectors (for CI/CD
122
143
for collector in list (prometheus_client .REGISTRY ._collector_to_names ):
123
144
if hasattr (collector , "_name" ) and "vllm" in collector ._name :
124
145
prometheus_client .REGISTRY .unregister (collector )
146
+
147
+
148
+ def build_buckets (mantissa_lst : List [int ], max_value : int ) -> List [int ]:
149
+ """
150
+ Builds a list of buckets with increasing powers of 10 multiplied by
151
+ mantissa values until the value exceeds the specified maximum.
152
+
153
+ """
154
+ exponent = 0
155
+ buckets : List [int ] = []
156
+ while True :
157
+ for m in mantissa_lst :
158
+ value = m * 10 ** exponent
159
+ if value <= max_value :
160
+ buckets .append (value )
161
+ else :
162
+ return buckets
163
+ exponent += 1
164
+
165
+
166
+ def build_1_2_5_buckets (max_value : int ) -> List [int ]:
167
+ """
168
+ Example:
169
+ >>> build_1_2_5_buckets(100)
170
+ [1, 2, 5, 10, 20, 50, 100]
171
+ """
172
+ return build_buckets ([1 , 2 , 5 ], max_value )
0 commit comments