o
    :/i                  
   @   s  U d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+ ee,Z-eee.gdf Z/e0d Z1e1e/B Z2G dd deZ3de4e2 fddZ5G dd de3Z6G dd de3Z7G dd de7e6Z8G dd de6Z9G dd de6Z:ee
B eB Z;ee<d< d e;d!e4e. d"e=de>e.e;f fd#d$Z?d%e4e. d&e.de4e. fd'd(Z@d&e.de4e. fd)d*ZAG d+d, d,ZBdS )-    N)ABCabstractmethod)Callable)	TypeAlias)CounterGauge	Histogram)CUDAGraphLogging)SupportsMetricsInfo
VllmConfig)KVConnectorLoggingKVConnectorPrometheus)init_logger)STAT_LOGGER_PLUGINS_GROUPload_plugins_by_group)FinishReason)PerfMetricsLoggingPerfMetricsProm)unregister_vllm_metrics)CachingMetricsIterationStatsMultiModalCacheStatsPromptTokenStatsSchedulerStats)SpecDecodingLoggingSpecDecodingPromStatLoggerBaseAggregateStatLoggerBasec                
   @   s~   e Zd ZdZeddedefddZe		ddedB d	e	dB d
e
dB defddZedd Zdd ZdedefddZdS )r   a   Interface for logging metrics.

    API users may define custom loggers that implement this interface.
    However, note that the `SchedulerStats` and `IterationStats` classes
    are not considered stable interfaces and may change in future versions.
    r   vllm_configengine_indexc                 C      d S N )selfr   r   r"   r"   d/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/v1/metrics/loggers.py__init__0      zStatLoggerBase.__init__Nscheduler_statsiteration_statsmm_cache_stats
engine_idxc                 C   r    r!   r"   r#   r'   r(   r)   r*   r"   r"   r$   record3   s   zStatLoggerBase.recordc                 C   r    r!   r"   r#   r"   r"   r$   log_engine_initialized<   r&   z%StatLoggerBase.log_engine_initializedc                 C   r    r!   r"   r-   r"   r"   r$   log?   r&   zStatLoggerBase.logis_awakelevelc                 C   r    r!   r"   )r#   r0   r1   r"   r"   r$   record_sleep_stateB   r&   z!StatLoggerBase.record_sleep_stater   Nr   )__name__
__module____qualname____doc__r   r   intr%   r   r   r   r,   r.   r/   r2   r"   r"   r"   r$   r   (   s(    
returnc                  C   sR   g } t t D ]\}}t|trt|ts!td|d|d| | q| S )NzStat logger plugin z+ must be a subclass of StatLoggerBase (got z).)	r   r   items
isinstancetype
issubclassr   	TypeErrorappend)	factoriesnameplugin_classr"   r"   r$   !load_stat_logger_plugin_factoriesF   s   rD   c                   @   s*   e Zd ZdZededee fddZdS )r   zNAbstract base class for loggers that
    aggregate across multiple DP engines.r   engine_indexesc                 C   r    r!   r"   r#   r   rE   r"   r"   r$   r%   [   r&   z AggregateStatLoggerBase.__init__N)	r5   r6   r7   r8   r   r   listr9   r%   r"   r"   r"   r$   r   W   s    c                	   @   s   e Zd Zd"dedefddZdd Zdefd	d
Zde	fddZ
dededefddZedd Z		d#dedB de	dB dedB defddZdd Zdd Zdd Zd d! ZdS )$LoggingStatLoggerr   r   r   c                 C   s   || _ || _| t  t | _t | _t | _	t | _
t | _| jj}t|| _d | _| jjjr>t| jjj| jjj| _d| _d| _d| _d| _|  rUt|| _d S d S )N        F)r   r   _resettime	monotonicr   last_scheduler_statsr   prefix_caching_metrics connector_prefix_caching_metricsmm_caching_metricsr   spec_decoding_loggingkv_transfer_configr   kv_connector_loggingcudagraph_loggingobservability_configcudagraph_metricsr	   compilation_configcudagraph_modecudagraph_capture_sizeslast_prompt_throughputlast_generation_throughputengine_is_idle
aggregated_enable_perf_statsr   perf_metrics_logging)r#   r   r   rR   r"   r"   r$   r%   `   s.   

zLoggingStatLogger.__init__c                 C   s"   || _ d| _d| _d| _d| _d S r4   )last_log_timenum_prompt_tokensnum_generation_tokensnum_corrupted_reqsnum_preemptions)r#   nowr"   r"   r$   rJ   ~   s
   
zLoggingStatLogger._resetr:   c                 C   s
   | j jjS r!   )r   rU   enable_mfu_metricsr-   r"   r"   r$   r^      s   
z$LoggingStatLogger._enable_perf_statsr(   c                 C   sF   |  j |jj7  _ |  j|j7  _|  j|j7  _|  j|j7  _d S r!   )ra   prompt_token_statscomputedrb   rc   rd   num_preempted_reqs)r#   r(   r"   r"   r$   _track_iteration_stats   s   z(LoggingStatLogger._track_iteration_statstracked_statsre   c                 C   s"   || j  }|dkrdS t|| S )NrI   )r`   float)r#   rk   re   
delta_timer"   r"   r$   _get_throughput   s   
z!LoggingStatLogger._get_throughputc                 C   s   d | jS )NzEngine {:03d}: )formatr   r-   r"   r"   r$   
log_prefix   s   zLoggingStatLogger.log_prefixNr'   r)   r*   c                 C   s   |r|  | |dur[| j|j |jdur| j|j |jdur*| j|j |j }r5| j	| | j
durF|jdurF| j
|j | jsL|| _|j }r[|  r[| j| |re| j| dS dS )zLog Stats to standard output.N)rj   rN   observeprefix_cache_statsconnector_prefix_cache_statsrO   spec_decoding_statsrQ   kv_connector_statsrS   rT   cudagraph_statsr]   rM   
perf_statsr^   r_   rP   )r#   r'   r(   r)   r*   ru   rw   r"   r"   r$   r,      s,   





zLoggingStatLogger.recordc                 C   sV   t  }| | j|}| | j|}| | t||| j| jf | _	|| _|| _d S r!   )
rK   rL   rn   ra   rb   rJ   anyrZ   r[   r\   )r#   re   prompt_throughputgeneration_throughputr"   r"   r$   _update_stats   s   

zLoggingStatLogger._update_statsc                 C   r    r!   r"   r-   r"   r"   r$   aggregate_scheduler_stats      z+LoggingStatLogger.aggregate_scheduler_statsc                 C   sd  |    |   | jrtjntj}g d}| j| j| jj	| jj
g}| jdkr1|d || j |ddg || jjd | jjd g tjrU|d || j | jjsg|d || jjd  | jjsy|d	 || jjd  || jd
| g|R   | jj|d | jj|d | jd ur| jj|d |  r| jj|| jd d S d S )N)z$Avg prompt throughput: %.1f tokens/sz(Avg generation throughput: %.1f tokens/szRunning: %d reqszWaiting: %d reqsr   zPreemptions: %dzGPU KV cache usage: %.1f%%zPrefix cache hit rate: %.1f%%d   zCorrupted: %d reqsz&External prefix cache hit rate: %.1f%%zMM cache hit rate: %.1f%%z, )log_fn)r   rp   )r{   r|   r\   loggerdebuginforZ   r[   rM   num_running_reqsnum_waiting_reqsrd   r@   extendkv_cache_usagerN   hit_rateenvsVLLM_COMPUTE_NANS_IN_LOGITSrc   rO   emptyrP   rp   joinrQ   r/   rS   rT   r^   r_   )r#   r   	log_partslog_argsr"   r"   r$   r/      sV   







zLoggingStatLogger.logc                 C   s(   | j jjrtd| j| j jj d S d S )NzSEngine %03d: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   cache_confignum_gpu_blocksr   r   r   r-   r"   r"   r$   r.     s   
z(LoggingStatLogger.log_engine_initializedr3   r4   )r5   r6   r7   r   r9   r%   rJ   boolr^   r   rj   rl   rn   propertyrp   r   r   r,   r{   r|   r/   r.   r"   r"   r"   r$   rH   _   s.    	

#:rH   c                	   @   s   e Zd Zdedee fddZedd Zde	fdd	Z
	
	dded
B ded
B ded
B defddZdd Zdd Zdd Zd
S )AggregatedLoggingStatLoggerr   rE   c                 C   s2   || _ dd | j D | _tj| |dd d| _d S )Nc                 S   s   i | ]}|t  qS r"   )r   .0idxr"   r"   r$   
<dictcomp>   s    z8AggregatedLoggingStatLogger.__init__.<locals>.<dictcomp>)r   T)rE   last_scheduler_stats_dictrH   r%   r]   rF   r"   r"   r$   r%     s   
z$AggregatedLoggingStatLogger.__init__c                 C   s   d t| jS )Nz{} Engines Aggregated: )ro   lenrE   r-   r"   r"   r$   rp   &  s   z&AggregatedLoggingStatLogger.log_prefixr:   c                 C   s   dS )NFr"   r-   r"   r"   r$   r^   *  r}   z.AggregatedLoggingStatLogger._enable_perf_statsNr   r'   r(   r)   r*   c                 C   sH   || j vrtd| d S tj| ||||d |d ur"|| j|< d S d S NzUnexpected engine_idx: %dr)   r*   )rE   r   warningrH   r,   r   r+   r"   r"   r$   r,   .  s   
z"AggregatedLoggingStatLogger.recordc                 C   sh   t  | _| j D ]}| j j|j7  _| j j|j7  _| j j|j7  _q	| j jt| j  _d S r!   )r   rM   r   valuesr   r   r   r   )r#   rM   r"   r"   r$   r|   B  s   z5AggregatedLoggingStatLogger.aggregate_scheduler_statsc                 C   s   t |  d S r!   )rH   r/   r-   r"   r"   r$   r/   P  s   zAggregatedLoggingStatLogger.logc                 C   s,   | j jjrtdt| j| j jj d S d S )NzR%d Engines: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   r   r   r   r   r   rE   r-   r"   r"   r$   r.   S  s   
z2AggregatedLoggingStatLogger.log_engine_initializedr4   )r5   r6   r7   r   rG   r9   r%   r   rp   r   r^   r   r   r   r,   r|   r/   r.   r"   r"   r"   r$   r     s.    


r   c                	   @   sf   e Zd Zdedee deddfddZ		dd	edB d
e	dB de
dB defddZdd Zdd ZdS )PerEngineStatLoggerAdapterr   rE   per_engine_stat_logger_factoryr:   Nc                 C   s*   i | _ || _|D ]
}|||| j |< qd S r!   )per_engine_stat_loggersrE   )r#   r   rE   r   r   r"   r"   r$   r%   ^  s   z#PerEngineStatLoggerAdapter.__init__r   r'   r(   r)   r*   c                 C   s6   || j vrtd| d S | j | j||||d d S r   )r   r   r   r,   r+   r"   r"   r$   r,   k  s   


z!PerEngineStatLoggerAdapter.recordc                 C      | j  D ]}|  qd S r!   )r   r   r/   r#   per_engine_stat_loggerr"   r"   r$   r/   |     
zPerEngineStatLoggerAdapter.logc                 C   r   r!   )r   r   r.   r   r"   r"   r$   r.     r   z1PerEngineStatLoggerAdapter.log_engine_initializedr4   )r5   r6   r7   r   rG   r9   PerEngineStatLoggerFactoryr%   r   r   r   r,   r/   r.   r"   r"   r"   r$   r   ]  s.    

r   c                	   @   s   e Zd ZeZeZeZe	Z
eZeZ	ddedee dB fddZdedefdd	Z		
ddedB dedB dedB defddZddedefddZdd ZdS )PrometheusStatLoggerNr   rE   c           *         s@  |d u rdg}|| _ t  || _|jj| _|jj| _ddg}|jj|jj	}fdd|D }| 
|j||| _| |||| _| |||| _| jddd|d	}t||| _| jd
dd|d	}t||| _| jdd|dg ddi | _g d}|D ]fdd|D | j< q}|   | jddd|d	}	t|	|| _tjr| jdd|d}
t|
|| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd |d}t||| _ | jd!d"|d}t||| _!| jd#d$|d}t||| _"| jd%d&|d}t||| _#| jd'd(|d)g d i | _$t%j&D ] fd*d|D | j$< q=| jd+d,|d}t||| _'| jd-d.|d}t||| _(| jd/d0|d}t||| _)i | _*| jd1d2|d3g dt+D ]fd4d|D | j*< q| j,d5d&t-||d6}t||| _.| j,d7d0t-||d6}t||| _/| j,d8d9g d:|d6}t||| _0| j,d;d<t-||d6}t||| _1| j,d=d>g d?|d6}t||| _2| j,d@dAt-||d6}t||| _3| j,dBdCg dD|d6}t||| _4| j,dEdFg dG|d6}t||| _5| j,dHdIg dG|d6}t||| _6g dJ}| j,dKdL||d6} t| || _7| j,dMdN||d6}!t|!|| _8| j,dOdP||d6}"t|"|| _9| j,dQdR||d6}#t|#|| _:| j,dSdT||d6}$t|$|| _;| j,dUdVt-||d6}%t|%|| _<| jrg dW}&| j,dXdY|&|d6}'t|'|| _=| j,dZd[|&|d6}(t|(|| _>| j,d\d]|&|d6})t|)|| _?n	i | _=i | _>i | _?d | _@|jAd urtB| j d^krtCDd_ d`| _Eda| _Fdb| _G|jAjH| _I| jdcddde| jE| jF| jGgd	| _@d S d S )fNr   
model_nameenginec                    s   i | ]	}| t |gqS r"   )strr   )r   r"   r$   r     s    z1PrometheusStatLogger.__init__.<locals>.<dictcomp>zvllm:num_requests_runningz.Number of requests in model execution batches.
mostrecentrB   documentationmultiprocess_mode
labelnameszvllm:num_requests_waitingz+Number of requests waiting to be processed.zvllm:engine_sleep_statezEngine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.sleep_state)rB   r   r   r   )awakeweights_offloadeddiscard_allc                    s   i | ]}| j |d qS ))r   r   r   )labelsr   )gauge_engine_sleep_stater   sr"   r$   r     s    zvllm:kv_cache_usage_percz*KV-cache usage. 1 means 100 percent usage.zvllm:corrupted_requestszMCorrupted requests, in terms of total number of requests with NaNs in logits.)rB   r   r   zvllm:prefix_cache_queriesz;Prefix cache queries, in terms of number of queried tokens.zvllm:prefix_cache_hitsz7Prefix cache hits, in terms of number of cached tokens.z"vllm:external_prefix_cache_querieszsExternal prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.zvllm:external_prefix_cache_hitszoExternal prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.zvllm:mm_cache_queriesz?Multi-modal cache queries, in terms of number of queried items.zvllm:mm_cache_hitsz;Multi-modal cache hits, in terms of number of cached items.zvllm:num_preemptionsz0Cumulative number of preemption from the engine.zvllm:prompt_tokensz#Number of prefill tokens processed.zvllm:prompt_tokens_by_sourcez"Number of prompt tokens by source.sourcec                    s    i | ]}|  t|qS r"   r   r   r   )counter_prompt_tokens_by_sourcer   r   r"   r$   r   _  s    
zvllm:prompt_tokens_cachedz2Number of cached prompt tokens (local + external).zvllm:prompt_tokens_recomputedz4Number of cached tokens recomputed for forward pass.zvllm:generation_tokensz&Number of generation tokens processed.zvllm:request_successz)Count of successfully processed requests.finished_reasonc              	      s$   i | ]}|  t|tqS r"   r   r   )counter_request_success_baser   reasonr"   r$   r     s    zvllm:request_prompt_tokens)rB   r   bucketsr   zvllm:request_generation_tokenszvllm:iteration_tokens_totalz.Histogram of number of tokens per engine_step.)             @         i   i   i   i   i    i @  z&vllm:request_max_num_generation_tokensz;Histogram of maximum number of requested generation tokens.zvllm:request_params_nz%Histogram of the n request parameter.)r         
      zvllm:request_params_max_tokensz.Histogram of the max_tokens request parameter.z vllm:time_to_first_token_secondsz,Histogram of time to first token in seconds.)MbP?{Gzt?{Gz?{Gz?g{Gz?gQ?g{Gz?皙?g      ?      ?      ?      ?      @      @      @      $@      4@      D@      T@g      d@g      @g      @z vllm:inter_token_latency_secondsz,Histogram of inter-token latency in seconds.)r   g?皙?g333333?r   g333333?皙?333333?g?r   r   r   r   r   r   r   r   r   r   z*vllm:request_time_per_output_token_secondsz7Histogram of time_per_output_token_seconds per request.)r   r   g?r   g      ?g       @r   r   r   g      .@r   g      >@r   g      I@g      N@g      ^@g      n@g      ~@g      @g      @g      @z vllm:e2e_request_latency_secondsz,Histogram of e2e request latency in seconds.zvllm:request_queue_time_secondsz5Histogram of time spent in WAITING phase for request.z#vllm:request_inference_time_secondsz5Histogram of time spent in RUNNING phase for request.z!vllm:request_prefill_time_secondsz5Histogram of time spent in PREFILL phase for request.z vllm:request_decode_time_secondsz4Histogram of time spent in DECODE phase for request.z'vllm:request_prefill_kv_computed_tokenszMHistogram of new KV tokens computed during prefill (excluding cached tokens).)r   gMb`?r   r   r   r   r   r   r   r   r   r   r   r      <   x   i,  iX  i  i  zvllm:kv_block_lifetime_secondsz|Histogram of KV cache block lifetime from allocation to eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).z'vllm:kv_block_idle_before_evict_secondszqHistogram of idle time before KV cache block eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).zvllm:kv_block_reuse_gap_secondszHistogram of time gaps between consecutive KV cache block accesses. Only the most recent accesses are recorded (ring buffer). Sampled metrics (controlled by --kv-cache-metrics-sample).r   zfvllm:lora_requests_info prometheus metrics may be incorrect/misleading with data parallel deployments.max_lorawaiting_lora_adaptersrunning_lora_adapterszvllm:lora_requests_infozRunning stats on lora requests.sum)JrE   r   r   rU   show_hidden_metricskv_cache_metricskv_cache_metrics_enabledmodel_configserved_model_namemax_model_len_spec_decoding_clsspeculative_configspec_decoding_prom_kv_connector_clskv_connector_prom_perf_metrics_clsperf_metrics_prom
_gauge_clsmake_per_enginegauge_scheduler_runninggauge_scheduler_waitingr   r2   gauge_kv_cache_usager   r   _counter_clscounter_corrupted_requestscounter_prefix_cache_queriescounter_prefix_cache_hits&counter_connector_prefix_cache_queries#counter_connector_prefix_cache_hitscounter_mm_cache_queriescounter_mm_cache_hitscounter_num_preempted_reqscounter_prompt_tokensr   r   ALL_SOURCEScounter_prompt_tokens_cached counter_prompt_tokens_recomputedcounter_generation_tokenscounter_request_successr   _histogram_clsbuild_1_2_5_buckets#histogram_num_prompt_tokens_request'histogram_num_generation_tokens_requesthistogram_iteration_tokens+histogram_max_num_generation_tokens_requesthistogram_n_requesthistogram_max_tokens_requesthistogram_time_to_first_tokenhistogram_inter_token_latency'histogram_request_time_per_output_tokenhistogram_e2e_time_requesthistogram_queue_time_request histogram_inference_time_requesthistogram_prefill_time_requesthistogram_decode_time_request%histogram_prefill_kv_computed_requesthistogram_kv_block_lifetime$histogram_kv_block_idle_before_evicthistogram_kv_block_reuse_gapgauge_lora_infolora_configr   r   r   labelname_max_loralabelname_waiting_lora_adapterslabelname_running_lora_adapters	max_lorasr   )*r#   r   rE   r   r   per_engine_labelvaluesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r	  r
  r  r  request_latency_bucketsr  r  r  r  r  r  kv_cache_residency_bucketsr  r  r  r"   )r   r   r   r   r   r   r   r$   r%     s  



			
zPrometheusStatLogger.__init__r=   
config_objc                 C   s   |  }d|d< d\}}|dkrd}d}|d usJ d| | j||d| d	}| jD ]}|  }t||d< |jdi |d
 q-d S )N r   NNr   zvllm:cache_config_infoz(Information of the LLMEngine CacheConfigzUnknown metrics info type r   r   r   r"   )metrics_infor   keysrE   r   r   set)r#   r=   r  r"  rB   r   
info_gauger   r"   r"   r$   log_metrics_info  s$   
z%PrometheusStatLogger.log_metrics_infor   r'   r(   r)   r*   c                 C   s  |dur| j | |j | j| |j | j| |j | j| |j	j
 | j| |j	j |jdurL| j| |jj
 | j| |jj |jdurY| j|j| |jdurf| j|j| |jdurs| j|j| | jr|jr| j| }| j| }| j| }|jD ]}||j ||j |jD ]}	||	 qq| jdurd |j!" }
d |j#" }| j$|
| j%|| j&| j'i}| jj(di |)  |dur| j*| |j
 | j+| |j |du rdS t,j-r| j.| |j/ | j0| |j1 | j2| |j3 |j4}t5j6D ]}| j7| | |8| q| j9| |j: | j;| |j< | j=| |j> | j?| |j3|j>  |j@D ]}| jA| | qL|jBD ]}| jC| | q[|jDD ]}| jE| | qj|jFD ]}| jG| | qy|jHD ]s}| jI|jJ |   | jK| |jL | jM| |jN | jO| |jP | jQ| |jR | jS| |jT |j3tU|jVd }| jW| | | jX| |j3 | jY| |j> | jZ| |j[ |j\r| j]| |j\ qdS )zLog to prometheus.N,r   r"   )^r   r$  r   r   r   r   r   r   incrr   queriesr   hitsrs   r   r   rt   r   rq   ru   r   rw   r   r   kv_cache_eviction_eventsr  r  r  lifetime_secondsidle_secondsreuse_gaps_secondsr  r   r   r#  r   r  r  r  r   r   set_to_current_timer   r   r   r   r   rc   r   ri   r   ra   rg   r   r   r   get_by_sourcer   cached_tokensr   recomputed_tokensr   rb   r  max_num_generation_tokens_iterr  n_params_iterr  time_to_first_tokens_iterr
  inter_token_latencies_iterr  finished_requestsr  finish_reasonr  e2e_latencyr  queued_timer  prefill_timer  inference_timer  decode_timemaxnum_cached_tokensr  r  r  r  mean_time_per_output_tokenmax_tokens_paramr	  )r#   r'   r(   r)   r*   lifetime_hist	idle_hist
reuse_histeventgapr   r   lora_info_labelsptsr   max_gen_tokensn_paramttftitlfinished_requestprefill_kv_computedr"   r"   r$   r,     s
  






































zPrometheusStatLogger.recordsleepr1   c                 C   s~   d}d}d}|dkrd}|dkrd}n|dkrd}| j D ] }| jd | | | jd | | | jd | | qd S )Nr   r   r   r   r   r   )rE   r   r$  )r#   rO  r1   r   r   r   r*   r"   r"   r$   r2     s    
z'PrometheusStatLogger.record_sleep_statec                 C   s   |  d| jj d S )Nr   )r&  r   r   r-   r"   r"   r$   r.     s   z+PrometheusStatLogger.log_engine_initializedr!   r4   r   r   )r5   r6   r7   r   r   r   r   r   r  r   r   r   r   r   r   r   rG   r9   r%   r   r
   r&  r   r   r   r,   r2   r.   r"   r"   r"   r$   r     s@    

    e
 r   
PromMetricmetricengine_idxsr   c                    s    fdd|D S )Nc                    s   i | ]}|  t|qS r"   r   r   rR  r   r"   r$   r     s    z#make_per_engine.<locals>.<dictcomp>r"   )rR  rS  r   r"   rT  r$   r     s   r   mantissa_lst	max_valuec                 C   sD   d}g }	 | D ]}|d|  }||kr| | q|  S |d7 }q)z
    Builds a list of buckets with increasing powers of 10 multiplied by
    mantissa values until the value exceeds the specified maximum.

    r   Tr   r   )r@   )rU  rV  exponentr   mvaluer"   r"   r$   build_buckets  s   rZ  c                 C   s   t g d| S )zR
    Example:
    >>> build_1_2_5_buckets(100)
    [1, 2, 5, 10, 20, 50, 100]
    )r   r   r   )rZ  )rV  r"   r"   r$   r    s   r  c                   @   s   e Zd ZdZ					ddedee dB dee dB d	ed
edefddZ			dde
dB dedB dedB dedB fddZddedefddZdd Zdd ZdS ) StatLoggerManagera  
    StatLoggerManager:
        Logging happens at the level of the EngineCore (per scheduler).
         * DP: >1 EngineCore per AsyncLLM - loggers for each EngineCore.
         * With Local Logger, just make N copies for N EngineCores.
         * With Prometheus, we need a single logger with N "labels"

        This class abstracts away this implementation detail from
        the AsyncLLM, allowing the AsyncLLM to just call .record()
        and .log() to a simple interface.
    NTFr   r   rS  custom_stat_loggersenable_default_loggersaggregate_engine_loggingclient_countc                 C   s   |r|ndg| _ g | _g }|d ur|| |r3ttjr3|dkr(td n|r,tnt	}|
| d}	|D ])}
t|
trRt|
trR|
|| j d}t|trQd}	nt|| j |
d}| j
| q7|	so| j
t|| j  d S d S )Nr   r   zfAsyncLLM created with api_server_count more than 1; disabling stats logging to avoid incomplete stats.F)r   rE   T)r   rE   r   )rE   stat_loggersr   r   isEnabledForloggingINFOr   r   rH   r@   r<   r=   r>   r   r   r   )r#   r   rS  r\  r]  r^  r_  stat_logger_factoriesdefault_logger_factorycustom_prometheus_loggerstat_logger_factoryglobal_stat_loggerr"   r"   r$   r%     sL   	



zStatLoggerManager.__init__r'   r(   r)   r*   c                 C   s.   |d u rd}| j D ]}|j||||d q	d S )Nr   r   )r`  r,   )r#   r'   r(   r)   r*   stat_loggerr"   r"   r$   r,     s   
zStatLoggerManager.recordr   rO  r1   c                 C   s   | j D ]}||| qd S r!   )r`  r2   )r#   rO  r1   r   r"   r"   r$   r2   +  s   
z$StatLoggerManager.record_sleep_statec                 C      | j D ]}|  qd S r!   )r`  r/   )r#   r   r"   r"   r$   r/   /     

zStatLoggerManager.logc                 C   rj  r!   )r`  r.   )r#   
agg_loggerr"   r"   r$   r.   3  rk  z(StatLoggerManager.log_engine_initialized)NNTFr   r!  rP  )r5   r6   r7   r8   r   rG   r9   StatLoggerFactoryr   r%   r   r   r   r,   r2   r/   r.   r"   r"   r"   r$   r[    sD    


7
r[  )Crb  rK   abcr   r   collections.abcr   typingr   prometheus_clientr   r   r   	vllm.envsr   vllm.compilation.cuda_graphr	   vllm.configr
   r   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   r   vllm.loggerr   vllm.pluginsr   r   vllm.v1.enginer   vllm.v1.metrics.perfr   r   vllm.v1.metrics.prometheusr   vllm.v1.metrics.statsr   r   r   r   r   vllm.v1.spec_decode.metricsr   r   r5   r   r9   r   r=   AggregateStatLoggerFactoryrm  r   rG   rD   r   rH   r   r   r   rQ  __annotations__objectdictr   rZ  r  r[  r"   r"   r"   r$   <module>   s`   
 ;D(      7

	