o
    :/i(                     @   s   d dl mZ d dlmZmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZ eeZed Zed Zed Zed	 Zed
 ZeG dd dZdS )    )field)ClassVarLiteral)FieldSkipValidationfield_validatormodel_validator)config)init_logger)autofloat16bfloat16fp8fp8_e4m3fp8_e5m2fp8_inc
fp8_ds_mla)r   float32r   )allalignnone)sha256sha256_cborxxhashxxhash_cbor)nativelmcachec                   @   s
  e Zd ZU dZdZee ed< dZe	e ed< 	 e
dddZeed< 	 ed	d
ddZeed< 	 dZeed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 edd
dZedB ed< 	 dZeed< 	 dZeed< 	 dZe ed< 	 e
dddZ!edB ed < 	 e
dddZ"edB ed!< 	 dZ#eed"< 	 dZ$edB ed#< 	 dZ%edB ed$< 	 d%Z&e'ed&< 	 d'e(fd(d)Z)d*d+ Z*e
dddZ+eed,< 	 e,d-d.d3d/d0Z-e.dd-d.e/ded'efd1d2Z0dS )4CacheConfigzConfiguration for the KV cache.   DEFAULT_BLOCK_SIZEN
block_sizeF)defaultinituser_specified_block_sizeg?r      )r!   gtlegpu_memory_utilizationr   cache_dtypeis_attention_freenum_gpu_blocks_overridesliding_windowTenable_prefix_cachingr   prefix_caching_hash_algocalculate_kv_scalescpu_kvcache_space_bytesmamba_page_size_padded)r!   r%   mamba_block_sizemamba_cache_dtypemamba_ssm_cache_dtyper   mamba_cache_modenum_gpu_blocksnum_cpu_blockskv_sharing_fast_prefillkv_cache_memory_byteskv_offloading_sizer   kv_offloading_backendreturnc                 C   s*   h d}ddl m}m} || |}||S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        >   r6   r5   r)   _block_size_resolvedr,   r'   r0   r/   r7   r*   r-   r#   r   )get_hash_factorshash_factors)vllm.config.utilsr=   r>   )selfignored_factorsr=   r>   factors rC   ^/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/config/cache.pycompute_hash   s   
zCacheConfig.compute_hashc                 C   s   dd | j  D S )Nc                 S   s   i | ]	\}}|t |qS rC   )str).0keyvaluerC   rC   rD   
<dictcomp>   s    z,CacheConfig.metrics_info.<locals>.<dictcomp>)__dict__itemsr@   rC   rC   rD   metrics_info   s   zCacheConfig.metrics_infor<   after)modec                 C   sH   | j r| S t| dd | jd u rt| d| j | S t| dd | S )Nr<   Tr    r#   )r<   object__setattr__r    r   rM   rC   rC   rD   _apply_block_size_default   s   
z%CacheConfig._apply_block_size_defaultc                 C   s   | dr
td |S )Nr   zUsing fp8 data type to store kv cache. It reduces the GPU memory footprint and boosts the performance. Meanwhile, it may cause accuracy drop without a proper scaling factor.)
startswithloggerinfo)clsr(   rC   rC   rD   _validate_cache_dtype   s
   
z!CacheConfig._validate_cache_dtype)r;   r   )1__name__
__module____qualname____doc__r   r   int__annotations__r    r   r   r#   boolr   r'   floatr(   
CacheDTyper)   r*   r+   r,   r-   PrefixCachingHashAlgor.   r/   r0   r1   r2   
MambaDTyper3   r4   MambaCacheModer5   r6   r7   r8   r9   r:   KVOffloadingBackendrF   rE   rN   r<   r   rS   r   classmethodrX   rC   rC   rC   rD   r      sp   
 
	#
r   N)dataclassesr   typingr   r   pydanticr   r   r   r   r?   r	   vllm.loggerr
   rY   rU   ra   rc   rd   rb   re   r   rC   rC   rC   rD   <module>   s   
