o
    :/iz                     @   sv   d Z ddlZddlmZ ddlmZmZ ddlmZ ed Z	eG dd dZ
eG d	d
 d
ZeG dd dZdS )z*Configuration for model weight offloading.    N)Literal)Fieldmodel_validator)config)autouvaprefetchc                   @   s>   e Zd ZU dZedddZeed< 	 eedZ	ee
 ed< dS )UVAOffloadConfigzConfiguration for UVA (Unified Virtual Addressing) CPU offloading.

    Uses zero-copy access from CPU-pinned memory. Simple but requires
    fast CPU-GPU interconnect.
    r   defaultgecpu_offload_gbdefault_factorycpu_offload_paramsN)__name__
__module____qualname____doc__r   r   float__annotations__setr   str r   r   `/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/config/offload.pyr	      s   
 
r	   c                   @   sj   e Zd ZU dZedddZeed< 	 edddZeed< 	 edddZ	eed< 	 ee
dZe
e ed	< d
S )PrefetchOffloadConfigz~Configuration for prefetch-based CPU offloading.

    Groups layers and uses async H2D prefetch to hide transfer latency.
    r   r
   offload_group_size   offload_num_in_groupoffload_prefetch_stepr   offload_paramsN)r   r   r   r   r   r   intr   r   r   r   r    r   r   r   r   r   r   /   s   
 r   c                   @   sj   e Zd ZU dZdZeed< 	 eedZ	eed< 	 ee
dZe
ed< 	 edddd
dZd	efddZdS )OffloadConfigzEConfiguration for model weight offloading to reduce GPU memory usage.r   offload_backendr   r   r   after)modereturnc                 C   s   | j dks| jjdkr2| jj| jjkr"td| jj d| jj d| jjdk r2td| jj d| jjdk}| jjdk}| j d	krN|rNtj	d
dd | S | j dkr^|r^tj	ddd | S | j dkrn|rn|rntj	ddd | S )z+Validate offload configuration constraints.r   r   zoffload_num_in_group (z!) must be <= offload_group_size ()r   zoffload_prefetch_step (zK) must be >= 1 when prefetch offloading is enabled (offload_group_size > 0)r   z]Prefetch offload fields are set but offload_backend='uva'. Prefetch settings will be ignored.   )
stacklevelzXUVA offload fields are set but offload_backend='prefetch'. UVA settings will be ignored.r   zBoth UVA and prefetch offload fields are set with offload_backend='auto'. Prefetch backend will be selected. Set offload_backend explicitly to suppress this warning.)
r#   r   r   r   
ValueErrorr   r   r   warningswarn)self
uva_activeprefetch_activer   r   r   validate_offload_configa   sB   z%OffloadConfig.validate_offload_configc                 C   s*   ddl m}m} || t d}||}|S )a  
        Provide a hash that uniquely identifies all the offload configs.

        All fields are included because PrefetchOffloader patches module
        forwards and inserts custom ops (wait_prefetch, start_prefetch)
        into the computation graph. Changing any offload setting can
        alter which layers are hooked and how prefetch indices are
        computed, so the compilation cache must distinguish them.
        r   )get_hash_factorshash_factors)ignored_factors)vllm.config.utilsr1   r2   r   )r-   r1   r2   factorshash_strr   r   r   compute_hash   s   
zOffloadConfig.compute_hashN)r&   r"   )r   r   r   r   r#   OffloadBackendr   r   r	   r   r   r   r   r0   r   r7   r   r   r   r   r"   O   s   
 )r"   )r   r+   typingr   pydanticr   r   r4   r   r8   r	   r   r"   r   r   r   r   <module>   s   