o
    :/i1                     @   s   d dl mZ d dlmZ d dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ erHd d
lmZ eeZed Zed ZeG dd dZdS )    )Callable)InitVar)TYPE_CHECKINGAnyClassVarLiteralcast)Fieldfield_validator)Self)config)init_logger)	safe_hash)resolve_obj_by_qualname)SchedulerInterface)generatepoolingdraft)fcfspriorityc                   @   s  e Zd ZU dZee ed< 	 ee ed< 	 dZe	e ed< dZ
e	e ed< dZeed	< 	 eed
dZeed< 	 eddZedB ed< 	 ee
d
dZeed< 	 ed
d
dZeed< 	 ed
d
dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 eddZeed< 	 eddZeed< 	 dZeed< 	 dZeed< 	 eddZeee B dB ed< 	 dZedB ed < 	 eddZ edB ed!< 	 ed
d
dZ!eed"< 	 e"d#d$ Z#d%ed& fd'd(Z$d%efd)d*Z%e&dd!d+d,e'd-e(d.e)d%e(fd/d0Z*deded%dfd1d2Z+ded%e,fd3d4Z-dS )5SchedulerConfigzScheduler configuration.max_model_lenis_encoder_decoderi   DEFAULT_MAX_NUM_BATCHED_TOKENS   DEFAULT_MAX_NUM_SEQSr   runner_type   )defaultgemax_num_batched_tokensN)r   max_num_scheduled_tokensmax_num_seqsmax_num_partial_prefillsmax_long_partial_prefillsr   long_prefill_token_thresholdTenable_chunked_prefillFis_multimodal_model)initmax_num_encoder_input_tokensencoder_cache_sizer   policydisable_chunked_mm_inputscheduler_clsdisable_hybrid_kv_cache_managerasync_schedulingstream_intervalc                  K   s.   d| vrd| d< d| vrd| d< t di | S )z`
        Factory method to create `SchedulerConfig` with default values for `InitVar`s.
        r   i    r   FN )r   )kwargsr1   r1   b/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/config/scheduler.pydefault_factory   s
   zSchedulerConfig.default_factoryreturnr   c                 C   sd   | j d u r| jrddlm} |S ddlm} |S td| j  t| j t	s-t
td | j S t| j S )Nr   )AsyncScheduler)	SchedulerzpUsing custom scheduler class %s. This scheduler interface is not public and compatibility may not be maintained.r   )r-   r/   "vllm.v1.core.sched.async_schedulerr6   vllm.v1.core.sched.schedulerr7   loggerwarning_once
isinstancestrr   typer   )selfr6   r7   r1   r1   r3   get_scheduler_cls   s   

z!SchedulerConfig.get_scheduler_clsc                 C   s,   g }| | j tt| dd }|S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        F)usedforsecurity)appendr    r   r=   encode	hexdigest)r?   factorshash_strr1   r1   r3   compute_hash   s   zSchedulerConfig.compute_hashwrap)modevaluehandlerc                 C   s   |du rdS ||S )zFSkip validation if the value is `None` when initialisation is delayed.Nr1   )clsrJ   rK   r1   r1   r3   _skip_none_validation   s   z%SchedulerConfig._skip_none_validationc                 C   s   |rd| _ d| _d| _td | j| _| j| _| jr"td| j | jdkr>| jdkr3t	|d | _td| j| j
| j | | d S )	NTFr   zYEncoder-decoder models do not support chunked prefill nor prefix caching; disabling both.z:Chunked prefill is enabled with max_num_batched_tokens=%d.r   g{Gz?zConcurrent partial prefills enabled with max_num_partial_prefills=%d, max_long_partial_prefills=%d, long_prefill_token_threshold=%d)r,   r&   r%   r:   infor    r)   r*   r#   intr$   verify_max_model_len)r?   r   r   r1   r1   r3   __post_init__   s0   

	zSchedulerConfig.__post_init__c                 C   s   | j |k r| jstd| j  d| d| j | jk r'td| j  d| j d| j | j| kr:td| j | j|  | jdkrW| jsFtd| j|krWtd	| j d
| d| j| jkrjtd| jd| jd| S )Nzmax_num_batched_tokens (z!) is smaller than max_model_len (z). This effectively limits the maximum sequence length to max_num_batched_tokens and makes vLLM reject longer sequences. Please increase max_num_batched_tokens or decrease max_model_len.z1) must be greater than or equal to max_num_seqs (z).zlmax_num_batched_tokens (%d) exceeds max_num_seqs * max_model_len (%d). This may lead to unexpected behavior.r   zDChunked prefill must be enabled to set max_num_partial_prefills > 1.zlong_prefill_token_threshold (z,) cannot be greater than the max_model_len (zself.max_long_partial_prefills=z= must be less than or equal to self.max_num_partial_prefills=.)	r    r&   
ValueErrorr"   r:   warningr#   r%   r$   )r?   r   r1   r1   r3   rP      sP   

	



z$SchedulerConfig.verify_max_model_len).__name__
__module____qualname____doc__r   rO   __annotations__boolr   r   r   r   
RunnerTyper	   r    r!   r"   r#   r$   r%   r&   r'   r)   r*   r+   SchedulerPolicyr,   r-   r=   r>   objectr.   r/   r0   staticmethodr4   r@   rG   r
   classmethodr   r   rM   rQ   r   rP   r1   r1   r1   r3   r      sf   
 	

#r   N)collections.abcr   dataclassesr   typingr   r   r   r   r   pydanticr	   r
   typing_extensionsr   vllm.config.utilsr   vllm.loggerr   vllm.utils.hashingr   vllm.utils.import_utilsr   vllm.v1.core.sched.interfacer   rU   r:   r[   r\   r   r1   r1   r1   r3   <module>   s    