o
    :/i{/                     @   s>  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ d dlmZ e
rfd dlmZ d dl m!Z! eG dd dZ"G dd dZ#G dd de j$Z%e%j&ej'e%j(ej)e%j*ej+e%j,ej)e%j-ej.e%j/ej'e%j0ej1iZ2dS )    N)deque)CallableMapping)	dataclass)TYPE_CHECKINGAny)MultiModalFeatureSpec)PoolingParams)SamplingParams)&length_from_prompt_token_ids_or_embeds)EngineCoreEventEngineCoreEventTypeEngineCoreRequestFinishReason)StructuredOutputRequest)ConstantList)LoRARequest)	BlockHashc                   @   s\   e Zd ZU dZee dB ed< ee dB ed< eed< eed< e	dB ed< e
dddZdS )StreamingUpdatezLightweight data for streaming session continuation.

    Contains only the fields needed to update an existing streaming session
    with new input data.
    Nmm_featuresprompt_token_ids
max_tokensarrival_timesampling_paramsrequestRequestreturnStreamingUpdate | Nonec                 C   s&   |j sd S | |j|j|j|j|jdS )N)r   r   r   r   r   )	resumabler   r   r   r   r   )clsr    r    \/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/v1/request.pyfrom_request-   s   zStreamingUpdate.from_request)r   r   r   r   )__name__
__module____qualname____doc__listr   __annotations__intfloatr
   classmethodr"   r    r    r    r!   r      s   
 r   c                !   @   s  e Zd Z											d>dedee dB dedB dedB ded	edB d
e	j
dB dee dB dddedB dedeeef dB ded ged f dB dededB ddf ddZededed ged f dB dd fddZdeee B ddfddZd?ddZedefd d!Zedefd"d#Zedefd$d%Zedefd&d'Zedefd(d)Zedefd*d+Zdefd,d-Zdefd.d/ZdedB fd0d1Zd2edefd3d4Z 	d@d5e!d6edB ddfd7d8Z"dee# dB fd9d:Z$d;d defd<d=Z%dS )Ar   r   NF
request_idr   r   pooling_paramsclient_indexr   prompt_embedsr   lora_requestzLoRARequest | None
cache_saltprioritytrace_headersblock_hasherr   r   reasoning_endedr   c                 C   s  || _ || _|| _|| _|| _|	| _t|| _| jd ur!|| j_	|d ur'|nt

 | _tj| _g | _d | _d | _|d urAd| _n)|d urf|jd usLJ |j| _| jd urYtj| _|jd ure|jd| _ntd|| _|| _i | _t||| _g | _| jd ur| j ndg| j | _d| _d| _ g | _!d| _"|
| _#|pg | _$t%| j| _&t%| j| _'|| _(d| _)d| _*d| _+d| _,d| _-g | _.|| _/| 0  | 1 | _2|| _3d | _4d S )N   kv_transfer_paramsz6sampling_params and pooling_params can't both be unsetr   F)5r,   r.   r2   r   r-   r0   r   from_sampling_paramsstructured_output_requestr5   timer   RequestStatusWAITINGstatuseventsstop_reasonr7   r   WAITING_FOR_FSM
extra_argsget
ValueErrorr   r/   _prompt_embeds_per_block_hashesr   num_prompt_tokens_output_token_idscopy_all_token_idsnum_output_placeholdersdiscard_latest_async_tokensspec_token_idsnum_computed_tokensr1   r   r   output_token_idsall_token_idsr3   num_cached_tokensis_prefill_chunknum_nans_in_logitsnum_preemptionsnum_external_computed_tokensblock_hashes_block_hasherupdate_block_hashesget_skip_reading_prefix_cacheskip_reading_prefix_cacher   streaming_queue)selfr,   r   r   r-   r.   r   r/   r   r0   r1   r2   r3   r4   r   r5   r    r    r!   __init__;   sx   








zRequest.__init__r   c                 C   sB   | |j |j|j|j|j|j|j|j|j|j	|j
|j||j|jdS )N)r,   r.   r   r/   r   r   r-   r   r0   r1   r2   r3   r4   r   r5   )r,   r.   r   r/   r   r   r-   r   r0   r1   r2   r3   r   r5   )r   r   r4   r    r    r!   from_engine_core_request   s"   z Request.from_engine_core_request	token_idsc                 C   sH   t |tr| j| | j| n| j| | j| |   d S N)
isinstancer)   rG   appendrI   extendrW   )r[   r^   r    r    r!   append_output_token_ids   s   
zRequest.append_output_token_idsc                 C   s$   | j dur| j|  |  dS dS )z=Compute block hashes for any new full blocks and append them.N)rV   rU   rb   r[   r    r    r!   rW      s   
zRequest.update_block_hashesc                 C   s
   | j d uS r_   )r:   rd   r    r    r!   use_structured_output      
zRequest.use_structured_outputc                 C   
   t | jS r_   )lenrI   rd   r    r    r!   
num_tokens   rf   zRequest.num_tokensc                 C   s   t | jt | j S r_   )rh   rI   rL   rd   r    r    r!   num_tokens_with_spec   s   zRequest.num_tokens_with_specc                 C   rg   r_   )rh   rG   rd   r    r    r!   num_output_tokens   rf   zRequest.num_output_tokensc                 C   rg   r_   )rh   r   rd   r    r    r!   num_encoder_inputs   rf   zRequest.num_encoder_inputsc                 C   s
   | j dkS )Nr   )rl   rd   r    r    r!   has_encoder_inputs   rf   zRequest.has_encoder_inputsc                 C   s@   | j d ur| j jd ur| j jS | jd ur| jjd ur| jjS dS )NF)r   rY   r-   rd   r    r    r!   rX      s   

z%Request.get_skip_reading_prefix_cachec                 C      t | jS r_   )r<   is_finishedr>   rd   r    r    r!   ro        zRequest.is_finishedc                 C   rn   r_   )r<   get_finished_reasonr>   rd   r    r    r!   rq     rp   zRequest.get_finished_reasoninput_idc                 C   s"   |t | jk s	J | j| j S r_   )rh   r   mm_positionget_num_embeds)r[   rr   r    r    r!   get_num_encoder_embeds  s   zRequest.get_num_encoder_embeds
event_type	timestampc                 C   s   | j t|| d S r_   )r?   ra   r   	new_event)r[   rv   rw   r    r    r!   record_event  s   zRequest.record_eventc                 C   s   | j sd S | j g }| _ |S r_   )r?   )r[   r?   r    r    r!   take_events  s   zRequest.take_eventsotherc                 C   sX   | j |j kr| j |j k S | j|jkr| j|jk S | j|jkr$| j|jk S t| t|k S )z|
        Compare two requests based on priority, arrival time, and request ID.
        Used in priority scheduling.
        )r2   r   r,   id)r[   r{   r    r    r!   __lt__  s   zRequest.__lt__)r   NNNNNr   NNFN)r   Nr_   )&r#   r$   r%   strr'   r)   r
   r	   r*   torchTensorr   r   r   boolr\   r+   r   r]   rc   rW   propertyre   ri   rj   rk   rl   rm   rX   ro   r   rq   ru   r   ry   r   rz   r}   r    r    r    r!   r   :   s    

	

x



r   c                   @   s   e Zd ZdZe Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze ZdefddZedd defddZedd dedB fd	d
ZdS )r<   zStatus of a request.r   c                 C   s   | j S r_   )namerd   r    r    r!   __str__9  s   zRequestStatus.__str__r>   c                 C   s
   | t jkS r_   )r<   	PREEMPTEDr>   r    r    r!   ro   <  rf   zRequestStatus.is_finishedNc                 C   s
   t | S r_   )_FINISHED_REASON_MAPrC   r   r    r    r!   rq   @  rf   z!RequestStatus.get_finished_reason)r#   r$   r%   r&   enumautor=   rA   WAITING_FOR_REMOTE_KVSWAITING_FOR_STREAMING_REQRUNNINGr   FINISHED_STOPPEDFINISHED_LENGTH_CAPPEDFINISHED_ABORTEDFINISHED_IGNOREDFINISHED_ERRORFINISHED_REPETITIONr~   r   staticmethodr   ro   r   rq   r    r    r    r!   r<   '  s&    r<   )3r   r;   collectionsr   collections.abcr   r   dataclassesr   typingr   r   r   vllm.multimodal.inputsr   vllm.pooling_paramsr	   vllm.sampling_paramsr
   
vllm.utilsr   vllm.v1.enginer   r   r   r   !vllm.v1.structured_output.requestr   vllm.v1.utilsr   vllm.lora.requestr   vllm.v1.core.kv_cache_utilsr   r   r   IntEnumr<   r   STOPr   LENGTHr   ABORTr   r   ERRORr   r   
REPETITIONr   r    r    r    r!   <module>   s<    n#