o
    :/i*                     @   s  U d dl mZmZ d dlmZ d dlmZmZ d dlm	Z	m
Z
mZmZ d dlZd dlZd dlmZ d dlmZ e	rLd dlmZ d d	lmZ d d
lmZ neZeZeZG dd de
ZG dd de
Zejeej B eejdB  B Zee d< eG dd dZ!edZ"dee"e"ge"f dee"dB  de"dB fddZ#eG dd dZ$eG dd dZ%eG dd dZ&G dd deZ'eG d d! d!Z(d"d#de&fd$d%Z)e&g i d&Z*dS )'    )ABCabstractmethod)Callable)	dataclassfield)TYPE_CHECKING
NamedTuple	TypeAliasTypeVarN)CUDAGraphStat)SchedulerOutput)KVConnectorKVEvents)KVConnectorWorkerMetadata)KVConnectorStatsc                   @   sR   e Zd ZU ejed< ejed< ejed< dZee dB ed< dedefdd	Z	dS )
LogprobsListslogprob_token_idslogprobssampled_token_ranksNcu_num_generated_tokensreq_idxnum_positionsc                 C   sH   | j d ur
| j | }|| }t| j|| | j|| | j|| d S N)r   r   r   r   r   )selfr   r   end_idx r   \/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/v1/outputs.pyslice_request(   s   

zLogprobsLists.slice_request)
__name__
__module____qualname__npndarray__annotations__r   listintr   r   r   r   r   r      s   
 


r   c                   @   s   e Zd ZU ejed< ejed< ejed< dZee dB ed< ddee dB fddZ	dd	d
Z
dejdd fddZedededd fddZdS )LogprobsTensorsr   r   selected_token_ranksNr   c                 C   s<   t | j  | j  | j  |d ur|S | jS r   )r   r   cpunumpyr   r&   r   )r   r   r   r   r   tolists>   s   zLogprobsTensors.tolistsreturnc                 C   sF   | j jjdkr	| S t| j jddd| jjddd| jjddd| jS )Nr'   T)non_blocking)r   devicetyper%   tor   r&   r   r   r   r   r   to_cpu_nonblockingH   s   z"LogprobsTensors.to_cpu_nonblockingmaskc                 C   s0   | j du s	J dt| j| | j| | j| S )z5Filter the logprobs tensors with the given bool mask.Nz1filter can't be used with cu_num_generated_tokens)r   r%   r   r   r&   )r   r1   r   r   r   filterR   s   zLogprobsTensors.filterr   num_tokens_per_positionc                 C   sF   t j| |ft jdd}t j|t jd}t j| t jdd}t|||dS )z$Create empty LogprobsTensors on CPU.r'   )dtyper,   )r4   )r   r   r&   )torchemptyint32
empty_likefloat32r%   )r   r3   r   r   r&   r   r   r   	empty_cpu]   s   zLogprobsTensors.empty_cpur   )r*   r%   )r   r   r   r5   Tensorr"   r   r#   r$   r)   r0   r2   staticmethodr:   r   r   r   r   r%   4   s    
 





r%   PoolerOutputc                   @   s$   e Zd ZU ejed< edB ed< dS )SamplerOutputsampled_token_idsNlogprobs_tensors)r   r   r   r5   r;   r"   r%   r   r   r   r   r>   v   s   
 
r>   Tfitemsr*   c                 C   sF   dd |D }t |dkrd S |d }|dd  D ]}| ||}q|S )Nc                 S   s   g | ]}|d ur|qS r   r   ).0itemr   r   r   
<listcomp>   s    z%_combine_non_none.<locals>.<listcomp>r      )len)rB   rC   non_nonecombinedrE   r   r   r   _combine_non_none   s   rK   c                   @   s   e Zd ZU dZee dB ed< dZee dB ed< dZe	dB ed< dZ
edB ed< dZedB ed< eedZee ed< d	Zeed
< dd ZedddZdS )KVConnectorOutputNfinished_sendingfinished_recvingkv_connector_statskv_cache_eventskv_connector_worker_metadefault_factoryinvalid_block_idsr   expected_finished_countc                 C   s0   | j  o| j o| j o| j o| j o| j S r   )rM   rN   rO   rP   rT   rQ   r/   r   r   r   is_empty   s   zKVConnectorOutput.is_emptyoutputsc                    s   t  dks
J dttjdd  D }ttjdd  D }tdd dd  D }td	d d
d  D }ttjdd  D }|d usIJ t fdd D sVJ  d j}| ||||||dS )Nr   zCannot merge empty outputsc                 S      g | ]}|j qS r   )rM   rD   outputr   r   r   rF          z+KVConnectorOutput.merge.<locals>.<listcomp>c                 S   rX   r   )rN   rY   r   r   r   rF      r[   c                 S   
   |  |S r   )	aggregatexyr   r   r   <lambda>      
 z)KVConnectorOutput.merge.<locals>.<lambda>c                 S   rX   r   )rO   rY   r   r   r   rF      r[   c                 S   r\   r   )merger^   r   r   r   ra      rb   c                 S   rX   r   )rP   rY   r   r   r   rF      r[   c                 S   rX   r   )rT   rY   r   r   r   rF      r[   c                 3   s     | ]}|j  d  j kV  qdS )r   N)rU   rY   rW   r   r   	<genexpr>   s
    
z*KVConnectorOutput.merge.<locals>.<genexpr>)rM   rN   rO   rP   rT   rU   )rH   rK   setunionallrU   )clsrW   rM   rN   rO   rP   rT   rU   r   rd   r   rc      s>   
zKVConnectorOutput.merge)rW   rL   )r   r   r   rM   rf   strr"   rN   rO   r   rP   r   rQ   r   r   rT   r$   rU   rV   classmethodrc   r   r   r   r   rL      s   
 
rL   c                   @   s6   e Zd ZU dZee dB ed< dZee dB ed< dS )ECConnectorOutputNrM   rN   )r   r   r   rM   rf   rj   r"   rN   r   r   r   r   rl      s   
 rl   c                   @   s   e Zd ZU ee ed< eeef ed< eedZ	eee  ed< dZ
edB ed< eedZeeedB f ed< dZeejdB  dB ed< dZedB ed	< dZedB ed
< dZeeef dB ed< dZedB ed< dS )ModelRunnerOutputreq_idsreq_id_to_indexrR   r?   Nr   prompt_logprobs_dictpooler_outputkv_connector_outputec_connector_outputnum_nans_in_logitscudagraph_stats)r   r   r   r#   rj   r"   dictr$   r   r?   r   r   rp   r%   rq   r5   r;   rr   rL   rs   rl   rt   ru   r   r   r   r   r   rm      s   
 rm   c                   @   s   e Zd ZedefddZdS )AsyncModelRunnerOutputr*   c                 C   s   dS )a  Get the ModelRunnerOutput for this async output.

        This is a blocking call that waits until the results are ready, which
        might involve copying device tensors to the host.
        This method should only be called once per AsyncModelRunnerOutput.
        Nr   r/   r   r   r   
get_output  s   z!AsyncModelRunnerOutput.get_outputN)r   r   r   r   rm   rx   r   r   r   r   rw     s    rw   c                   @   s*   e Zd ZU ee ed< eee  ed< dS )DraftTokenIdsrn   draft_token_idsN)r   r   r   r#   rj   r"   r$   r   r   r   r   ry     s   
 ry   scheduler_outputr   c                 C   sV   | j stS t| j  }dd t|D }dd |D }dd |D }t||||dS )zz
    Create a ModelRunnerOutput stub that contains the correct
    per-request bookkeeping but no generated data yet.
    c                 S   s   i | ]\}}||qS r   r   )rD   idxridr   r   r   
<dictcomp>$  s    z:make_empty_encoder_model_runner_output.<locals>.<dictcomp>c                 S   s   g | ]}d gqS )r   r   rD   _r   r   r   rF   '  r[   z:make_empty_encoder_model_runner_output.<locals>.<listcomp>c                 S   s   g | ]}d qS r   r   r   r   r   r   rF   *  s    )rn   ro   r?   rq   )num_scheduled_tokensEMPTY_MODEL_RUNNER_OUTPUTr#   keys	enumeraterm   )r{   rn   ro   r?   rq   r   r   r   &make_empty_encoder_model_runner_output  s   r   )rn   ro   )+abcr   r   collections.abcr   dataclassesr   r   typingr   r   r	   r
   r(   r    r5   vllm.compilation.cuda_graphr   vllm.v1.core.sched.outputr   vllm.distributed.kv_eventsr   1vllm.distributed.kv_transfer.kv_connector.v1.baser   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   objectr   r%   r;   r#   r=   r"   r>   rA   rK   rL   rl   rm   rw   ry   r   r   r   r   r   r   <module>   sJ   &?	.A(
