o
    :/iT1                     @   sh  d dl mZ d dl mZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ eeZeG dd dZeG dd dZG dd dZeddddg ddZededZG dd dee ZeG dd dZG dd dee Z eG dd dZ!G dd dee! Z"eG d d! d!Z#G d"d# d#ee# Z$dS )$    )MutableSequence)Sequence)	dataclass)AnyGenericN)TypeVar)init_logger)PromptLogprobsSampleLogprobs)LoRARequest)RequestStateStatsc                   @   s   e Zd ZU dZeed< eed< ee ed< edB ed< e	dB ed< dZ
ejdB ed< dZedB ed	< dZeeB dB ed
< dZedB ed< defddZdefddZdS )CompletionOutputa!  The output data of one completion output of a request.

    Args:
        index: The index of the output in the request.
        text: The generated output text.
        token_ids: The token IDs of the generated output text.
        cumulative_logprob: The cumulative log probability of the generated
            output text.
        logprobs: The log probabilities of the top probability words at each
            position if the logprobs are requested.
        finish_reason: The reason why the sequence is finished.
        stop_reason: The stop string or token id that caused the completion
            to stop, None if the completion finished for some other reason
            including encountering the EOS token.
        lora_request: The LoRA request that was used to generate the output.
    indextext	token_idsNcumulative_logproblogprobsrouted_expertsfinish_reasonstop_reasonlora_requestreturnc                 C   s
   | j d uS N)r   self r   Y/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/outputs.pyfinished2   s   
zCompletionOutput.finishedc                 C   sF   d| j  d| jd| j d| j d| j d| j d| j d| j d	S )
NzCompletionOutput(index=z, text=z, token_ids=z, routed_experts=z, cumulative_logprob=z, logprobs=z, finish_reason=z, stop_reason=))r   r   r   r   r   r   r   r   r   r   r   r   __repr__5   s    
zCompletionOutput.__repr__)__name__
__module____qualname____doc__int__annotations__strGenericSequencefloatr
   r   npndarrayr   r   r   r   boolr   r   r   r   r   r   r      s   
 r   c                   @   s<   e Zd ZU dZejed< defddZde	de
fddZd	S )
PoolingOutputznThe output data of one pooling output of a request.

    Args:
        data: The extracted hidden states.
    datar   c                 C      d| j  dS )NzPoolingOutput(data=r   )r-   r   r   r   r   r   L      zPoolingOutput.__repr__otherc                 C   s    t || jot| j|jk S r   )
isinstance	__class__r+   r-   all)r   r0   r   r   r   __eq__O   s   zPoolingOutput.__eq__N)r    r!   r"   r#   torchTensorr%   r&   r   objectr+   r4   r   r   r   r   r,   B   s
   
 
r,   c                   @   s   e Zd ZdZ					ddddededB dee dB dedB dee d	e	d
e
dB dedB dedB dee dB dedB deeef dB deddfddZdd de	ddfddZdefddZdS )RequestOutputak  The output data of a completion request to the LLM.

    Args:
        request_id: The unique ID of the request.
        prompt: The prompt string of the request.
                For encoder/decoder models, this is the
                decoder input prompt.
        prompt_token_ids: The token IDs of the prompt.
                          For encoder/decoder models, this is the
                          decoder input prompt token ids.
        prompt_logprobs: The log probabilities to return per prompt token.
        outputs: The output sequences of the request.
        finished: Whether the whole request is finished.
        metrics: Metrics associated with the request.
        lora_request: The LoRA request that was used to generate the output.
        encoder_prompt: The encoder prompt string of the request.
                        None if decoder-only.
        encoder_prompt_token_ids: The token IDs of the encoder prompt.
                                  None if decoder-only.
        num_cached_tokens: The number of tokens with prefix cache hit.
        kv_transfer_params: The params for remote K/V transfer.
    N)kv_transfer_params
request_idpromptprompt_token_idsprompt_logprobsoutputsr   metricsr   encoder_promptencoder_prompt_token_idsnum_cached_tokensr9   kwargsr   c                K   s`   |r
t dt| || _|| _|| _|| _|| _|| _|| _	|| _
|	| _|
| _|| _|| _d S )Nz+RequestOutput: Ignoring extra arguments: %s)loggerwarning_oncer&   r:   r;   r<   r=   r>   r   r?   r   r@   rA   rB   r9   )r   r:   r;   r<   r=   r>   r   r?   r   r@   rA   rB   r9   rC   r   r   r   __init__m   s    
zRequestOutput.__init__next_output	aggregatec                 C   s   |  j |j O  _ |j| _|jD ]Z}t| jD ]L\}}|j|jkrb|r[| j|j7  _t|jts6t	|j|_|j
|j |jrN|jdusGJ |j
|j |j|_|j|_|j|_n|| j|<  nq| j| qdS )z,Merge subsequent RequestOutput into this oneN)r   r9   r>   	enumerater   r   r1   r   r   listextendr   r   r   r   append)r   rG   rH   next_completioni
completionr   r   r   add   s.   


zRequestOutput.addc                 C   s^   d| j  d| jd| j d| jd| j d| j d| j d| j d	| j d
| j	 d| j
 dS )NzRequestOutput(request_id=z	, prompt=, prompt_token_ids=z, encoder_prompt=z, encoder_prompt_token_ids=z, prompt_logprobs=
, outputs=, finished=z
, metrics=z, lora_request=, num_cached_tokens=r   )r:   r;   r<   r@   rA   r=   r>   r   r?   r   rB   r   r   r   r   r      s,   
	
zRequestOutput.__repr__)NNNNN)r    r!   r"   r#   r&   rJ   r$   r	   r   r+   r   r   dictr   rF   rP   r   r   r   r   r   r8   U   sP    
	


$r8    T)r:   r;   r<   r=   r>   r   _O)defaultc                
   @   s:   e Zd ZdZdededee dedef
ddZ	d	d
 Z
dS )PoolingRequestOutputa  
    The output data of a pooling request to the LLM.

    Args:
        request_id (str): A unique identifier for the pooling request.
        outputs (PoolingOutput): The pooling results for the given input.
        prompt_token_ids (list[int]): A list of token IDs used in the prompt.
        num_cached_tokens: The number of tokens with prefix cache hit.
        finished (bool): A flag indicating whether the pooling is completed.
    r:   r>   r<   rB   r   c                 C   s"   || _ || _|| _|| _|| _d S r   )r:   r<   rB   r   r>   )r   r:   r>   r<   rB   r   r   r   r   rF      s
   
zPoolingRequestOutput.__init__c                 C   s8   t | j d| jd| jd| j d| j d| j dS )Nz(request_id=rR   rQ   rT   rS   r   )typer    r:   r>   r<   rB   r   r   r   r   r   r      s   zPoolingRequestOutput.__repr__N)r    r!   r"   r#   r&   rW   rJ   r$   r+   rF   r   r   r   r   r   rY      s    
rY   c                   @   P   e Zd ZU dZee ed< edefddZ	e
defddZdefd	d
ZdS )EmbeddingOutputzThe output data of one embedding output of a request.

    Args:
        embedding: The embedding vector, which is a list of floats.
            Its length depends on the hidden dimension of the model.
    	embeddingpooling_outputc                 C   $   | j }|jdkrtdt| S )N   z,pooled_data should be a 1-D embedding vector)r-   ndim
ValueErrorr\   tolistr^   pooled_datar   r   r   	from_base   s   
zEmbeddingOutput.from_baser   c                 C   
   t | jS r   )lenr]   r   r   r   r   hidden_size     
zEmbeddingOutput.hidden_sizec                 C   r.   )NzEmbeddingOutput(hidden_size=r   )ri   r   r   r   r   r     r/   zEmbeddingOutput.__repr__N)r    r!   r"   r#   rJ   r(   r%   staticmethodr,   rf   propertyr$   ri   r&   r   r   r   r   r   r\      s   
 r\   c                   @      e Zd ZedefddZdS )EmbeddingRequestOutputrequest_outputc                 C   "   t | jt| j| j| j| jdS N)r:   r>   r<   rB   r   )rn   r:   r\   rf   r>   r<   rB   r   ro   r   r   r   rf        
z EmbeddingRequestOutput.from_baseNr    r!   r"   rk   rY   rf   r   r   r   r   rn         rn   c                   @   r[   )ClassificationOutputzThe output data of one classification output of a request.

    Args:
        probs: The probability vector, which is a list of floats.
            Its length depends on the number of classes.
    probsr^   c                 C   r_   )Nr`   z.pooled_data should be a 1-D probability vector)r-   ra   rb   rv   rc   rd   r   r   r   rf   "  s   
zClassificationOutput.from_baser   c                 C   rg   r   )rh   rw   r   r   r   r   num_classes+  rj   z ClassificationOutput.num_classesc                 C   r.   )Nz!ClassificationOutput(num_classes=r   )rx   r   r   r   r   r   /  r/   zClassificationOutput.__repr__N)r    r!   r"   r#   rJ   r(   r%   rk   r,   rf   rl   r$   rx   r&   r   r   r   r   r   rv     s   
 rv   c                   @   rm   )ClassificationRequestOutputro   c                 C   rp   rq   )ry   r:   rv   rf   r>   r<   rB   r   rr   r   r   r   rf   4  rs   z%ClassificationRequestOutput.from_baseNrt   r   r   r   r   ry   3  ru   ry   c                   @   s:   e Zd ZU dZeed< edefddZde	fddZ
d	S )
ScoringOutputzThe output data of one scoring output of a request.

    Args:
        score: The similarity score, which is a scalar value.
    scorer^   c                 C   s(   | j  }|jdkrtdt| S )Nr   z$pooled_data should be a scalar score)r-   squeezera   rb   rz   itemrd   r   r   r   rf   I  s   

zScoringOutput.from_baser   c                 C   r.   )NzScoringOutput(score=r   )r{   r   r   r   r   r   T  r/   zScoringOutput.__repr__N)r    r!   r"   r#   r(   r%   rk   r,   rf   r&   r   r   r   r   r   rz   ?  s   
 
rz   c                   @   rm   )ScoringRequestOutputro   c                 C   rp   rq   )r~   r:   rz   rf   r>   r<   rB   r   rr   r   r   r   rf   Y  rs   zScoringRequestOutput.from_baseNrt   r   r   r   r   r~   X  ru   r~   )%collections.abcr   r   r'   dataclassesr   typingr   r   numpyr)   r5   typing_extensionsr   vllm.loggerr   vllm.logprobsr	   r
   vllm.lora.requestr   vllm.v1.metrics.statsr   r    rD   r   r,   r8   STREAM_FINISHEDrW   rY   r\   rn   rv   ry   rz   r~   r   r   r   r   <module>   sH   ,k	$