o
    :/iY                     @   s  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ eeZeG d	d
 d
Zdededeedf dedee f
ddZdedee fddZdededede fddZ!dededede"fddZ#dee dededeee  ee$e"ef  e$e"ef f fddZ%dee de dee
j& fdd Z'ed!d"de(e fd#d$Z)dS )%    N)	dataclass)	lru_cache)Any)JSONResponse)init_logger)PoolingRequestOutput)EMBED_DTYPES
EmbedDType
Endiannessbinary2tensortensor2binaryc                   @   sF   e Zd ZU eed< eed< eed< eed< eed< eedf ed< dS )	MetadataItemindexembed_dtype
endiannessstartend.shapeN)__name__
__module____qualname__int__annotations__r	   r
   tuple r   r   k/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/entrypoints/pooling/utils.pyr      s   
 r   r   r   r   .	n_requestreturnc                    s2   t   jt fddt|D S )Nc              
      s4   g | ]}t | |  |d    dqS )   r   r   r   r   r   r   )r   ).0ir   r   n_bytesr   sizer   r   
<listcomp>.   s    	
z(build_metadata_items.<locals>.<listcomp>)r   nbytesmathprodrange)r   r   r   r   r   r"   r   build_metadata_items%   s
   

	r*   outputc                 C   s   | j j S N)outputsdatatolist)r+   r   r   r   encode_pooling_output_float;   s   r0   c                 C   s   t | jj||S r,   )r   r-   r.   )r+   r   r   r   r   r   encode_pooling_output_binary?   s   r1   c                 C   s    t | jj||}t|dS )Nzutf-8)r   r-   r.   pybase64	b64encodedecode)r+   r   r   embedding_bytesr   r   r   encode_pooling_output_base64G   s   r6   pooling_outputsc              	   C   s   d}g }g }d}t | D ]7\}}t|jj||d}	t|	}
t||||||
 |jjjd}||	 || |j}|t|7 }||
7 }qt||d}|||fS )Nr   )tensorr   r   r   )prompt_tokenstotal_tokens)		enumerater   r-   r.   lendictr   appendprompt_token_ids)r7   r   r   num_prompt_tokensitemsbodyoffsetidxr+   binaryr$   itemr?   usager   r   r   encode_pooling_bytesP   s:   
	


rH   rA   rB   c                    s    fddt | dd dD S )Nc                    s,   g | ]}t  |j|j |j|j|jqS r   )r   r   r   r   r   r   )r    rF   rB   r   r   r%   {   s    z)decode_pooling_output.<locals>.<listcomp>c                 S   s   | j S r,   )r   )xr   r   r   <lambda>   s    z'decode_pooling_output.<locals>.<lambda>)key)sorted)rA   rB   r   rI   r   decode_pooling_outputz   s   
rN   r   )maxsizec                  C   s.   t jdd urddlm}  | S td tS )Norjsonr   ORJSONResponsezMTo make v1/embeddings API fast, please install orjson by `pip install orjson`)	importlibutil	find_specfastapi.responsesrR   loggerwarning_oncer   rQ   r   r   r   get_json_response_cls   s   rY   )*importlib.utilrS   r'   dataclassesr   	functoolsr   typingr   r2   torchrV   r   vllm.loggerr   vllm.outputsr   vllm.utils.serial_utilsr   r	   r
   r   r   r   rW   r   r   r   listr*   floatr0   bytesr1   strr6   r=   rH   TensorrN   typerY   r   r   r   r   <module>   sp   	



	$
 *