
    vj                     n    d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	  e            Z
 G d de          ZdS )    )ListOptionalUnion)
get_logger)InferFramework)is_vllm_availablec                        e Zd Z	 	 	 	 ddededededee         f
 fd	Zd
ee	e         e	e	e                  f         de	e         fdZ
defdZ xZS )VllmautoN   model_id_or_dirdtypequantizationtensor_parallel_sizetrust_remote_codec                    t                                          |           t                      st          d          ddlm} t                              d          s|dv rd} || j        ||||          | _	        dS )	a  
        Args:
            dtype: The dtype to use, support `auto`, `float16`, `bfloat16`, `float32`
            quantization: The quantization bit, default None means do not do any quantization.
            tensor_parallel_size: The tensor parallel size.
        zLInstall vllm by `pip install vllm` before using vllm to accelerate inferencer   )LLM   )bfloat16r   float16)r   r   r   r   N)
super__init__r   ImportErrorvllmr   r
   check_gpu_compatibility	model_dirmodel)selfr   r   r   r   r   r   	__class__s          t/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/pipelines/accelerate/vllm.pyr   zVllm.__init__   s     	))) "" 	^   	++A.. 	E7K5L 5LESN%/!57 7 7


    promptsreturnc                    |                     dd          }|                     dd          }|                     dd          }|                     dd          }|s|dk    rd|d<   |r|t          |d	                   z
  |d
<   |r||d
<   d	dlm}  |di |}t	          |d	         t
                    r&d | j                            ||          D             S d | j                            ||          D             S )zGenerate tokens.
        Args:
            prompts(`Union[List[str], List[List[int]]]`):
                The string batch or the token list batch to input to the model.
            kwargs: Sampling parameters.
        	do_sampleNnum_beamr   
max_lengthmax_new_tokensTuse_beam_searchr   
max_tokens)SamplingParamsc                 2    g | ]}|j         d          j        S r   outputstext.0outputs     r    
<listcomp>z!Vllm.__call__.<locals>.<listcomp>F   /       +1q!&  r!   )sampling_paramsc                 2    g | ]}|j         d          j        S r-   r.   r1   s     r    r4   z!Vllm.__call__.<locals>.<listcomp>K   r5   r!   )prompt_token_idsr6    )poplenr   r+   
isinstancestrr   generate)	r   r"   kwargsr%   r&   r'   r(   r+   r6   s	            r    __call__zVllm.__call__)   s]    JJ{D11	::j!,,ZZd33
$4d;;  	-X\\(,F$% 	@#-GAJ#?F<  	2#1F< ''''''(.22622gaj#&& 		 59Z5H5H_ 6I 6> 6>   
 59Z5H5H%,o 6I 6O 6O   r!   
model_typec                 :    t          fddD                       S )Nc                 <    g | ]}|                                 v S r9   )lower)r2   r   rA   s     r    r4   z-Vllm.model_type_supported.<locals>.<listcomp>Q   s7     
 
 
,1EZ%%'''
 
 
r!   )llamabaichuaninternlmmistralaquilabloomfalcongptmptoptqwenrI   )any)r   rA   s    `r    model_type_supportedzVllm.model_type_supportedP   s=     
 
 
 
6
 
 
   	r!   )r   Nr   N)__name__
__module____qualname__r=   intr   boolr   r   r   r@   rQ   __classcell__)r   s   @r    r
   r
   
   s         %%)-.597 7"%77  #7 (+	7
 %-TN7 7 7 7 7 7:%d3id3i&@ A %"3i% % % %Ns        r!   r
   N)typingr   r   r   
modelscoper   $modelscope.pipelines.accelerate.baser   modelscope.utils.import_utilsr   loggerr
   r9   r!   r    <module>r]      s    ( ( ( ( ( ( ( ( ( ( ! ! ! ! ! ! ? ? ? ? ? ? ; ; ; ; ; ;	V V V V V> V V V V Vr!   