o
    :/iC                     @   s  d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ erId d	lZd d
lmZ n	ede dZeZeeZedee dZddde e!ef d	B de e!ef d	B de"edf de e!ef fddZ#ddde e!ef d	B de e!ef d	B de"edf de e!ef fddZ$eddG dd dZ%eddG dd dZ&d	S )    )	dataclassfield)TYPE_CHECKINGAnyLiteralTypeVar)VLLMValidationError)EmbedsPrompt
TextPromptTokensPrompt)init_logger)merge_media_io_kwargs)TokenizerLike)
LazyLoaderN)ChatTemplateContentFormatOptiontorch_Sztorch.Tensor)Nautounset_valuesdefaults	overridesr   .returnc                  s2   | d u ri } |d u ri }|  fdd|  D B S )Nc                    s   i | ]\}}| vr||qS  r   ).0kvr   r   b/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/renderers/params.py
<dictcomp>(   s    z merge_kwargs.<locals>.<dictcomp>)items)r   r   r   r   r   r   merge_kwargs   s
   r    c               C   s   | d u ri } |d u ri }t | }| D ])\}}||v rq||v r9t|| t r9t|t r9t|| ||d||< q|||< q|S )Nr   )dictr   
isinstancerecursively_merge_kwargs)r   r   r   mergedr   r   r   r   r   r#   +   s    

r#   T)frozenc                	   @   s   e Zd ZU dZdZedB ed< 	 dZded< 	 ee	dZ
e	eef ed< 	 dZe	ee	eef f dB ed	< 	 dZe	eef dB ed
< 	 			dde	eef dB de	ee	eef f dB de	eef dB fddZde	eef fddZdS )
ChatParamsz4Configuration to control how to parse chat messages.Nchat_templater   r   chat_template_content_format)default_factorychat_template_kwargsmedia_io_kwargsmm_processor_kwargsdefault_chat_template_kwargsdefault_media_io_kwargsdefault_mm_processor_kwargsc                 C   s>   |s|s|s| S t | j| jt|| jt|| jt|| jdS )N)r'   r(   r*   r+   r,   )	r&   r'   r(   r    r*   r   r+   r#   r,   )selfr-   r.   r/   r   r   r   with_defaultsZ   s.   zChatParams.with_defaultsr   c                 C   s   t | jt| jddS )z9The arguments to pass to `tokenizer.apply_chat_template`.F)r'   return_dict)r    r*   r!   r'   r0   r   r   r   get_apply_chat_template_kwargsx   s   z)ChatParams.get_apply_chat_template_kwargs)NNN)__name__
__module____qualname____doc__r'   str__annotations__r(   r   r!   r*   r   r+   r,   r1   r4   r   r   r   r   r&   G   s.   
  
r&   c                   @   s  e Zd ZU dZedB ed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 dZ	e
d dB ed	< 	 d
Zeed< 	 dZeed< 	 d
Zeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 ededB fddZd2ddZdefddZdeeef fddZdedB dedefddZdedB dedefd d!ZdedB dedefd"d#ZdedB d$edefd%d&ZdedB d'e de fd(d)Z!dedB d'e de fd*d+Z"dedB d'e de fd,d-Z#dedB d'e de fd.d/Z$dedB d$e%e&B de%e&B fd0d1Z'dS )3TokenizeParamsz3Configuration to control how prompts are tokenized.Nmax_total_tokensr   max_output_tokenspad_prompt_tokenstruncate_prompt_tokens)leftrighttruncation_sideFdo_lower_caseTadd_special_tokensneeds_detokenizationmax_total_tokens_parammax_output_tokens_paramtruncate_prompt_tokens_paramr   c                 C   s   | j du rdS | j | j S )z'Maximum allowed number of input tokens.N)r<   r=   r3   r   r   r   max_input_tokens   s   
zTokenizeParams.max_input_tokensc                 C   s   | j }| j}| j}| j}|d ur.|d ur.||kr.t| j d| d| j d|d| j|d|d urT|d urV||krXt| j d| d| j d| j d| d	
| j|dd S d S d S )
N=zcannot be greater than z=max_total_tokens=z%. Please request fewer output tokens.	parametervaluez cannot be greater than z - z = z+. Please request a smaller truncation size.)r<   r=   rI   r?   r   rG   rF   rH   )r0   r<   r=   rI   r?   r   r   r   __post_init__   sB   
zTokenizeParams.__post_init__tokenization_kwargsc              
   K   s  | d| j}| d| j}| d| j}| d| j}| d| j}| d| j}| dd  }rD|dkr9|}n|dv r@d }n||d< | d	d  }	r^|	d
v rS|}n|	dv rZd }n|	|d	< |rftd| | j	}
t
|
|
d u ss|d u rudn|
| ||| j|||dS )N
max_lengthr>   r?   rC   rD   rE   padding)F
do_not_pad
truncation)Tlongest_first)Fdo_not_truncatez_The following tokenization arguments are not supported by vLLM Renderer and will be ignored: %sr   )r<   r=   r>   r?   rB   rC   rD   rE   )poprI   r>   r?   rC   rD   rE   loggerwarningr<   r;   rB   )r0   rO   rP   r>   r?   rC   rD   rE   rQ   rS   r<   r   r   r   with_kwargs   sX   zTokenizeParams.with_kwargsc                 C   sf   | j }|dur|dk r| j}n|du r| jdur| jd }| jdkr)td| jdS t|du|| jdS )z,The arguments to pass to `tokenizer.encode`.Nr      r@   F)rS   rD   )rS   rP   rD   )r?   rI   rB   r!   rD   )r0   rP   r   r   r   get_encode_kwargs   s   

z TokenizeParams.get_encode_kwargs	tokenizertextc                 C   sx   | j }|du r	|S | jdu r:|dur:||j }t||kr:td| j d| j dt| d| d| ddt|d	|S )
z0Apply length checks to prompt text if necessary.N'This model's maximum context length is   tokens. However, you requested ( output tokens and your prompt contains z characters (more than z* characters, which is the upper bound for zf input tokens). Please reduce the length of the input prompt or the number of requested output tokens.
input_textrK   )rI   r?   max_chars_per_tokenlenr   r<   r=   )r0   r\   r]   rI   max_input_charsr   r   r   _text_len_check:  s,   
zTokenizeParams._text_len_checkc                 C   s   | j r| S |S )z,Apply lowercase to prompt text if necessary.)rC   lower)r0   r\   r]   r   r   r   _text_lowercaseU  s   zTokenizeParams._text_lowercasec                 C   s    | j | jfD ]}|||}q|S )z$Apply all validators to prompt text.)re   rg   )r0   r\   r]   	validatorr   r   r   _validate_textY  s
   zTokenizeParams._validate_textpromptc                 C   s   |  ||d |d< |S )z
        Ensure that the prompt meets the requirements set out by this config.
        If that is not possible, raise a `VLLMValidationError`.

        This method is run before tokenization occurs.
        rj   )ri   r0   r\   rj   r   r   r   apply_pre_tokenizationc  s   z%TokenizeParams.apply_pre_tokenizationtokensc                 C   sn   | j }|dur|dk r| j}|du s|t|kr|S |du r"tdt|ts+td||jg|t|   S )z,Apply padding to prompt tokens if necessary.Nr   z1Cannot pad tokens when `skip_tokenizer_init=True`z&Cannot pad tokens for embedding inputs)r>   rI   rc   
ValueErrorr"   listpad_token_id)r0   r\   rm   
pad_lengthr   r   r   _token_paddingr  s   
zTokenizeParams._token_paddingc                 C   s   | j }|dur|dk r| j}|du s|t|kr|S |dkr$|dd S | jp/|dur.|jnd}|dkr;|| d S |d| S )z/Apply truncation to prompt tokens if necessary.Nr   r@   )r?   rI   rc   rB   )r0   r\   rm   rP   sider   r   r   _token_truncation  s   z TokenizeParams._token_truncationc                 C   s|   | j }|du r	|S t||kr<t|}||d krdnd}|| j }td| j d| j d| | d| | d	d
|d|S )z2Apply length checks to prompt tokens if necessary.NrZ   z	at least  r^   r_   r`   z input tokens, for a total of z_ tokens. Please reduce the length of the input prompt or the number of requested output tokens.input_tokensrK   )rI   rc   r=   r   r<   )r0   r\   rm   rI   token_count	qualifiertotalr   r   r   _token_len_check  s2   
zTokenizeParams._token_len_checkc                 C   s$   | j | j| jfD ]}|||}q|S )z)Apply all validators to a token sequence.)rr   rt   rz   )r0   r\   rm   rh   r   r   r   _validate_tokens  s   zTokenizeParams._validate_tokensc                 C   s<   d|v r|  ||d |d< d|v r|  ||d |d< |S )z
        Ensure that the prompt meets the requirements set out by this config.
        If that is not possible, raise a `VLLMValidationError`.

        This method is run after tokenization occurs.
        prompt_token_idsprompt_embeds)r{   rk   r   r   r   apply_post_tokenization  s   z&TokenizeParams.apply_post_tokenization)r   N)(r5   r6   r7   r8   intr:   r=   r>   r?   rB   r   rC   boolrD   rE   rF   r9   rG   rH   propertyrI   rN   r   rY   r!   r[   r   re   rg   ri   r
   rl   r   rr   rt   rz   r{   r   r	   r~   r   r   r   r   r;      sd   
 
";

r;   )'dataclassesr   r   typingr   r   r   r   vllm.exceptionsr   vllm.inputsr	   r
   r   vllm.loggerr   vllm.multimodal.media.connectorr   vllm.tokenizersr   vllm.utils.import_utilsr   r   vllm.entrypoints.chat_utilsr   globalsobjectr5   rW   ro   r   r   r!   r9   tupler    r#   r&   r;   r   r   r   r   <module>   sN   





8