o
    :/iW                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z d dl	mZ d dlmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlm
Z
 d dl m!Z! ddl"m#Z# erd dl$m%Z% zd dl&m'Z' W n e(y   d dl&m)Z' Y nw e!e*Z+d.ddZ,d.ddZ-			d/de.d de.e/e0ef  dB de1d e1d!e2e.d e.e/e0ef  dB f f
d"d#Z3d0d%d&Z4d'd(d)e0e5B d!e6fd*d+Z7G d,d- d-e#Z8dS )1    )Sequence)Path)TYPE_CHECKINGAnycastoverload)ChatCompletionRequest)ReasoningEffort)FunctionTool)ValidationMode)SpecialTokenPolicySpecialTokens)InstructTokenizerV13)SentencePieceTokenizer)
Tekkenizer)ValidationError)ChatCompletionMessageParam)init_logger   )TokenizerLike)BatchEncodingMistralCommonBackendMistralCommonTokenizerrequestMistralChatCompletionRequestc                 C   s|   t | jD ]6\}}|ddkr;|dd  }d ur2zt|}W n ty1 } ztd|d }~ww g }|| j| d< qd S )Nrole	assistant
tool_callsziValidating messages' `tool_calls` raised an error. Please ensure `tool_calls` are iterable of tool calls.)	enumeratemessagesgetlistr   
ValueError)r   imessagetool_calls_validatorvalidated_tool_callse r+   d/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/tokenizers/mistral.pymaybe_serialize_tool_calls/   s"   r-   c              	   C   s   t | jD ]l\}}|ddkrD|dg }|D ]#}t|d dkr;td|d |d dd  |d dd |d< q|| j| d< q|dd	v rqd
|v rq|d
 }t|dkrjtd||dd  |dd }|| j| d
< qdS )z6Truncates tool call IDs for Mistral's ID requirements.r   r   r    id	   z!Truncating tool call ID: %s to %siN>   tooltool_resultstool_call_idz!Truncating tool_call_id: %s to %s)r!   r"   r#   lenloggerwarning)r   r&   r'   r    	tool_callr2   r+   r+   r,   truncate_tool_call_idsV   s4   
r7   NFr"   r   toolscontinue_final_messageadd_generation_promptreturnc                 C   sn  |r|rt dttttf | d }|r|d dkrt d|r+|d dkr+t d| D ]}|dd }q-|rdd	 |D D ]}|d
d u rLi |d
< |dd u rWd|d< q?ttj	
 }ttj	
 }	|D ]J}
t|

 }|D ]?}||vr|
| td| d |
d dkrt|
d 
 }|D ]}||	vr|
d | td| d qqrt dqh| |fS )NzMCannot set both `add_generation_prompt` and `continue_final_message` to True.r   r   zCannot set `add_generation_prompt` to True when the last message is from the assistant. Consider using `continue_final_message` instead.z\Cannot set `continue_final_message` to True when the last message is not from the assistant.	reasoningc                 S   s    g | ]}|d  dkr|d qS )typefunctionr+   ).0r0   r+   r+   r,   
<listcomp>   s    zC_prepare_apply_chat_template_tools_and_messages.<locals>.<listcomp>
parametersdescription 'z\' is not supported by mistral-common for tools. It has been popped from the tool definition.r>   r?   zi' is not supported by mistral-common for function tools. It has been popped from the function definition.z,mistral-common only supports function tools.)r%   r   dictstrr   popr#   setr   model_fieldskeysr
   r$   r4   warning_once)r"   r8   r9   r:   last_messager'   _r?   tools_fieldsfunction_fieldsr0   	tool_keystool_keyfunction_keysfunction_keyr+   r+   r,   /_prepare_apply_chat_template_tools_and_messagest   s^   



	rU   r   c                 C   sZ   | j d us
| jd urtd| jr)| jttvr+td| j ddd tD  dd S d S )Nz6chat_template is not supported for Mistral tokenizers.zreasoning_effort=z; is not supported by Mistral models. Supported values are: c                 S   s   g | ]}|j qS r+   )value)r@   r*   r+   r+   r,   rA      s    z+validate_request_params.<locals>.<listcomp>.)chat_templatechat_template_kwargsr%   reasoning_effortr$   r	   )r   r+   r+   r,   validate_request_params   s   
r[   	tokenizerr   tc                 C   s   t | tsJ t| t |ts|dn|}| j}z|| j|  W S  tyE   |d}|| j	v r:| j	|  Y S t
d| | j Y S w )Nzutf-8z6Failed to convert token %s to id, replacing with <unk>)
isinstancer   r>   bytesencodenum_special_tokens_tekken_token2id_nospecialKeyErrordecode_special_tokens_reverse_vocabr4   r5   unk_id)r\   r]   t_bytesshiftt_strr+   r+   r,   _tekken_token_to_id   s   


rj   c                       s:  e Zd ZdZedddddeeB dededB dedB d	d f
d
dZdV fddZ	d	e
e fddZde
e d	e
e fddZd	efddZed	e
e fddZed	e
e fddZed	efddZed	efddZed	efdd Zed	efd!d"Zed	efd#d$Zed	efd%d&Zed	efd'd(Zed	efd)d*Zd+ed	efd,d-Zd	efd.d/Zd	efd0d1Z				dWd2ee
e B d3edB d4ed5ed6edB d	d7fd8d9Zed	e
e fd:d;Zd	eeef fd<d=Z d	eeef fd>d?Z!			dXd2ed5edB d6edB d4ed	e
e f
d@dAZ"	dYdBe
dC dDe
eee#f  dB d	e
e fdEdFZ$	dZdGe%e eB dHed	efdIdJZ&	dZdGe
e
e  e
e B dHed	efdKdLZ'e(dMed	efdNdOZ)e(dMe
e d	e
e fdPdOZ)dMee
e B d	ee
e B fdQdOZ)dMe
e d	efdRdSZ*	dZdGe%e dHed	e
e fdTdUZ+  Z,S )[MistralTokenizerTFN)trust_remote_coderevisiondownload_dirpath_or_repo_idrl   rm   rn   r;   c          	      O   sf   zddl m} W n ty   ddl m} Y nw |j|g|R tj||d u r(dn|d|}| |S )Nr   r   r   main)mode	cache_dirrm   )(transformers.tokenization_mistral_commonr   ImportErrorr   from_pretrainedr   test)	clsro   rl   rm   rn   argskwargsr   r\   r+   r+   r,   ru      s"   
	z MistralTokenizer.from_pretrainedr\   r   c                    sF  t    | _|j _ jj _ jj _ jjj}|t	j
kr$tdt jjj}t|dd  _t jt _t jt _ jsS jsStdt j  fddt jd ddD  _tt j dd	 d
 _ j  _ jd  _ t!dd  jD  _" #  _$t% j$ _& ' j$ _(t% j( _)d S )NzzMistral tokenizer must be in test mode. Make sure to set `mode='ValidationMode.test'` when creating the Mistral tokenizer.vr<   zUnsupported tokenizer: c                    s"   i | ]} j |gd dd |qS )Fskip_special_tokensr   )convert_ids_to_tokensr@   r&   selfr+   r,   
<dictcomp>  s    z-MistralTokenizer.__init__.<locals>.<dictcomp>r   c                 S   s   | d S )Nr   r+   )xr+   r+   r,   <lambda>   s    z+MistralTokenizer.__init__.<locals>.<lambda>)keyc                 s   s    | ]}t |V  qd S N)r3   )r@   tokr+   r+   r,   	<genexpr>%      z,MistralTokenizer.__init__.<locals>.<genexpr>)*super__init__transformers_tokenizerr\   mistralinstruct_tokenizerinstruct"_chat_completion_request_validator_moder   rv   r%   rG   versionrV   intsplitr^   r   	is_tekkenr   is_spm	TypeErrorr>   range
vocab_size_vocab_dictrF   sorteditemsvocab_vocab_max_token_idmax_max_chars_per_token_get_special_token_ids_special_token_idsrI   _special_token_ids_set_get_special_tokens_special_tokens_special_tokens_set)r   r\   rq   _mistral_version_str	__class__r   r,   r     s6   






zMistralTokenizer.__init__c                    s    fddt t jD S )Nc                    s   g | ]
} j |r|qS r+   )r\   
is_specialr~   r   r+   r,   rA   .  s    z;MistralTokenizer._get_special_token_ids.<locals>.<listcomp>)r   r3   r   r   r+   r   r,   r   -  s   z'MistralTokenizer._get_special_token_idsall_special_idsc                    s    fdd|D S )Nc                    s    g | ]} j j|gtjd qS ))special_token_policy)r\   rd   r   KEEPr~   r   r+   r,   rA   1  s    z8MistralTokenizer._get_special_tokens.<locals>.<listcomp>r+   )r   r   r+   r   r,   r   0  s   
z$MistralTokenizer._get_special_tokensc                 C   s   t | dS )NrD   )r3   r`   r   r+   r+   r,   num_special_tokens_to_add6  s   z*MistralTokenizer.num_special_tokens_to_addc                 C      | j S r   )r   r   r+   r+   r,   all_special_tokens;     z#MistralTokenizer.all_special_tokensc                 C   r   r   )r   r   r+   r+   r,   r   ?  r   z MistralTokenizer.all_special_idsc                 C      | j jS r   )r\   bos_idr   r+   r+   r,   bos_token_idC     zMistralTokenizer.bos_token_idc                 C   r   r   )r\   eos_idr   r+   r+   r,   eos_token_idG  r   zMistralTokenizer.eos_token_idc                 C   r   r   )r\   pad_idr   r+   r+   r,   pad_token_idK  r   zMistralTokenizer.pad_token_idc                 C   s   dS )NTr+   r   r+   r+   r,   is_fastO     zMistralTokenizer.is_fastc                 C   r   r   )r   r   r   r+   r+   r,   r   S  r   zMistralTokenizer.vocab_sizec                 C   r   r   )r   r   r+   r+   r,   max_token_idW  r   zMistralTokenizer.max_token_idc                 C   r   r   )r   r   r+   r+   r,   max_chars_per_token[  r   z$MistralTokenizer.max_chars_per_tokenc                 C   r   r   )r   truncation_sider   r+   r+   r,   r   _  r   z MistralTokenizer.truncation_sidetoken_idc                 C   s
   || j v S r   )r   )r   r   r+   r+   r,   _is_special_token_idc  s   
z%MistralTokenizer._is_special_token_idc                 C   s   t t| S r   )hashr.   r   r+   r+   r,   __hash__f     zMistralTokenizer.__hash__c                 C   r   r   )r   r   r+   r+   r,   __len__i     zMistralTokenizer.__len__text	text_pairadd_special_tokens
truncation
max_lengthr   c                 C   sh   |d urt d| j|||||d}|d r2|d d | jkr2|d d |d }r2|d |S )Nz<`text_pair` is not supported by `MistralTokenizer.__call__`.)r   r   r   r   r   	input_idsr<   attention_mask)r%   r   r   rH   r#   )r   r   r   r   r   r   encodedr   r+   r+   r,   __call__l  s    
zMistralTokenizer.__call__c                 C   r   r   )r   r   r+   r+   r,   r     r   zMistralTokenizer.vocabc                 C   r   r   )r   r   r+   r+   r,   	get_vocab  r   zMistralTokenizer.get_vocabc                 C   s   i S r   r+   r   r+   r+   r,   get_added_vocab  r   z MistralTokenizer.get_added_vocabc                 C   s2   | j j||dd}|dur|d ur|d | S |S )NF)boseos)r\   r`   )r   r   r   r   r   r   r+   r+   r,   r`     s   	zMistralTokenizer.encoder"   r   r8   c                 K   s   | dd}|dd}|dd}|dd}|dd}|d}	i }
| jd	kr1|d
|
d
< t||||\}}| jjd|||||||	d dd	|
S )Nr:   Fr9   tokenizeTpaddingr   r      rZ   )	conversationr8   r9   r   r   r   r   return_tensorsreturn_dictr+   )rH   r#   r   rU   r   apply_chat_template)r   r"   r8   ry   r:   r9   r   r   r   r   version_kwargsr+   r+   r,   r     s2   


z$MistralTokenizer.apply_chat_templateidsr|   c                 C   s    t |tr|g}| jj||dS Nr{   )r^   r   r   rd   r   r   r|   r+   r+   r,   rd     s
   
zMistralTokenizer.decodec                 C   s   | j j||dS r   )r   batch_decoder   r+   r+   r,   r     s   zMistralTokenizer.batch_decodetokensc                 C      d S r   r+   r   r   r+   r+   r,   convert_tokens_to_ids     z&MistralTokenizer.convert_tokens_to_idsc                 C   r   r   r+   r   r+   r+   r,   r     r   c                 C   s   | j |S r   )r   r   r   r+   r+   r,   r     r   c                    s  t jh jrAt jtsJ t j fdd|D }tdd |D r: fdd|D } j|t	j
}|S d|}|S t jtsNJ t jg }g }d}|D ] }|v rq|rk| j|t	j g }|| qV|| qV|r| j|t	j d|}|S )Nc                    s"   g | ]}|v s| j vr|qS r+   )r   r@   r]   r   to_decode_special_tokensr+   r,   rA     
    z=MistralTokenizer.convert_tokens_to_string.<locals>.<listcomp>c                 s   s    | ]}t |tV  qd S r   )r^   r_   r   r+   r+   r,   r     s    z<MistralTokenizer.convert_tokens_to_string.<locals>.<genexpr>c                    s   g | ]}t  j|qS r+   )rj   r\   r   r   r+   r,   rA         rD   )r   r    r   r^   r\   r   r>   anyrd   r   r   joinr   appendIGNORE)r   r   r   decodedregular_tokensdecoded_listtokenr+   r   r,   convert_tokens_to_string  sF   !

z)MistralTokenizer.convert_tokens_to_stringc                    s   |sfdd|D S j tjh tjtr/jjr$ jj jj	r/ jj	  fdd|D }fdd|D }t
dd |D rWjrWfdd|D }|S )Nc                       g | ]} j |qS r+   r\   id_to_piecer@   r   r   r+   r,   rA     r   z:MistralTokenizer.convert_ids_to_tokens.<locals>.<listcomp>c                    s"   g | ]}| v s |s|qS r+   )r   r~   non_skip_special_tokens_idsr   r+   r,   rA   (  r   c                    r   r+   r   r   r   r+   r,   rA   /  r   c                 s   s    | ]}d |v V  qdS )u   �Nr+   r   r+   r+   r,   r   1  r   z9MistralTokenizer.convert_ids_to_tokens.<locals>.<genexpr>c                    s8   g | ]}| j vr j|tjn j|gtjqS r+   )r   r\   id_to_byte_piecer   r   rd   r   r   r+   r,   rA   8  s    
)r\   get_special_tokenr   r    r^   r   r   BEGIN_THINKadd	END_THINKr   r   )r   r   r|   ids_keptr   r+   r   r,   r}     s$   
z&MistralTokenizer.convert_ids_to_tokens)r\   r   r;   N)NTFN)NNTr   )F)-__name__
__module____qualname__IS_MISTRAL_TOKENIZERclassmethodrG   r   boolru   r   r$   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rF   r   r   r`   r   r   r   rd   r   r   r   r   r}   __classcell__r+   r+   r   r,   rk      s    +



%


"5rk   )r   r   )NFF)r   r   )9collections.abcr   pathlibr   typingr   r   r   r   (mistral_common.protocol.instruct.requestr   r   r	   +mistral_common.protocol.instruct.tool_callsr
   r   *mistral_common.protocol.instruct.validatorr   %mistral_common.tokens.tokenizers.baser   r   )mistral_common.tokens.tokenizers.instructr   .mistral_common.tokens.tokenizers.sentencepiecer   'mistral_common.tokens.tokenizers.tekkenr   pydanticr   vllm.entrypoints.chat_utilsr   0vllm.entrypoints.openai.chat_completion.protocolvllm.loggerr   protocolr   transformersr   rs   r   rt   r   r   r4   r-   r7   r$   rF   rG   r  tuplerU   r[   r_   r   rj   rk   r+   r+   r+   r,   <module>   sV   

' 

N