o
    :/iu                     @   sn   d dl Z d dlZd dlmZmZ d dlmZ ddlmZm	Z	 ddl
mZ dedefd	d
ZG dd deZdS )    N)
CollectionSet)AutoTokenizer   )HfTokenizerget_cached_tokenizer)TokenizerLike	tokenizerreturnc                 C   s6   t  | }G dd d| j}| jj d|_||_|S )a  
    The logic of adding image pad tokens should only be applied in
    `QwenVLProcessor`, so they are patched out here.

    The definition of the wrapped tokenizer can be found here:
    https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
    c                   @   sp   e Zd Z		ddedee eB dee eB deeeB  fddZ			
dde	ee	 B de
ded
B defddZd
S )z7get_qwen_vl_tokenizer.<locals>.TokenizerWithoutImagePadall textallowed_specialdisallowed_specialr
   c                    s,   t d|} fdd jj|||dD S )NNFCc                    s   g | ]} j | qS r   )decoder).0tselfr   d/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/tokenizers/qwen_vl.py
<listcomp>!   s    zTget_qwen_vl_tokenizer.<locals>.TokenizerWithoutImagePad.tokenize.<locals>.<listcomp>)r   r   )unicodedata	normalizer	   encode)r   r   r   r   kwargsr   r   r   tokenize   s   
z@get_qwen_vl_tokenizer.<locals>.TokenizerWithoutImagePad.tokenizeFN	token_idsskip_special_tokenserrorsc                 [   s&   t |tr|g}| jj||p| jdS )N)r   )
isinstanceintr	   decoder   )r   r   r   r   r   r   r   r   _decode*   s   
z?get_qwen_vl_tokenizer.<locals>.TokenizerWithoutImagePad._decode)r   r   )FN)__name__
__module____qualname__strr   r   listbytesr   r!   boolr#   r   r   r   r   TokenizerWithoutImagePad   s.    




r+   WithoutImagePad)copy	__class__r$   )r	   new_tokenizerr+   r   r   r   get_qwen_vl_tokenizer   s
   
"r0   c                   @   s   e Zd ZedefddZdS )QwenVLTokenizerr
   c                 O   s   t j|i |}tt|S )N)r   from_pretrainedr   r0   )clsargsr   r	   r   r   r   r2   @   s   zQwenVLTokenizer.from_pretrainedN)r$   r%   r&   classmethodr   r2   r   r   r   r   r1   ?   s    r1   )r-   r   collections.abcr   r   transformersr   hfr   r   protocolr   r0   r1   r   r   r   r   <module>   s   2