o
    :/i/                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZ	 d dl
mZmZmZmZmZmZ d dlmZ d dlmZmZ d	d
lmZmZmZmZmZmZmZ d	dlmZ d	dl m!Z!m"Z"m#Z#m$Z$m%Z% errd dl&m'Z'm(Z(m)Z) d dl*m+Z+ ee,Z-ede.d dZ/ede#dZ0ede#ddZ1G dd dee1 Z2G dd dee0 Z3G dd dee0 Z4eddG dd dee0 Z5G dd  d Z6G d!d" d"Z7dS )#    N)defaultdict)Mapping)	dataclass)Lock)TYPE_CHECKINGGenericLiteralProtocolTypeVarcast)init_logger)TokenizerLikecached_tokenizer_from_config   )BaseMultiModalProcessorCacheBaseMultiModalReceiverCacheMultiModalProcessorOnlyCacheMultiModalProcessorSenderCacheMultiModalReceiverCacheShmObjectStoreReceiverCacheShmObjectStoreSenderCache)MultiModalInputs)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoInputProcessingContextTimingContext)ModelConfigObservabilityConfig
VllmConfig)SupportsMultiModalNr    )bound_I_I_coT)r"   	covariantc                   @   s"   e Zd ZdZdedefddZdS )ProcessingInfoFactory
    Constructs a
    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
    instance from the context.
    ctxreturnc                 C      d S N )selfr(   r,   r,   e/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/multimodal/registry.py__call__1   s   zProcessingInfoFactory.__call__N)__name__
__module____qualname____doc__r   r$   r/   r,   r,   r,   r.   r&   *   s    r&   c                   @   s&   e Zd ZdZdedee fddZdS )DummyInputsBuilderFactoryz
    Constructs a
    [`BaseDummyInputsBuilder`][vllm.multimodal.processing.BaseDummyInputsBuilder]
    instance from the context.
    infor)   c                 C   r*   r+   r,   )r-   r5   r,   r,   r.   r/   >   s    z"DummyInputsBuilderFactory.__call__N)r0   r1   r2   r3   r#   r   r/   r,   r,   r,   r.   r4   7   s    r4   c                
   @   s<   e Zd ZdZdddedee dedB dee fdd	ZdS )
MultiModalProcessorFactoryr'   Ncacher5   dummy_inputsr8   r)   c                C   r*   r+   r,   )r-   r5   r9   r8   r,   r,   r.   r/   H   s   z#MultiModalProcessorFactory.__call__)	r0   r1   r2   r3   r#   r   r   r   r/   r,   r,   r,   r.   r6   A   s    r6   )frozenc                   @   sN   e Zd ZU ee ed< ee ed< ee ed< dddede	dB fdd	Z
dS )
_ProcessorFactoriesr5   	processorr9   Nr7   r(   r8   c                C   s$   |  |}| |}| j|||dS )Nr7   r5   r9   r<   )r-   r(   r8   r5   dummy_inputs_builderr,   r,   r.   build_processorW   s   

z#_ProcessorFactories.build_processor)r0   r1   r2   r&   r#   __annotations__r6   r4   r   r   r?   r,   r,   r,   r.   r;   Q   s   
 r;   c                   @   s`  e Zd ZdZdddefddZdee dee d	e	e fd
dZ
d+ddZ	d,dddedB defddZ	d,dddedB defddZddddddedB dedB dee fddZddddddeeef dedB dedB def
ddZddded fd d!ZdddedB fd"d#ZdddedB fd$d%ZdddedB fd&d'Zddd(e dedB fd)d*Z!dS )-MultiModalRegistryzL
    A registry that dispatches data processing according to the model.
    model_configr   r)   c                    sT   |j sdS |  | j|dd}t fdd|jD r( jr!dS td dS dS )z
        Checks if the model supports multimodal inputs.
        Returns True if the model is multimodal with any non-zero supported
        modalities, otherwise returns False, effectively running in
        text-only mode.
        FN)	tokenizerc                 3   s    | ]
}  |d kV  qdS )r   N)get_limit_per_prompt).0modality	mm_configr,   r.   	<genexpr>u   s
    
z@MultiModalRegistry.supports_multimodal_inputs.<locals>.<genexpr>TzcAll limits of multimodal modalities supported by the model are set to 0, running in text-only mode.)is_multimodal_modelget_multimodal_config_create_processing_infoallsupported_mm_limitsenable_mm_embedslogger	info_once)r-   rB   r5   r,   rG   r.   supports_multimodal_inputsg   s   z-MultiModalRegistry.supports_multimodal_inputsr<   r5   r9   c                   s    dt dt f fdd}|S )a/  
        Register a multi-modal processor to a model class. The processor
        is constructed lazily, hence a factory method should be passed.

        When the model receives multi-modal data, the provided function is
        invoked to transform the data into a dictionary of model inputs.
        	model_clsr)   c                    s,   d| j v rtd|  t d| _| S )N_processor_factoryzfModel class %s already has a multi-modal processor registered to %s. It is overwritten by the new one.r=   )__dict__rP   warningr;   rT   )rS   r9   r5   r<   r-   r,   r.   wrapper   s   
z6MultiModalRegistry.register_processor.<locals>.wrapper)r!   )r-   r<   r5   r9   rX   r,   rW   r.   register_processor   s   z%MultiModalRegistry.register_processorr    c                 C   s0   ddl m} ||\}}t|dsJ td|S )Nr   )get_model_architecturerT   r    ) vllm.model_executor.model_loaderrZ   hasattrr   )r-   rB   rZ   rS   _r,   r,   r.   _get_model_cls   s   
z!MultiModalRegistry._get_model_clsNrC   c                 C   s   |d u rt |}t||S r+   )r   r   )r-   rB   rC   r,   r,   r.   _create_processing_ctx   s   
z)MultiModalRegistry._create_processing_ctxc                 C   s&   |  |}|j}| ||}||S r+   )r^   rT   r_   r5   )r-   rB   rC   rS   	factoriesr(   r,   r,   r.   rL      s   

z*MultiModalRegistry._create_processing_info)rC   r8   r8   c                C   s@   |j st|j d| |}|j}| ||}|j||dS )zT
        Create a multi-modal processor for a specific model and tokenizer.
        z is not a multimodal modelr7   )rJ   
ValueErrormodelr^   rT   r_   r?   )r-   rB   rC   r8   rS   r`   r(   r,   r,   r.   create_processor   s   

z#MultiModalRegistry.create_processor)r8   r<   	mm_countsc                C   s|   |j }|du r| j||d}| }|jj|||jd}|j|tddd}|d }	t|	}
|
|k r<|		dg||
   |S )	z
        Create dummy data for profiling the memory usage of a model.

        The model is identified by `model_config`.
        Nr7   )seq_lenrd   
mm_optionsFenabled)
timing_ctxprompt_token_idsr   )
max_model_lenrc   rK   r9   get_dummy_processor_inputslimit_per_promptapplyr   lenextend)r-   rB   rd   r8   r<   re   rH   processor_inputs	mm_inputsrj   	total_lenr,   r,   r.   get_dummy_mm_inputs   s$   z&MultiModalRegistry.get_dummy_mm_inputsvllm_configr   )Nprocessor_onlylrushmc                 C   s`   |j }| |s
d S | }|jdkrd S |j}|jdko$|jdkp$|j}|s)dS | }|jS )Nr   r   rv   )	rB   rR   rK   mm_processor_cache_gbparallel_config_api_process_countdata_parallel_sizedata_parallel_external_lbmm_processor_cache_type)r-   ru   rB   rH   rz   is_ipc_supportedr,   r,   r.   _get_cache_type   s   



z"MultiModalRegistry._get_cache_typec                 C   sX   |  |}|du rdS |dkrt|jS |dkrt|jS |dkr%t|S td|)z4Return a `BaseMultiModalProcessorCache`, if enabled.Nrv   rw   rx   Unknown cache type: )r   r   rB   r   r   ra   r-   ru   
cache_typer,   r,   r.   processor_cache_from_config  s   


z.MultiModalRegistry.processor_cache_from_configc                 C   s    |  |}|du rdS t|jS )z4Return a `MultiModalProcessorOnlyCache`, if enabled.N)r   r   rB   r   r,   r,   r.    processor_only_cache_from_config'  s   

z3MultiModalRegistry.processor_only_cache_from_configc                 C   s6   |  |}|dv rdS |dkrt|jS td|)z>Return a `BaseMultiModalReceiverCache` for the engine process.)Nrv   rx   Nrw   r   )r   r   rB   ra   r   r,   r,   r.   !engine_receiver_cache_from_config2  s   

z4MultiModalRegistry.engine_receiver_cache_from_configshared_worker_lockc                 C   s6   |  |}|dv rdS |dkrt||S td|)z>Return a `BaseMultiModalReceiverCache` for the worker process.)Nrv   rw   Nrx   r   )r   r   ra   )r-   ru   r   r   r,   r,   r.   !worker_receiver_cache_from_config?  s   

z4MultiModalRegistry.worker_receiver_cache_from_config)rB   r   r)   r    r+   )"r0   r1   r2   r3   boolrR   r6   r#   r&   r4   rY   r^   r   r   r_   r   rL   r   r   rc   r   strintr   rt   r   r   r   r   r   r   r   LockTyper   r,   r,   r,   r.   rA   b   s    

"




$



rA   c                       sN   e Zd Zd fddZdedefdd	Zdeeeeef f fd
dZ	  Z
S )MultiModalTimingRegistryobservability_configObservabilityConfig | Noner)   Nc                    sD   t    |r|jrt | _tttf t| _	d| _
d S d| _
d S )NTF)super__init__enable_mm_processor_stats	threadingr   _lockr   r   r   _ctx_by_request_id_enabled)r-   r   	__class__r,   r.   r   O  s   




z!MultiModalTimingRegistry.__init__
request_idc                 C   sD   | j stddS | j | j| W  d    S 1 sw   Y  d S )NFrg   )r   r   r   r   )r-   r   r,   r,   r.   getY  s
   
$zMultiModalTimingRegistry.getc                 C   sV   | j si S | j dd | j D }| j  |W  d    S 1 s$w   Y  d S )Nc                 S   s   i | ]	\}}||  qS r,   )get_stats_dict)rE   req_idr(   r,   r,   r.   
<dictcomp>e  s    z1MultiModalTimingRegistry.stat.<locals>.<dictcomp>)r   r   r   itemsclear)r-   statsr,   r,   r.   stat`  s   
$zMultiModalTimingRegistry.stat)r   r   r)   N)r0   r1   r2   r   r   r   r   dictfloatr   __classcell__r,   r,   r   r.   r   N  s    
&r   )8r   collectionsr   collections.abcr   dataclassesr   multiprocessing.synchronizer   r   typingr   r   r   r	   r
   r   vllm.loggerr   vllm.tokenizersr   r   r8   r   r   r   r   r   r   r   inputsr   
processingr   r   r   r   r   vllm.configr   r   r   %vllm.model_executor.models.interfacesr    r0   rP   typer!   r#   r$   r&   r4   r6   r;   rA   r   r,   r,   r,   r.   <module>   s4    $	
 m