o
    di d                     @   s   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZmZ ddlmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZ eeZ e!dZ"G dd dZ#G dd dZ$dS )zWDefines the base classes that are used to perform inference with ONNX Runtime sessions.    N)Path)AnyDictListOptionalSetTupleUnion)InferenceSession	IOBinding)
TypeHelper   )_get_model_external_data_paths)
get_logger   )get_device_for_providerget_dtype_from_sessionget_provider_for_deviceparse_devicevalidate_provider_availabilityc                   @   sz  e Zd ZdZd>dedee fddZede	fdd	Z
ede	fd
dZedee	 fddZede	fddZedee	ef fddZedee	ef fddZedejfddZejdd ZedejfddZedee fddZejdefddZdd Zdefd d!Zded"ee	eejejf f dee	ejf fd#d$Zded%eej dee	eejejf f fd&d'Zd(e	d)e e! dejfd*d+Z"d(e	d,ee	e!f dee! fd-d.Z#d/ee	 d,ee	e!f de!fd0d1Z$			d?d"ee	ejf d2ee%e	  d3eee	e	f  d4eee	e e! f  de ee	e e! f ee	ejf f f
d5d6Z&d7d8 Z'd9d: Z(d;ee	e)f fd<d=Z*dS )@ORTSessionMixina  
    Mixin class that provides common functionalities for an ONNX Runtime session.
    This class is used to manage the session, the execution provider, and the IO binding.
    It also provides methods to prepare the inputs and outputs for ONNX Runtime.
    Nsessionuse_io_bindingc                 C   s   || _ t|j| _|du r| jdkrtd d}nd}|| _t|| _	t
|| _t| j| j| _dd t| D | _dd t| D | _d	d | D | _d
d | D | _dd | D | _dd | D | _dS )a  
        Initializes the ORTSessionMixin class.
        Args:
            session (`onnxruntime.InferenceSession`):
                The ONNX Runtime session to use for inference.
            use_io_binding (`Optional[bool]`, defaults to `None`):
                Whether to use IO Binding or not. If `None`, it will be set to `True` for CUDAExecutionProvider and `False`
                for other providers.
        NCUDAExecutionProviderz`use_io_binding` was not set, but CUDAExecutionProvider supports IO Binding. Setting `use_io_binding=True` to leverage IO Binding and improve performance. You can disable it by setting `model.use_io_binding=False`.TFc                 S      i | ]\}}|j |qS  name).0idxinputr   r   f/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/onnxruntime/base.py
<dictcomp>P       z=ORTSessionMixin.initialize_ort_attributes.<locals>.<dictcomp>c                 S   r   r   r   )r   r   outputr   r   r!   r"   Q   r#   c                 S      i | ]}|j |jqS r   r   shaper   r    r   r   r!   r"   R       c                 S   r%   r   r&   r   r$   r   r   r!   r"   S   r)   c                 S   r%   r   r   typer(   r   r   r!   r"   T   r)   c                 S   r%   r   r+   r*   r   r   r!   r"   U   r)   )r   r   _model_pathpathproviderloggerinfo_use_io_bindingr   _io_bindingr   _dtyper   provider_option_device	enumerate
get_inputsinput_namesget_outputsoutput_namesinput_shapesoutput_shapesinput_dtypesoutput_dtypes)selfr   r   r   r   r!   initialize_ort_attributes2   s&   


z)ORTSessionMixin.initialize_ort_attributesreturnc                 C   s   t d | jS )zW
        Returns the path of the onnx file from which the session was created.
        zThe `ORTSessionMixin.model_path` property is deprecated and will be removed in a future version. Please use `ORTSessionMixin.path` instead (`ORTSessionMixin.path` is a proper Path object).)r0   warningr.   r@   r   r   r!   
model_pathW   s   zORTSessionMixin.model_pathc                 C   s   t d | jjS )zW
        Returns the name of the onnx file from which the session was created.
        zThe `ORTSessionMixin.model_name` property is deprecated and will be removed in a future version. Please use `ORTSessionMixin.path.name` instead (`ORTSessionMixin.path` is a proper Path object).)r0   rC   r.   r   rD   r   r   r!   
model_nameb   s   zORTSessionMixin.model_namec                 C   
   | j  S )T
        Returns a list of Execution Providers registered with the session.
        )r   get_providersrD   r   r   r!   	providersm      
zORTSessionMixin.providersc                 C   s
   | j d S )R
        Returns the main Execution Provider registered with the session.
        r   )rJ   rD   r   r   r!   r/   t   rK   zORTSessionMixin.providerc                 C   rG   )U
        Returns a dictionary of Execution Providers configurations/options.
        )r   get_provider_optionsrD   r   r   r!   provider_options{   rK   z ORTSessionMixin.provider_optionsc                 C   s   | j | j S )S
        Returns the configuration/options of the main Execution Provider.
        )rO   r/   rD   r   r   r!   r5      s   zORTSessionMixin.provider_optionc                 C      | j S )
        Returns the `torch.device` associated with the ONNX Runtime session.
        This device is inferred from the provider and provider options.
        )r6   rD   r   r   r!   device   s   zORTSessionMixin.devicec                 O      t d)NzThe device attribute is read-only, please use the `.to(device)` method to change both the device and the execution provider accordingly.)AttributeErrorr@   argskwargsr   r   r!   rS      s   c                 C   rQ   )
        Returns the `torch.dtype` associated with the ONNX Runtime session.
        This dtype is inferred from the input/output dtypes of the session.
        If no floating point type is found, it defaults to `torch.float32`.
        )r4   rD   r   r   r!   dtype   s   zORTSessionMixin.dtypec                 C   rQ   )<
        Returns whether IO Binding is used or not.
        )r2   rD   r   r   r!   r      s   zORTSessionMixin.use_io_bindingvaluec                 C   s   t |ts	td|| _dS )z,
        Sets the IO Binding usage.
        z+`use_io_binding` should be a boolean value.N)
isinstancebool
ValueErrorr2   )r@   r\   r   r   r!   r      s   

c           
      O   s  d}d}|D ])}t |ttjfr|}qt |trt|}qt |tjr'|}qt |tjr/|}q| D ]\}}|dkr?|}q4|dkrE|}q4|durL| S |du rR| S t|\}}t|}	t	|	 || jkrg| S | j
j|	g|gd | jdu r| jdkrtd d| _|| _| S )a
  
        Moves the session to the specified device by updating the execution provider and its options.
        Args:
            device (`str`, `int`, `torch.device`):
                The device to move the session to. It can be a string (e.g., "cuda", "cpu"), an integer (e.g., 0 for GPU 0),
                or a `torch.device` object.
        Returns:
            `ORTSessionMixin`: The updated session.
        Raises:
            ValueError: If the device is not supported or if the provider is not available.
        NrS   rZ   )rO   r   z`use_io_binding` was set to `None` before the provider was changed to CUDAExecutionProvider. Setting `use_io_binding=True` to leverage IO Binding and improve performance. You can disable it by setting `model.use_io_binding=False`.T)r]   strtorchrS   intrZ   itemsr   r   r   r   set_providersr   r/   r0   r1   r6   )
r@   rW   rX   rZ   rS   argkeyr\   r5   r/   r   r   r!   to   sH   



zORTSessionMixin.to	use_torchc                 C   s"   |du r| j du rtddS dS )z
        Raises an error if IO Binding is requested although the tensor used are numpy arrays.

        Args:
            use_torch (`bool`):
                Whether the tensor used during inference are of type torch.Tensor or not.
        FTzIO Binding can not be used when passing numpy inputs. Please disable IO Binding with `model.use_io_binding=False`, or pass `torch.Tensor` inputs instead.N)r   r_   )r@   rh   r   r   r!   raise_on_numpy_input_io_binding   s
   z/ORTSessionMixin.raise_on_numpy_input_io_bindingmodel_inputsc                 C   s   i }| j  D ]=}||ddu rtd| d|r&|| jdd||< n|| ||< t| j| }|| j|krD|| 	|||< q|S )a  
        Prepares the inputs for ONNX Runtime by converting them to numpy arrays with the expected dtype.

        Args:
            use_torch (`bool`):
                Whether the inputs are torch.Tensor or not.
            inputs (`Dict[str, Union[torch.Tensor, np.ndarray]]`):
                The inputs to prepare for ONNX Runtime.

        Returns:
            `Dict[str, np.ndarray]`: The inputs prepared for ONNX Runtime.
        NzInput z' is required by model but not provided.T)force)
r9   keysgetr_   numpyr   ort_type_to_numpy_typer>   rZ   astype)r@   rh   rj   onnx_inputs
input_nameexpected_dtyper   r   r!   _prepare_onnx_inputs   s   z$ORTSessionMixin._prepare_onnx_inputsonnx_outputsc                 C   sF   i }| j  D ]\}}|| ||< |r t|| | j||< q|S )a  
        Prepares the outputs from ONNX Runtime by converting them to torch.Tensor if requested.

        Args:
            use_torch (`bool`):
                Whether the outputs should be torch.Tensor or not.
            onnx_outputs (`List[np.ndarray]`):
                The outputs from ONNX Runtime.

        Returns:
            `Dict[str, Union[torch.Tensor, np.ndarray]]`: The outputs prepared for the user.
        )r;   rc   ra   
from_numpyrg   rS   )r@   rh   ru   model_outputsoutput_namer   r   r   r!   _prepare_onnx_outputs   s   z%ORTSessionMixin._prepare_onnx_outputsrx   output_shapec                 C   s   t |dkr
tdtdd |D std| dtdd |D s,td| dt| j| }t |dkrHtjt	||| j
d	}|S tjd|| j
d	}|S )
a[  
        Prepares an output buffer for ONNX Runtime IO Binding.

        Args:
            output_name (`str`):
                The name of the output for which to prepare the buffer.
            output_shape (`Tuple[int]`):
                The shape of the output buffer.

        Returns:
            `torch.Tensor`: The output buffer.

        r   z"`output_shape` should not be emptyc                 s   s    | ]}t |tV  qd S N)r]   rb   r   dimr   r   r!   	<genexpr>J  s    z9ORTSessionMixin._prepare_output_buffer.<locals>.<genexpr>z4`output_shape` should only contain integers but got .c                 s   s    | ]}|d kV  qdS r   Nr   r|   r   r   r!   r~   L  s    z=`output_shape` should only contain positive integers but got )rZ   rS   )lenr_   allr   ort_type_to_torch_typer?   ra   emptynpprodrS   tensor)r@   rx   rz   output_dtypeoutput_bufferr   r   r!   _prepare_output_buffer:  s   z&ORTSessionMixin._prepare_output_bufferknown_axes_valuesc                 C   s>   t | j| }t|D ]\}}t|tr| ||||< q|S )a  
        Infers the shape of a given output by using the `known_axes_values` mapping.

        Args:
            output_name (`str`):
                The name of the output for which to infer the shape.
            known_axes_values (`Dict[str, int]`):
                A mapping of the axis names to their values.

        Returns:
            `List[int]`: The inferred shape of the output.
        )listr=   r7   r]   r`   _dynamic_axis_inference)r@   rx   r   rz   r   	axis_namer   r   r!   _output_shape_inferenceX  s   
z'ORTSessionMixin._output_shape_inferencer   c                 C   sV   ||v r|| S | d}t|D ]\}}||v r!t|| ||< qttd|S )au  
        Infers the value of a given dynamic axis by using the `known_axes_values` mapping.

        For instance, for the following inputs:
            axis_name = "sequence_length + past_sequence_length"
            known_axes_values = {"batch_size": 2, "sequence_length": 3, "past_sequence_length": 7}

        The inferred value will be:
            3 + 7 = 10
         )splitr7   r`   rb   evaljoin)r@   r   r   tokensr   tokenr   r   r!   r   n  s   
z'ORTSessionMixin._dynamic_axis_inferenceoutputs_to_not_bindknown_output_buffersknown_output_shapesc              
   C   s  i }| j  D ]k}|| j}||  s||  ||< || j}t| j| }	||	kr6|| 	|	||< || 
 }
|
dkrDt
 }
| j|| jj| jjpPdt| j| ||
 t| j| D ]\}}t|trq|| ||< qbqi }i }|pzi }|p~i }|pt }| j D ]H}||v rq||v r|| }n| ||}||v r|| }n| ||}|
 }
| j|| jj| jjpdt| j| ||
 |||< |||< q||fS )a=  
        Prepares IO binding for ONNX Runtime.

        Args:
            model_inputs (`Dict[str, torch.Tensor]`):
                The inputs to bind to the model.
            outputs_to_not_bind (`Optional[Set[str]]`, defaults to `None`):
                The names of the outputs that should not be bound.
            known_output_buffers (`Optional[Dict[str, str]]`, defaults to `None`):
                Sometimes we can reuse the same input buffer for the output. This is the case for the output sample
                in a diffusion pipeline. It is possible to explicitely pass the buffer via this argument.
            known_output_shapes (`Optional[Dict[str, Tuple[int]]]`, defaults to `None`):
                It can be hard to infer all the output shapes from the inputs only. For instance for the past key /
                values. It is possible to explicitely pass the shape via this argument.

        Returns:
            `TupleDict[str, Tuple[int]], Dict[str, torch.Tensor]`: A dictionary of the output shapes and a dictionary of
            the output buffers.
        r   )r9   rl   r'   is_contiguous
contiguousrZ   r   r   r>   rg   data_ptrNON_EMPTY_TENSORr3   
bind_inputrS   r,   indexro   r7   r<   r]   r`   setr;   r   r   bind_outputr?   )r@   rj   r   r   r   r   rr   input_shapetensor_dtypers   r   r   r   r=   output_buffersrx   rz   r   r   r   r!   _prepare_io_binding  sf   


	




	
z#ORTSessionMixin._prepare_io_bindingc                 O   rT   )NztThe `forward` method should be implemented in the derived class. Please refer to the documentation for more details.)NotImplementedErrorrV   r   r   r!   forward  s   zORTSessionMixin.forwardc                 O   s   | j |i |S r{   )r   rV   r   r   r!   __call__  s   zORTSessionMixin.__call__save_directoryc                    sr   t j dd t| jj}t |j }t|} fdd|D }t|| t	||D ]
\}}t|| q,dS )z
        Saves the ONNX Runtime session to the specified directory.

        Args:
            save_directory (`Union[str, Path]`):
                The directory where to save the ONNX Runtime session.
        T)exist_okc                    s   g | ]	}t  |j qS r   )r   r   )r   external_data_pathr   r   r!   
<listcomp>  s    z0ORTSessionMixin.save_session.<locals>.<listcomp>N)
osmakedirsr   r   r-   r   r   shutilcopyzip)r@   r   rE   model_save_pathexternal_data_pathsexternal_data_save_pathssrc_pathdst_pathr   r   r!   save_session  s   	
zORTSessionMixin.save_sessionr{   )NNN)+__name__
__module____qualname____doc__r
   r   r^   rA   propertyr`   rE   rF   r   rJ   r/   r   r   rO   r5   ra   rS   setterrZ   r   rg   ri   r	   Tensorr   ndarrayrt   ry   r   rb   r   r   r   r   r   r   r   r   r   r   r   r   r!   r   +   sz    %


	>
"
""
 
`r   c                   @   s   e Zd ZdZdee fddZedd Zedd Z	ed	d
 Z
edd Zedd Zedd Zedd ZejdefddZdd ZdS )ORTParentMixinaC  
    Wrapper class for multiple ORTSessionMixin instances. This class allows to combine multiple parts into
    a single wrapper. It is useful for pipelines/models that require multiple parts to work together, such
    as diffusion pipelines or encoder-decoder models, as it provides a unified interface for inference.
    partsc                 C   s8   t |dk r
tdtdd |D rtd|| _dS )z
        Initializes the ORTParentMixin class.
        Args:
            parts (`List[ORTSessionMixin]`):
                List of ORTSessionMixin instances to wrap.
        r   z<ORTParentMixin should be initialized with at least one part.c                 s   s    | ]	}t |t V  qd S r{   )r]   r   r   modelr   r   r!   r~     s    z;ORTParentMixin.initialize_ort_attributes.<locals>.<genexpr>zGAll parts passed to ORTParentMixin should be ORTSessionMixin instances.N)r   r_   anyr   )r@   r   r   r   r!   rA     s
   
z(ORTParentMixin.initialize_ort_attributesc                    .   t  fdd jD std  jd jS )rH   c                 3   "    | ]}|j  jd  j kV  qdS r   )rJ   r   r   rD   r   r!   r~   !       z+ORTParentMixin.providers.<locals>.<genexpr>zCalling `ORTParentMixin.providers` when the underlying parts have different values for `providers` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   rJ   rD   r   rD   r!   rJ     
   zORTParentMixin.providersc                    r   )rL   c                 3   r   r   )r/   r   r   rD   r   r!   r~   -  r   z*ORTParentMixin.provider.<locals>.<genexpr>zCalling `ORTParentMixin.provider` when the underlying parts have different values for `provider` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   r/   rD   r   rD   r!   r/   (  r   zORTParentMixin.providerc                    r   )rM   c                 3   r   r   )rO   r   r   rD   r   r!   r~   9  r   z2ORTParentMixin.provider_options.<locals>.<genexpr>zCalling `ORTParentMixin.provider_options` when the underlying parts have different values for `provider_options` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   rO   rD   r   rD   r!   rO   4  r   zORTParentMixin.provider_optionsc                    r   )rP   c                 3   r   r   )r5   r   r   rD   r   r!   r~   E  r   z1ORTParentMixin.provider_option.<locals>.<genexpr>zCalling `ORTParentMixin.provider_option` when the underlying parts have different values for `provider_option` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   r5   rD   r   rD   r!   r5   @  r   zORTParentMixin.provider_optionc                    r   )rR   c                 3   r   r   )rS   r   r   rD   r   r!   r~   R  r   z(ORTParentMixin.device.<locals>.<genexpr>zCalling `ORTParentMixin.device` when the underlying parts have different values for `device` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   rS   rD   r   rD   r!   rS   L  s
   zORTParentMixin.devicec                    r   )rY   c                 3   r   r   )rZ   r   r   rD   r   r!   r~   `  r   z'ORTParentMixin.dtype.<locals>.<genexpr>zCalling `ORTParentMixin.dtype` when the underlying parts have different values for `dtype` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   rZ   rD   r   rD   r!   rZ   Y  s
   zORTParentMixin.dtypec                    r   )r[   c                 3   r   r   )r   r   r   rD   r   r!   r~   l  r   z0ORTParentMixin.use_io_binding.<locals>.<genexpr>zCalling `ORTParentMixin.use_io_binding` when the underlying parts have different values for `use_io_binding` is not recommended. The value of the first session will be returned. r   )r   r   r0   rC   r   rD   r   rD   r!   r   g  r   zORTParentMixin.use_io_bindingr\   c                 C   s   | j D ]}||_qdS )z9
        Setter for the use_io_binding property.
        N)r   r   )r@   r\   r   r   r   r!   r   s  s   
c                 O   s    | j D ]
}|j|i | q| S )a  
        Moves all parts to the specified device by updating the execution provider and its options.
        Args:
            device (`str`, `int`, `torch.device`):
                The device to move the session to. It can be a string (e.g., "cuda", "cpu"), an integer (e.g., 0 for GPU 0),
                or a `torch.device` object.
        Returns:
            `ORTParentMixin`: The updated session.
        Raises:
            ValueError: If the device is not supported or if the provider is not available.
        )r   rg   )r@   rW   rX   r   r   r   r!   rg   {  s   
zORTParentMixin.toN)r   r   r   r   r   r   rA   r   rJ   r/   rO   r5   rS   rZ   r   r   r^   rg   r   r   r   r!   r     s(    






r   )%r   r   r   pathlibr   typingr   r   r   r   r   r   r	   rn   r   ra   onnxruntimer
   r   *onnxruntime.transformers.io_binding_helperr   
onnx.utilsr   utils.loggingr   utilsr   r   r   r   r   r   r0   r   r   r   r   r   r   r   r!   <module>   s&   $	
   ]