o
    diy=                     @   s  d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ d
dlmZ d
dlmZmZmZ d
dlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddlm&Z&m'Z' erddlm(Z( e) Z*G dd dZ+dS )z?Main class for performing graph optimization with ONNX Runtime.    N)Path)TYPE_CHECKINGDictListOptionalUnion)
load_model)GenerationConfig)
AutoConfig)BertOnnxModel)optimize_model   )check_model_uses_external_data)CONFIG_NAMENormalizedConfigManagerlogging)maybe_save_preprocessors   )OptimizationConfig	ORTConfig)ORTModelForCausalLM)ORTModel) ORTModelForConditionalGeneration)ONNX_WEIGHTS_NAMEORTConfigManager)PretrainedConfigc                       s,  e Zd ZdZddeej dddef fddZe			d d
e
eejef deee  dd fddZ				d!dede
eejf dee dee def
ddZede
eejf deeef fddZede
eejf de
eejf defddZede
eejf de
eejf deeef fddZ  ZS )"ORTOptimizerzc
    Handles the ONNX Runtime optimization process for models shared on huggingface.co/models.
    Fonnx_model_pathconfigr   from_ortmodelc                    sb   t    || _|| _| jj| _|| _zt| j| j| _W dS  t	y0   t
d| j dw )a  
        Args:
            onnx_model_path (`List[os.PathLike]`):
                The paths of the onnx models to optimize.
            config ([`~transformers.PretrainedConfig`]):
                An instance of the configuration associated to the model to optimize.
            from_ortmodel (`bool`, defaults to `False`):
                Whether the model being optimized is already loaded into an ORTModel, or if it was passed from disk.
        z-Tried to use ORTOptimizer for the model type zm, but it is not available yet. Please open an issue or submit a PR at https://github.com/huggingface/optimum.N)super__init__r   r   
model_typer   r   get_normalized_config_classnormalized_configKeyErrorNotImplementedError)selfr   r   r   	__class__ n/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/onnxruntime/optimization.pyr!   3   s   


zORTOptimizer.__init__Nmodel_or_path
file_namesreturnc                 C   s  g }d}t |tr=d}t |tr'||jj|jjg7 }|jdur&||jj nt |tr3|j	r3t
d||j |j}n?tj|rtd}|du rLtgn|}t|}tt|vratdt dt|}|D ]
}||| qhntd| d| |||dS )	a  
        Args:
            model_or_path (`Union[str, os.PathLike, ORTModel]`):
                The path to a local directory hosting the model to optimize or an instance of an `ORTModel` to quantize.
                Can be either:
                    - A path to a local *directory* containing the model to optimize.
                    - An instance of [`~optimum.onnxruntime.ORTModel`].
            file_names(`Optional[List[str]]`, defaults to `None`):
                The list of file names of the models to optimize.
        NTa  ORTOptimizer does not support ORTModelForCausalLM models when without/with past models are merged. Please re-export your model. This can be done by using the optimum-cli ONNX export tool or `ORTModelForCausalLM.from_pretrained(..., export=True, use_merged=False)`.Fz<The local directory does not contain the configuration file .zUnable to load the model from )r   r   )
isinstancer   r   encoderpathdecoderdecoder_with_pastappendr   
use_mergedr&   r   osisdirr   r   r   listdir
ValueErrorr
   from_pretrainedjoinpath)clsr,   r-   r   r   r   	file_namer*   r*   r+   r;   K   s<   



zORTOptimizer.from_pretrained	optimizedToptimization_configsave_dirfile_suffixuse_external_data_formatone_external_filec                 C   sR  |dur	t d t|}|jddd t| j| t| jj}|	|}t 
d d}| jD ]}	tjt|	dd}
t|
du rFd} nq1~
t  t|||d}| jD ]}	|r_d	| nd
}||	j | |	j}z5t|	 || jj| jj|j||j|j d}|jr|r|j | ||d | |_!|j"|j# dd W n t$y } zdt|v rt%t| d}|| d}~ww |j | ||d t|	 d & r| j'du rt()|	 d  qV| j*| |*| t+| jd j,| zt-.| jd j,}|*| W n
 t$y   Y nw t 
d| d| d| d t|S )a  
        Optimizes a model given the optimization specifications defined in `optimization_config`.

        Args:
            optimization_config ([`~optimum.onnxruntime.OptimizationConfig`]):
                The configuration containing the parameters related to optimization.
            save_dir (`Union[str, os.PathLike]`):
                The path used to save the optimized model.
            file_suffix (`str`, defaults to `"optimized"`):
                The file suffix used to save the optimized model.
            use_external_data_format (`Optional[bool]`, defaults to `None`):
                Whether to use external data format to store model of size >= 2Gb. This argument is deprecated.
            one_external_file (`bool`, defaults to `True`):
                When `use_external_data_format=True`, whether to save all tensors to one external file.
                If False, save each tensor to a file named with the tensor name.

        Nz}The argument use_external_data_format in the ORTOptimizer.optimize() method is deprecated and will be removed in optimum 2.0.T)parentsexist_okzOptimizing model...F)load_external_data)optimizationrC   rD   _ )	opt_leveloptimization_optionsuse_gpuonly_onnxruntime)rC   all_tensors_to_one_file)use_symbolic_shape_inferkeep_io_typesz#Incomplete symbolic shape inferencezO. Try to set `disable_shape_inference=True` in your optimization configuration._datar   zOptimized model saved at: z (external data format: z ; saved all tensor to one file: ))/loggerwarningr   mkdirr   "check_optimization_supported_modelr"   get_model_ort_typer   create_fusion_optionsinfor   onnxloadstrr   gccollectr   r<   stemwith_suffixsuffixr   as_posixr$   num_attention_headshidden_sizeoptimization_leveloptimize_for_gpu*enable_transformers_specific_optimizationsfp16save_model_to_filemodelconvert_float_to_float16disable_shape_inference	ExceptionRuntimeErroris_filer   r7   removesave_pretrainedr   parentr	   r;   )r'   r@   rA   rB   rC   rD   r"   rL   model_uses_external_data
model_path
onnx_model
ort_configrb   output_path	optimizereerrgeneration_configr*   r*   r+   optimizez   s   




	
zORTOptimizer.optimizec                 C   sJ   t t| }| }tdddd | D   dd | D S )an  
        Computes the dictionary mapping the name of the fused operators to their number of apparition in the model.

        Args:
            onnx_model_path (`Union[str, os.PathLike]`):
                Path of the ONNX model.

        Returns:
            The dictionary mapping the name of the fused operators to their number of apparition in the model.
        z%The following operators were fused : z, c                 S   s   g | ]
\}}|d kr|qS r   r*   .0kvr*   r*   r+   
<listcomp>  s    z4ORTOptimizer.get_fused_operators.<locals>.<listcomp>c                 S   s   i | ]\}}|d kr||qS r~   r*   r   r*   r*   r+   
<dictcomp>      z4ORTOptimizer.get_fused_operators.<locals>.<dictcomp>)r   r   get_fused_operator_statisticsrT   rZ   joinitems)r   onnx_optimized_modelfused_operatorr*   r*   r+   get_fused_operators   s   z ORTOptimizer.get_fused_operatorsonnx_optimized_model_pathc                 C   sX   t t| }t t|}t| }t| }|| }td| d| d|  |S )a  
        Compute the difference in the number of nodes between the original and the optimized model.

        Args:
            onnx_model_path (`Union[str, os.PathLike]`):
                Path of the ONNX model.
            onnx_optimized_model_path (`Union[str, os.PathLike]`):
                Path of the optimized ONNX model.

        Returns:
            The difference in the number of nodes between the original and the optimized model.
        z
There are z nodes before optimization and z,nodes after. The number of nodes removed is )r   r   lennodesrT   rZ   )r   r   rv   r   nodes_number_onnx_model!nodes_number_onnx_optimized_modeldifference_nodes_numberr*   r*   r+   get_nodes_number_difference  s   z(ORTOptimizer.get_nodes_number_differencec                    sx   t t| t t|fdd t }fD ]}| D ]}||j q q fdd|D }dd | D S )aN  
        Compute the dictionary mapping the operators name to the difference in the number of corresponding nodes between
        the original and the optimized model.

        Args:
            onnx_model_path (`Union[str, os.PathLike]`):
                Path of the ONNX model.
            onnx_optimized_model_path (`Union[str, os.PathLike]`):
                Path of the optimized ONNX model.

        Returns:
            The dictionary mapping the operators name to the difference in the number of corresponding nodes between the
            original and the optimized model.
        c                    s$   t  | }t | }|| S N)r   get_nodes_by_op_type)op_typeonnx_model_nodes_with_op_type'onnx_optimized_model_nodes_with_op_type)rv   r   r*   r+   nodes_difference_given_typeA  s   zJORTOptimizer.get_operators_difference.<locals>.nodes_difference_given_typec                    s   i | ]}| |qS r*   r*   )r   r   )r   r*   r+   r   L  s    z9ORTOptimizer.get_operators_difference.<locals>.<dictcomp>c                 S   s   i | ]\}}|d kr||qS r~   r*   r   r*   r*   r+   r   M  r   )r   r   setr   addr   r   )r   r   op_typesrk   nodeoperators_differencer*   )r   rv   r   r+   get_operators_difference,  s   z%ORTOptimizer.get_operators_difference)Fr   )r?   NT)__name__
__module____qualname____doc__r   r7   PathLikeboolr!   classmethodr   r]   r   r   r;   r   r}   staticmethodr   intr   r   r   __classcell__r*   r*   r(   r+   r   .   sZ    "
2
 &
r   ),r   r^   r7   pathlibr   typingr   r   r   r   r   r[   r   transformersr	   +transformers.models.auto.configuration_autor
   (onnxruntime.transformers.onnx_model_bertr   "onnxruntime.transformers.optimizerr   
onnx.utilsr   utilsr   r   r   utils.save_utilsr   configurationr   r   modeling_decoderr   modeling_ortr   modeling_seq2seqr   r   r   r   
get_loggerrT   r   r*   r*   r*   r+   <module>   s.   