o
    ‰dËiD(  ã                   @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlZd dlZd dlmZ ddlmZ ddlmZmZmZmZ dd	lmZmZ erTd d
lmZ de jd< dd„ ZG dd„ dƒZG dd„ dƒZ dZ!dZ"dd„ Z#G dd„ dƒZ$eeeedœZ%dS )é    N)Úcontextmanager)Úperf_counter_ns)ÚTYPE_CHECKINGÚSet)Útrangeé   )Úversion)ÚImageClassificationProcessingÚQuestionAnsweringProcessingÚTextClassificationProcessingÚTokenClassificationProcessing)Ú	RunConfigÚcpu_info_command)ÚDatasetÚfalseÚTOKENIZERS_PARALLELISMc                 C   s   | dv rd}|S | }|S )N)útext-classificationzaudio-classificationzsequence-classification© )ÚtaskÚautoclass_namer   r   ú_/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/runs_base.pyÚget_autoclass_name   s
   ÿr   c                   @   s"   e Zd Z		ddd„Zdd„ ZdS )	Ú
CalibratorÚcalibration_datasetr   c                 C   s(   || _ || _|| _|| _|| _|| _d S ©N)r   Ú	quantizerÚ
model_pathÚqconfigÚcalibration_paramsÚnode_exclusion)Úselfr   r   r   r   r   r   r   r   r   Ú__init__&   s   
zCalibrator.__init__c                 C   ó   t ƒ ‚r   ©ÚNotImplementedError©r    r   r   r   Úfit0   s   zCalibrator.fitN)r   r   )Ú__name__Ú
__module__Ú__qualname__r!   r&   r   r   r   r   r   %   s
    
ÿ
r   c                   @   sR   e Zd Zdefdd„Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ ZdS )ÚRunÚ
run_configc                 C   sb  t d$i |¤Ž |d | _|d dkrd| _nd| _|d |d dœ}tjd	d
gtj |¡d| _tj	t
ƒ gdd d¡}d}dtjv rRtj	ddd}| d¡ d¡}i d|d “d| j“d|d “d|d “d|d “d|d “d|d “d|d “d|d “d|d “d|d “d|d “d|“dtjtj|dœ“dg i i d œd!œ“d"|d" “d#|d# “| _dS )%ad  Initialize the Run class holding methods to perform inference and evaluation given a config.

        A run compares a transformers model and an optimized model on latency/throughput, model size, and provided metrics.

        Args:
            run_config (dict): Parameters to use for the run. See [`~utils.runs.RunConfig`] for the expected keys.
        r   Úquantization_approachÚstaticTFÚbatch_sizesÚinput_lengths)Ú
batch_sizeÚinput_lengthÚmaximizeÚminimize)Ú
directionsÚsampler)Úshellzutf-8NÚdevzQgit ls-remote https://github.com/huggingface/optimum.git HEAD | awk '{ print $1}'Ú
Úmodel_name_or_pathÚ	task_argsÚdatasetÚoperators_to_quantizer   Úaware_trainingÚper_channelÚcalibrationÚ	frameworkÚframework_argsÚhardwareÚversions)ÚtransformersÚoptimumÚoptimum_hashÚ
evaluation)ÚbaselineÚ	optimized)ÚtimeÚothersÚmax_eval_samplesÚtime_benchmark_argsr   )r   r   Ústatic_quantizationÚoptunaÚcreate_studyÚsamplersÚGridSamplerÚstudyÚ
subprocessÚcheck_outputr   ÚdecodeÚoptimum_versionÚ__version__ÚstriprD   Úreturn_body)r    r+   Úsearch_spaceÚcpu_inforF   r   r   r   r!   5   sv   

þ
ÿÿþýüûúùø	÷
öõôóýòþíé
èzRun.__init__c                 C   sB   z| j  | j¡ |  ¡  W |  ¡  tdƒ | jS |  ¡  tdƒ w )a  Launch inference to compare metrics between the original and optimized model.

        These metrics are latency, throughput, model size, and user provided metrics.

        Returns:
            `dict`: Finalized run data with metrics stored in the "evaluation" key.
        zFinished run.)rS   ÚoptimizeÚ_launch_timeÚlaunch_evalÚfinalizeÚprintrZ   r%   r   r   r   Úlaunchq   s   
ý
z
Run.launchc                 C   r"   )zÓOptuna objective function to measure latency/throughput.

        Populate the `["evaluation"]["time"]` list of the run for various batch size and input length.

        Returns:
            Dummy data.
        r#   )r    Útrialr   r   r   r^   ‚   s   zRun._launch_timec                 C   r"   )z
        Run evaluation on the original and optimized model.

        Populate the `["evaluation"]["others"]` subdictionary of the run.
        r#   r%   r   r   r   r_   Œ   s   zRun.launch_evalc                 C   s,   | j  ¡ }|d | _| jr|d | _dS dS )zTLoad evaluation dataset, and if needed, calibration dataset for static quantization.Úevalr?   N)Útask_processorÚload_datasetsÚ_eval_datasetrN   Ú_calibration_dataset)r    Údatasets_dictr   r   r   rf   ”   s
   

ÿzRun.load_datasetsc                 C   ó   t | dƒs	tdƒ‚| jS )z¼Get calibration dataset. The dataset needs to be loaded first with [`~optimum.runs_base.Run.load_datasets`].

        Returns:
            `datasets.Dataset`: Calibration dataset.
        rh   z,No calibration dataset defined for this run.)ÚhasattrÚKeyErrorrh   r%   r   r   r   Úget_calibration_datasetœ   s   
zRun.get_calibration_datasetc                 C   rj   )zÄ
        Get evaluation dataset.  The dataset needs to be loaded first with [`~optimum.runs_base.Run.load_datasets`].

        Returns:
            `datasets.Dataset`: Evaluation dataset.
        rg   z+No evaluation dataset defined for this run.)rk   rl   rg   r%   r   r   r   Úget_eval_dataset¦   s   
zRun.get_eval_datasetc                 C   r"   )zCleanup intermediary files.r#   r%   r   r   r   r`   ±   s   zRun.finalizeN)r'   r(   r)   Údictr!   rb   r^   r_   rf   rm   rn   r`   r   r   r   r   r*   4   s    <

r*   i Êš;ç    €„.Ac                 C   s   | t  S r   )ÚNS_TO_MS_SCALE)Úns_timer   r   r   Úns_to_msº   s   rs   c                
   @   sj   e Zd Zdededee dedef
dd„Zedefd	d
„ƒZ	e
dd„ ƒZdefdd„Zdd„ Zdd„ ZdS )ÚTimeBenchmarkr0   r1   Úmodel_input_namesÚwarmup_runsÚdurationc                 C   s8   || _ || _|| _|| _|| _g | _tdƒ| _|| _d S )Nz-inf)	r0   r1   Úmodelrv   Úbenchmark_durationÚ	latenciesÚfloatÚ
throughputru   )r    rx   r0   r1   ru   rv   rw   r   r   r   r!   ¿   s   

zTimeBenchmark.__init__Úreturnc                 C   s
   t | jƒS r   )Úlenrz   r%   r   r   r   Únum_runsÏ   s   
zTimeBenchmark.num_runsc                 c   sL    t ƒ }d V  t ƒ }| j || ¡ td|| › d|| d d›dƒ d S )NzTracked function took: zns (rp   z.3fzms))r   rz   Úappendra   )r    ÚstartÚendr   r   r   ÚtrackÓ   s   €(zTimeBenchmark.trackÚduration_nsc                 C   s   t t| jƒ| t dƒ| _d S )Né   )Úroundr~   rz   ÚSEC_TO_NS_SCALEr|   )r    r„   r   r   r   r`   Þ   s   zTimeBenchmark.finalizec                 C   s‚   t | jƒ| jtt | j¡ƒtt | j¡ƒtt | jd¡ƒtt | jd¡ƒtt | jd¡ƒtt | jd¡ƒtt | jd¡ƒdœ	}|S )Ng      à?gÍÌÌÌÌÌì?gffffffî?g®Gáz®ï?g+‡ÙÎ÷ï?)	Únb_forwardsr|   Úlatency_meanÚlatency_stdÚ
latency_50Ú
latency_90Ú
latency_95Ú
latency_99Úlatency_999)r~   rz   r|   rs   ÚnpÚmeanÚstdÚquantile)r    Úbenchmarks_statsr   r   r   Úto_dictá   s   ÷zTimeBenchmark.to_dictc                    sœ  i }h d£‰ d| j v rtjd| j| jfd|d< d| j v r*tj| j| jtjd|d< d| j v r<tj| j| jtjd|d< d| j v rUtj| jd	| jj	j
| jj	j
tjd|d< t ‡ fd
d„| j D ƒ¡rktd| j › dƒ‚t| jddD ]}| jjdi |¤Ž qr| jdkrÆ| jt }td| jd›dƒ t| jƒ|k r½|  ¡  | jjdi |¤Ž W d   ƒ n1 s±w   Y  t| jƒ|k s™|  |¡ |  ¡ S ddddœ}|S )N>   Ú	input_idsÚpixel_valuesÚattention_maskÚtoken_type_idsr–   iè  )ÚhighÚsizer˜   )Údtyper™   r—   é   c                    s   g | ]}|ˆ v‘qS r   r   )Ú.0Úk©Úchecked_inputsr   r   Ú
<listcomp>  s    z)TimeBenchmark.execute.<locals>.<listcomp>zAt least an input in z, has no dummy generation for time benchmark.z
Warming up)Údescr   zRunning time tracking in z.1fzs.éÿÿÿÿ)rˆ   r|   r‰   r   )ru   ÚtorchÚrandintr0   r1   ÚonesÚint64Úrandrx   ÚconfigÚ
image_sizeÚfloat32r   Úanyr$   r   rv   Úforwardry   r‡   ra   Úsumrz   rƒ   r`   r•   )r    ÚinputsÚ_Úbenchmark_duration_nsr”   r   r    r   Úexecuteñ   sB   




ÿÿ


ÿþ
ýzTimeBenchmark.executeN)r'   r(   r)   Úintr   Ústrr{   r!   Úpropertyr   r   rƒ   r`   r•   r³   r   r   r   r   rt   ¾   s&    ÿÿÿÿ
ÿ

rt   )r   ztoken-classificationzquestion-answeringzimage-classification)&ÚosrT   Ú
contextlibr   rJ   r   Útypingr   r   Únumpyr   rO   r¥   rD   Útqdmr   Ú r   rW   Úutils.preprocessingr	   r
   r   r   Ú
utils.runsr   r   Údatasetsr   Úenvironr   r   r*   r‡   rq   rs   rt   Útask_processing_mapr   r   r   r   Ú<module>   s:    
 a
ü