o
    diD(                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlZd dlZd dlmZ ddlmZ ddlmZmZmZmZ dd	lmZmZ erTd d
lmZ de jd< dd ZG dd dZG dd dZ dZ!dZ"dd Z#G dd dZ$eeeedZ%dS )    N)contextmanager)perf_counter_ns)TYPE_CHECKINGSet)trange   )version)ImageClassificationProcessingQuestionAnsweringProcessingTextClassificationProcessingTokenClassificationProcessing)	RunConfigcpu_info_command)DatasetfalseTOKENIZERS_PARALLELISMc                 C   s   | dv rd}|S | }|S )N)text-classificationzaudio-classificationzsequence-classification )taskautoclass_namer   r   _/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/runs_base.pyget_autoclass_name   s
   r   c                   @   s"   e Zd Z		dddZdd ZdS )	
Calibratorcalibration_datasetr   c                 C   s(   || _ || _|| _|| _|| _|| _d S N)r   	quantizer
model_pathqconfigcalibration_paramsnode_exclusion)selfr   r   r   r   r   r   r   r   r   __init__&   s   
zCalibrator.__init__c                 C      t  r   NotImplementedErrorr    r   r   r   fit0   s   zCalibrator.fitN)r   r   )__name__
__module____qualname__r!   r&   r   r   r   r   r   %   s
    

r   c                   @   sR   e Zd ZdefddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd ZdS )Run
run_configc                 C   sb  t d$i | |d | _|d dkrd| _nd| _|d |d d}tjd	d
gtj|d| _tj	t
 gddd}d}dtjv rRtj	ddd}|dd}i d|d d| jd|d d|d d|d d|d d|d d|d d|d d|d d|d d|d d|dtjtj|ddg i i d d!d"|d" d#|d# | _dS )%ad  Initialize the Run class holding methods to perform inference and evaluation given a config.

        A run compares a transformers model and an optimized model on latency/throughput, model size, and provided metrics.

        Args:
            run_config (dict): Parameters to use for the run. See [`~utils.runs.RunConfig`] for the expected keys.
        r   quantization_approachstaticTFbatch_sizesinput_lengths)
batch_sizeinput_lengthmaximizeminimize)
directionssampler)shellzutf-8NdevzQgit ls-remote https://github.com/huggingface/optimum.git HEAD | awk '{ print $1}'
model_name_or_path	task_argsdatasetoperators_to_quantizer   aware_trainingper_channelcalibration	frameworkframework_argshardwareversions)transformersoptimumoptimum_hash
evaluation)baseline	optimized)timeothersmax_eval_samplestime_benchmark_argsr   )r   r   static_quantizationoptunacreate_studysamplersGridSamplerstudy
subprocesscheck_outputr   decodeoptimum_version__version__striprD   return_body)r    r+   search_spacecpu_inforF   r   r   r   r!   5   sv   


	

zRun.__init__c                 C   sB   z| j | j |   W |   td | jS |   td w )a  Launch inference to compare metrics between the original and optimized model.

        These metrics are latency, throughput, model size, and user provided metrics.

        Returns:
            `dict`: Finalized run data with metrics stored in the "evaluation" key.
        zFinished run.)rS   optimize_launch_timelaunch_evalfinalizeprintrZ   r%   r   r   r   launchq   s   

z
Run.launchc                 C   r"   )zOptuna objective function to measure latency/throughput.

        Populate the `["evaluation"]["time"]` list of the run for various batch size and input length.

        Returns:
            Dummy data.
        r#   )r    trialr   r   r   r^      s   zRun._launch_timec                 C   r"   )z
        Run evaluation on the original and optimized model.

        Populate the `["evaluation"]["others"]` subdictionary of the run.
        r#   r%   r   r   r   r_      s   zRun.launch_evalc                 C   s,   | j  }|d | _| jr|d | _dS dS )zTLoad evaluation dataset, and if needed, calibration dataset for static quantization.evalr?   N)task_processorload_datasets_eval_datasetrN   _calibration_dataset)r    datasets_dictr   r   r   rf      s
   

zRun.load_datasetsc                 C      t | ds	td| jS )zGet calibration dataset. The dataset needs to be loaded first with [`~optimum.runs_base.Run.load_datasets`].

        Returns:
            `datasets.Dataset`: Calibration dataset.
        rh   z,No calibration dataset defined for this run.)hasattrKeyErrorrh   r%   r   r   r   get_calibration_dataset   s   
zRun.get_calibration_datasetc                 C   rj   )z
        Get evaluation dataset.  The dataset needs to be loaded first with [`~optimum.runs_base.Run.load_datasets`].

        Returns:
            `datasets.Dataset`: Evaluation dataset.
        rg   z+No evaluation dataset defined for this run.)rk   rl   rg   r%   r   r   r   get_eval_dataset   s   
zRun.get_eval_datasetc                 C   r"   )zCleanup intermediary files.r#   r%   r   r   r   r`      s   zRun.finalizeN)r'   r(   r)   dictr!   rb   r^   r_   rf   rm   rn   r`   r   r   r   r   r*   4   s    <

r*   i ʚ;    .Ac                 C   s   | t  S r   )NS_TO_MS_SCALE)ns_timer   r   r   ns_to_ms   s   rs   c                
   @   sj   e Zd Zdededee dedef
ddZedefd	d
Z	e
dd ZdefddZdd Zdd ZdS )TimeBenchmarkr0   r1   model_input_nameswarmup_runsdurationc                 C   s8   || _ || _|| _|| _|| _g | _td| _|| _d S )Nz-inf)	r0   r1   modelrv   benchmark_duration	latenciesfloat
throughputru   )r    rx   r0   r1   ru   rv   rw   r   r   r   r!      s   

zTimeBenchmark.__init__returnc                 C   s
   t | jS r   )lenrz   r%   r   r   r   num_runs   s   
zTimeBenchmark.num_runsc                 c   sL    t  }d V  t  }| j||  td||  d|| d dd d S )NzTracked function took: zns (rp   z.3fzms))r   rz   appendra   )r    startendr   r   r   track   s   (zTimeBenchmark.trackduration_nsc                 C   s   t t| j| t d| _d S )N   )roundr~   rz   SEC_TO_NS_SCALEr|   )r    r   r   r   r   r`      s   zTimeBenchmark.finalizec                 C   s   t | j| jtt| jtt| jtt| jdtt| jdtt| jdtt| jdtt| jdd	}|S )Ng      ?g?gffffff?gGz?g+?)	nb_forwardsr|   latency_meanlatency_std
latency_50
latency_90
latency_95
latency_99latency_999)r~   rz   r|   rs   npmeanstdquantile)r    benchmarks_statsr   r   r   to_dict   s   zTimeBenchmark.to_dictc                    s  i }h d d| j v rtjd| j| jfd|d< d| j v r*tj| j| jtjd|d< d| j v r<tj| j| jtjd|d< d| j v rUtj| jd	| jj	j
| jj	j
tjd|d< t fd
d| j D rktd| j  dt| jddD ]}| jjdi | qr| jdkr| jt }td| jdd t| j|k r|   | jjdi | W d    n1 sw   Y  t| j|k s| | |  S dddd}|S )N>   	input_idspixel_valuesattention_masktoken_type_idsr   i  )highsizer   )dtyper   r      c                    s   g | ]}| vqS r   r   ).0kchecked_inputsr   r   
<listcomp>  s    z)TimeBenchmark.execute.<locals>.<listcomp>zAt least an input in z, has no dummy generation for time benchmark.z
Warming up)descr   zRunning time tracking in z.1fzs.)r   r|   r   r   )ru   torchrandintr0   r1   onesint64randrx   config
image_sizefloat32r   anyr$   r   rv   forwardry   r   ra   sumrz   r   r`   r   )r    inputs_benchmark_duration_nsr   r   r   r   execute   sB   








zTimeBenchmark.executeN)r'   r(   r)   intr   strr{   r!   propertyr   r   r   r`   r   r   r   r   r   r   rt      s&    


rt   )r   ztoken-classificationzquestion-answeringzimage-classification)&osrT   
contextlibr   rJ   r   typingr   r   numpyr   rO   r   rD   tqdmr    r   rW   utils.preprocessingr	   r
   r   r   
utils.runsr   r   datasetsr   environr   r   r*   r   rq   rs   rt   task_processing_mapr   r   r   r   <module>   s:    
 a
