
    )j                     z    d dl Z d dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZmZ d Zd Zedk    r e             dS dS )    N)batch_generateloadstream_generate)DEFAULT_MODEL)pipeline_loadsharded_loadc                     t          j        d          } |                     dt          dt           dd           |                     dd	d
dt
                     |                     ddddt
                     |                     ddddt
                     |                     ddddt
                     |                     ddd           |                     dddd           |                     d t
          d!d"#           | S )$z&Set up and return the argument parser.zLLM benchmarking script)descriptionz--modelz[The path to the local model directory or Hugging Face repo. If no model is specified, then z	 is used.N)typehelpdefaultz--prompt-tokensz-pi   zLength of prompt)r   r   r   z--generation-tokensz-gi   zLength of completionz--batch-sizez-b   z
Batch sizez--num-trialsz-n   zNumber of timing trialsz
--pipeline
store_truez,Use pipelining instead of tensor parallelism)actionr   z--quantize-activationsz-qazSQuantize activations using the same quantization config as the corresponding layer.z--prefill-step-sizei   z0Step size for prefill processing (default: 2048))r   r   r   )argparseArgumentParseradd_argumentstrr   int)parsers    Z/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/benchmark.pysetup_arg_parserr      s   $1JKKKF
G.;G G G           #          &     ;    
  b	     ?	     M    c            	         t                      } |                                 t          j                            d           t          j                                        }|                                j        r|nd }j        s|nd }fd}j	        pt          }|                                dk    rt          |||d          \  }n t          |dddidj        i          \  }i _        j        }j        j        }|                    d	          p|d
         d	         }	t          j                            d|	||f                                          d         fd}
fd}|dk    r|
}n|} |d            |             g d} |d|dd|d           g t-          j                  D ]c} |                                           fd|D             }d |D             } |d|dz    dd                    |          z              dfdfd|D             }d |D             } |dd                    |          z              d S )Nr   c                  0    dk    rt          | i | d S d S )Nr   )print)argskwargsranks     r   rprintzmain.<locals>.rprintR   s,    1994"6""""" 9r   r   T)return_configtrust_remote_codequantize_activations)r"   tokenizer_configmodel_config
vocab_sizetext_configc                  @    t          j                  D ]} | S N)
max_tokensprefill_step_size)r   r,   )responser   generation_tokensmodelprompt	tokenizers    r   single_benchzmain.<locals>.single_benchn   s=    '("4
 
 
 	 	H r   c                  @    t           j                  j        S r*   )r   r,   stats)r   r.   r/   promptsr1   s   r   batch_benchzmain.<locals>.batch_benchy   s1    ("4
 
 
 	r   zRunning warmup..)
prompt_tpsgeneration_tpspeak_memoryzTiming with prompt_tokens=z, generation_tokens=z, batch_size=.c                 4    g | ]}|t          |          fS  getattr).0kr-   s     r   
<listcomp>zmain.<locals>.<listcomp>   s(    BBBAwx++,BBBr   c                 $    g | ]\  }}| d |dS =z.3fr<   r?   r@   vs      r   rA   zmain.<locals>.<listcomp>   s(    666daa>>!>>>666r   zTrial z:  z, c                 P      fdD             }t          |          j        z  S )Nc              3   8   K   | ]}t          |          V  d S )Nr=   )r?   r-   r@   s     r   	<genexpr>z$main.<locals>.avg.<locals>.<genexpr>   s-      ??!$$??????r   )sum
num_trials)r@   valsr   	responsess   ` r   avgzmain.<locals>.avg   s0    ????Y???4yy4?**r   c                 *    g | ]}| |          fS r<   r<   )r?   r@   rN   s     r   rA   zmain.<locals>.<listcomp>   s%    000q33q66{000r   c                 $    g | ]\  }}| d |dS rC   r<   rE   s      r   rA   zmain.<locals>.<listcomp>   s(    222$!Q!~~a~~~222r   z
Averages: )r   
parse_argsmxrandomseeddistributedinitr    pipeliner/   r   sizer   r   r$   _eos_token_idsprompt_tokensr.   
batch_sizegetrandinttolistrangerK   appendjoin)r   grouppipeline_grouptensor_groupr!   
model_pathconfigrZ   r[   r'   r2   r6   _benchreport_keysiresultsr   rN   r.   r/   r0   r5   r    r-   rM   r1   s                   @@@@@@@@@@r   mainrk   H   s\   FDINN1N!!E::<<D"m5UUN $7554L# # # # # ,}Jzz||a#/D$
 $
 $
 y&& $(1480$2KL	$
 $
 $
 y&  "I&M.JL))PVM-B<-PJi:
M/JKKRRTTGQZF	 	 	 	 	 	 	 	 	         Q
F
FHHHAAAK
FP-PP,=PP*PPPQQQI4?## 7 7688"""BBBBkBBB66g666 !   499W#5#556666+ + + + + + 1000K000G22'222G
F=499W---.....r   __main__)r   mlx.corecorerR   mlx_lmr   r   r   mlx_lm.generater   mlx_lm.utilsr   r   r   rk   __name__r<   r   r   <module>rs      s           8 8 8 8 8 8 8 8 8 8 ) ) ) ) ) ) 4 4 4 4 4 4 4 49 9 9xR/ R/ R/j zDFFFFF r   