
    )jx                         d dl Z d dlZd dlZd dlZd dlmZ ddlmZ ddl	m
Z
mZ ddlmZ dZd Zd Zed	k    r ed
            e             dS dS )    N   )generate_step)make_prompt_cachesave_prompt_cache)loadi  c                  l   t          j        d          } |                     dt          dd           |                     dt          d	           |                     d
dd           |                     dt          dd           |                     dt          dd           |                     ddd           |                     ddd           |                     dt          dd           |                     dt          dd           |                     d d!t          t
          "           | S )#z&Set up and return the argument parser.z=Cache the state of a prompt to be reused with mlx_lm.generate)descriptionz--model	mlx_modelz;The path to the local model directory or Hugging Face repo.)typedefaulthelpz--adapter-pathz9Optional path for the trained adapter weights and config.)r   r   z--trust-remote-code
store_truez)Enable trusting remote code for tokenizer)actionr   z--eos-tokenNz#End of sequence token for tokenizerz--max-kv-sizez$Set the maximum key-value cache sizez--prompt-cache-filez$The file to save the prompt cache inT)r   requiredz--promptz;Message to be processed by the model ('-' reads from stdin))r   r   z	--kv-bitszFNumber of bits for KV cache quantization. Defaults to no quantization.)r   r   r   z--kv-group-sizez%Group size for KV cache quantization.@   z--quantized-kv-startzLWhen --kv-bits is set, start quantizing the KV cache from this step onwards.)r   r   r   )argparseArgumentParseradd_argumentstrintDEFAULT_QUANTIZED_KV_START)parsers    ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/cache_prompt.pysetup_arg_parserr      s   $S  F J	     H    
 8    
 2	     3	     3    
 J    
 '     4	     "*     M    c            
         t                      } |                                 }d|j        rdnd i}|j        
|j        |d<   t	          |j        |j        |          \  }}|j        dk    rt          j	        
                                n|j        |_        |j        r$d|j        dg}|                    |dd	          }n|                    |j                  }t          ||j                  }t!          j        |          }t%          j                    d
fd}	t'          ||d
||j        |j        |j        |	          D ]}
t/                       t/          dt!          j                    dz  dd           t/          d           i }|j        |d<   t3          j        |          |d<   t7          |j        ||           d S )Ntrust_remote_codeT	eos_token)adapter_pathtokenizer_config-user)rolecontentF)add_generation_promptcontinue_final_messager   c                     t          j                     }| |z
  z  }d| dd|dd}t          t          |                    t          |dt          |          z
  z  z   dd	           d S )
NzProcessed 6dz	 tokens (z6.2fz tok/s)  T)endflush)timemaxlenprint)	processedtotal_tokenscurrentspeedmsgmax_msg_lenstarts        r   callbackzmain.<locals>.callbackv   s    )++Wu_-GYGGGEGGGG+s3xx00cC;S122$GGGGGGr   )
max_tokensprompt_cachekv_bitskv_group_sizequantized_kv_startprompt_progress_callbackzPeak memory: g    eAz.3fz GBz	Saving...modelr    )r   
parse_argsr   r   r   r?   r   promptsysstdinreadhas_chat_templateapply_chat_templateencoder   max_kv_sizemxarrayr-   r   r;   r<   r=   r0   get_peak_memoryjsondumpsr   prompt_cache_file)r   argsr    r?   	tokenizermessagesrA   cacheyr8   _metadatar6   r7   s               @@r   mainrV   S   s   FD ,T5K-UTTQUV~!(,%
&)  E9 '+kS&8&8#).."""dkDK" 	/#<<=.."'#' / 
 
 !!$+..eT%566E
A IKKEKH H H H H H 	(2!)	 	 	 
 
 		GGG	
=",..4
=
=
=
=>>>	+H
HW#':.>#?#?H d,eX>>>>>r   __main__zCalling `python -m mlx_lm.cache_prompt...` directly is deprecated. Use `mlx_lm.cache_prompt...` or `python -m mlx_lm cache_prompt ...` instead.)r   rL   rB   r-   mlx.corecorerI   generater   models.cacher   r   utilsr   r   r   rV   __name__r0    r   r   <module>r_      s      



        # # # # # # > > > > > > > >      ! ? ? ?D>? >? >?B z	E	X   	DFFFFF r   