
    )jf                         d dl Z d dlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZ dZdZdZdZd Zd	Zd
Zd Zd Zedk    r ed            e             dS dS )    N   )stream_generate)make_prompt_cache)make_sampler)loadsharded_load        g      ?   z(mlx-community/Llama-3.2-3B-Instruct-4bitc                     t          j        d          } |                     dt          dt                     |                     ddd	           |                     d
t          d           |                     dt
          t          d           |                     dt
          t          d           |                     dt
          t          d           |                     dt
          dd           |                     dt          t          d           |                     dt          dd           |                     ddt          t          d           |                     ddd !           |                     d"dd#	           | S )$z&Set up and return the argument parser.zChat with an LLM)descriptionz--modelz;The path to the local model directory or Hugging Face repo.)typehelpdefaultz--trust-remote-code
store_truez)Enable trusting remote code for tokenizer)actionr   z--adapter-pathz9Optional path for the trained adapter weights and config.)r   r   z--tempzSampling temperature)r   r   r   z--top-pzSampling top-pz--xtc-probabilityz5Probability of XTC sampling to happen each next tokenz--xtc-thresholdr	   zDThresold the probs of each next token candidate to be sampled by XTCz--seedz	PRNG seedz--max-kv-sizez$Set the maximum key-value cache sizeNz--max-tokensz-mz$Maximum number of tokens to generatez--system-promptz.System prompt to be used for the chat template)r   r   z
--pipelinez,Use pipelining instead of tensor parallelism)argparseArgumentParseradd_argumentstrDEFAULT_MODELfloatDEFAULT_TEMPDEFAULT_TOP_PDEFAULT_XTC_PROBABILITYintDEFAULT_SEEDDEFAULT_MAX_TOKENS)parsers    U/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/chat.pysetup_arg_parserr       s   $1CDDDF
J	     8    
 H    
 ul9O     };K     'D	     S	     	     3	     "3     =    
 ;    
 M    c                     t                      } |                                 }t          j                                        }|                                |j        r|nd }|j        s|nd }fdt          j                            |j                   |	                                dk    r6|j
        r|                     d           t          |j        ||          \  }}n*t          |j        |j
        d|j        rdnd i          \  }}fd} d|j         d	            |             t!          ||j                  }	 t%          d
k    rdnd          }	|	dk    rd S |	dk    rt!          ||j                  }<|	dk    r |             Mg }
|j        |
                    d|j        d           |
                    d|	d           |                    |
d          }t-          ||||j        t1          |j        |j        |j        |j        |                    d          t=          |j                  z             |          D ]} |j         dd                         +)Nc                  0    dk    rt          | i | d S d S )Nr   )print)argskwargsranks     r   rprintzmain.<locals>.rprintc   s,    1994"6""""" 9r!   r   z*Adapters not supported in distributed modetrust_remote_codeT)adapter_pathtokenizer_configc                  `      d             d             d             d           d S )NzThe command list:z- 'q' to exitz- 'r' to reset the chatz- 'h' to display these commands )r(   s   r   
print_helpzmain.<locals>.print_helpv   sL    "###()))011111r!   z"[INFO] Starting chat session with .r   z>>  qrhsystem)rolecontentuser)add_generation_prompt
)xtc_thresholdxtc_probabilityxtc_special_tokens)
max_tokenssamplerprompt_cache)flushend)!r    
parse_argsmxdistributedinitr'   pipelinerandomseedsizer*   errorr   modelr   r)   r   max_kv_sizeinputsystem_promptappendapply_chat_templater   r=   r   temptop_pr:   r;   encodelisteos_token_idstext)r   r%   grouppipeline_grouptensor_grouprK   	tokenizerr.   r?   querymessagespromptresponser'   r(   s                @@r   mainr_   Z   s   FDN!!E::<<D"m5UUN $7554L# # # # # INN49zz||a 	GLLEFFF'
NLQQyyJ*#T-C%MTT
 
 
y2 2 2 2 2 F=
===>>>JLLL$UD,<==L#tqyyeeb11C<<EC<<,UD4DEELC<<JLLL)OOX$:LMMNNNE::;;;.."& / 
 
 ( 	
"0 $ 4$$T**T)2I-J-JJ   &
 
 
 	6 	6H  F8="55555G#r!   __main__zwCalling `python -m mlx_lm.chat...` directly is deprecated. Use `mlx_lm.chat...` or `python -m mlx_lm chat ...` instead.)r   mlx.corecorerC   generater   models.cacher   sample_utilsr   utilsr   r   r   r   r   DEFAULT_XTC_THRESHOLDr   r   r   r    r_   __name__r$   r-   r!   r   <module>ri      s           % % % % % % + + + + + + & & & & & & % % % % % % % %   :B B BJH H HV z	E	H   	DFFFFF r!   