
    )jz                        d dl Z d dlmZ d dlmZmZmZ d dlmZ	 d dl
mZ d dlmZ ddlmZmZmZmZmZ 	 d#ded	ej        d
edeeej        egeeef         f         fdZg dZg dZ	 	 	 	 	 	 	 	 	 	 	 d$dedededee         dee         dedee         dedee         dedeeeeej        egeeef         f         ef                  defdZde j        fdZd  Ze d!k    r e!d"            e             dS dS )%    N)Path)CallableOptionalUnion)tree_map_with_path   )dequantize_modelloadquantize_modelsaveupload_to_hub@   recipemodel
group_sizereturnc                 (  	
 d	d| dk    rdn/| dk    rddn$| dk    rdn| d	k    rdnt          d
|            d |                                D             }t          |          dk    rt          d          t          |d                             d                    D ]\  }|                                r nt          |j                  
dt          dt          j	        dt          t          t          f         f	
fd}|S )Naffine   	mixed_2_6   	mixed_3_4      	mixed_3_6	mixed_4_6zInvalid quant recipe c                      g | ]\  }}d |v 	|S )	down_proj ).0k_s      X/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/convert.py
<listcomp>z1mixed_quant_predicate_builder.<locals>.<listcomp>&   s&    IIItq!q8H8H8H8H8H    r   z2Model does not have expected keys for mixed quant..pathmoduler   c                 L   t          |                     d                    k    r(t          |                     d                             nd}|	dz  k     p|d	z  dz  k    p|	dz  z
  dz  dk    }d| v sd| v sd	| v r|rd
S d| v r|rd
S d| v rd
S d
S )a=  Implements mixed quantization predicates with similar choices to, for example, llama.cpp's Q4_K_M.
        Ref: https://github.com/ggerganov/llama.cpp/blob/917786f43d0f29b7c77a0c56767c0fa4df68b1c5/src/llama.cpp#L5265
        By Alex Barron: https://gist.github.com/barronalex/84addb8078be21969f1690c1454855f3
        r&   r         r   r   v_projv_a_projv_b_proj)r   bitsmoder   lm_head)lensplitint)
r'   r(   indexuse_more_bitsr   	high_bitslayer_locationlow_bitsr0   
num_layerss
       r#   mixed_quant_predicatez<mixed_quant_predicate_builder.<locals>.mixed_quant_predicate0   s    4::c??##n44 

3/000 	 J!O# 2J!++2
a'1,1 	 
d 2 2jD6H6H 7I",iNNN$=",iNNN",iNNN((DIIIr%   )
ValueErrornamed_modulesr2   	enumerater3   isdigitlayersstrnnModuler   booldict)r   r   r   	down_keysr!   r;   r7   r8   r9   r0   r:   s     `   @@@@@r#   mixed_quant_predicate_builderrG      s    DI	;					;			;		999:::IIu2244IIII
9~~MNNN 'y|'9'9#'>'>??  99;; 	E	U\""JJJ	J 
tTz	J J J J J J J J J J J: ! r%   )r   r   r   r   )float16bfloat16float32	mlx_modelFr   hf_pathmlx_pathquantizeq_group_sizeq_bitsq_modedtypeupload_reporevision
dequantizequant_predicatetrust_remote_codec                 L   t          |t                    rt          |          }|                                rt	          d| d          t          d           t          | |dd|id          \  }}}t          |
t                    r&|dk    rt	          d          t          |
||          }
|                    d	d           .|                    d
d           x}r|                    dd           t          v rrt          d           t          t                    t          |dd           fd}|                    t          ||                                                     |r|	rt	          d          |r't          d           t          ||||||
          \  }}|	rJt          d           |                    dd            |                    dd            t#          |          }t%          || |||           |t'          ||           d S d S )NzCannot save to the path zY as it already exists. Please delete the file/directory or specify a new path to save to.z[INFO] LoadingTrW   )rT   return_configtokenizer_configlazyr   z4Quant predicates only support 'affine' quantization.torch_dtypetext_configrR   z[INFO] Using dtype:cast_predicatec                     dS )NTr   )r"   s    r#   <lambda>zconvert.<locals>.<lambda>   s    D r%   c                      |           r9t          j        |j        t           j                  r|                              S |S )N)mx
issubdtyperR   floatingastype)r!   vr^   rR   s     r#   	set_dtypezconvert.<locals>.set_dtype   sA    ~a   R]17BK%H%H xx&r%   z/Choose either quantize or dequantize, not both.z[INFO] Quantizing)r0   rV   z[INFO] Dequantizingquantizationquantization_config)
isinstancerA   r   existsr<   printr
   rG   getMODEL_CONVERSION_DTYPESgetattrrb   updater   
parametersr   popr	   r   r   )rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r   	tokenizerconfigr]   rg   r^   s         `          @r#   convertru   U   s   " (C   ">> 
Rx R R R
 
 	

 

#-/@A     E9f /3'' 
XTUUU7
 
 }

=$//}M4)H)HH+}..'''#U+++E"" (8..II	 	 	 	 	 	 	'	53C3C3E3EFFGGG LJ LJKKK 	
!"""&+
 
 
v  (#$$$

>4(((

($/// ''   h,,,,, r%   c                     t          j        d          } |                     ddt          d           |                     dt          dd	
           |                     dddd           |                     ddt          d           |                     ddt          d           |                     ddt          dg d           |                     ddt
          t          d           |                     dd t          t          d!           |                     d"d#t          d           |                     d$d%d&dd'           |                     d(d)dd'           | S )*z
    Configures and returns the argument parser for the script.

    Returns:
        argparse.ArgumentParser: Configured argument parser.
    z(Convert Hugging Face model to MLX format)descriptionz	--hf-pathz--modelzSPath to the model. This can be a local path or a Hugging Face Hub model identifier.)typehelpz
--mlx-pathrK   zPath to save the MLX model.)rx   defaultry   z-qz
--quantizezGenerate a quantized model.
store_true)ry   actionz--q-group-sizezGroup size for quantization.N)ry   rx   rz   z--q-bitsz!Bits per weight for quantization.z--q-modezThe quantization mode.r   )r   mxfp4nvfp4mxfp8)ry   rx   rz   choicesz--quant-predicatezMixed-bit quantization recipe.F)ry   r   rx   requiredz--dtypezvType to save the non-quantized parameters. Defaults to config.json's `torch_dtype` or the current model weights dtype.)ry   rx   r   rz   z--upload-repoz-The Hugging Face repo to upload the model to.z-dz--dequantizezDequantize a quantized model.)ry   r|   rz   z--trust-remote-codez)Trust remote code when loading tokenizer.)argparseArgumentParseradd_argumentrA   r4   QUANT_RECIPESrn   )parsers    r#   configure_parserr      s    $>  F b	     3:W     l!>|     +	     0	     %555     .      F'     <	     ,     8	     Mr%   c                  |    t                      } |                                 }t          di t          |           d S )Nr   )r   
parse_argsru   vars)r   argss     r#   mainr      s>    FDd4jjr%   __main__zCalling `python -m mlx_lm.convert ...` directly is deprecated. Use `mlx_lm.convert ...` or `python -m mlx_lm convert ...` instead.)r   )rK   FNNr   NNNFNF)"r   pathlibr   typingr   r   r   mlx.corecorerb   mlx.nnrB   	mlx.utilsr   utilsr	   r
   r   r   r   rA   rC   r4   rE   rD   rG   r   rn   ru   r   r   r   __name__rl   r   r%   r#   <module>r      s          , , , , , , , , , ,             ( ( ( ( ( (              689! 9!9!	9!/29!sBIt$eD$J&7789! 9! 9! 9!x EDD<<< 
  "& " 	#Z- Z-Z-Z- Z- 3-	Z-
 SMZ- Z- C=Z- Z- smZ- Z- hRY-uT4Z/@@A3FGZ- Z- Z- Z- Z-zK(1 K K K K\   z	E	O   	DFFFFF r%   