
    )jE                         d dl Z d dlZd dlmZmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ e G d d                      Z e G d d                      Z!d Z" e!ddddg e ddg ddg           e ddg           e dddd g!          g"          Z# e!ddddg e ddg ddg           e ddg           e dd#dd g!          g"          Z$ e"e$d$%          Z% e!ddg d& e dddd'gdg           e d(d)g           e ddgd* +           e d,d-gd. +           e d/d0gd1 d2g3           e dddd gd4 5           e ddg d6d7 5          g"          Z&e#e#e#e#e$ e"e$d$%          e&d8Z'd9 Z(d: Z)	 	 dbd<ej*        d=ej+        d2ej+        dz  d>e,fd?Z-d=ej+        d@ej.        j/        fdAZ0dBe1ej*                 dCe	dDej*        dz  dEe2dFe,f
dGZ3dH Z4dDej*        dIe1e          dCe	dEe2dFe,f
dJZ5	 	 	 dcdNej*        dCe	dOe,dFe,dPe6d>e,dQe,fdRZ7	 dddDej*        dTe1e8         dCe	dOe,dFe,f
dUZ9	 	 	 	 	 dedXej+        dYe!dOe,dZe,d[e,d\e,dFe,fd]Z:d^ej*        d_e
e8ef         fd`Z;da Z<dS )f    N)	dataclassfield)Path)AnyCallableDict)request)tree_flattentree_maptree_map_with_path)tqdm)create_attention_mask)SwitchLinear)	load_data)loadsavec                       e Zd ZU ej        ed<   eej                 ed<   dZej        dz  ed<    ee          Z	eed<   dZ
eej        gef         dz  ed<   dS )ScaleConfigprevlayersNblock)default_factorykwargs
use_config)__name__
__module____qualname__nnModule__annotations__listr   r   r   r   r   bool     Z/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/quant/awq.pyr   r      s         
)OOOO"E29t"""5...FD...59J")d*+d299999r$   r   c                   d    e Zd ZU eed<   eed<   ee         ed<   ee         ed<   dZedz  ed<   dS )	AWQConfigembedlm_headno_clipscale_configsNlm_key)r   r   r   strr    r!   r   r,   r#   r$   r%   r'   r'   !   sY         JJJLLL#Y$$$$FC$Jr$   r'   c                     t          j        |           } |                                D ]\  }}t          | ||           | S N)copydeepcopyitemssetattr)cfgr   kvs       r%   updater7   *   sF    
-

C  1QJr$   embed_tokensr)   q_projk_proj	self_attninput_layernorm)r9   r:   v_projmask)r   r   r   r   zmlp.up_projzmlp.down_proj)r   r   mlppost_attention_layernorm	gate_projup_proj)r   r   r   )r(   r)   r*   r+   pre_feedforward_layernormlanguage_model)r,   )r9   q_a_projq_b_projkv_a_proj_with_mqa	kv_b_projrG   zself_attn.kv_a_layernormzself_attn.kv_b_projc                     d| j         vS N
switch_mlpr?   r   s    r%   <lambda>rN   m       )B r$   )r   r   r   zmlp.shared_experts.up_projzmlp.shared_experts.down_projc                     d| j         v S rJ   rL   rM   s    r%   rN   rN   r       \UY%> r$   zmlp.switch_mlp.up_projzmlp.switch_mlp.down_projc                     d| j         v S rJ   rL   rM   s    r%   rN   rN   w   rQ   r$   indices)r   r   r   r   c                     d| j         vS rJ   rL   rM   s    r%   rN   rN   ~   rO   r$   )r   r   r   r   )zswitch_mlp.gate_projzswitch_mlp.up_projzshared_experts.gate_projzshared_experts.up_projgatec                     d| j         v S rJ   rL   rM   s    r%   rN   rN      rQ   r$   )llamamistralqwen2qwen3gemma3_textgemma3deepseek_v2c                 L    | |z
                       t          j                  dz  S )N   )astypemxfloat32)xys     r%   msere      s    UNN2:&&1,,r$   c                 J    |                     d          }|D ]
}| |         } | S )N.)split)modulekeykeysr5   s       r%   submodule_from_keyrl      s0    99S>>D  Mr$       layerrc   
batch_sizec           
      V   g }t          d|j        d         |          D ]s}|4|                     | ||||z            ||||z            fi |           n'|                     | ||||z            fi |           t          j        |           tt          j        |d          }|S )Nr   axis)rangeshapeappendra   evalconcatenate)rn   rc   rS   ro   r   rd   is          r%   	run_layerry      s     	A1agaj*--  HHaA
N*+WQZ5G-HSSFSS    HHUU1QZ/0;;F;;<<<





qq!!!AHr$   groupc                     |                                 }|dk    r| S | j        d         }||z  dk    sJ |                                }||z   dz
  |z  }| ||z  |dz   |z           S )N   r   )sizert   rank)rc   rz   NBrlocal_Bs         r%   
dist_splitr      su    

AAvv	
Aq5A::::

A1uqyQGQ[AEW,,--r$   r   quantize_funcr   layer_kwargsn_gridc                    t           j                                        }|pi }| d         j        }|p| d         } ||fi |}|                                                    d          }t          d          }	d }
t          |                                          }t          |          D ]y}||z  }t          j
        ||z  d                              d          }||                                |                                z                                  z  }| D ]>}t          |t           j        t$          f          r ||j        |z            |z  |_        ?t)          ||fi |}t+          ||                                          }|4t           j                            |          |                                z  }||j        z  }t          j        |           |                                |	k     r|                                }	|}
|                    |           {|
                    d          }
t          j        |
           |
S )Nr   )r   r|   rq   infg-C6?)ra   distributedinit
input_featabsmeanfloatr
   
parametersrs   maximumreshapemaxminsqrt
isinstancer   Linearr   weightry   re   sumall_sumr}   rv   itemload_weights)r   r   r   r   r   rz   rc   outx_max
best_errorbest_scalesweightsratioscalesrn   out_qlosss                    r%   search_best_scaler      s%    N!!E%2Lq	AVAYE
%
"
"\
"
"CEEGGLLfL%%EuJK5++--..G v $ $E5L$//77;;6::<<&**,,6<<>>> 	M 	ME%")\!:;; M,}U\F-BCCfL%33l333""$$>))$//%**,,>D
99;;##J K 	7####%%b))KGKr$   c                 X   t          | t          j        t          f          rst	          |          dk    sJ | j        |d d t          j        f         z  | _        t          | d          r| j	        |z  | _	        |d         j        |z  |d         _        nt          | t          j
        t          j        f          rC| j        |z  | _        t          | d          r| j	        |z  | _	        |D ]}|j        |z  |_        n| j        j        dk    rf| j        j        }d| j                            t          j                  z   |z  dz
                      |          | _        |D ]}|j        |z  |_        nt#          d|            |D ]!}t          |d          r|j        |z  |_        "d S )Nr|   biasr   RMSNormg      ?z"Could not apply scale to prev_op: r   )r   r   r   r   lenr   ra   newaxishasattrr   	LayerNormr   	__class__r   dtyper`   rb   NotImplementedErrorr   )prev_opr   r   rn   dts        r%   apply_scaler      s   'BI|455 R6{{a &BJ*??7F## 	1"<&0GL!!9+f4q		GblBJ7	8	8 R &07F## 	1"<&0GL 	1 	1E <&0ELL	1			#y	0	0^!7>((444>D
&** 	  	1 	1E <&0ELL	1 ""Pw"P"PQQQ 9 95,'' 	9$/&8E9 9r$   configsc                    
 |D ]}|j         |                                s|j        ! |j                 

fd|j        D             }nd 
 fd|j        D             }fd|j        D             }|j        D ]1}t	          |d         |          rt          |d         |          ||<   2t          |
|||          }	t          t           |j	                  ||	           d S )Nc                 0    g | ]}t          |          S r#   rl   ).0llocal_blocks     r%   
<listcomp>zscale_block.<locals>.<listcomp>   s$    NNNQ(a88NNNr$   c                 0    g | ]}t          |          S r#   r   )r   r   r   s     r%   r   zscale_block.<locals>.<listcomp>#  s$    HHHq(22HHHr$   c                 *    i | ]}|v ||         S r#   r#   )r   r5   r   s     r%   
<dictcomp>zscale_block.<locals>.<dictcomp>$  s)    UUUq1CTCT<?CTCTCTr$   r   )r   r   r   r   r   )
r   r   r   r   r   getattrr   r   rl   r   )r   r   r   r   r   confr   local_kwargsr5   r   r   s   `  `      @r%   scale_blockr     s2     J J?&tu/E/E&:!
+KNNNN$+NNNFFKHHHHDKHHHFUUUUDKUUU 	8 	8Avay!$$ 8")&)Q"7"7Q"%'
 
 
 	&udi88&&IIII+J Jr$         ?@      ri   
group_size
max_shrinkn_framesc                    t           j                                        }| j                            dd          }|j        d         |z   dz
  |z  }	|d d |	         }| j        }
|                    |j        d         d|          }|
j        }t          j        |
d|
j        dz
            }g }t          d|j        d         |          D ]E}||||z            }
|
j        d         |
j        d         |z  f}t          j
        |t          d                    }t          j        g |dR |j                  }|
j        } |
j        g |
j        d d         d|R  }
t          j        d||
          }|
                                                    dd	          }t          t#          ||z                      D ]<}d||z  z
  }||z  }t          j        |
| |                              |          } ||          } |j        g |j        d d         d|R  }t          j        d||          }t'          ||                              d
          }|4t           j                            |          |                                z  }||j        d         z  }||k     }t          j        |||          }t          j        |dt           j        f         ||          }t          j        ||           >|                    |           Gt          j        |d
          } |j        g |j        d d         d|R  }t          j        || |          }|                    |          }t          j        |           |S )Nr   r|   r   r_   r   )r   zbdg,odg->bodT)rr   keepdimsrq   .)ra   r   r   r   flattenrt   r   r   ndimrs   fullr   zerosr   einsumr   r   intclipre   r   r   r}   wherer   rv   ru   rw   )ri   r   r   r   r   ro   r   rz   rc   strideww_init_shapew_all	w_max_allbgroup_shaper   
best_w_maxw_shaper   init_maxrx   pw_maxw_mw_qr   r   best_indicesw_rbest_ws                                  r%   search_best_clipr   3  sr    N!!E 	!!!Q''Agaj8#a'H4F	((F(AA			!'!*b*--A7LJq!QVaZ((EI 1ek!nj11 "% "%!a*n$%wqz172;*#<=W[%,,77
X//Q//qw???
'AI4qwss|4R4444i1--5577;;B;66 s:.//00 	, 	,AAJALE'!eVU++33G<<C-$$C#+>sy"~>r>:>>>CIna55E sE??&&A&..D ~--d33ejjllBCIaL D*,L,jAAJ,sBJ"?
SSJGJ
++++$$$$	222J
%-
:SbS)
:2
:z
:
:
:CWS:+z22F^^L))FGFOOOMr$      no_clip_keysc                     fd}t          ||                                 t          j        j                   d S )Nc                      t          |t          j        t          f          r7t	           fdD                       rt          |          }||_        d S d S d S )Nc              3       K   | ]}|vV  	d S r/   r#   )r   r5   paths     r%   	<genexpr>z1clip_block.<locals>.apply_clip.<locals>.<genexpr>  s@       A
 A
ATMA
 A
 A
 A
 A
 A
r$   )r   r   r   )r   r   r   r   allr   r   )r   ri   best_weightr   r   r   r   s   `  r%   
apply_clipzclip_block.<locals>.apply_clip  s    fry,788 		(S A
 A
 A
 A
#/A
 A
 A
 >
 >
 		( ++%	  K (FMMM		( 		( 		( 		(r$   is_leafr   leaf_modulesr   r   	is_module)r   r   r   r   r   r   s    ```` r%   
clip_blockr   y  s\    
( 
( 
( 
( 
( 
( 
( 
( z5#5#5#7#7ATUUUUUUr$         inputs
awq_configbitsembed_group_size
embed_bitsc           	         |j         | |j                  } t          j                                        }fd}	t	          |          }
|j        }| j        |                             ||          | j        |<    | j        |         |          }d }t          t          | j
                            D ]\  }}|                                }t          ||t          j        j                  }|                    |           t#          |||
          }|                    |           ~t          j        |           t#          |||
          }t'          ||                                          }|4t          j                            |          |                                z  }||j        z  }|                    |           |                                }t1          ||j        |	|d|
i           t5          ||j        |	|           t          j        |           t#          |||
          }t'          ||                                          }|4t          j                            |          |                                z  }||j        z  }t          j        d	||z              ||k    rU|                    |           |                    |           t          j        |           t          j        d
           |}t          j        |           t          j                     |j         x}| v r"| |                             ||          | |<   d S d S )Nc                 V    t          j        |           }t          j        |dS )N)r   r   )ra   quantize
dequantize)r   wqr   r   s     r%   r   z#awq_quantize.<locals>.quantize_func  s0    [*===}bt
CCCCr$   r   r   c                      t           t          j        t          f          s S  G  fddt          j                  } |            S )Nc                   *    e Zd Zdej        f fdZdS ).awq_quantize.<locals>.capture.<locals>.Catcherrc   c                 >   t          d          r#t          j        j        |gd          _        n|_        t	          t
                    rB|d         }t          d          r#t          j        j        |gd          _        n|_         |g|R i |S )Nr   r   rq   rS   )r   ra   rw   r   r   r   rS   )selfrc   argsr   rS   ri   s        r%   __call__z7awq_quantize.<locals>.capture.<locals>.Catcher.__call__  s    6<00 *(*8I17MTU(V(V(VF%%()F% fl33 1"1gGvy11 1)+#^W5A* * * *1va1$111&111r$   N)r   r   r   ra   arrayr  )ri   s   r%   Catcherr    s<        2"( 2 2 2 2 2 2 2 2r$   r
  )r   r   r   r   r   )ri   r
  s   ` r%   capturezawq_quantize.<locals>.capture  se    &29l";<< 	M	2 	2 	2 	2 	2 	2 	2bi 	2 	2 	2( wyyr$   r   )r>   r>   )r   r   r   r   r   )r   r   r   r   r   zLoss reduction: z6Loss is not reduced, falling back to original weights.)!r,   ra   r   r   r   r(   modelto_quantized	enumerater   r   r   r   r   r   r   update_modulesry   r   re   r   r   r}   r   r   r+   r   r*   writer7   rv   clear_cacher)   )r  r   r   r   r   r   r   r   rz   r   r>   	embed_keyr  er   orig_leavescapture_leavesoutputs	outputs_qbefore_lossorig_params
after_lossr)   s      ``                  r%   awq_quantizer    s    $j'(N!!ED D D D D D !((D I"[3@@#* A  EK	 $U[#F++F  4 d5<0011 5 55((**!';	@STTT^,,,E6555[))) 	Ejt<<<<eV$777	'9--1133.00==

LKw|#[)))&&((,' $	
 	
 	
 	
 	#+'!	
 	
 	
 	
 	Ejt<<<<eV$777	),,0022
//
;;ejjllJJgl"

@j;&>@@AAA##  ---LL%%%K*4@@@@JOPPP

%%%//w44'j 5 
 
g 0/r$   r  configc                     dddd<   fd}t          ||                                 t          j        j                   S )Nr   r   r  quantizationc                 r    t          |d          r|j        |j        dd         | <   d S dd         | <   d S )Nr   r  r  F)r   r   r   )r   ri   r  s     r%   update_configz$update_config.<locals>.update_config  sS    66"" 	1$/, ,F>"4(((
 ,1F>"4(((r$   r   r   )r  r  r   s    ` r%   r   r     s]    
 -/::F>1 1 1 1 1 }e&8&8&:&:BIDWXXXXMr$   c            
      b   t          j                    } |                     ddd           |                     dd           |                     dt          d	           |                     d
t          d	           |                     dt          d	           |                     dt          d	           |                     dt          d	           |                     dt          d	           |                     dt          d	           |                     dt          d	           |                                 }t
          j                                        }|j        }|J||	                                z  dk    r/||	                                ||	                                z  z
  z  }t
          j
                            |j                   t          |j        dd          \  }}}|d         }t                              |d           x}t!          d| d          t#          ||j        |j                  }	t'          |	|          }	t)          ||	||j        |j        |j        |j        |j                   t5          ||          }t7          |j        |j        |||           d S )Nz--modelz-mz&mlx-community/Qwen2.5-7B-Instruct-bf16)defaultz
--mlx-path	mlx_modelz--bitsr   )typer"  z--group-sizer   z--embed-bitsz--embed-group-sizerm   z--num-samples   z--sequence-lengthr   z--n-gridr   z--seed{   r   T)lazyreturn_config
model_typezAWQ support for z models NYI.)r   r   r   r   r   )argparseArgumentParseradd_argumentr   
parse_argsra   r   r   num_samplesr}   randomseedr   r  AWQ_MODEL_CONFIGSgetr   r   sequence_lengthr   r  r   r   r   r   r   r   r   mlx_path)
parserr  rz   r.  r  	tokenizerr  r)  r   calibration_datas
             r%   mainr8    s   $&&F
4!I     k:::
sA666
S"===
S!<<<
,3CCC
c3???
+#sCCC

b999
sC888DN!!E"K[5::<<7!;;uzz||kEJJLL&@@@INN49#DJTNNNE9f%J'++J===
F!"MZ"M"M"MNNN D,<d>RSS!"2E::Y??.{	 	 	 	 5&))F
    r$   )Nrm   )r   r   r   )r   )r   r   rm   r   r   )=r*  r0   dataclassesr   r   pathlibr   typingr   r   r   urllibr	   mlx.corecorera   mlx.nnr   	mlx.utilsr
   r   r   r   mlx_lm.models.baser   mlx_lm.models.switch_layersr   mlx_lm.quant.utilsr   mlx_lm.utilsr   r   r   r'   r7   	llama_awqgemma3_text_awq
gemma3_awqdeepseek_v2_awqr1  re   rl   r   r	  r   ry   r   Groupr   r!   dictr   r   r   r   r   r-   r   r  r   r8  r#   r$   r%   <module>rK     s      ( ( ( ( ( ( ( (       & & & & & & & & & &                   @ @ @ @ @ @ @ @ @ @       4 4 4 4 4 4 4 4 4 4 4 4 ( ( ( ( ( (        : : : : : : : :           I
x "1118		
 	
 	
 	/@AAA++	
 	
 	
	  	( )
x "1118		
 	
 	
 	/@AAA,+	
 	
 	
	  ( VO,<===
)
QQQ"238		
 	
 	
 	+)*	
 	
 	
 	#$BB	
 	
 	

 	-23>>	
 	
 	

 	)./>>;		
 	
 	
 	++BB		
 	
 	
 	+   ?>	
 	
 	
C-	2 2 2j "f_-=>>>"  - - -    $	 9	x X_ 	   (."( .2>#7 . . . .0O00 9t0 	0
 0 0 0 0f9 9 9:J9J+J J 	J
 J J J JH C CICC C 	C
 C C C C C CV V V9Vs)V V 	V
 V V V V4 o
 o
Ho
 o
 	o

 o
 o
 o
 o
 o
 o
 o
d9cN   (4 4 4 4 4r$   