
    )j-                         d Z ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
m
Z
 ddlmZmZ ddlmZ ddlmZmZmZ d Z G d	 d
ej                  Z	 ddZd Zedk    r e             dS dS )zS
Implements GPTQ

- https://arxiv.org/abs/2210.17323
- https://github.com/AutoGPTQ
    N)tree_flattentree_unflatten)tqdm)QuantizedSwitchLinearSwitchLinear)	load_data)compute_bits_per_weightloadsavec           	      (   |dv sJ d|             d|z  }d|z  dz
  }t          j        | d|j        d         df          } t          j        t          j        | |d         z
  |d         z            d|                              t           j                  } t          j        dt          j        d	d|t           j                            }t          j        | dd|f          } t          j	        | |z  d
          } | 
                    dd          S )N>            zUnsupported bits     r      ).N        r   axis)mx	unflattenshapecliproundastypeuint32powerarangesumflatten)wbitsscalesbiases
el_per_intn_binsshiftss          [/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/quant/gptq.pyquantizer*      s
   98$88tJWq[F
QV\"-r233A

!fY''6)+<<==sF	 	fRY  Xa1b$	::;;F
QR,--A
q6z###A99R    c                   $     e Zd Z fdZd Z xZS )Catcherc                     t                                                       || _        t          j        d          | _        d S )Nr   )super__init__moduler   arrayH)selfr1   	__class__s     r)   r0   zCatcher.__init__)   s3    #r+   c                 ~    |                     dd          }| j        |j        |z  z   | _         | j        |g|R i |S )Nr   r   )r!   r3   Tr1   )r4   xargskwargsxfs        r)   __call__zCatcher.__call__.   sK    YYq""$)#t{1.t...v...r+   )__name__
__module____qualname__r0   r<   __classcell__)r5   s   @r)   r-   r-   (   sG            
/ / / / / / /r+   r-   r   c                 0	   g }t           j        t          h}t          |                                 t           j        j                  D ]:\  }	}
t          |
          |v r$|                    |	t          |
          f           ;| 
                    t          |                     t          t          t          dt          |          |                    t          |          |z  d          D ]1\  }}||||z            } | |           t!          j        |           2d }t           j        fd            }t          t          |          t          |          d          D ]E\  }\  }}
 ||
j                  }|
`t!          j        |           |
j        j        j        }|
j        j                            t           j                  }g }g }t          d|j        d         |          D ]}||z   }|d	||f         }t!          j        |          }t!          j        ||
          \  }}}|                    |           |                    |           t          |          D ]{}	|	|z  }	|d	|	|	dz   f         }||	|	f         } |||||          }|d	|	|	|z   fxx         |||	|	dz   |	|	|z   f         z  z  cc<   ||d	|	|	dz   f<   t!          j        ||           ||d	|d fxx         |||||d f         z  z  cc<   t!          j        |d          }t!          j        |d          }t7          |||          } |
j                            |
          }!| |!_        ||!_        ||!_        |!                     |           t!          j        |!           ||!f||<   G| 
                    t          |                     t          |                                 t           j        j                  }|d
}"||d
}#g }$t          |          D ]?\  }\  }	}
tC          |
d          r'|#|"|	<   |$                    |	 |
j        di |#f           @t          |$          dk    r"| 
                    t          |$                     | |"fS )N)is_leafr   zComputing Hessians)totaldescc                    t          j        t           j                  5  dt          j        t          j        |                     z  }t          j        | j        d                   }| ||fxx         |z  cc<   t           j                            |           } t           j        	                    |           } t           j                            | d          }|cd d d            S # 1 swxY w Y   d S )Ng{Gz?r   T)upper)
r   streamcpumeandiagr   r   linalgcholeskycholesky_inv)r3   damprJ   Hinvs       r)   compute_inverse_hessianz.gptq_quantize.<locals>.compute_inverse_hessianN   s
   Yrv 	 	"'"'!**---D9QWQZ((DdDjMMMT!MMM	""1%%A	&&q))A9%%at%44D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   B;C''C+.C+c                     dz  dz
  }t          j        t          j        | |z
  |z            d|          }||z  |z   }| |z
  |z  S )Nr   r   r   )r   r   r   )r"   dr$   r%   r'   qr#   s         r)   
gptq_errorz!gptq_quantize.<locals>.gptq_errorX   sR    D1GBHa&jF233S&AAQJA{r+   
Quantizingr   .)r#   
group_sizer   r   to_quantized )"nnLinearr   r   leaf_modulesModule	is_moduletypeappendr-   update_modulesr   r   	enumeraterangelenr   evalcompiler3   r1   weightdtyper   float32r   
zeros_liker*   concatenaterW   r$   r%   	set_dtypehasattr)%modeldatar#   rV   fallback_bitsfallback_group_size
batch_sizelayers
gptq_typesklesbatchrP   rT   lidkeyrO   	orig_typeW
all_scales
all_biasesijWlerr_r$   r%   r"   rR   Wqlayerconfigfallback_configq_layerss%     `                                  r)   gptq_quantizer   4   s    F)\*JU//1129;NOOO + +177j  MM1gajj/***	//000 %3t99j1122$ii:%!    1
 QZ'(e
   Z    Z &&kk   /# /#Xc1
 '&qs++C
HO)	HO""2:..

q!'"+z22 	. 	.AJA3!8B-##C !#BTj Q Q QAvvf%%%f%%%:&& 	  	 Qc1q1u9n%AJJq!VV44#q1q5y.!!!Qa!a%iQU.B)C%CC!!!&'CQUN#Qc122gJJJ#QqS!""W--JJJJ 
444
444avv..%%4J%GG	"""
Els	//000	#  F *55F,<OPPOHv&& D D	6Aq1n%% 	D'F1IOOQ A A A ABCCC
8}}q^H55666&=r+   c                     t          j                    } |                     ddd           |                     dd           |                     dt          dd	
           |                     dt          dd
           |                     dt          dd
           |                     dt          dd
           |                     dt          dd
           |                     dt          dd
           |                     dt          d           |                                 }t
          j                            |j                   t          |j	        dd          \  }}}t          ||j        |j                  }t          |||j        |j        |j        |j                  \  }|d<   t%          |          }t'          d|d d!           t)          |j        |j	        |||           d S )"Nz--modelz-mzQwen/Qwen3-0.6B-base)defaultz
--mlx-path	mlx_modelz--bitsr   z!Quantization bits for GPTQ layers)r^   r   helpz--group-size@   z'Quantization group size for GPTQ layersz--fallback-bits   z%Quantization bits for non-GPTQ layersz--fallback-group-sizez+Quantization group size for non-GPTQ layersz--num-samplesr   z?Number of samples from the calibration dataset, use -1 for all.z--sequence-lengthi   z)Sequence length for the calibration data.z--seed{   )r^   r   T)lazyreturn_configquantizationzQuantized model with z.3fz bits per weight.)argparseArgumentParseradd_argumentint
parse_argsr   randomseedr
   rm   r   num_samplessequence_lengthr   r#   rV   ro   rp   r	   printr   mlx_path)parserr9   rm   	tokenizerr   calibration_databpws          r)   mainr      sL   $&&F
	41GHHH
k:::
sA,O     6	     4	     :	     N	     8	     sC888DINN49#DJTNNNE9f D,<d>RSS$1	 % %!E6.! "%
(
(C	
<#
<
<
<
<===
    r+   __main__)r   )__doc__r   mlx.corecorer   mlx.nnrY   	mlx.utilsr   r   r   mlx_lm.models.switch_layersr   r   mlx_lm.quant.utilsr   mlx_lm.utilsr	   r
   r   r*   r\   r-   r   r   r=   rX   r+   r)   <module>r      sQ                 2 2 2 2 2 2 2 2       K K K K K K K K ( ( ( ( ( (           	/ 	/ 	/ 	/ 	/bi 	/ 	/ 	/& k k k k\? ? ?D zDFFFFF r+   