
    #j>8                     ,   d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddl m!Z! dd	l"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG ddlHmIZI ddlJmKZK ddlLmMZM i de%d e)d!e+d"e9d#e!d$eCd%eEd&e5d'e7d(e/d)e;d*e=d+e-d,e1d-eKd.e'd/eMeIe3e#eAe?eGd0ZNi ded e
d!e
d(ed"ed#ed$ed%ed&ed'ed*ed+ed,ed)ed-ed.e	d/eeeeeeed0ZOeeeeeeeefZP ejQ        eR          ZS G d1 d2          ZT G d3 d4          ZUd5eVfd6ZWd7eVfd8ZXd9 ZYdS ):    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFourOverSixConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMetalConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SinqConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FourOverSixHfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)MetalHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SinqHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfouroversixfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptq)spqrfp8z
auto-roundmxfp4metalsinqc                   D    e Zd ZdZedefd            Zed             ZdS )AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc           	         |                     d          }|                     dd          s|                     dd          r*|                     dd          rdnd}t          j        |z   }n|t          d          |t          vr9t          d| d	t          t                                                               t          |         }|                    |          S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrN   rP   suffix
target_clss        f/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/transformers/quantizers/auto.pyr_   z AutoQuantizationConfig.from_dict   s   /33NCC#''>> 	BZB^B^_motBuBu 	 8 < <^U S S`WWY`F-<vELL! \   ???:, : :/446677: :  
 6lC
##$<===    c                     t          j        |fi |}t          |dd           t          d| d          |j        }|                     |          } |j        di | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrZ   rf   r_   update)r`   pretrained_model_name_or_pathkwargsmodel_configrN   rf   s         rc   rh   z&AutoQuantizationConfig.from_pretrained   s    !12OZZSYZZ<!6==E N<Y  N  N  N   $0#C !mm,DEE"",,V,,,""rd   N)__name__
__module____qualname____doc__classmethoddictr_   rh   rg   rd   rc   rM   rM      s`         
 > > > > [>( 
# 
# [
# 
# 
#rd   rM   c                       e Zd ZdZedeez  fd            Zed             Zedeez  dedz  fd            Z	e
d             ZdS )	AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    rf   c           	         t          |t                    rt                              |          }|j        }|t
          j        k    r6t          |t                    st          d          |j	        r|dz  }n|dz  }|t          vr9t          d| dt          t                                                               t          |         } ||fi |S )NzZFound `quant_method=bitsandbytes` but `quantization_config` is not a `BitsAndBytesConfig`.rT   rS   rV   rW   )
isinstancers   rM   r_   rP   r   rY   r
   	TypeErrorrQ   r]   rZ   r\   r^   )r`   rf   rl   rP   rb   s        rc   from_configzAutoHfQuantizer.from_config   s    )400 	X"8"B"BCV"W"W*7 -<<<13EFF p   #/ (''555:, : :/446677: :  
 ,L9
z-88888rd   c                 P    t          j        |fi |}|                     |          S )N)rM   rh   ry   )r`   rk   rl   rf   s       rc   rh   zAutoHfQuantizer.from_pretrained   s0    4DEbmmflmm2333rd   quantization_config_from_argsNc                 &   |d}nd}t          |t                    rDt          |t                    rt          j        |          }nt                              |          }|D|j        j        |j        j        k    r*t          d|j        j         d|j        j         d          t          |t                    r~t          |t                    ri|	                                }|
                                D ]\  }}t          |||           |r(|dt          |                                           dz  }|dk    r7t          |t          t          t           f          st#          j        |           nt&                              |           |S )	z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        NzYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rw   rs   r   r_   rM   	__class__rn   rZ   LOADING_ATTRIBUTES_CONFIG_TYPESget_loading_attributesitemssetattrr\   r^   r   r   r   warningswarnloggerinfo)r`   rf   r{   warning_msgloading_attr_dictattrvals          rc   merge_quantization_configsz*AutoHfQuantizer.merge_quantization_configs   s    )4y K
 K)400 	\7II \&5&?@S&T&T##&<&F&FGZ&[&[# *5#-6:W:a:jjjF/B/L/U F F  oL  oV  o_ F F F  
 )+JKK 	BPZ)+JQ
 Q
 	B !> T T V V.4466 8 8	c+T37777  B   BDIZI_I_IaIaDbDb   B   B   B  B"Z0CkS^`tEu%v%vM+&&&& KK$$$""rd   c           	         |                      dd           }|                      dd          s|                      dd          r*|                      dd          rdnd}t          j        |z   }n|t          d          |t          vrGt
                              d| d	t          t          	                                           d
           dS dS )NrP   rQ   FrR   rS   rT   rU   rV   rW   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rX   r   rY   rZ   r[   r   warningr\   r]   r^   )rN   rP   ra   s      rc   supports_quant_methodz%AutoHfQuantizer.supports_quant_method  s   /33NDII#''>> 	BZB^B^_motBuBu 	 8 < <^U S S`WWY`F-<vELL! \   ???NNi, i i/446677i i i  
 5trd   )rn   ro   rp   rq   rr   r   rs   ry   rh   r   staticmethodr   rg   rd   rc   ru   ru      s         
 9.E.L 9 9 9 [98 4 4 [4 /#!$;;/# (?'E/# /# /# [/#b   \  rd   ru   methodc                       fd}|S )z-Register a custom quantization configuration.c                     t           v rt          d d          t          | t                    st	          d          | t           <   | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)r[   rZ   
issubclassr   rx   )r`   r   s    rc   register_config_fnz8register_quantization_config.<locals>.register_config_fn$  s\    555DDDDEEE#677 	JHIII36(0
rd   rg   )r   r   s   ` rc   register_quantization_configr   !  s$         rd   namec                       fd}|S )zRegister a custom quantizer.c                     t           v rt          d d          t          | t                    st	          d          | t           <   | S )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)r]   rZ   r   r   rx   )r`   r   s    rc   register_quantizer_fnz1register_quantizer.<locals>.register_quantizer_fn4  s[    )))E4EEEFFF#{++ 	A?@@@'*t$
rd   rg   )r   r   s   ` rc   register_quantizerr   1  s$         ! rd   c                 <   t          | d          }|r!t                              | j                  sd}|s|Q|r&t                              | j        |          | _        n|| _        t                              | j        |          }nd }||                    ||           |                    |          }|                    |           } |	                    |           } t          |j        dd          s |j        j        }t          |d|          |d<   || |fS )Nrf   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrru   r   rf   r   ry   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planri   rP   )configrf   r   r   
user_agentr   hf_quantizerrP   s           rc   get_hf_quantizerr   A  sS   F$9::M _BB6C]^^  +7 	=)8)S)S*,?* *F&& *=F&&22&' 3 
 

 ))!% 	* 	
 	
 	
 "33J??
,,V44,,V44 |7uMM 	O';HL"),"N"NJw++rd   )Zr   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr    quantizer_auto_roundr!   quantizer_awqr"   quantizer_bitnetr#   quantizer_bnb_4bitr$   quantizer_bnb_8bitr%   quantizer_compressed_tensorsr&   quantizer_eetqr'   quantizer_fbgemm_fp8r(   quantizer_finegrained_fp8r)   quantizer_fouroversixr*   quantizer_fp_quantr+   quantizer_gptqr,   quantizer_higgsr-   quantizer_hqqr.   quantizer_metalr/   quantizer_mxfp4r0   quantizer_quantor1   quantizer_quarkr2   quantizer_sinqr3   quantizer_spqrr4   quantizer_torchaor5   quantizer_vptqr6   r]   r[   r   
get_loggerrn   r   rM   ru   strr   r   r   rg   rd   rc   <module>r      s    7 7 7 7 7 7                                                         4       + + + + + + 4 4 4 4 4 4 ' ' ' ' ' ' / / / / / / 2 2 2 2 2 2 2 2 2 2 2 2 F F F F F F + + + + + + 6 6 6 6 6 6 @ @ @ @ @ @ 9 9 9 9 9 9 2 2 2 2 2 2 + + + + + + - - - - - - ) ) ) ) ) ) - - - - - - - - - - - - / / / / / / - - - - - - + + + + + + + + + + + + 1 1 1 1 1 1 + + + + + +	<+ + O	
 O   ) " O  
> 6 & !  !" O#$ $$/   4$	9$+$ +$ J	$
 J$ J$ l$ [$ $$ $ 
9$ 1$ /$ [$ }$  !$" J#$$ !/$ $ $  6 	#  
	H	%	%&# &# &# &# &# &# &# &#Rl l l l l l l l^     !S ! ! ! ! !, !, !, !, !,rd   