o
    ri>8                     @   s  d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddl m!Z! dd	l"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG ddlHmIZI ddlJmKZK ddlLmMZM i de%d e)d!e+d"e9d#e!d$eCd%eEd&e5d'e7d(e/d)e;d*e=d+e-d,e1d-eKd.e'd/eMeIe3e#eAe?eGd0ZNi ded e
d!e
d(ed"ed#ed$ed%ed&ed'ed*ed+ed,ed)ed-ed.e	d/eeeeeeed0ZOeeeeeeeefZPeQeRZSG d1d2 d2ZTG d3d4 d4ZUd5eVfd6d7ZWd8eVfd9d:ZXd;d< ZYdS )=    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFourOverSixConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMetalConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SinqConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FourOverSixHfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)MetalHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SinqHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfouroversixfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptq)spqrfp8z
auto-roundmxfp4metalsinqc                   @   s.   e Zd ZdZedefddZedd ZdS )AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc                 C   s   | d}| dds| ddr!| ddrdnd}tj| }n|d u r)td|tvr;td| d	tt  t| }||S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrL   rM   suffix
target_cls r`   j/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/transformers/quantizers/auto.pyr\      s    


z AutoQuantizationConfig.from_dictc                 K   sV   t j|fi |}t|dd d u rtd| d|j}| |}|jdi | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized.r`   )r   from_pretrainedgetattrrW   rb   r\   update)r]   pretrained_model_name_or_pathkwargsmodel_configrL   rb   r`   r`   ra   rc      s   

z&AutoQuantizationConfig.from_pretrainedN)__name__
__module____qualname____doc__classmethoddictr\   rc   r`   r`   r`   ra   rK      s    rK   c                   @   s\   e Zd ZdZedeeB fddZedd ZedeeB dedB fd	d
Z	e
dd ZdS )AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    rb   c                 K   s   t |tr
t|}|j}|tjkr't |tstd|j	r#|d7 }n|d7 }|t
vr9td| dtt
  t
| }||fi |S )NzZFound `quant_method=bitsandbytes` but `quantization_config` is not a `BitsAndBytesConfig`.rQ   rP   rS   rT   )
isinstancern   rK   r\   rM   r   rV   r	   	TypeErrorrN   rZ   rW   rY   r[   )r]   rb   rg   rM   r_   r`   r`   ra   from_config   s&   





zAutoHfQuantizer.from_configc                 K   s   t j|fi |}| |S )N)rK   rc   rr   )r]   rf   rg   rb   r`   r`   ra   rc      s   
zAutoHfQuantizer.from_pretrainedquantization_config_from_argsNc                 C   s   |durd}nd}t |trt |trt|}nt|}|dur9|jj|jjkr9td|jj d|jj dt |trdt |trd|	 }|
 D ]
\}}t||| qK|rd|dt|  d7 }|dkrwt |tttfswt| |S t| |S )	z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        NzYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rp   rn   r   r\   rK   	__class__ri   rW   LOADING_ATTRIBUTES_CONFIG_TYPESget_loading_attributesitemssetattrrY   r[   r   r   r   warningswarnloggerinfo)r]   rb   rs   warning_msgloading_attr_dictattrvalr`   r`   ra   merge_quantization_configs   s4   	




z*AutoHfQuantizer.merge_quantization_configsc                 C   s   |  dd }|  dds|  ddr"|  ddrdnd}tj| }n|d u r*td|tvr@td| d	tt	  d
 dS dS )NrM   rN   FrO   rP   rQ   rR   rS   rT   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rU   r   rV   rW   rX   r|   warningrY   rZ   r[   )rL   rM   r^   r`   r`   ra   supports_quant_method  s    
z%AutoHfQuantizer.supports_quant_method)ri   rj   rk   rl   rm   r   rn   rr   rc   r   staticmethodr   r`   r`   r`   ra   ro      s    
1ro   methodc                        fdd}|S )z-Register a custom quantization configuration.c                    6    t v rtd  dt| tstd| t  < | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rX   rW   
issubclassr   rq   r]   r   r`   ra   register_config_fn$     
z8register_quantization_config.<locals>.register_config_fnr`   )r   r   r`   r   ra   register_quantization_config!     
r   namec                    r   )zRegister a custom quantizer.c                    r   )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rZ   rW   r   r   rq   r   r   r`   ra   register_quantizer_fn4  r   z1register_quantizer.<locals>.register_quantizer_fnr`   )r   r   r`   r   ra   register_quantizer1  r   r   c                 C   s   t | d}|rt| jsd}|s|d ur,|r t| j|| _n|| _tj| j|d}nd }|d ur[|j||d ||}|| } |	| } t
|jdds[|jj}t
|d||d< || |fS )Nrb   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrro   r   rb   r   rr   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planrd   rM   )configrb   r   r   
user_agentr   hf_quantizerrM   r`   r`   ra   get_hf_quantizerA  s4   




r   )Zrz   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr    quantizer_awqr!   quantizer_bitnetr"   quantizer_bnb_4bitr#   quantizer_bnb_8bitr$   quantizer_compressed_tensorsr%   quantizer_eetqr&   quantizer_fbgemm_fp8r'   quantizer_finegrained_fp8r(   quantizer_fouroversixr)   quantizer_fp_quantr*   quantizer_gptqr+   quantizer_higgsr,   quantizer_hqqr-   quantizer_metalr.   quantizer_mxfp4r/   quantizer_quantor0   quantizer_quarkr1   quantizer_sinqr2   quantizer_spqrr3   quantizer_torchaor4   quantizer_vptqr5   rZ   rX   rv   
get_loggerri   r|   rK   ro   strr   r   r   r`   r`   r`   ra   <module>   s   h	
	

)o