o
    ~ri                     @   sl   d dl Z ddlmZ ddlmZ e rd dlmZ d dlmZ ddl	m
Z
 G dd	 d	e
Zd
efddZdS )    N   )get_module_from_name)is_fouroversix_available)ModelQuantizationConfig)FourOverSixConfig)ConversionOpsc                   @   sb   e Zd Zdd Z			ddeeejf dejj	dB dedB de
e dB deeejf f
d	d
ZdS )FourOverSixQuantizec                 C   s
   || _ d S )N)hf_quantizer)selfr	    r   s/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/transformers/integrations/fouroversix.py__init__   s   
zFourOverSixQuantize.__init__N
input_dictmodelfull_layer_namemissing_keysreturnc                    s   | j jjr|S t||\}}|ddd  t| d }|  ddd}	|| d }
||	|
t	||	r>t
||	 |D ]}|| q@ fddD S )aZ  
        We need to store some parameters to create the quantized weight. For example, fouroversix
        requires 4 values that are stored in the checkpoint to recover the quantized weight. So we
        store them in a dict that is stored in hf_quantizer for now as we can't save it in the op
        since we create an op per tensor.
        .   r    c                    s    i | ]}  d | | qS )r   r   ).0quantized_keymodule_namequantized_parametersr   r   
<dictcomp>6   s    z/FourOverSixQuantize.convert.<locals>.<dictcomp>)r	   quantization_configkeep_master_weightsr   rsplitlistkeysreplaceget_quantized_parametershasattrdelattrdiscard)r
   r   r   r   r   kwargsmodule_full_parameter_nameparameter_name	parameterkeyr   r   r   convert   s   


zFourOverSixQuantize.convert)NNN)__name__
__module____qualname__r   dictstrtorchTensornnModuler   r-   r   r   r   r   r      s     

r   configc                 C   s8   t | j| j| j| j| j| j| j| j| j	| j
| j| jdS )N)activation_scale_ruledtypegradient_scale_ruler   matmul_backendoutput_dtypequantize_backend
scale_ruleweight_scale_2dweight_scale_rulemodules_to_not_convertmodule_config_overrides)r   r8   r9   r:   r   r;   r<   r=   r>   r?   r@   rA   rB   )r7   r   r   r   adapt_fouroversix_config<   s   rC   )r3   quantizers.quantizers_utilsr   utilsr   fouroversixr   &transformers.utils.quantization_configr   core_model_loadingr   r   rC   r   r   r   r   <module>   s    -