
    j9                         d dl Z d dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZ d Z	 	 dddddd	e	d
ededededeeee	geeef         f                  fdZ G d de	          Z G d de	          Z G d de	          ZdS )    N)CallableOptionalUnion)Module)tree_map_with_pathc                 6    ddddd}||          \  }}|p||p|fS )N)@      )    r
   )   r
   )r      )affinemxfp4nvfp4mxfp8 )mode
group_sizebitsmode_defaultsdefault_group_sizedefault_bitss         a/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx/nn/layers/quantized.py_defaults_for_moder      sB    	 M (5T':$++T-A\AA    r   F)r   quantize_inputclass_predicatemodelr   r   r   r   r   c                    pd fd}|                                  }t          ||t          j                  }|                     |           dS )a  Quantize the sub-modules of a module according to a predicate.

    By default all layers that define a ``to_quantized()`` method will be
    quantized. Both :obj:`Linear` and :obj:`Embedding` layers will be
    quantized. The module is updated in-place.

    Note:
        ``quantize_input=True`` is only supported for ``"nvfp4"`` and ``"mxfp8"``
        modes and :obj:`Linear` layers.

    Args:
        model (mlx.nn.Module): The model whose leaf modules may be quantized.
        group_size (Optional[int]): The quantization group size (see
           :func:`mlx.core.quantize`). Default: ``None``.
        bits (Optional[int]): The number of bits per parameter (see
           :func:`mlx.core.quantize`). Default: ``None``.
        mode (str): The quantization method to use (see
           :func:`mlx.core.quantize`). Default: ``"affine"``.
        quantize_input (bool): Whether to quantize activations. Default: ``False``.
        class_predicate (Optional[Callable]): A callable which receives the
           :obj:`Module` path and :obj:`Module` itself and returns ``True`` or a
           dict of params for ``to_quantized`` if it should be quantized and
           ``False`` otherwise. If ``None``, then all layers that define a
           ``to_quantized()`` method are quantized. Default: ``None``.

    Example:
        Weight only quantization for all layers that define a ``to_quantized()`` method:

        >>> import mlx.nn as nn
        >>> nn.quantize(model, group_size=64, bits=4, mode="affine")

        Weight and input quantization for all linear layers:

        >>> predicate = lambda p, m: isinstance(m, nn.Linear)
        >>> nn.quantize(model, mode="nvfp4", quantize_input=True, class_predicate=predicate)
    c                 "    t          |d          S )Nto_quantized)hasattr)_ms     r   <lambda>zquantize.<locals>.<lambda>C   s    wq.7Q7Q r   c                     | |          x}rt          |d          rt          |t                    rd}r|d<    |j        di |S t          |t                    r.d|v r|d         s|                    d            |j        di |S t          d          t          dt          |                     |S )Nr!   )r   r   r   r   zZ``class_predicate`` must return a bool or a dict of parameters to pass to ``to_quantized``z!Unable to quantize model of type r   )r"   
isinstanceboolr!   dictpop
ValueErrortype)	pathr$   bool_or_paramskwargsr   r   r   r   r   s	       r   _maybe_quantizez!quantize.<locals>._maybe_quantizeE   s   ,_T1555> 	q.)) Pnd33 ,6dSSF% B3A/0)1>33F33355 
(N::N(E: '**+;<<<)1>;;N;;;$O  
 !!NT!WW!N!NOOOHr   )is_leafN)leaf_modulesr   r   	is_moduleupdate_modules)r   r   r   r   r   r   r0   leavess    `````  r   quantizer6      s    Z &R*Q*QO        0 !!FAQRRRF	     r   c                        e Zd ZdZ	 	 	 ddededededef
 fd	Zd
 Zd Zd Z	e
	 	 	 ddedededefd            Z xZS )QuantizedEmbeddinga8  The same as :obj:`Embedding` but with a  quantized weight matrix.

    :obj:`QuantizedEmbedding` also provides a :meth:`from_embedding`
    classmethod to convert embedding layers to :obj:`QuantizedEmbedding`
    layers.

    Args:
        num_embeddings (int): How many possible discrete tokens can we embed.
           Usually called the vocabulary size.
        dims (int): The dimensionality of the embeddings.
        group_size (Optional[int]): The group size to use for the quantized
            weight. See :func:`~mlx.core.quantize`. Default: ``None``.
        bits (Optional[int]): The bit width to use for the quantized weight.
            See :func:`~mlx.core.quantize`. Default: ``None``.
        mode (str): The quantization method to use (see
           :func:`mlx.core.quantize`). Default: ``"affine"``.
    Nr   num_embeddingsdimsr   r   r   c                    t                                                       t          |||          \  | _        | _        || _        t          j        d|z            }t          j	        
                    ||f|          }t          j        ||||          ^| _        | _        }|r|d         nd | _        || _        || _        |                                  d S )N   )shapescaler   r   )super__init__r   r   r   r   mathsqrtmxrandomnormalr6   weightscalesbiasesr9   r:   freeze)
selfr9   r:   r   r   r   r>   rG   rI   	__class__s
            r   rA   zQuantizedEmbedding.__init__u   s     	 &8j$%O%O"	 	!d(##!!(>e!LL,.KJ4-
 -
 -
)T[6 $*3fQiit,	 	r   c                     |                      d          }t          j        | d         |         | d         |         |||         nd | j        | j        | j                  S )NrI   rG   rH   )rH   rI   r   r   r   )getrD   
dequantizer   r   r   )rK   xrI   s      r   __call__zQuantizedEmbedding.__call__   sc    (##}N1>!$ & 26!99
 
 
 	
r   c           
          t          j        || d         | d         |                     d          d| j        | j        | j                  S )z
        Call the quantized embedding layer as a quantized linear layer.

        Use this for example when input embedding and output projection
        weights are tied.
        rG   rH   rI   TrH   rI   	transposer   r   r   rD   quantized_matmulrN   r   r   r   rK   rP   s     r   	as_linearzQuantizedEmbedding.as_linear   sP     "N>88H%%	
 	
 	
 		
r   c           	      T    | j          d| j         d| j         d| j         d| j         	S )Nz, , group_size=, bits=, mode=)r9   r:   r   r   r   rK   s    r   _extra_reprzQuantizedEmbedding._extra_repr   s\    " P Pdi P P/P P26)P PDHIP P	
r   embedding_layerc                     |j         j        \  }} | |||||          }t          j        |j         |||          ^|_         |_        }|r|d         nd|_        |S )zHCreate a :obj:`QuantizedEmbedding` layer from an :obj:`Embedding` layer.r?   r   N)rG   r=   rD   r6   rH   rI   )	clsr_   r   r   r   embedding_dimsr:   qlrI   s	            r   from_embeddingz!QuantizedEmbedding.from_embedding   sy      /5;Sz4dCCC(*"	)
 )
 )
%	29v "(1F1IIT		r   NNr   )__name__
__module____qualname____doc__intstrrA   rQ   rX   r^   classmethodr   rd   __classcell__rL   s   @r   r8   r8   b   s
        ,    	
       6	
 	
 	

 
 
$
 
 
     	
    [    r   r8   c                        e Zd ZdZ	 	 	 	 ddedededed	ed
ef fdZd Zd Z	e
	 	 	 ddeded	ed
efd            Z xZS )QuantizedLinearaI  Applies an affine transformation to the input using a quantized weight matrix.

    It is the quantized equivalent of :class:`mlx.nn.Linear`. For now its
    parameters are frozen and will not be included in any gradient computation
    but this will probably change in the future.

    :obj:`QuantizedLinear` also provides a classmethod :meth:`from_linear` to
    convert linear layers to :obj:`QuantizedLinear` layers.

    Args:
        input_dims (int): The dimensionality of the input features.
        output_dims (int): The dimensionality of the output features.
        bias (bool, optional): If set to ``False`` then the layer will not use
            a bias. Default: ``True``.
        group_size (Optional[int]): The group size to use for the quantized
            weight. See :func:`~mlx.core.quantize`. Default: ``None``.
        bits (Optional[int]): The bit width to use for the quantized weight.
            See :func:`~mlx.core.quantize`. Default: ``None``.
        mode (str): The quantization method to use (see
           :func:`mlx.core.quantize`). Default: ``"affine"``.
    TNr   
input_dimsoutput_dimsbiasr   r   r   c                    t                                                       t          |||          \  | _        | _        || _        t          j        d|z            }t          j	        
                    | |||f          }t          j        ||||          ^| _        | _        }	|	r|	d         nd | _        |rt          j        |f          | _        |                                  d S )Nr<   lowhighr=   r?   r   )r@   rA   r   r   r   r   rB   rC   rD   rE   uniformr6   rG   rH   rI   zerosrs   rJ   )rK   rq   rr   rs   r   r   r   r>   rG   rI   rL   s             r   rA   zQuantizedLinear.__init__   s     	 &8j$%O%O"	 	!j.))""
+ # 
 

 -/KJ4-
 -
 -
)T[6 $*3fQiit  	1+00DI 	r   c                     | j         j        \  }}|dz  | j        z  }d| d| dd| v  d| j         d| j         d| j         S )	Nr   input_dims=, output_dims=z, bias=rs   rZ   r[   r\   )rG   r=   r   r   r   rK   out_dimsin_dimss      r   r^   zQuantizedLinear._extra_repr  s     K-'R<DI-P' P P P P&D. P P/P P26)P PDHIP P	
r   c           
          t          j        || d         | d         |                     d          d| j        | j        | j                  }d| v r|| d         z   }|S )NrG   rH   rI   TrS   rs   rU   rW   s     r   rQ   zQuantizedLinear.__call__	  si    N>88H%%	
 	
 	
 T>>DL Ar   linear_layerc                     |j         j        \  }} | ||d|||          }t          j        |j         |||          ^|_         |_        }|r|d         nd|_        d|v r|j        |_        |S )zACreate a :obj:`QuantizedLinear` layer from a :obj:`Linear` layer.Fr?   r   Nrs   )rG   r=   rD   r6   rH   rI   rs   )	ra   r   r   r   r   rr   rq   rc   rI   s	            r   from_linearzQuantizedLinear.from_linear  s     #/"5";ZS[%TMMM(*	)
 )
 )
%	29v "(1F1IIT	\!!"'BG	r   )TNNr   re   )rf   rg   rh   ri   rj   r(   rk   rA   r^   rQ   rl   r   r   rm   rn   s   @r   rp   rp      s	        4        	 
                D
 
 
       	
    [    r   rp   c                        e Zd ZdZ	 	 	 ddededededef
 fd	Zd
 Zd Zd Z	de
f fdZd Ze	 	 	 ddedededefd            Z xZS )QQLineara  Quantizes the input and applies an affine transformation using quantized weights.

    Two use cases are supported:

    1) **Eval**:  The weights are frozen and stored in quantized form together with
       their scales (``self.weight`` is quantized and ``self.scales`` is provided).
    2) **Train**: The weights are stored in higher precision and are quantized on
         the fly during computation so that gradients with respect to the weights
         can be computed.

    To switch between the two cases, use ``layer.eval()`` and ``layer.train()`` respectively.

    Compared to the :class:`mlx.nn.QuantizedLinear` layer, this layer
    quantizes the input as well and includes weights in gradient computations.

    :obj:`QQLinear` also provides the class method :meth:`from_linear` to
    convert :class:`mlx.nn.Linear` layers to :obj:`QQLinear` layers.

    Note: This layer does not support a bias term yet.

    Args:
        input_dims (int): The dimensionality of the input features.
        output_dims (int): The dimensionality of the output features.
        group_size (Optional[int]): The group size to use for the quantized weight.
            See :func:`~mlx.core.quantize`. Default: ``None``.
        bits (Optional[int]): The bit width to use for the quantized weight.
            See :func:`~mlx.core.quantize`. Default: ``None``.
        mode (Optional[str]): The quantization method to use (see
            :func:`mlx.core.quantize`). Currently, only ``"nvfp4"`` and ``"mxfp8"``
            are supported. Default: ``"nvfp4"``.
    Nr   rq   rr   r   r   r   c                 "   t                                                       t          |||          \  | _        | _        || _        t          j        d|z            }t          j	        
                    | |||f          | _        d| _        d S )Nr<   ru   F)r@   rA   r   r   r   r   rB   rC   rD   rE   rx   rG   
_quantized)rK   rq   rr   r   r   r   r>   rL   s          r   rA   zQQLinear.__init__R  s     	 &8j$%O%O"		!j.))i''
+ ( 
 

  r   c           
          | j         j        \  }}| j         j        t          j        k    r|dz  | j        z  }d| d| d| j         d| j         d| j         
S )Nr   r{   r|   rZ   r[   r\   )rG   r=   dtyperD   uint32r   r   r   r}   s      r   r^   zQQLinear._extra_reprh  s     K-';	))|	1GP' P P P P/P P26)P PDHIP P	
r   c                     | j         sBt          j        | j        | j        | j        | j                  \  | _        | _        d| _         d S d S )Nr?   T)r   rD   r6   rG   r   r   r   rH   r]   s    r   r6   zQQLinear.quantizeq  sV     	#'){	Y	( ( ($DK #DOOO	# 	#r   c                     | j         rUt          j        | j        | j        | j        | j        | j                  | _        |                     d           d| _         d S d S )NrH   r   r   r   rH   F)	r   rD   rO   rG   rH   r   r   r   __delattr__r]   s    r   rO   zQQLinear.dequantize{  sh    ? 		$-{?YY  DK X&&&#DOOO		$ 		$r   c                     t                                          |           | j        r|                                  d S |                                  d S )N)r@   _set_training_mode	_trainingrO   r6   )rK   r   rL   s     r   r   zQQLinear._set_training_mode  sL    ""4(((> 	OOMMOOOOOr   c                     t          j        || d         |                     d          | j        | j        | j                  }|S )NrG   rH   r   )rD   qqmmrN   r   r   r   rW   s     r   rQ   zQQLinear.__call__  sG    GN88H%%
 
 
 r   r   c                     |j         j        \  }}|                    d          t          d           | |||||          }|j         |_         |                    |j                   |S )z:Create a :obj:`QQLinear` layer from a :obj:`Linear` layer.rs   Nz#QQLinear does not support bias yet.r?   )rG   r=   rN   NotImplementedErrortraintraining)ra   r   r   r   r   rr   rq   rc   s           r   r   zQQLinear.from_linear  su     #/"5";ZF##/%&KLLLS[*dFFF '	
&'''	r   )NNr   )rf   rg   rh   ri   rj   rk   rA   r^   r6   rO   r(   r   rQ   rl   r   r   rm   rn   s   @r   r   r   1  s?        H        	 
              ,
 
 
# # #
$ 
$ 
$t      	 	 	     	
    [    r   r   )NN)rB   typingr   r   r   mlx.corecorerD   mlx.nn.layers.baser   	mlx.utilsr   r   rj   rk   r(   r)   r6   r8   rp   r   r   r   r   <module>r      s    , , , , , , , , , ,       % % % % % % ( ( ( ( ( (B B B I!
  LPI! I! I!I!I! I!
 I! I! hV}eD$J6G'GHII! I! I! I!Xc c c c c c c cLf f f f ff f f fRy y y y yv y y y y yr   