
    j?                     @   d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	 d Z
 eej        d          d             Z eej        d          d	             Z eej        d          d
             Z eej        d          d             Z eej        d          dad            Z eej        d          dbd            Z eej        d          dcd            Z eej        d          dbd            Z eej        d          d             Z eej        d          d             Z eej        d          dddefd            Z eej        d          dcd            Z eej        d          d             Z eej        d          d             Z eej        d          dej        fd            Z eej        d          d             Z eej        d          d             Zdbdej        d edej        fd!Z  eej        d          dedej        d#efd$            Z! eej        d          d%             Z" eej        d          dej        d&ej        dej        fd'            Z# eej        d          dej        dej        fd(            Z$ eej        d          d)             Z% eej        d          dfd+            Z& eej        d          ddd,            Z' eej        d          dbd-            Z(d. Z) G d/ d0e	          Z* e
e           G d1 d2e	                      Z+ e
e$           G d3 d4e	                      Z, e
e           G d5 d6e	                      Z- e
e           G d7 d8e	                      Z. e
e           G d9 d:e	                      Z/ G d; d<e	          Z0 G d= d>e	          Z1 e
e           G d? d@e	                      Z2 e
e           G dA dBe	                      Z3 e
e           G dC dDe	                      Z4 G dE dFe	          Z5 G dG dHe	          Z6 e
e           G dI dJe	                      Z7 e
e           G dK dLe	                      Z8 e
e           G dM dNe	                      Z9 G dO dPe	          Z: G dQ dRe	          Z; e
e)           G dS dTe	                      Z< e
e%           G dU dVe	                      Z= G dW dXe	          Z> e
e"           G dY dZe	                      Z? e
e&           G d[ d\e	                      Z@ e
e'           G d] d^e	                      ZA e
e(           G d_ d`e	                      ZBdS )g    N)partial)Any)Modulec                       fd}|S )Nc                     fd| _         | S )Nc                      |          S N )_xfs     c/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx/nn/layers/activations.py<lambda>z<_make_activation_module.<locals>.decorator.<locals>.<lambda>   s    aadd     )__call__)klassr   s    r   	decoratorz*_make_activation_module.<locals>.decorator   s    ****r   r
   )r   r   s   ` r   _make_activation_moduler      s$         r   T)	shapelessc                 *    t          j        |           S )zpApplies the sigmoid function.

    .. math::
        \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
    mxsigmoidr   s    r   r   r      s     :a==r   c                 ,    t          j        | d          S )zIApplies the Rectified Linear Unit.

    Simply ``mx.maximum(x, 0)``.
    r   r   maximumr   s    r   relur      s     :ar   c                 P    t          j        t          j        | d                    S )u\   Applies the ReLU² activation function.

    Applies :math:`\max(0, x)^2` element wise.
    r   )r   squarer   r   s    r   relu2r!   &   s      9RZ1%%&&&r   c                 R    t          j        t          j        | d          d          S )z`Applies the Rectified Linear Unit 6.

    Applies :math:`\min(\max(x, 0), 6)` element wise.
    r   g      @r   minimumr   r   s    r   relu6r%   /   s"     :bjA&&,,,r   {Gz?c                 2    t          j        || z  |           S )z`Applies the Leaky Rectified Linear Unit.

    Simply ``mx.maximum(negative_slope * x, x)``.
    r   )r   negative_slopes     r   
leaky_relur)   8   s     :nq(!,,,r   c                 6    | t          j        | |d          z
  S )zaApplies the Log Softmax function.

    Applies :math:`x + \log \sum_i e^{x_i}` element wise.
    T)axiskeepdims)r   	logsumexpr   r,   s     r   log_softmaxr0   A   s      r|AD488888r         ?c                 f    t          j        | dk    | |t          j        |           dz
  z            S )zfApplies the Exponential Linear Unit.

    Simply ``mx.where(x > 0, x, alpha * (mx.exp(x) - 1))``.
    r      )r   whereexpr   alphas     r   elur8   J   s-     8AE1ervayy1}5666r   c                 .    t          j        | |          S )zdApplies the Softmax function.

    Applies :math:`\frac{e^{x_i}}{\sum_j e^{x_j}}` element wise.
    r,   r   softmaxr/   s     r   r<   r<   S   s     :ad####r   c                 ,    t          j        | d          S )zXApplies the Softplus function.

    Applies :math:`\log(1 + \exp(x))` element wise.
    r   )r   	logaddexpr   s    r   softplusr?   \   s     <1r   c                 V    t          j        | dt          j        |           z             S )zXApplies the Softsign function.

    Applies :math:`\frac{x}{1 + |x|}` element wise.
    r3   )r   divideabsr   s    r   softsignrC   e   s"     9QBF1II&&&r         ?lambdc                     t          j        t          j        |           |k    | t          j        |           |z  z
  d          S )zApplies the Softshrink activation function.

    .. math::
        \text{softshrink}(x) = \begin{cases}
        x - \lambda & \text{if } x > \lambda \\
        x + \lambda & \text{if } x < -\lambda \\
        0 & \text{otherwise}
        \end{cases}
    r   )r   r4   rB   signr   rE   s     r   
softshrinkrI   n   s7     8BF1II%q271::+='=qAAAr   c                     t          j        | d          |t          j        t          j        | d          |z            dz
  z  z   S )zApplies the Continuously Differentiable Exponential Linear Unit.

    Applies :math:`\max(0, x) + \min(0, \alpha * (\exp(x / \alpha) - 1))`
    element wise.
            r3   )r   r   r5   r$   r6   s     r   celurL   |   s@     :a
1c0B0BU0J)K)Ka)O PPPr   c                 0    | t          j        |           z  S )zApplies the Sigmoid Linear Unit. Also known as Swish.

    Applies :math:`x \sigma(x)` element wise, where :math:`\sigma(\cdot)` is
    the logistic sigmoid.
    r   r   s    r   silurN      s     rz!}}r   c                 $    t          |             S )zmApplies the Log Sigmoid function.

    Applies :math:`\log(\sigma(x)) = -\log(1 + e^{-x})` element wise.
    )r?   r   s    r   log_sigmoidrP      s     aRLL=r   returnc                 f    | dt          j        | t          j        d          z            z   z  dz  S )zApplies the Gaussian Error Linear Units function.

    .. math::
        \textrm{GELU}(x) = x * \Phi(x)

    where :math:`\Phi(x)` is the Gaussian CDF.

    See also :func:`gelu_approx` and :func:`gelu_fast_approx` for faster
    approximations.
    r3      )r   erfmathsqrtr   s    r   gelurW      s0     BF1ty||+,,,-11r   c           	          d| z  dt          j        t          j        dt          j        z            | d| dz  z  z   z            z   z  S )at  An approximation to Gaussian Error Linear Unit.

    See :func:`gelu` for the exact computation.

    This function approximates ``gelu`` with a maximum absolute error :math:`<
    0.0005` in the range :math:`[-6, 6]` using the following

    .. math::

        x = 0.5 * x * \left(1 + \text{Tanh}\left((\sqrt{2 / \pi} * \left(x + 0.044715 * x^3\right)\right)\right)

    rD   r3   rS   gHm?   )r   tanhrU   rV   pir   s    r   gelu_approxr\      sD     7a"'$)AK"8"8A1a4<O"PQQQRRr   c                 6    | t          j        d| z            z  S )a  A fast approximation to Gaussian Error Linear Unit.

    See :func:`gelu` for the exact computation.

    This function approximates ``gelu`` with a maximum absolute error :math:`<
    0.015` in the range :math:`[-6, 6]` using the following

    .. math::

        x = x \sigma\left(1.702 x\right)

    where :math:`\sigma(\cdot)` is the logistic sigmoid.

    References:
    - https://github.com/hendrycks/GELUs
    - https://arxiv.org/abs/1606.08415
    gZd;?r   r   s    r   gelu_fast_approxr^      s    & rz%!)$$$$r   r   r,   c                 d    t          j        | d|          \  }}|t          j        |          z  S )?  Applies the gated linear unit function.

    This function splits the ``axis`` dimension of the input into two halves
    (:math:`a` and :math:`b`) and applies :math:`a * \sigma(b)`.

    .. math::
        \textrm{GLU}(x) = a * \sigma(b)

    Args:
        axis (int): The dimension to split along. Default: ``-1``
    rS   )indices_or_sectionsr,   )r   splitr   )r   r,   abs       r   glure      s1     8A14888DAqrz!}}r   rK   	thresholdc                 6    t          j        | |k    dd          S )  Applies the Step Activation Function.

    This function implements a binary step activation, where the output is set
    to 1 if the input is greater than a specified threshold, and 0 otherwise.

    .. math::
        \text{step}(x) = \begin{cases}
        0 & \text{if } x < \text{threshold} \\
        1 & \text{if } x \geq \text{threshold}
        \end{cases}

    Args:
        threshold: The value to threshold at.
    r3   r   )r   r4   )r   rf   s     r   stepri      s    " 8A	M1a(((r   c                 (    t          | d          dz  S )a:  Applies the Scaled Exponential Linear Unit.

    .. math::
        \text{selu}(x) = \begin{cases}
        \lambda x & \text{if } x > 0 \\
        \lambda \alpha (\exp(x) - 1) & \text{if } x \leq 0
        \end{cases}

    where :math:`\lambda = 1.0507` and :math:`\alpha = 1.67326`.

    See also :func:`elu`.
    gGG?g䃞ͪ?)r8   r   s    r   selurk      s     q'??V##r   r7   c                 ^    t          j        d|           |t          j        d|           z  z   S )zApplies the element-wise parametric ReLU.

    .. math::
        \text{PReLU}(x) = \max(0,x) + a * \min(0,x)

    where :math:`a` is an array.
    r   r   r   r$   r6   s     r   prelurn     s,     :aebjA&6&6666r   c                 J    | t          j        t          |                     z  S )zApplies the Mish function, element-wise.

    Mish: A Self Regularized Non-Monotonic Neural Activation Function.

    Reference: https://arxiv.org/abs/1908.08681

    .. math::
        \text{Mish}(x) = x * \text{Tanh}(\text{Softplus}(x))

    )r   rZ   r?   r   s    r   mishrp     s     rwx{{####r   c                 h    t          j        | dz   d          }| t          j        |d          z  dz  S )zApplies the hardswish function, element-wise.

    .. math::
        \text{Hardswish}(x) = x * \min(\max(x + 3, 0), 6) / 6
    rY   r      rm   )r   max_x_3s     r   	hardswishrt     s5     jQ""Grz'1%%%))r         c                 R    t          j        t          j        | |          |          S )zzApplies the HardTanh function.

    Applies :math:`\max(\min(x, \mathrm{max\_val}), \mathrm{min\_val})` element-wise.
    r#   )r   min_valmax_vals      r   	hard_tanhry   *  s"     :bjG,,g666r   c                 Z    t          j        t          j        |           |k    | d          S )zApplies the HardShrink activation function.

    .. math::
        \text{hardshrink}(x) = \begin{cases}
        x & \text{if } x > \lambda \\
        x & \text{if } x < -\lambda \\
        0 & \text{otherwise}
        \end{cases}
    r   )r   r4   rB   rH   s     r   hard_shrinkr{   3  s%     8BF1II%q!,,,r   c                 0    t          j        |  |          S )zfApplies the Softmin function.

    Applies :math:`\frac{e^{-x_i}}{\sum_j e^{-x_j}}` element-wise.
    r:   r;   r/   s     r   softminr}   A  s     :qbt$$$$r   c                 *    t          j        |           S )zIApplies the hyperbolic tangent function.

    Simply ``mx.tanh(x)``.
    )r   rZ   r   s    r   rZ   rZ   J  s    
 71::r   c                   6     e Zd ZdZddef fdZdefdZ xZS )GLUr`   r*   r,   c                 V    t                                                       || _        d S r	   )super__init__r,   )selfr,   	__class__s     r   r   zGLU.__init___  s$    			r   rQ   c                 .    t          || j                  S )Nr/   )re   r,   r   r   s     r   r   zGLU.__call__c  s    QTY''''r   r*   )	__name__
__module____qualname____doc__intr   r   r   __classcell__r   s   @r   r   r   R  sn        
 
 S      (S ( ( ( ( ( ( ( (r   r   c                       e Zd ZdZdS )Sigmoidz~Applies the sigmoid function, element-wise.

    .. math::
        \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
    Nr   r   r   r   r
   r   r   r   r   g             r   r   c                       e Zd ZdZdS )MishzApplies the Mish function, element-wise.

    Reference: https://arxiv.org/abs/1908.08681

    .. math::
        \text{Mish}(x) = x * \text{Tanh}(\text{Softplus}(x))

    Nr   r
   r   r   r   r   p  s           r   r   c                       e Zd ZdZdS )ReLUzApplies the Rectified Linear Unit.
        Simply ``mx.maximum(x, 0)``.

    See :func:`relu` for the functional equivalent.
    Nr   r
   r   r   r   r   |  r   r   r   c                       e Zd ZdZdS )ReLU2ub   Applies the ReLU² activation function.

    See :func:`relu2` for the functional equivalent.
    Nr   r
   r   r   r   r                r   r   c                       e Zd ZdZdS )ReLU6z_Applies the Rectified Linear Unit 6.

    See :func:`relu6` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )	LeakyReLUzApplies the Leaky Rectified Linear Unit.

    Simply ``mx.maximum(negative_slope * x, x)``.

    Args:
        negative_slope: Controls the angle of the negative slope. Default: ``1e-2``
    r&   c                 V    t                                                       || _        d S r	   )r   r   _negative_slope)r   r(   r   s     r   r   zLeakyReLU.__init__  s'    -r   c                 ,    t          || j                  S r	   )r)   r   r   s     r   r   zLeakyReLU.__call__  s    !T1222r   r&   r   r   r   r   r   r   r   r   s   @r   r   r     sV         . . . . . .3 3 3 3 3 3 3r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )ELUzApplies the Exponential Linear Unit.
        Simply ``mx.where(x > 0, x, alpha * (mx.exp(x) - 1))``.

    See :func:`elu` for the functional equivalent.

    Args:
        alpha: the :math:`\alpha` value for the ELU formulation. Default: ``1.0``
    r1   c                 V    t                                                       || _        d S r	   r   r   _alphar   r7   r   s     r   r   zELU.__init__  $    r   c                 ,    t          || j                  S r	   )r8   r   r   s     r   r   zELU.__call__  s    1dk"""r   r1   r   r   s   @r   r   r     sV              # # # # # # #r   r   c                       e Zd ZdZdS )SoftmaxzZApplies the Softmax function.

    See :func:`softmax` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                       e Zd ZdZdS )Softplusz\Applies the Softplus function.

    See :func:`softplus` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                       e Zd ZdZdS )Softsignz\Applies the Softsign function.

    See :func:`softsign` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )
SoftshrinkzApplies the Softshrink function.

    See :func:`softshrink` for the functional equivalent.

    Args:
        lambd: the :math:`\lambda` value for Softshrink. Default: ``0.5``
    rD   c                 V    t                                                       || _        d S r	   )r   r   rE   )r   rE   r   s     r   r   zSoftshrink.__init__  s$    


r   c                 ,    t          || j                  S r	   )rI   rE   r   s     r   r   zSoftshrink.__call__  s    !TZ(((r   rD   r   r   s   @r   r   r     sV              ) ) ) ) ) ) )r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )CELUa<  Applies the Continuously Differentiable Exponential Linear Unit.
        Applies :math:`\max(0, x) + \min(0, \alpha * (\exp(x / \alpha) - 1))`
        element wise.

    See :func:`celu` for the functional equivalent.

    Args:
        alpha: the :math:`\alpha` value for the CELU formulation. Default: ``1.0``
    r1   c                 V    t                                                       || _        d S r	   r   r   s     r   r   zCELU.__init__  r   r   c                 ,    t          || j                  S r	   )rL   r   r   s     r   r   zCELU.__call__  s    At{###r   r   r   r   s   @r   r   r     sV              $ $ $ $ $ $ $r   r   c                       e Zd ZdZdS )SiLUzoApplies the Sigmoid Linear Unit. Also known as Swish.

    See :func:`silu` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                       e Zd ZdZdS )
LogSoftmaxzbApplies the Log Softmax function.

    See :func:`log_softmax` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                       e Zd ZdZdS )
LogSigmoidzbApplies the Log Sigmoid function.

    See :func:`log_sigmoid` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   c                   :     e Zd ZdZd fd	Zdej        fdZ xZS )PReLUa[  Applies the element-wise parametric ReLU.
        Applies :math:`\max(0, x) + a * \min(0, x)` element wise, where :math:`a`
        is an array.

    See :func:`prelu` for the functional equivalent.

    Args:
        num_parameters: number of :math:`a` to learn. Default: ``1``
        init: the initial value of :math:`a`. Default: ``0.25``
    r3         ?c                 ~    t                                                       t          j        |g|          | _        d S r	   )r   r   r   fullweight)r   num_parametersinitr   s      r   r   zPReLU.__init__  s3    g~.55r   r   c                 ,    t          || j                  S r	   )rn   r   r   s     r   r   zPReLU.__call__  s    Q$$$r   )r3   r   )	r   r   r   r   r   r   arrayr   r   r   s   @r   r   r     sd        	 	6 6 6 6 6 6%"( % % % % % % % %r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )GELUa0  Applies the Gaussian Error Linear Units.

    .. math::
        \textrm{GELU}(x) = x * \Phi(x)

    where :math:`\Phi(x)` is the Gaussian CDF.

    However, if ``approx`` is set to 'precise' or 'fast' it applies

    .. math::
        \textrm{GELUApprox}(x) &= 0.5 * x * \left(1 + \text{Tanh}\left((\sqrt{2 / \pi} * \left(x + 0.044715 * x^3\right)\right)\right) \\
        \textrm{GELUFast}(x) &= x * \sigma\left(1.702 * x\right)

    respectively.

    .. note::
       For compatibility with the PyTorch API, 'tanh' can be used as an alias
       for 'precise'.

    See :func:`gelu`, :func:`gelu_approx` and :func:`gelu_fast_approx` for the
    functional equivalents and information regarding error bounds.


    Args:
        approx ('none' | 'precise' | 'fast'): Which approximation to gelu to use if any.
    nonec                     t                                                       || _        g d}||vrt          d| d| d          d S )N)r   preciserZ   fastzThe approximation should be in z but 'z' was given)r   r   _approx
ValueError)r   approxallowedr   s      r   r   zGELU.__init__<  sf    555  T'TTTTT   ! r   c                     | j         dk    rt          |          S | j         dv rt          |          S t          |          S )Nr   )r   rZ   )r   rW   r\   r^   r   s     r   r   zGELU.__call__E  sB    <6!!77N\000q>>!"""r   )r   r   r   s   @r   r   r      sV         6     # # # # # # #r   r   c                       e Zd ZdZdS )TanhzbApplies the hyperbolic tangent function.

    See :func:`tanh` for the functional equivalent.
    Nr   r
   r   r   r   r   M  r   r   r   c                       e Zd ZdZdS )	HardswishzlApplies the hardswish function, element-wise.

    See :func:`hardswish` for the functional equivalent.
    Nr   r
   r   r   r   r   U  r   r   r   c                   @     e Zd ZdZddef fdZdej        fdZ xZ	S )Steprh   rK   rf   c                 V    t                                                       || _        d S r	   )r   r   rf   )r   rf   r   s     r   r   zStep.__init__m  s$    "r   r   c                 ,    t          || j                  S r	   )ri   rf   r   s     r   r   zStep.__call__q  s    At~&&&r   rK   )
r   r   r   r   floatr   r   r   r   r   r   s   @r   r   r   ]  sp         # #% # # # # # #'"( ' ' ' ' ' ' ' 'r   r   c                       e Zd ZdZdS )SELUzeApplies the Scaled Exponential Linear Unit.

    See :func:`selu` for the functional equivalent.
    Nr   r
   r   r   r   r   u  r   r   r   c                       e Zd ZdZdS )HardTanhz]Applies the HardTanh function.

    See :func:`hard_tanh` for the functional equivalent.
    Nr   r
   r   r   r   r   }  r   r   r   c                       e Zd ZdZdS )
HardShrinkzApplies the HardShrink function.

    See :func:`hard_shrink` for the functional equivalent.

    Args:
        lambd: the :math:`\lambda` value for Hardshrink. Default: ``0.5``
    Nr   r
   r   r   r   r     s           r   r   c                       e Zd ZdZdS )SoftminzZApplies the Softmin function.

    See :func:`softmin` for the functional equivalent.
    Nr   r
   r   r   r   r     r   r   r   r   r   r   r   r   )ru   r1   )CrU   	functoolsr   typingr   mlx.corecorer   mlx.nn.layers.baser   r   compiler   r   r!   r%   r)   r0   r8   r<   r?   rC   r   rI   rL   rN   rP   r   rW   r\   r^   r   re   ri   rk   rn   rp   rt   ry   r{   r}   rZ   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r   <module>r      so                      % % % % % %   	t$$$  %$ 	t$$$  %$ 	t$$$' ' %$' 	t$$$- - %$- 	t$$$- - - %$- 	t$$$9 9 9 %$9 	t$$$7 7 7 %$7 	t$$$$ $ $ %$$ 	t$$$  %$ 	t$$$' ' %$' 	t$$$
B 
B 
B 
B 
B %$
B 	t$$$Q Q Q %$Q 	t$$$  %$ 	t$$$  %$ 	t$$$2rx 2 2 2 %$2 	t$$$S S %$S  	t$$$% % %$%* 28 3       	t$$$) )BH ) ) ) ) %$)& 	t$$$$ $ %$$  	t$$$7RX 7bh 728 7 7 7 %$7 	t$$$$BH $ $ $ $ %$$ 	t$$$* * %$* 	t$$$7 7 7 %$7 	t$$$
- 
- 
- %$
- 	t$$$% % % %$%  ( ( ( ( (& ( ( (* !!    f   "!     6        6        F         F    3 3 3 3 3 3 3 3"# # # # #& # # #$ !!    f   "! ""    v   #" ""    v   #") ) ) ) ) ) ) )"$ $ $ $ $6 $ $ $&     6    %%       &% %%       &%% % % % %F % % %(*# *# *# *# *#6 *# *# *#Z     6    ##       $#' ' ' ' '6 ' ' '0     6    ##    v   $# %%       &% !!    f   "!  r   