
    |j[9                        d dl mZ d dlZd dlmZ d dlZd dlmZ ddlmZm	Z	m
Z
 ddlmZmZmZ dd	lmZmZ erdd
lmZ g Z G d de          Z G d de          Z G d de          ZdS )    )annotationsN)TYPE_CHECKING)_C_ops   )core	frameworkunique_name)_current_expected_placein_dygraph_modein_pir_mode   )Initializercalculate_gain)_NonLinearityc                  >     e Zd ZdZ	 	 	 	 	 	 dd fdZ	 dddZ xZS )MSRAInitializera]  Implements the MSRA initializer a.k.a. Kaiming Initializer

    This class implements the weight initialization from the paper
    `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
    ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
    by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
    robust initialization method that particularly considers the rectifier
    nonlinearities. In case of Uniform distribution, the range is [-x, x], where

    .. math::

        x = gain \times \sqrt{\frac{3}{fan\_in}}

    In case of Normal distribution, the mean is 0 and the standard deviation
    is

    .. math::

        \frac{gain}{\sqrt{{fan\_in}}}

    Args:
        uniform (bool, optional): whether to use uniform or normal distribution. Default is True.
        fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be inferred automatically. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
        seed (int32, optional): random seed. Default is 0.
        negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
        nonlinearity(str, optional): the non-linear function. Default is relu.
        mode(str, optional): the mode of initialization, can be 'fan_in' or 'fan_out'. When set to 'fan_in', the fan_in parameter is used for initialization. When set to 'fan_out', the out_features of trainable Tensor will be used. Default is 'fan_in'.

    Note:
        It is recommended to set fan_in to None for most cases.

    TNr   relufan_inuniformboolfloat | Noneseedintnegative_slopefloatnonlinearityr   modestrreturnNonec                4   |J |J t                                                       || _        || _        || _        || _        || _        || _        | j        dvrt          d| j         d          | j        dk    r| j        t          d          dS dS )zConstructor for MSRAInitializerN)r   fan_outzWThe mode of KaimingNormal/KaimingUniform should be 'fan_in' or 'fan_out', but received .r"   zdThe mode of KaimingNormal/KaimingUniform is 'fan_out', but fan_in is set. Please set fan_in to None.)	super__init___uniform_fan_in_seed_negative_slope_nonlinearity_mode
ValueError)selfr   r   r   r   r   r   	__class__s          m/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/nn/initializer/kaiming.pyr%   zMSRAInitializer.__init__H   s     """
-)
:222. $
. . .   :""t|'?@   #"'?'?    varpaddle.Tensorblockpaddle.pir.Block | Nonepaddle.Tensor | Nonec                   t          |t          j                  r|                                r
J d            |                     |          }t          |t          j        t          j        j        t          j        j	        j
        f          sJ t          |t          j        t          j        j        f          sJ |                     |          \  }}| j        dk    r| j        |n| j        }| j        dk    r|}| j        dk    r|j        j        | _        |j        }|t          j        j        j        k    s!|t          j        j        j        k    r~| j        swt          j        j        j        }|                    t5          j        d                    d|j        dg                    |j        |t          j        j        j        d	
          }nC|t          j         j!        t          j         j"        fv r| j        st          j         j#        }|}n|}|}tI                      r| j        rtK          | j&        | j'                  }	|	tQ          j)        dtU          |          z            z  }
tW          j,        |j        ||
 |
| j        |j-        .                                r|j-        nt_                                }ntK          | j&        | j'                  }	|	tQ          j)        tU          |                    z  }|j-        .                                r|j-        nt_                      }tW          j0        |j        d|| j        ||          }|t          j        j        j        k    s@|t          j        j        j        t          j         j!        t          j         j"        fv r2| j        s+tW          j1        ||          }|2                    |           n|2                    |           dS tg                      r*| j        rrtK          | j&        | j'                  }	|	tQ          j)        dtU          |          z            z  }
tW          j,        |j        ||
 |
| j        t_                                }notK          | j&        | j'                  }	|	tQ          j)        tU          |                    z  }t_                      }tW          j0        |j        d|| j        ||          }|t          j         j!        t          j         j"        fv r| j        stW          j1        ||          S |S | j        r|tK          | j&        | j'                  }	|	tQ          j)        dtU          |          z            z  }
|4                    di d|i|j        tk          |          |
 |
| j        dd          }nvtK          | j&        | j'                  }	|	tQ          j)        tU          |                    z  }|4                    dd|i|j        tk          |          d|| j        dd          }|t          j        j        j        k    s!|t          j        j        j        k    r,| j        s%|4                    dd|id|i|j        |d           ||_6        |S )a\  Initialize the input tensor with MSRA initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block|None, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op.
        zDCurrently, kaiming initializer not support lazy init for dist param.r   Nr"   r   r#   
masra_inittmpF)nameshapedtypetypepersistableg      @        uniform_randomOut)r:   r;   minmaxr   T)r<   inputsoutputsattrsstop_gradientgaussian_random)r:   r;   meanstdr   )r<   rD   rE   rF   castX)in_dtype	out_dtype)r<   rC   rD   rE   )7
isinstancer   EagerParamBaseis_dist_check_blockVariablepaddlepirValuer   ParameterMetaBlock_compute_fansr+   r'   r(   programrandom_seedr;   VarDescVarTypeFP16BF16r&   FP32
create_varr	   generatejoinr9   r:   DENSE_TENSORDataTypeFLOAT16BFLOAT16FLOAT32r   r   r*   r)   mathsqrtr   r   r   place_typer
   gaussianrJ   _share_underline_tensor_tor   	append_opr   op)r-   r1   r3   f_inf_outr   origin_dtyperM   out_vargainlimitrI   rj   var_tmpro   s                  r/   forwardzMSRAInitializer.forwardf   sE    sI455	
:=++--	
 	
 S	
 	
 

 !!%(("
 
-
 
 	
 	
 
 %)/6:3C!DEEEEE((--e :!!!\1TTt|F:""F:??2DJ y4</444DL0555dm5,1I&& )HHlCHe<==  i\)6! '  GG T]2DM4JKKKM L -IGG$IG t	} %d&8$:NOOtyuV}})<=== .IFJy((3CII022	 	 &d&8$:NOOTYuV}}555 y((3CII022 
 !/M3TZE  t|3888L(-M)M*   !+g|<<22377772237774]] K	} %d&8$:NOOtyuV}})<=== .IFJ+--  &d&8$:NOOTYuV}}555/11 /M3TZE 
 !68N OOO P {7L999N} %d&8$:NOOtyuV}})<===__)"G,!(!$Y %v$ $
  #' %   &d&8$:NOOTYuV}}555__*"G,!(!$Y #" $
  #' %   t|3888 4 999$-9>"CL$+M%1 	      CFIr0   )TNr   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r    )N)r1   r2   r3   r4   r   r5   )__name__
__module____qualname____doc__r%   rw   __classcell__r.   s   @r/   r   r   &   s         F # !&,      > DHt t t t t t t t tr0   r   c                  .     e Zd ZdZ	 	 	 	 dd fdZ xZS )KaimingNormala_  Implements the Kaiming Normal initializer

    This class implements the weight initialization from the paper
    `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
    ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
    by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
    robust initialization method that particularly considers the rectifier
    nonlinearities.

    In case of Normal distribution, the mean is 0 and the standard deviation
    is

    .. math::

        \frac{gain}{\sqrt{{fan\_in}}}

    Args:
        fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be inferred automatically. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
        negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
        nonlinearity(str, optional): the non-linear function. Default is relu.
        mode(str, optional): the mode of initialization, can be 'fan_in' or 'fan_out'. When set to 'fan_in', the fan_in parameter is used for initialization. When set to 'fan_out', the out_features of trainable Tensor will be used. Default is 'fan_in'.

    Note:
        It is recommended to set fan_in to None for most cases.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import paddle.nn as nn

            >>> linear = nn.Linear(2, 4, weight_attr=nn.initializer.KaimingNormal())
            >>> data = paddle.rand([30, 10, 2], dtype='float32')
            >>> res = linear(data)

    Nr>   r   r   r   r   r   r   r   r   r   r    c                V    t                                          d|d|||           d S )NFr   r   r   r   r   r   r   r$   r%   r-   r   r   r   r   r.   s        r/   r%   zKaimingNormal.__init__C  sB     	)% 	 	
 	
 	
 	
 	
r0   Nr>   r   r   
r   r   r   r   r   r   r   r   r   r    rx   ry   rz   r{   r%   r|   r}   s   @r/   r   r     s[        # #N  $ #"
 
 
 
 
 
 
 
 
 
 
r0   r   c                  .     e Zd ZdZ	 	 	 	 dd fdZ xZS )KaimingUniformaY  Implements the Kaiming Uniform initializer

    This class implements the weight initialization from the paper
    `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
    ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
    by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
    robust initialization method that particularly considers the rectifier
    nonlinearities.

    In case of Uniform distribution, the range is [-x, x], where

    .. math::

        x = gain \times \sqrt{\frac{3}{fan\_in}}

    Args:
        fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be inferred automatically. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
        negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
        nonlinearity(str, optional): the non-linear function. Default is relu.
        mode(str, optional): the mode of initialization, can be 'fan_in' or 'fan_out'. When set to 'fan_in', the fan_in parameter is used for initialization. When set to 'fan_out', the out_features of trainable Tensor will be used. Default is 'fan_in'.

    Note:
        It is recommended to set fan_in to None for most cases.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import paddle.nn as nn

            >>> linear = nn.Linear(2, 4, weight_attr=nn.initializer.KaimingUniform())
            >>> data = paddle.rand([30, 10, 2], dtype='float32')
            >>> res = linear(data)

    Nr>   r   r   r   r   r   r   r   r   r   r    c                V    t                                          d|d|||           d S )NTr   r   r   r   s        r/   r%   zKaimingUniform.__init__y  sB     	)% 	 	
 	
 	
 	
 	
r0   r   r   r   r}   s   @r/   r   r   T  s[        " "L  $ #"
 
 
 
 
 
 
 
 
 
 
r0   r   )
__future__r   rh   typingr   rS   r   baser   r   r	   base.frameworkr
   r   r   initializerr   r   r   __all__r   r   r    r0   r/   <module>r      sl   # " " " " "                     0 0 0 0 0 0 0 0 0 0         
 5 4 4 4 4 4 4 4 +******
t t t t tk t t tn4
 4
 4
 4
 4
O 4
 4
 4
n3
 3
 3
 3
 3
_ 3
 3
 3
 3
 3
r0   