
    |j9<                        d dl mZ d dlZd dlZd dlmZ ddlmZmZmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ g Z G d
 de          Z G d de          Z G d de          ZdS )    )annotationsN)_C_ops   )core	frameworkunique_name)check_variable_and_dtype)_current_expected_placein_dygraph_modein_pir_mode   )Initializerc                  <     e Zd ZdZ	 	 	 	 	 dd fdZ	 dddZ xZS )XavierInitializera  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is [-x, x], where

    .. math::

        x = gain \times \sqrt{\\frac{6.0}{fan\_in + fan\_out}}

    In case of Normal distribution, the mean is 0 and the standard deviation
    is

    .. math::

       gain \times \sqrt{\\frac{2.0}{fan\_in + fan\_out}}


    Args:
        uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True.
        fan_in (float|None, optional): fan_in for Xavier initialization. If None, it is
                inferred from the variable. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization. If None, it is
                 inferred from the variable. Default is None.
        seed (int, optional): Random seed. Default is 0.
        gain (float, optional): Scaling Tensor. Default is 1.0.

    Note:
        It is recommended to set fan_in and fan_out to None for most cases.

    TNr         ?uniformboolfan_infloat | Nonefan_outseedintgainfloatreturnNonec                    |J |J t                                                       || _        || _        || _        || _        || _        d S N)super__init___uniform_fan_in_fan_out_seed_gain)selfr   r   r   r   r   	__class__s         l/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/nn/initializer/xavier.pyr    zXavierInitializer.__init__G   sZ     """



    varpaddle.Tensorblockpaddle.pir.Block | Nonepaddle.Tensor | Nonec                t   |                      |          }t          |t          j        t          j        j        f          sJ t          |t          j        j        j                  st          |dg dd           | 	                    |          \  }}| j
        |n| j
        }| j        |n| j        }| j        dk    r|j        j        | _        t          |t          j                  r|                                r|j        n|j        }|j        }|t          j        j        j        k    s!|t          j        j        j        k    ry| j        srt          j        j        j        }	|                    t7          j        d                    d|j        dg                    ||	t          j        j        j        d	          }
nC|t          j         j!        t          j         j"        fv r| j        st          j         j#        }	|}
n|}	|}
tI                      r| j        rdd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }tS          j*        ||	| || j        tW                                }
ndd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }tW                      }tS          j,        |d
|| j        |	|          }
|t          j        j        j        k    s@|t          j        j        j        t          j         j!        t          j         j"        fv r| j        stS          j-        |
|          }
t          |t          j                  rI|                                r5t          j.        j/        j0        1                    |
|j2        |j3                  }
|
4                    |           dS tk                      r$| j        rtd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }t          j6        *                    |
j        |	| || j        tW                                }
ngd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }tS          j,        |
j        d
|| j        |	tW                                }
|t          j         j!        t          j         j"        fv r| j        stS          j-        |
|          S |
S | j        rfd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }|7                    di d|
i|
j        |	| || j        dd          }nhd||fv rd
}n/| j%        tM          j'        dtQ          ||z             z            z  }|7                    dd|
i|
j        |
j        d
|| j        dd          }|t          j        j        j        k    s!|t          j        j        j        k    r,| j        s%|7                    dd|
id|i|
j        |d           ||_8        |S )a]  Initialize the input tensor with Xavier initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block|None, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        Out)uint16float16float32float64xavier_initNr   .tmpF)nameshapedtypetypepersistableg        g      @g       @uniform_random)r9   r:   minmaxr   T)r;   inputsoutputsattrsstop_gradientgaussian_random)r9   r:   meanstdr   )r;   rA   rB   rC   castX)in_dtype	out_dtype)r;   r@   rA   rB   )9_check_block
isinstancer   Blockpaddlepirr   ParameterMetar	   _compute_fansr"   r#   r$   programrandom_seedEagerParamBaseis_dist_local_shaper9   r:   VarDescVarTypeFP16BF16r!   FP32
create_varr   generatejoinr8   DENSE_TENSORDataTypeFLOAT16BFLOAT16FLOAT32r   r%   mathsqrtr   r   r   r
   gaussianrG   distributedauto_parallelapidtensor_from_localprocess_mesh
placements_share_underline_tensor_tor   _pir_ops	append_opop)r&   r*   r,   f_inf_outr   r   out_var_shapeorigin_dtyperJ   out_varlimitrF   placerp   s                  r(   forwardzXavierInitializer.forwardX   s    !!%((%)/6:3C!DEEEEE#vz<== 	$;;;	   ((--e -4<=0%%dm:??2DJ 3	 899>AkkmmC 	 y4</444DL0555dm5,1I&& )HHmSXu=>>  $\)6! '  GG T]2DM4JKKKM L -IGG$IG M	} )))EE JeFW$4555* * E !.!FJ+--  )))CC*tyuVg=M7N7N1N'O'OOC/11 /!J  t|3888L(-M)M*   !+g|<<#y788 S[[]]  &48KK!13>  
 ..s3334]] Z	} )))EE JeFW$4555* * E !/11MFJ+--  )))CC*tyuVg=M7N7N1N'O'OOC /MJ+--  !68N OOO P {7L999N} $)))EE JeFW$4555* * E __)"G,!(!* %v$ $
  #' %   )))CC*tyuVg=M7N7N1N'O'OOC__*"G,!(!( #" $
  #' %   t|3888 4 999$-9>"CL$+M%1 	      CFIr)   )TNNr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r*   r+   r,   r-   r   r.   )__name__
__module____qualname____doc__r    rx   __classcell__r'   s   @r(   r   r   "   s        " "L # $      $ DHL L L L L L L L Lr)   r   c                  .     e Zd ZdZ	 	 	 	 dd fdZ xZS )XavierNormala	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio, using a normal distribution whose mean is :math:`0` and standard deviation is

    .. math::

        gain \times \sqrt{\frac{2.0}{fan\_in + fan\_out}}.


    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a normal distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-0.21607460,  0.08382989],
             [ 0.29147008, -0.07049121]])

            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [1.06076419, 0.87684733])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]]])
    Nr   r   r   r   r   r   r8   
str | Noner   r   c                T    t                                          d||d|           d S )NFr   r   r   r   r   r   r   r    r&   r   r   r   r8   r'   s        r(   r    zXavierNormal.__init__^  s;     	&' 	 	
 	
 	
 	
 	
r)   NNr   N
r   r   r   r   r   r   r8   r   r   r   ry   rz   r{   r|   r    r}   r~   s   @r(   r   r   '  s[        4 4p  $ $	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
r)   r   c                  .     e Zd ZdZ	 	 	 	 dd fdZ xZS )XavierUniforma	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is :math:`[-x,x]`, where

    .. math::

        x = gain \times \sqrt{\frac{6.0}{fan\_in + fan\_out}}.

    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a uniform distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-1.18095720,  0.64892638],
             [ 0.43125069, -1.11156428]])
            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [-0.27524316,  1.13808715])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]]])
    Nr   r   r   r   r   r   r8   r   r   r   c                T    t                                          d||d|           d S )NTr   r   r   r   s        r(   r    zXavierUniform.__init__  s;     	qt 	 	
 	
 	
 	
 	
r)   r   r   r   r~   s   @r(   r   r   j  s[        6 6t  $ $	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
r)   r   )
__future__r   rd   rN   r   baser   r   r   base.data_feederr	   base.frameworkr
   r   r   initializerr   __all__r   r   r    r)   r(   <module>r      s^   # " " " " "         0 0 0 0 0 0 0 0 0 0 8 8 8 8 8 8         
 % $ $ $ $ $
B B B B B B B BJ@
 @
 @
 @
 @
$ @
 @
 @
FB
 B
 B
 B
 B
% B
 B
 B
 B
 B
r)   