
    |j                        d dl mZ d dlZd dlmZ d dlmZmZ ddlm	Z	 ddl
mZ ddlmZ d	d
lmZ er$d dlmZ d dlmZ d dlmZ d dlmZ d	dlmZ d	dlmZ g Z G d de          ZdS )    )annotationsN)TYPE_CHECKING)_C_opspir   )	framework)no_grad)in_dynamic_or_pir_mode   )	Optimizer)Sequence)Tensor)GradientClipBase)WeightDecayRegularizer)LRScheduler)_ParameterConfigc                  `     e Zd ZU dZded<   	 	 	 	 	 	 dd fdZd Zed             Zd Z	 xZ
S )SGDa	  
    Optimizer of the stochastic gradient descent algorithm.

    .. math::

        param\_out = param - learning\_rate * grad

    Parameters:
        learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
            It can be a float value or a LRScheduler. The default value is 0.001.
        parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (int|float|WeightDecayRegularizer|None, optional): The strategy of regularization. \
            It can be a int or float value as coeff of L2 regularization or \
            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three clipping strategies
            ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
            :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
        multi_precision (bool, optional): Whether to use multi-precision during weight updating.
        name (str|None, optional): The default value is None. Normally there is no need for user
                to set this property. For more information, please refer to
                :ref:`api_guide_Name` .

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
            >>> linear = paddle.nn.Linear(10, 10)
            >>> inp = paddle.to_tensor(inp)
            >>> out = linear(inp)
            >>> loss = paddle.mean(out)
            >>> sgd = paddle.optimizer.SGD(
            ...     learning_rate=0.1,
            ...     parameters=linear.parameters(),
            ...     weight_decay=0.01
            ... )
            >>> out.backward()
            >>> sgd.step()
            >>> sgd.clear_grad()

    strtypeMbP?NFlearning_ratefloat | LRScheduler
parameters4Sequence[Tensor] | Sequence[_ParameterConfig] | Noneweight_decay%float | WeightDecayRegularizer | None	grad_clipGradientClipBase | Nonemulti_precisionboolname
str | NonereturnNonec                    |t          d          t                                          |||||           d| _        || _        i | _        d S )Nzlearning_rate is not set)r   r   r   r   r"   sgd)
ValueErrorsuper__init__r   _multi_precision_master_weights)selfr   r   r   r   r    r"   	__class__s          d/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/optimizer/sgd.pyr*   zSGD.__init__]   si      7888'!% 	 	
 	
 	
 	 /!    c                   t          |t          j        t          j        f          sJ t          |t                    r|                     |          }|D ]}|j        | j        v r| j        rO| 	                    |j
                  r5|                     |          }| j                            |j                   g| 	                    |j
                  r| j        st          j        d           d S )NzAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Adam optimizer.)
isinstancer   Blockr   dict_update_param_groupr"   _already_create_accumulatorr+   _is_dtype_fp16_or_bf16dtype_create_master_weightaddwarningswarn)r-   blockr   pmaster_ps        r/   _create_accumulatorszSGD._create_accumulatorss   s   %)/39!=>>>>>j$'' 	>11*==J  	 	Av999$ )D)DQW)M)M 55a88044QV<<<++AG44- X  	 	r0   c                @   t          |t                    r|                     |          }| j        o|                     |d         j                  }|r| j        |d         j                 nd }|                     |          }t                      r&t          j        |d         ||d         ||           d S t          |t          j                  sJ |d         |d         |d}d|d         i}d|i}|r
||d<   ||d<   |                    | j        |||d	          }	|	S )
Nr   r   )ParamGradLearningRateParamOutr    MasterParamMasterParamOutT)r   inputsoutputsattrsstop_gradient)r2   r4   r5   r+   r7   r8   r,   r"   _create_param_lrr
   r   sgd_r   r3   	append_opr   )
r-   r=   param_and_gradfind_mastermaster_weightlrrH   rI   rJ   sgd_ops
             r/   _append_optimize_opzSGD._append_optimize_op   sq   nd++ 	F!55nEEN+ 
0K0K1#1
 1

 D !2!788 	 "">22!## "	Kq!q!   4eY_55555 (*&q) " F ">!#45G&4E :(5}%,9()__Y" %  F Mr0   c                0    |                     d          }|S )Nparams)get)r-   r   s     r/   r5   zSGD._update_param_group   s    ^^H--
r0   )r   NNNFN)r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   )__name__
__module____qualname____doc____annotations__r*   r@   r	   rT   r5   __classcell__)r.   s   @r/   r   r   (   s         0 0d III .3KO>B-1 %" " " " " " ",  , 0 0 W0d      r0   r   )
__future__r   r;   typingr   paddler   r   baser   base.dygraphr	   base.frameworkr
   	optimizerr   collections.abcr   r   paddle.nn.clipr   paddle.regularizerr   rR   r   r   __all__r    r0   r/   <module>rj      sK   # " " " " "                            " " " " " " 3 3 3 3 3 3             ,((((((//////999999++++++
V V V V V) V V V V Vr0   