
    zjY                     r    d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ  G d de          ZdS )    N)core	frameworkunique_name)append_backward)Variablein_dygraph_modeprogram_guard)	Optimizerc                       e Zd ZdZd Zd Zd Zej        d             Z	d Z
d Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZddZ	 	 	 	 ddZd Z	 ddZdS )RecomputeOptimizera  
        :api_attr: Static Graph

    Recompute Optimizer Wrapper

    Normally, a training step contains three sub-steps: first, run forward
    Operators to calculate the loss; second, run backward Operators to
    calculate gradient of the parameters; third, apply optimization method
    to update the value of the parameters.

    In the forward computation process, all variables that are needed by
    backward computation process will be kept in memory, which occupy a great
    amount of memory when the network becomes very deep.

    Recompute split the network to k segments. In each segment, It will
    recompute the forward Operators, before running backward operators. It is
    very helpful for saving memory.

    The Variables that separate a network to segments are called as checkpoints,
    and users should set it manually. The usage is very simple:

    Args:
        optimizer (Optimizer): The optimizer that is applied to parameters.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import numpy as np

            >>> paddle.enable_static()

            >>> def gen_data():
            ...     return {"x": np.random.random(size=(32, 32)).astype('float32'),
            ...     "y": np.random.randint(2, size=(32, 1)).astype('int64')}
            >>> def mlp(input_x, input_y, hid_dim=128, label_dim=2):
            ...     print(input_x)
            ...     fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
            ...     prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
            ...     cost = paddle.nn.functional.cross_entropy(
            ...         input=prediction, label=input_y,
            ...         reduction='none', use_softmax=False
            ...     )
            ...     sum_cost = paddle.mean(cost)
            ...     return sum_cost, fc_1, prediction
            >>> input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
            >>> input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
            >>> cost, fc_1, pred = mlp(input_x, input_y)

            >>> sgd = paddle.optimizer.Adam(learning_rate=0.01)
            >>> sgd = paddle.incubate.optimizer.RecomputeOptimizer(sgd)
            >>> sgd._set_checkpoints([fc_1, pred])
            >>> sgd.minimize(cost)

            >>> print("Finished optimize")
            Finished optimize
            >>> place = paddle.CPUPlace()
            >>> exe = paddle.static.Executor(place)
            >>> exe.run(paddle.static.default_startup_program())
            >>> step = 10

            >>> for i in range(step):
            ...     cost_val = exe.run(feed=gen_data(),
            ...             program=paddle.static.default_main_program(),
            ...             fetch_list=[cost.name])
            ...     print("step=%d cost=%f" % (i, cost_val[0]))
            var x : DENSE_TENSOR.shape(-1, 32).dtype(float32).stop_gradient(True)
            Finished optimize
            step=0 cost=0.737203
            step=1 cost=1.308077
            step=2 cost=0.768422
            step=3 cost=1.239475
            step=4 cost=0.882643
            step=5 cost=0.738027
            step=6 cost=0.819374
            step=7 cost=0.818534
            step=8 cost=0.753692
            step=9 cost=0.787448

    c                     t                      rt          d          || _        d | _        | j        j        | _        | j        j        | _        d| _        d S )Nz-In dygraph, don't support RecomputeOptimizer.F)r   	Exception
_optimizer_checkpoints_learning_rate_learning_rate_mapenable_offload)self	optimizers     s/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/incubate/optimizer/recompute.py__init__zRecomputeOptimizer.__init__j   sW     	MKLLL# "o<"&/"D#    c                     t          |t                    s
J d            |D ](}t          |t          t          f          s
J d            )|| _        dS )zR
        Args:
            checkpoints (list): List of Variable or string
        z=_checkpoints should be a list of Variable or a list of StringN)
isinstancelistr   strr   )r   checkpointsckpts      r   _set_checkpointsz#RecomputeOptimizer._set_checkpointss   s|    
 +t,, 	
 	
K	
 	
,   	 	DdXsO44  O 4  (r   c                     d| _         d S )NT)r   r   s    r   _enable_offloadz"RecomputeOptimizer._enable_offload   s    "r   c                      t          d          )a  
            :api_attr: Static Graph

        load function is not supported by Recompute Optimizer for now.
        :return: None

        Args:
            state_dict: the dict load by load_persistable method

        Examples:
            .. code-block:: python

                >>> import paddle

                >>> paddle.enable_static()
                >>> def mlp(input_x, input_y, hid_dim=128, label_dim=2):
                ...     fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
                ...     prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                ...     cost = paddle.nn.functional.cross_entropy(
                ...         input=prediction, label=input_y,
                ...         reduction='none', use_softmax=False
                ...     )
                ...     sum_cost = paddle.mean(cost)
                ...     return sum_cost, fc_1, prediction

                >>> input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
                >>> input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                >>> cost, fc_1, pred = mlp(input_x, input_y)
                >>> print("Finished FF")
                Finished FF

                >>> sgd = paddle.optimizer.Adam(learning_rate=0.01)
                >>> sgd = paddle.incubate.optimizer.RecomputeOptimizer(sgd)
                >>> sgd._set_checkpoints([fc_1, pred])
                >>> try:
                ...     state_dict = {}
                ...     sgd.load(state_dict)
                >>> except NotImplementedError as e:
                ...     print(e)
                load function is not supported by Recompute Optimizer for now
        z=load function is not supported by Recompute Optimizer for now)NotImplementedError)r   
state_dicts     r   loadzRecomputeOptimizer.load   s    V "K
 
 	
r   c                 8    | j                             |          S )a  
        call apply_gradients function of self._optimizer.

        Args:
            params_grads (list): list of (param, grad) pair to do optimization.

        Returns:
            list: A list of operators appended to the current program.

        Examples:
            .. code-block:: python

                >>> import paddle
                >>> import paddle.base.framework as framework

                >>> paddle.enable_static()

                >>> def mlp(input_x, input_y, hid_dim=128, label_dim=2):
                ...     fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
                ...     prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                ...     cost = paddle.nn.functional.cross_entropy(
                ...         input=prediction, label=input_y,
                ...         reduction='none', use_softmax=False
                ...     )
                ...     sum_cost = paddle.mean(cost)
                ...     return sum_cost, fc_1, prediction

                >>> input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
                >>> input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                >>> cost, fc_1, pred = mlp(input_x, input_y)
                >>> print("Finished FF")
                Finished FF

                >>> sgd = paddle.optimizer.Adam(learning_rate=0.01)
                >>> sgd = paddle.incubate.optimizer.RecomputeOptimizer(sgd)
                >>> sgd._set_checkpoints([fc_1, pred])
                >>> params_grads = sgd.backward(
                ...     cost,
                ...     startup_program=None,
                ...     parameter_list=None,
                ...     no_grad_set=None)

                >>> program = cost.block.program
                >>> with framework.program_guard(program, None):
                ...     optimize_ops = sgd.apply_gradients(params_grads)

                >>> print("Finished apply gradients")
                Finished apply gradients
        )params_grads)r   apply_gradients)r   r(   s     r   r)   z"RecomputeOptimizer.apply_gradients   s    f ..L.IIIr   c                    t          j        |dz             }t          j        |dz             }| j                                                            || j        | j                                                            |          j        dd          }| j                                                            || j        | j                                                            |          j        dd          }||fS )Nz@Pinnedz@FetchFTnameshapedtypepersistablestop_gradient)r   generate_main_programglobal_block
create_varcheckpoint_shapevarr.   )r   varnamepinned_var_namefetched_var_name
pinned_var	fetch_vars         r   _create_varszRecomputeOptimizer._create_vars   s    %.w/BCC&/(0BCC'4466AA '$113377@@F B 
 

 &3355@@!'$113377@@F A 
 
	  000r   c                    d}|                                 }| j                                        }t          j                                        }|D ]}| j                                                             |          }|                    || j	        | j                                                             |j
                  j        dd          }|                    dd|id|j        d|j        d	d
dd||i           dS )a0  
        add fill_constant_ops to the end of the prog

        we should fill the pinned vars before running the main_prog
        to instantiate their tensor hold_, which could tell us whether
        the host memory could hold all the checkpoints from all the
        GPU devices in this node.
        r   FTr+   fill_constantOutr-   r.   valueg        
place_type   )typeoutputsattrsN)r3   checkpoint_name2pinned_namevaluesr   op_proto_and_checker_makerkOpRoleAttrNamer2   r6   r4   r5   r,   r.   	append_opr-   )	r   startup_programop_roleblockfill_constant_varsOP_ROLE_KEYr7   r6   r:   s	            r   _append_fill_constant_opsz,RecomputeOptimizer._append_fill_constant_ops   s    ,,..!=DDFF5EEGG) 	 	G$113377@@C))+(5577;;CHEEK!" *  J OO$(SYSYS !  
 
 
 
	 	r   c           
      V   t           j                                        }| j                            |dd| j                                                            |          gid| j                                                            |          gidt          |          ||i           d S )NmemcpyXr?   dst_place_type)rC   inputsrD   rE   )	r   rH   rI   rM   _insert_op_without_syncr2   r3   r6   int)r   
insert_idxsrc_varnamedst_varnamerL   rT   rO   s          r   _insert_async_memcpy_opz*RecomputeOptimizer._insert_async_memcpy_op"  s     5EEGG
**$,99;;??LLMN*7799==kJJK $S%8%8+wO 	+ 	
 	
 	
 	
 	
r   c                     || j         v sJ d| d            | j         |         }| j        |         }|                     |||dd           d S )NzTry to fetch z/ from Pinned Memory, but it is NOT a checkpoint   )rF   checkpoint_name2fetch_namer[   )r   idxr7   pinned_varnamefetch_varnames        r   _insert_fetch_opz#RecomputeOptimizer._insert_fetch_op0  sj    $::::TGTTT ;:: 9'B7@$$S.-ANNNNNr   c                     || j         v sJ d| d            | j         |         }|                     |||dd           d S )NzTry to offload z- to Pinned Memory, but it is NOT a checkpointr   rB   )rF   r[   )r   r_   r7   r`   s       r   _insert_offload_opz%RecomputeOptimizer._insert_offload_op9  s]    $::::TgTTT ;:: 9'B$$S'>1aHHHHHr   c                     d S N )r   op_idxcheckpoint_names      r   _insert_sync_opz"RecomputeOptimizer._insert_sync_op@      r   c                     t          | j                  dk    s
J d            | j                            d          }t          j        d| d           d|f| j        |<   |S )Nr   z#Could NOT found checkpoint to fetchzRecord fetch []fetch)lenun_fetch_checkpoint_namespoploggingdebugidx2insertionsr   r_   ri   s      r   _record_fetch_opz#RecomputeOptimizer._record_fetch_opD  su    4122Q6661 766 8<<R@@9999:::$+_#=C r   c                     | j                             d          }||k    sJ d| d| d            t          j        d| d           d|f| j        |<   d S )Nr   zexpected to offload [z] but got [rn   zRecord offload [offload)un_offload_checkpoint_namesrr   rs   rt   ru   )r   r_   ri   expected_checkpoint_names       r   _record_offload_opz%RecomputeOptimizer._record_offload_opN  s    #'#C#G#G#J#J "::::[$<[[[[[ ;:: 	;;;;<<<$-#?C   r   c                     || j         vsJ d| d            | j                             |           t          j        d| d           d|f| j        |<   d S )NzTry to sync the checkpoint [z] twicezRecord offload sync [rn   sync)synced_checkpointsaddrs   rt   ru   rv   s      r   _record_sync_opz"RecomputeOptimizer._record_sync_opV  sy    d&====C?CCC >== 	##O444@o@@@AAA$*O#<C   r   c                    i | _         | j        d d          | _        | j                            d           | j        d d          }i | _        | j        D ]}d| j        |<   t          | j        j                  | _        t          | j        j                  D ]9\  }}t          |j                            d                    dk    r	|| _         n:| j        t          | j        j                  k     s
J d            |                     | j                  }d }t          | j        j        | j        d                    D ]\  }}| j        |z   }|j                                        }|D ]}	|	|v r|	| j        vr| j        |	         dk    r(|}
|	| j        d         k    r|                     |          }|
|	k    sJ d|
 d|	 d            | j        j        |                             |	| j        |	                    | j        |	xx         dz  cc<   t#          d	|	 d
          t          | j                  dk    sJ | j         d            d S )Nrm   r   rL   r]   z#Could NOT found backward op in progz&Current recompute segment should use [z] BUT got [rn   zuse checkpoint [z] before fetch in BW# checkpoints have NOT been Recorded)ru   sorted_checkpoint_namesrq   rr   checkpoint_usage_countrp   rM   opsbw_start_op_idx	enumeraterW   descattrrw   input_arg_names_rename_inputr^   
ValueError)r   need_fetch_checkpoint_namesri   r_   opfetched_checkpoint_varnamelast_last_fetch_checkpointi
input_vars	input_varsecond_to_last_fetch_checkpoints              r   _parse_backwardz"RecomputeOptimizer._parse_backward^  s    )-)Eaaa)H&&**2...&*&DQQQ&G#&(##= 	= 	=O;<D'88"4:>22 00 	 	GC27<<	**++q00'*$ 1 #c$*.&9&99991 :99
 &*%:%:4;O%P%P"%)"tz~d.B.D.DEFF  	  	EAr&*C0022J'  	 ;;; (FFF6yAQFF !; <  )D,H,KKK$($9$9#$>$> !;
  ?)KKK}Ed}}qz}}}  LKK 
s+99% ;IF   3I>>>!C>>>>(NyNNN  3 << 4122a777-RRR 87777r   c                    t          | j                  dk    rd S t          | j        j                  }t	          t          | j        |                    D ]}|| j        v r| j        |         \  }}|dk    r7|                     ||           t          j	        d| d           | j        |= X|dk    r.| 
                    ||           t          j	        d| d           | j                                         t          | j                  dk    s.J d | j                                        D              d            d S )	Nr   ro   Insert [z] fetch op.r~   zSync [c                     g | ]
}|d          S r]   rg   .0eles     r   
<listcomp>z7RecomputeOptimizer._update_backward.<locals>.<listcomp>      ???3A???r   z checkpoints left un-Fetched)rp   ru   rM   r   reversedranger   rb   rs   rt   rj   _sync_with_cpprG   )r   total_oprh   	operationri   s        r   _update_backwardz#RecomputeOptimizer._update_backward  sd   t"##q((Ftz~&&uT%98DDEE 		I 		IF,,,-1-@-H*	?''))&/BBBM"I_"I"I"IJJJ+F33&((((AAAM"G?"G"G"GHHH
!!###4&''1,,,??$"5"<"<">">???]]] -,,,,r   c           
         i | _         | j        d d          | _        | j                            d          }| j        d d          }i | _        | j        D ]}ddd| j        |<   t                      | _        t          | j        j	                  | _
        t          | j        j	                  D ]9\  }}t          |j                            d                    dk    r	|| _
         n:| j
        t          | j        j	                  k     s
J d            d }t          | j        j	        | j
        | j                           D ]-\  }}| j
        |z   }|j                                        }|j                                        }	|D ]}
|
|v rt          |          dk    sJ d|
 d| d	            |
| j        v r|n| j        |         d
         dk    r|                     ||           n@| j        |         d         }|dk    sJ d| d            |                     |dz   |           |                     |dz   |
           |
}nt)          d|
 d	          |
|k    rt          |          dk    sJ d|
 d| d	            || j        d         k    sJ d| d| j        d          d| d	            | j        |         d         dk    r|                     ||           \| j        |         d         }|dk    sJ d| d            |                     |dz   |           |	D ]H}||v rB|| j        vsJ d| d            | j        |         d
xx         dz  cc<   || j        |         d<   I/t          | j                  dk    sJ | j         d            t          | j                  t          |          k    s/J t          |          t          | j                  z
   d            d S )Nrm   r   )countr_   rL   z"Could NOT found Forward op in progr]   z;checkpoint should be the only Output of a certain op, but [z] is from [rn   r   r_   zlast_usage_idx of checkpoint [z] should large than 0z4There should be just ONE op that output checkpoint [z$the last offload checkpoint before [z] is suppose to be [z], but got [zcheckpoint [z] used after syncr   )ru   r   rz   rr   checkpoint_usage_count_and_idxsetr   rp   rM   r   fw_start_op_idxr   rW   r   r   r   output_arg_namesr   r   r|   r   rq   )r   last_checkpointneed_offload_checkpoint_namesri   r_   r   last_offload_checkpointr   output_varsr   
output_varlast_usage_idxr   s                r   _parse_forwardz!RecomputeOptimizer._parse_forward  s    +/+G+J(:>>rBB(,(H(K%.0+#? 	 	OD DD/@@ #&%%"4:>22 00 	 	GC27<<	**++q00'*$ 1 #c$*.&9&99990 :99 #'JN4/$2FFG
 
 P	P P	PEAr &*C'2244K0022J) A A
!>>>{++q000rV`rrmorrr 100 "T%EEE2> $ C$;!"")!+ $%!% !%
 !% 4 4$')@!" !" !" !"
 %)$G(?%&&+%- !/
 (6'9'9'9$sE\$s$s$s (:'9'9 !% 4 4$2Q$68O!" !" !" //aDDD2<//(`S]```   00{++q000rV`rrmorrr 100 07;< < < m  m  mdh  eA  BD  eE  m  m  Sj  m  m  m< < < ;3!  
 ,,S2IJJJJ)-)L3**!  .111k=Tkkk  211 ,,*Q.0G   ( P P	 ===$D,CCCCCyCCC DCC 7	B7KKKqPKKKLOD7	B5IP 4344999-RRR :99 4*++s)0
 0
 
 
 
 011C8O4P4PPuuu
 
 
 
 
r   c                 p   t          | j                  dk    rd S t          t          | j        | j                            D ]}|| j        v r| j        |         \  }}|dk    r7|                     ||           t          j        d| d           | j        |= X|dk    r6| 	                    ||           t          j        d| d           | j        |= | j
                                         t          | j                  dk    s.J d | j                                        D              d            d S )	Nr   ry   r   z] offload op.r~   z] offload_sync op.c                     g | ]
}|d          S r   rg   r   s     r   r   z6RecomputeOptimizer._update_forward.<locals>.<listcomp>7  r   r   z checkpoints left un-Offloaded)rp   ru   r   r   r   r   rd   rs   rt   rj   rM   r   rG   )r   rh   r   ri   s       r   _update_forwardz"RecomputeOptimizer._update_forward"  sn   t"##q((F$&(<==
 
 	4 	4F ,,,-1-@-H*	?	))++FODDDM"K_"K"K"KLLL+F33&((((AAAMF?FFF   +F3
!!###4&''1,,,??$"5"<"<">">???___ -,,,,r   c                     d S rf   rg   r!   s    r   _check_offload_fetchz'RecomputeOptimizer._check_offload_fetch:  rk   r   Nc                    |j         j        | _        |j         | _         |t          j                                        }t          | j        |          5  t          | j                  dk    sJ d| j         d            t          d | j        D                       sJ d| j         d            i | _
        i | _        | j        D ].}|                     |          \  }}|| j
        |<   || j        |<   /|                     |           |                                  |                                  |                                  |                                  |                                  ddd           dS # 1 swxY w Y   dS )z
        core steps for recompute offload
        1. create pinned vars and temp vars
        2. parse & update Forward pass: offload, sync
        3. parse & update Backward pass: rename, fetch, sync
        4. verify the correctness
        Nr   zcheckpoints shape z2 should be an non empty list like: [12, 512, 1024]c              3   "   K   | ]
}|d k    V  dS )r   Nrg   r   s     r   	<genexpr>z.RecomputeOptimizer._offload.<locals>.<genexpr>O  s&      @@3sQw@@@@@@r   zall ele in checkpoints shape z- should be a determined integer larger than 0)rM   programr2   paddlestaticdefault_startup_programr	   rp   r5   allrF   r^   r   r<   rP   r   r   r   r   r   )r   lossrK   checkpoint_varnamer8   fetch_var_names         r   _offloadzRecomputeOptimizer._offload>  s    "Z/Z
"$mCCEEO4-?? 	( 	(t,--111nT%:nnn 211 @@$*?@@@@@  t0Ettt @ 02D,.0D+&*&B 	 	"262C2C&3 3/ $ 01CD # /0BCC **?;;;   """!!###!!!  """%%'''9	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(s   DE::E>E>c                 n   | j         
J d            t                      rt          d          |j        | _        |j        j        }t          ||          5  g }| j         D ]Z}t          |t                    r|
                    |           -|
                    |j                            |                     [t          |          dk    rt          ||||          \  }	}
nt          ||||          }	ddd           n# 1 swxY w Y   | j        r|
| _        |                     ||           |	S )a^  
        call append_backward with checkpoints.

        Args:
            loss (Variable): loss variable to run optimizations.
            startup_program (Program): startup_program for initializing parameters
                in `parameter_list`.
            parameter_list (list): list of Variables or Variable.names to update.
            no_grad_set (set|None): set of Variables or Variables.names should be ignored.
            callbacks (list|None): list of callables to run when appending backward
                operator for one parameter.
            checkpoints (list): list of Variables as checkpoints

        Examples:
            .. code-block:: python

                >>> import paddle

                >>> paddle.enable_static()

                >>> def mlp(input_x, input_y, hid_dim=128, label_dim=2):
                ...     fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
                ...     prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                ...     cost = paddle.nn.functional.cross_entropy(
                ...         input=prediction, label=input_y,
                ...         reduction='none', use_softmax=False
                ...     )
                ...     sum_cost = paddle.mean(cost)
                ...     return sum_cost, fc_1, prediction

                >>> input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
                >>> input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                >>> cost, fc_1, pred = mlp(input_x, input_y)
                >>> print("Finished FF")
                Finished FF

                >>> sgd = paddle.optimizer.Adam(learning_rate=0.01)
                >>> sgd = paddle.incubate.optimizer.RecomputeOptimizer(sgd)
                >>> sgd._set_checkpoints([fc_1, pred])
                >>> params_grads = sgd.backward(
                ...     cost,
                ...     startup_program=None,
                ...     parameter_list=None,
                ...     no_grad_set=None)
                >>> print("Finished backward")
                Finished backward
        N&You should call _set_checkpoints first*DyGraph current does not support recomputer   )r   )rK   )r   r   r$   r.   _dtyperM   r   r	   r   r   appendr6   rp   r   r   r   r   )r   r   rK   parameter_listno_grad_set	callbacksr   checkpoint_varsr   r(   r   s              r   backwardzRecomputeOptimizer.backwardi  s   n  ,,4 -,,  	%<   j*$7O44 	 	 O) A AdH-- A#**40000#**4:>>$+?+?@@@@ ?##a''8G" /	9 9 9555  /" /	     #	 	 	 	 	 	 	 	 	 	 	 	 	 	 	0  	A+BD(MM$M@@@s   B"DD	D	c                 x    t          | j        d          r| j        j        n| j        j        } ||||          S )a  
        call the apply_optimize function of self._optimizer
        Args:
            loss (Variable): loss variable to run optimizations.
            startup_program (Program): startup_program for initializing parameters
                in `parameter_list`.
            params_grads (list): list of (param, grad) pair to do optimization.
        Examples:
            .. code-block:: python

                >>> import paddle

                >>> paddle.enable_static()

                >>> def mlp(input_x, input_y, hid_dim=128, label_dim=2):
                ...     fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim)
                ...     prediction = paddle.static.nn.fc(x=[fc_1], size=label_dim, activation='softmax')
                ...     cost = paddle.nn.functional.cross_entropy(
                ...         input=prediction, label=input_y,
                ...         reduction='none', use_softmax=False
                ...     )
                ...     sum_cost = paddle.mean(cost)
                ...     return sum_cost, fc_1, prediction

                >>> input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
                >>> input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                >>> cost, fc_1, pred = mlp(input_x, input_y)
                >>> print("Finished FF")
                Finished FF

                >>> sgd = paddle.optimizer.Adam(learning_rate=0.01)
                >>> sgd = paddle.incubate.optimizer.RecomputeOptimizer(sgd)
                >>> sgd._set_checkpoints([fc_1, pred])
                >>> params_grads = sgd.backward(
                ...     cost,
                ...     startup_program=None,
                ...     parameter_list=None,
                ...     no_grad_set=None)

                >>> optimize_ops = sgd.apply_optimize(
                ...     cost, startup_program=None, params_grads=params_grads)

                >>> print("Finished apply_optimize")
                Finished apply_optimize
        apply_optimizerK   r(   )hasattrr   r   _apply_optimize)r   r   rK   r(   funcs        r   r   z!RecomputeOptimizer.apply_optimize  sS    b t(8991DO**0 	
 t/
 
 
 	
r   c                    t          |t                    s
J d            | j        
J d            t                      rt	          d          |                     ||||          }|                     |||          }||fS )NzThe loss should be an Variable.r   r   )rK   r   r   r   )r   r   r   r   r$   r   r   )r   r   rK   r   r   r(   optimize_opss          r   minimizezRecomputeOptimizer.minimize  s     $))LL+LLL) ,,4 -,,  	%<   }}+)#	 % 
 
 **/ + 
 
 \))r   rf   )NNNN)NNN)__name__
__module____qualname____doc__r   r   r"   r   deprecate_stat_dictr&   r)   r<   rP   r[   rb   rd   rj   rw   r|   r   r   r   r   r   r   r   r   r   r   rg   r   r   r   r      s       O Ob$ $ $( ( (# # # ",
 ,
 #",
\3J 3J 3Jj1 1 1,! ! !F
 
 
O O OI I I    @ @ @= = =<
 <
 <
|
 
 
&q
 q
 q
f
 
 
0  )( )( )( )(\ ^ ^ ^ ^@6
 6
 6
r LP* * * * * *r   r   )rs   r   paddle.baser   r   r   paddle.base.backwardr   paddle.base.frameworkr   r   r	   paddle.optimizerr
   r   rg   r   r   <module>r      s      4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 J J J J J J J J J J & & & & & &* * * * * * * * * *r   