
    zj                     P    d dl Z d dlmZ d dlmZ ddlmZmZ ddlm	Z	 d	dZ
d ZdS )
    N)fleet)in_dynamic_mode   )HeterParallelOptimizerHybridParallelOptimizer)loggerc                    t           j         }| |_        |4|j        rt          j        d           t          j        |          |_        i |_        |	                                dk    r|j        j
        sqt          | |j        |j                  }|j        j        d         j        rd|_        d|_        |j        j        d         j        rd|_        |j        r
J d            |S t'          | |j                  S | S )a  
    Optimizer for distributed training.
    For the distributed training, this method would rebuild a new instance of DistributedOptimizer.
    Which has basic Optimizer function and special features for distributed training.
    Args:
        optimizer(Optimizer): The executor to run for init server.
        strategy(DistributedStrategy): Extra properties for distributed optimizer.
            It is recommended to use DistributedStrategy in fleet.init(). The strategy
            here is for compatibility. If the strategy in fleet.distributed_optimizer()
            is not None, then it will overwrite the DistributedStrategy in fleet.init(),
            which will take effect in distributed training.
    Returns:
        Fleet: instance of fleet.
    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import paddle.distributed.fleet as fleet
            >>> fleet.init(is_collective=True)
            >>> strategy = fleet.DistributedStrategy()
            >>> linear = paddle.nn.Linear(10, 10)
            >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters())
            >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)

    Na$  It is recommended to use DistributedStrategy in fleet_env.init(). The strategy here is only for compatibility. If the strategy in fleet_env.distributed_optimizer() is not None, then it will overwrite the DistributedStrategy in fleet_env.init(), which will take effect in distributed training.r   
pp_configsFz7sep parallel can not coexist with sharding_comm_overlap)r   user_defined_optimizer_is_collectiver   warningcopydeepcopy_user_defined_strategy_context
worker_numheter_ccl_moder   _hcghybrid_configsdp_comm_overlap
_dp_enable_sep_enablesharding_comm_overlap_sharding_enabler   )	optimizerstrategy	fleet_envhp_optims       r/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/distributed/fleet/optimizer.py_dygraph_distributed_optimizerr       s3   4 I'0I$# 	NB   ,0=+B+B	(I!!/> 	.9>9+K H />- ',#',$/># -2)#/  M / O)9;       c                  b    t                      rt          | i |S t          j        j        | i |S N)r   r    r   distributed_optimizer)argskwargss     r   r$   r$   `   s<     B-t>v>>>{0$A&AAAr!   r#   )r   paddle.distributedr   paddle.frameworkr   meta_optimizersr   r   utils.log_utilr   r    r$    r!   r   <module>r,      s     $ $ $ $ $ $ , , , , , , L L L L L L L L " " " " " "E E E EPB B B B Br!   