
    zj                     <    d dl Z d dlmZmZmZmZ g Zd Zd Zd Z	dS )    N)get_clusterget_cluster_from_argsget_gpusloggerc           
         t          j        d          }|
J d            t          j        d          }|
J d            t          j        d          }|
J d            t          t          j        d                    }|
J d	            |                    d
          }t	          |          }t          |          }|dk    r!||k    rt          j        d| d| d           | dk    r4| d
                    |          k    rt          j        d|  d| d           t          j        d          }	|	|}
|dk    r}	 t          t          j        dd                    }|t	          |          k    r ||k    rt          j        d| d           |}
n&# t          $ r}t          |           Y d}~nd}~ww xY w|
d}
t          t          |
|
t	          |          z                       }g }	|D ]#|	                    fd|D                        $ng|	                    d
          }g }	||z  t	          |          k    sJ t          |          D ](}|	                    |||z  |dz   |z                      )t          j        d| d| d| d|	            t          |||	|          \  }}||j        |         fS )zZ
    args_node_ips:string, args_node_ip:string, args_port: int, selected_devices:list
    PADDLE_TRAINERSNz"PADDLE_TRAINERS should not be NonePOD_IPzPOD_IP should not be NonePADDLE_TRAINER_IDz$PADDLE_TRAINER_ID should not be NoneTRAINER_PORTS_NUMz$TRAINER_PORTS_NUM should not be None,z	127.0.0.1zcPlease NOTE: When using paddlecloud, node_ip is automatically got from POD_IP. Your input node_ip: z doesn't equals to node_ip: z from paddlecloud environment.zPlease NOTE: When using paddlecloud, cluster_node_ips is automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).Your input cluster_node_ips: z doesn't equals to IPs: DISTRIBUTED_TRAINER_ENDPOINTS   PADDLE_PORT zUse Cloud specified port:.i  c                     g | ]	} d | 
S ): ).0portips     n/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/distributed/cloud_utils.py
<listcomp>z%get_cloud_cluster.<locals>.<listcomp>W   s#    %G%G%Gnndnn%G%G%G    zparsed from args: node_ips:z         node_ip:z node_rank:z trainer_endpoints:)osgetenvintsplitlenr   warningjoin	Exceptionprintlistrangeappenddebugr   pods)args_node_ipsargs_node_ip	args_portselected_devicesnode_ipsnode_ip	node_rankpaddle_ports_num	num_nodestrainer_endpointsstarted_portpaddle_porteportstrainer_endpoints_oriiclusterpodr   s                     @r   get_cloud_clusterr;      s   
 y*++H!Ei!!G ;	-..I  "H   29%899::'')O'''~~c""HHIII+'\"9"924@2 2
2 2 2	
 	
 	
 ##(9K9K(K(K+ EM  	
 	
 	
 	"ABB  q==!")M2">">?? %,<(=(===#y00N#M{#M#M#MNNN#.L   a LU<<L8M8M)MNNOO 	I 	IB$$%G%G%G%G%G%G%GHHHH	I !2 7 7 < <++s3H/I/IIIIIy!! 	 	A$$%((AE5E+EE    L	Wh 	W 	W	W 	W%.	W 	WCT	W 	W  
 ',.> LGS GL+++s   AF 
F;"F66F;c                  F    t          t          j        dd                    S )NPADDLE_TRAINERS_NUM1)r   r   r   r   r   r   _get_trainers_numr?   n   s    ry.44555r   c                 p   t          | j                  }t                      }t          j        d| d|            d }d }| j        rB|dk    r<t          | j        | j        | j	        |          \  }}t          j
        d|            n*t          | |          \  }}t          j
        d|            ||fS )Nzparsed from args trainerss_num:z selected_devices:r   zget cluster from cloud:zget cluster from args:)r   r,   r?   r   r'   use_paddlecloudr;   cluster_node_ipsr.   r3   infor   )argsr,   trainers_numr9   r:   s        r   get_cluster_and_podrF   r   s     566$&&L
L\,\\JZ\\   G
C 
8 1 1(!L	
 
 	7g778888,T3CDD6W66777C<r   )
r   %paddle.distributed.utils.launch_utilsr   r   r   r   __all__r;   r?   rF   r   r   r   <module>rI      s    
			            P, P, P,f6 6 6    r   