
    j                        d dl Z d dlZd dlmZ d dlZg dZd Zd Zd Z	d Z
d Zdej        eej                 z  d	dfd
ZdeddfdZdd eddfdddeej                 dej        eej                 z  dz  dededeej        j                 dz  deej                 dz  d	dfdZ	 ddeej                 ded	dfdZ	 	 ddeej                 deej                 d	dfdZeddfdeej                 deej                 ded	dfdZdS )    N)Sequence)
all_reducereduce	broadcast
all_gatherreduce_scatterc                 F   t          t          j        d          st          j        dd           dS t                      }| D ]]}|j        r dS |                                s dS |j        s dS |	                                }||v r dS |
                    |           ^dS )N_nccl_all_reducez)PyTorch is not compiled with NCCL support   
stacklevelFT)hasattrtorch_Cwarningswarnset	is_sparseis_contiguousis_cuda
get_deviceadd)tensorsdevicestensordevices       Y/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/torch/cuda/nccl.pyis_availabler      s    58/00 AaPPPPueeG 
 
 	55##%% 	55~ 	55""$$W55F4    c                      t           j                                        } | dz	  }| dz	  dz  }| dz  }t           j                                                            d          }|dk    r|||fS ||||fS )a  
    Returns the version of the NCCL.


    This function returns a tuple containing the major, minor, and patch version numbers of the NCCL.
    The suffix is also included in the tuple if a version suffix exists.
    Returns:
        tuple: The version information of the NCCL.
           i  zutf-8 )r   r   _nccl_version_nccl_version_suffixdecode)vermajorminorpatchsuffixs        r   versionr,   #   s}     (
 
 
"
"C2IEBY%E%KEX**,,33G<<F||ue$$ueV,,r   c                  >    t           j                                        S N)r   r   _nccl_unique_id r   r   	unique_idr1   8   s    8##%%%r   c                 D    t           j                            | ||          S r.   )r   r   _nccl_init_rank)	num_ranksuidranks      r   	init_rankr7   <   s    8##IsD999r   inputsreturnc                     t          | t          j        j                  rt          | t          j                  rt          d          d S )Nz(Inputs should be a collection of tensors)
isinstancecollectionsabc	Containerr   Tensor	TypeError)r8   s    r   _check_sequence_typerA   @   sP    fko788 DJ= = D BCCCD Dr   c                     t          |            || }t          |           t          j                            | ||||           d S r.   )rA   r   r   r
   r8   outputsopstreamscommss        r   r   r   G   sL       !!!	Hfgr7EBBBBBr   )rD   outputrootrE   rF   rD   c                   t          |            |6|t          d          t          j        dt          d           ||         }njt          |t          j                  sDt          |t          j	        j
                  r%t          j        dt          d           ||         }n|| |         n|}t          j                            | |||||           d S )Nz'output' and 'outputs' can not be both specified. 'outputs' is deprecated in favor of 'output', taking in a single output tensor. The signature of reduce is: reduce(inputs, output=None, root=0, op=SUM, streams=None, comms=None).z`nccl.reduce` with an output tensor list is deprecated. Please specify a single output tensor with argument 'output' instead instead.r   r   z\nccl.reduce with an output tensor list is deprecated. Please specify a single output tensor.)rA   
ValueErrorr   r   FutureWarningr;   r   r?   r<   r=   r   r   _nccl_reduce)r8   rH   rI   rE   rF   rG   rD   _outputs           r   r   r   Q   s       Y   M`	    dmGG-- =*(3 3 = 	5		
 	
 	
 	
 ,"(.&,,f	H&'4WeDDDDDr   c                 h    t          |            t          j                            | |||           d S r.   )rA   r   r   _nccl_broadcast)r8   rI   rF   rG   s       r   r   r   |   s4        	HVT7E:::::r   c                     t          |            t          |           t          j                            | |||           d S r.   )rA   r   r   _nccl_all_gather)r8   rD   rF   rG   s       r   r   r      sB        !!!	Hfgw>>>>>r   c                     t          |            t          |           t          j                            | ||||           d S r.   )rA   r   r   _nccl_reduce_scatterrC   s        r   r   r      sD        !!!	H!!&'2wFFFFFr   )r   NN)NN)r<   r   collections.abcr   
torch.cudar   __all__SUMr   r,   r1   r7   r?   rA   r   intcudaStreamr   r   r   r   r0   r   r   <module>r\      s        $ $ $ $ $ $     P
O
O  *- - -*& & &: : :D0F!F D4 D D D D  $T C C C C <@26
(E .2(E (E (EU\"(EL8EL11D8(E (E 		(E
 ej'(4/(E el#d*(E 
(E (E (E (EX HL; ;U\";*-;	; ; ; ; 
	? ?U\"?el#?
 
? ? ? ? 
	G 	GU\"	Gel#	G 		G 
	G 	G 	G 	G 	G 	Gr   