
    |jb(                         d dl Zd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 ddddZej        dej        dej        diZdad Zd	 Zd
 Z G d d          Z G d d          ZdS )    N)_current_expected_place_)async_offload_with_offsetcreate_async_load   )gpunpuxpu      c                     | d S t          j                    rKt          j                    s8|                                                                                                 S d S N)paddleis_compiled_with_cudais_compiled_with_rocmvalue
get_tensor_share_cuda)tensors    m/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/optimizer/fusion_utils.py_share_tensor_ipc_metar   *   sX    ~t#%% 9f.J.L.L 9||~~((**668884    c                  Z   t           t          j                    rd} nCt          j                    rd} n-t	                      }	 |                                } n	#  d} Y nxY w| t                                          v s(J dt                                           d|  d            | a t           S )Nr   r	   unknownz&tensor fusion helper now only support z, but got device z	 instead.)__current_device_type__r   r   is_compiled_with_xpur   get_device_type	alignmentkeys)device_typecurrent_devices     r   get_current_device_typer!   2   s    &')) 		(KK(** 	(KK577N(,<<>>('inn.....nY^^5E5EnnXcnnn /.. #.""s   A Ac                    t          j        | j                  t          | j                 z  }|t
          t                               z  }|dk    rdnt
          t                               |z
  }|t          | j                 z  }|S Nr   )npprodshapealigndtyper   r!   )tsize	remainingalialign_s        r   	get_alignr.   F   sv    717eAGn,Dy!8!:!:;;I >> 	
.001I= 
 E!'N"FMr   c                       e Zd Zdej        fdZ ej                    d             Z ej                    d             Z	 ej                    d             Z
d Zed             ZdS )FusionStorageNc                    t          |t                    s
J d            t          |t                    s
J d            t          |t                    s|
J d            || _        || _        || _        i | _        i | _        i | _        || _        d | _	        d| _
        |                                  |                                  d S )Nzaccumulators must be a dictzmaster_weights must be a dictz*merged_model_params must be a dict or Noner   )
isinstancedictaccumulatorsmaster_weightsmerged_model_paramsaccumulators_metamaster_weights_metamerged_model_params_metar(   bufferoffsetbuild_buffermapping_tensor)selfr4   r5   r6   r(   s        r   __init__zFusionStorage.__init__S   s     ,--LL/LLL-.$//PP1PPP/*D11	85H5P5P7 6Q5PP(,#6 !##% (*%
r   c                    d| _         | j                                        D ]\  }}|| j        vr
i | j        |<   |                                D ]w\  }}|j        | j        k    sJ |                                t          |          z   }| j         | j         |z   |j        |j        d| j        |         |<   | xj         |z  c_         x| j	                                        D ]q\  }}|j        | j        k    sJ |                                t          |          z   }| j         | j         |z   |j        |j        d| j
        |<   | xj         |z  c_         r| j        | j                                        D ]q\  }}|j        | j        k    sJ |                                t          |          z   }| j         | j         |z   |j        |j        d| j        |<   | xj         |z  c_         rt          j        | j         f| j                  | _        d S )Nr   )startendnamer&   )r(   )r;   r4   itemsr7   r(   _numelr.   rC   r&   r5   r8   r6   r9   r   zerosr:   )r>   kv	para_namevar_tmpsrc_lens         r   r<   zFusionStorage.build_bufferk   s)   %++-- 	' 	'DAq...,.&q)&'ggii 	' 	'"	7}
2222!..**Yw-?-??![;0#L$]	8 8&q))4 w&	' '--// 		# 		#DAq7dj((((hhjj9Q<</G{W,	+ +D$Q' KK7"KKK#/06688 	' 	'1w$*,,,,((**y||3![;0FW	4 4-a0 w&lDK>DDDr   c                    | j                                         D ]T\  }}|                                D ]:\  }}|                     | j        |         |         |d         |d                    ;U| j                                        D ]4\  }}|                     | j        |         |d         |d                    5| j                                        D ]4\  }}|                     | j        |         |d         |d                    5d S )NrA   rB   )srcrA   rB   )r7   rD   mapping_tensor_implr4   r8   r5   r9   r6   )r>   rG   rH   rI   metas        r   r=   zFusionStorage.mapping_tensor   sJ   *0022 	 	DAq#$7799  	4(()!,Y7w-U )     ,2244 	 	DAq$$'*!G*!E( %     17799 	 	DAq$$,Q/jeH %    	 	r   c                 l   |j         }|j        }d|_        |                                 t          j        || j                            ||                     |                                                    |           ||_        | j                            ||          	                    |           d S )NT)
r&   stop_gradientflatten_r   assignr:   _slicer   	_set_dims_share_buffer_to)r>   rM   rA   rB   tensor_shaperQ   s         r   rN   z!FusionStorage.mapping_tensor_impl   s    y) Kuc**	
 	
 	
 	""<000)5#&&77<<<<<r   c                 *    t          | j                  S r   )r   r:   r>   s    r   _refresh_buffer_ipc_metaz&FusionStorage._refresh_buffer_ipc_meta   s    %dk222r   c                 *    |                                  S r   )rZ   rY   s    r   buffer_ipc_metazFusionStorage.buffer_ipc_meta   s    ,,...r   )__name__
__module____qualname__r   float32r?   imperative_baseno_gradr<   r=   rN   rZ   propertyr\    r   r   r0   r0   R   s        
 !n   0 _(E (E (ET _  * _= = =3 3 3 / / X/ / /r   r0   c                       e Zd Zd Z ej                    d             Zd Z ej                    d             Zd Z	d Z
 ej                    d             ZdS )	FusionStorageHelperc                     t                      | _        d | _        d | _        d | _        d | _        d | _        d | _        g | _        | 	                    ||||           d S r   )
r   async_loaderr7   r8   r9   r:   
cpu_bufferbuffer_lengthtasks
reset_meta)r>   r7   r8   r9   r\   s        r   r?   zFusionStorageHelper.__init__   ss     .//!%#' (,%!
$		
 	
 	
 	
 	
r   c                 ^   t          |t                    s
J d            || _        t          |t                    s
J d            || _        t          |t                    s|
J d            || _        t          |t
                    s
J d            t          |          dv s
J d            t          j        j	        j
                            |          }t          j        |          | _        | j                                        | _        | j                                        | _        d S )Nz accumulators_meta must be a dictz"master_weights_meta must be a dictz/merged_model_params_meta must be a dict or Nonezbuffer_ipc_meta must be a tuple)      zbuffer_ipc_meta must be a tuple with length 5 when FLAGS_use_virtual_memory_auto_growth is True or 7 when FLAGS_use_virtual_memory_auto_growth is False.)r2   r3   r7   r8   r9   tuplelenr   basecoreDenseTensor_new_shared_cuda	to_tensorr:   
pin_memoryri   rE   rj   )r>   r7   r8   r9   r\   
new_tensors         r   rl   zFusionStorageHelper.reset_meta   sV    +T22 	
 	
.	
 	
2 "3-t44 	
 	
0	
 	
4 $7 /66	='//< 0/0 )A%/511 	
 	
-	
 	
1 ?##v--- g .-- [%1BB
 

 &z22+0022![//11r   c                 <    |                      d| j                   d S r#   )sync_partial_paramrj   rY   s    r   
sync_paramzFusionStorageHelper.sync_param   s!    4#566666r   c                 N   t          |t                    s
J d            t          |t                    s
J d            |dk    s
J d            || j        k    s
J d            t          | j        | j        ||||z
  | j                  }| j                            |           d S )Nzstart must be an integerzend must be an integerr   zstart must be non-negativez9end must be less than or equal to the total buffer length)
src_tensor
dst_tensor
src_offset
dst_offsetoffload_sizerh   )	r2   intrj   r   r:   ri   rh   rk   append)r>   rA   rB   tasks       r   rz   z&FusionStorageHelper.sync_partial_param   s    %%%AA'AAA%#s##==%===#zzz7zzzd((((G )(( ){+*
 
 
 	
$r   c                 R   t          | j                  dk    rd S | j                            d          }t          | j                  dk    rF| j                            d          }|                                 t          | j                  dk    F|                                 d S )Nr   )rq   rk   pop	cuda_waitcpu_wait)r>   	last_taskr   s      r   wait_allzFusionStorageHelper.wait_all  s    tz??aFJNN2&&	$*oo!!:>>!$$DNN $*oo!! 	r   c                    di i}| j                                         D ]>\  }}|                                D ]$\  }}|                     |          }|||j        <   %?| j                                        D ]%\  }}|                     |          }||d         |<   &| j        rDi |d<   | j                                        D ]%\  }}|                     |          }||d         |<   &|S )Nr5   r6   )r7   rD   restore_tensor_from_metarC   r8   r9   )r>   
state_dictrG   rH   rI   tensor_metarJ   s          r   r   zFusionStorageHelper.state_dict  s   &+
*0022 	3 	3DAq*+'')) 3 3&	;77DD+2
7<((3 ,2244 	6 	6DAq33A66G.5J'(++( 	?02J,-5;;== ? ?177::7>
01!44r   c                     |d         }|d         }|d         }|d         }| j                             ||          }|                                                    |           ||_        |S )Nr&   rC   rA   rB   )ri   rT   r   rU   rC   )r>   r   r&   rC   rA   rB   r   s          r   r   z,FusionStorageHelper.restore_tensor_from_meta+  sm    G$6"G$% ''s33%%e,,,r   N)r]   r^   r_   r?   ra   rb   rl   r{   rz   r   r   r   rd   r   r   rf   rf      s        
 
 
, _!2 !2 !2F7 7 7 _     "      _    r   rf   )numpyr$   r   paddle.autogradautogradra   paddle.frameworkr   #paddle.incubate.tensor.manipulationr   r   r   float16bfloat16r`   r'   r   r   r!   r.   r0   rf   rd   r   r   <module>r      sK        ) ) ) ) ) )              	 NA
OQ
NA	    # # #(	 	 	m/ m/ m/ m/ m/ m/ m/ m/`r r r r r r r r r rr   