
    |j?                    ,   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ ddl	m
Z
mZmZmZ ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        ej        gZg dZ G d d	e          Zd
 Z  G d d          Z!d Z"d Z#d Z$d&dZ%d&dZ&d'dZ'd'dZ(d Z) G d d          Z* G d d          Z+ G d d          Z, G d d          Z- G d d          Z.ej/        d d!d"d#d$dfd%Z0dS )(    N)Enum)TracerEventTypeTracerMemEventType)flops   )intersection_rangesmerge_rangesmerge_self_ranges
sum_ranges)	allreduce	broadcastrpcc                   2    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
S )
SortedKeysav  
    SortedKeys is used to specify how to sort items when printing ``paddle.profiler.Profiler.summary`` table.

    The meaning of each SortedKeys is as following

    - **SortedKeys.CPUTotal** : Sorted by CPU total time.

    - **SortedKeys.CPUAvg**  : Sorted by CPU average time.

    - **SortedKeys.CPUMax**  : Sorted by CPU max time.

    - **SortedKeys.CPUMin**  : Sorted by CPU min time.

    - **SortedKeys.GPUTotal**  : Sorted by GPU total time.

    - **SortedKeys.GPUAvg**  : Sorted by GPU average time.

    - **SortedKeys.GPUMax**  : Sorted by GPU max time.

    - **SortedKeys.GPUMin**  : Sorted by GPU min time.
    r   r                     N)__name__
__module____qualname____doc__CPUTotalCPUAvgCPUMaxCPUMinGPUTotalGPUAvgGPUMaxGPUMin     r/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/profiler/profiler_statistic.pyr   r   1   sB         , HFFFHFFFFFr$   r   c                     |                      dd          }|                     dd          }|                     dd          }|S )z8
    convert static host node name to operator name
    z compute z dygraphz pybind_imperative_func)replace)nameop_names     r%   _nodename2opnamer+   R   sC     ll:r**Gooj"--Goo7<<GNr$   c                   V    e Zd ZdZd Zd Zd Zed             Zed             Z	d Z
dS )	HostStatisticNodez?
    Wrap original node for calculating statistic metrics.
    c                     || _         g | _        g | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d S Nr   )
hostnodechildren_noderuntime_nodecpu_timeself_cpu_timegpu_timeself_gpu_timegeneral_gpu_timeself_general_gpu_timer   )selfr0   s     r%   __init__zHostStatisticNode.__init__a   sR      !%&"


r$   c                     | j         j        t          j        k    rZt	          | j         d          rGt          | j         j                  }t          || j         j        | j         j	                  | _        d S d S d S )Ninput_shapes)
r0   typer   Operatorhasattrr+   r)   r   r<   
attributes)r9   r*   s     r%   	cal_flopszHostStatisticNode.cal_flopsm   su    =!999t}n55 *4=+=>>"M.M, 


 :9 r$   c                    | j         j        | j         j        z
  | _        | j        | _        |                                  | j        D ]}|                                 |                                 | xj        |j        z  c_        | xj	        |j	        z  c_	        | xj        |j        |j        z
  z  c_        | xj
        |j
        z  c_
        | j        D ]}|                                 | xj        |j        |j        z
  z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj	        |j	        z  c_	        | xj        |j	        z  c_        | j         j        D ]}|j        t           j        k    r:| xj        |j        |j        z
  z  c_        | xj        |j        |j        z
  z  c_        | xj	        |j        |j        z
  z  c_	        | xj        |j        |j        z
  z  c_        d S N)r0   end_nsstart_nsr3   r4   rA   r1   cal_statisticr5   r7   r   r2   r6   r8   device_noder=   r   Kernel)r9   childrtdevices       r%   rF   zHostStatisticNode.cal_statisticw   s   ,t}/EE!]' 	& 	&EOO!!!MMU^+MM!!U%;;!!%,"??JJ%+%JJJ# 	> 	>B")bk"99MMR[(MM"+-!!R%88!!&&"*==&&&m/ 	J 	JF{o444!@@""fmfo&EE""!!V]V_%DD!!&&&-&/*II&&&	J 	Jr$   c                     | j         j        S rC   )r0   rD   r9   s    r%   rD   zHostStatisticNode.end_ns   s    }##r$   c                     | j         j        S rC   )r0   rE   rM   s    r%   rE   zHostStatisticNode.start_ns   s    }%%r$   c                 ,    t          | j        |          S rC   )getattrr0   r9   r)   s     r%   __getattr__zHostStatisticNode.__getattr__   s    t}d+++r$   N)r   r   r   r   r:   rA   rF   propertyrD   rE   rR   r#   r$   r%   r-   r-   \   s         
 
 
  J J J6 $ $ X$ & & X&, , , , ,r$   r-   c                 B   t          j        t                    }|                                 D ]p\  }}g }|                    |           ||         }|rJ|                                }|                    |           |j        D ]}|                    |           |Jq|S rC   )collectionsdefaultdictlistitemsappendpopr1   )	nodetreesresults	thread_idrootnodestack
threadlistcurrent_node	childnodes           r%   traverse_treerc      s    %d++G(00 ( (	8XY'
 	( 99;;Ll+++)7 ( (	Y''''	  	(
 Nr$   c                     g }g }|                     |            |r^|                                }|j        D ]}|                     |           |j        D ]!}|j        D ]}|                     |           "|^|S )zD
    Get all device nodes called in the time range of hostnode.
    )rY   rZ   r1   r2   rG   )r0   r_   device_nodesra   rb   runtimenode
devicenodes          r%   get_device_nodesrh      s     EL	LL
 0yy{{%3 	$ 	$ILL####'4 	0 	0K)5 0 0
##J////0  0 r$   c                     dfd	g }|                                  D ](\  }} |          \  }}|                    |           )|S )Nr   c                    d| j         v rg dfS | j        t          j        t          j        fv rg dfS | j        t          j        k    r,t          |           }|                                 ||j        fS g }d}| j	        D ]0} ||dz             \  }}|r||z  }|
                    |           1| j        t          j        k    r0t          |           }|                                 ||_        ||g|fS ||fS )NGradNoder   r   )r)   r=   r   BackwardOptimizationr>   r-   rF   r   r1   rY   Forward)	nodedepth	stat_nodelayernflopsclfbuild_layers	           r%   rw   z+_build_layer_from_tree.<locals>.build_layer   s&   ""q5L9$(
 
 
 q5L9000)$//I##%%%io--# 	  	 A;q%!),,DAq  !Q9///)$//I##%%%$IOu%v--f}r$   r   )rX   rY   )r[   ret_r^   rr   rw   s        @r%   _build_layer_from_treer{      sr         > C ((  8;x((q

5Jr$   r   c                    | dz  dk    rt          | dz  |           dS | dz  dk    rt          | dz  |           dS | dz  dk    rt          | dz  |           dS | dz  dk    rt          | dz  |           d	S t          | |           S )
Ng   mBr   z T    eAz G    .Az M     @@z Kroundn	precisions     r%   _format_large_numberr      s    Dy1}}D),,0000Cx!||C++////Cx!||C++////Cx!||C++////Ay!!##r$   c                     | dz  dk    rt          | dz  |           dS | dz  dk    rt          | dz  |           dS | dz  dk    rt          | dz  |           dS t          | |           dS )	Nr}   r   z sr~   z msr   z usz nsr   r   s     r%   _format_timer      s    Cx!||C++////Cx!||C++0000Cx!||C++0000Ay!!&&&&r$   c                     g g g dfd	| dd          D ]} |           d                               S )Nr   c                    t          | t                    r| D ]} 
||dz              d S | j        t          j        t          j        fv r-t          	          dk    r	                    |           t          | j	                  }|	d         k    rCt                    dk    r0d         
                    |          r                    d           t                    k    rd                              S d|	d         z
  z  }t          | j                  }t          | j                  }t          | j        dz  | j        z            }                    | | d| d| d	| d
	           d S d S )Nr   r   r'    r}   z
 latency: z	, FLOPs: z	, FLOPS: 
)
isinstancerW   r=   r   rn   r>   lenrY   r+   r)   
startswithjoinr   r3   r   r   )ro   rp   r   r)   aligntmflops_nflops_sloopoffsetprint_layer_treerepeatry   s           r%   r   z*_gen_layer_flops.<locals>.print_layer_tree   s   dD!! 	 / /  EAI..../ / Y?2O4LMMM6{{ae$$$#DI..D ##C1Q9J9J49P9PA4yyF""wws||#56":-.Edm,,B*4:66G*4:+;dm+KLLGJJT$TT"TTwTTTTT    % NMr$   r   r'   rx   )r   )ro   r   r   r   r   r   ry   s    ` @@@@r%   _gen_layer_flopsr      s    
CFD         6 !""X  773<<r$   c                 @    t          |           }t          ||          S )zV
    gen_layer_flops generate flops/runtime information depend on layer/operator.
    )r{   r   )r[   r   
layer_trees      r%   gen_layer_flopsr   !  s!     (	22JJ///r$   c                 v   i }t          j        t                    }t          j        t                    }|                                 D ]>\  }}g }|                    |           t          |          }g }|                    |           |||<   ||         }	||         }
|r|                                }|	                    |           |                                }|
                    |           |j        D ]U}|                    |           t          |          }|j                            |           |                    |           V|j        D ]+}t          |          }|j                            |           ,|@|                                D ]\  }}|	                                 ||fS )zn
    Using HostStatisticNode to wrap original profiler result tree, and calculate node statistic metrics.
    )
rU   rV   rW   rX   rY   r-   rZ   r1   r2   rF   )r[   node_statistic_treer\   
newresultsr]   r^   r_   root_statistic_nodenewstackr`   newthreadlistra   current_statistic_noderb   child_statistic_noderf   runtime_statistic_nodes                    r%   	wrap_treer   )  s    %d++G(..J(00  	8X/99+,,,)<I&Y'
"9- 	 99;;Ll+++%-\\^^"  !7888)7 6 6	Y''''8'C'C$&4;;(    45555+8  ):;)G)G&&3::*     	$ +>*C*C*E*E , ,&	&))++++
**r$   c                   0    e Zd ZdZd Zd Zd Zd Zd ZdS )TimeRangeSummaryzO
    Analyse time ranges for each TracerEventType, and summarize the time.
    c                 "   t          j        t                    | _        t          j        d           | _        t          j        t
                    | _        t          j        d           | _        t          j        t
                    | _        d S )Nc                  4    t          j        t                    S rC   rU   rV   rW   r#   r$   r%   <lambda>z+TimeRangeSummary.__init__.<locals>.<lambda>Y  s    K+D11 r$   c                  4    t          j        t                    S rC   )rU   rV   intr#   r$   r%   r   z+TimeRangeSummary.__init__.<locals>.<lambda>]  s    K+C00 r$   )	rU   rV   rW   CPUTimeRangeGPUTimeRanger   CPUTimeRangeSumGPUTimeRangeSum
call_timesrM   s    r%   r:   zTimeRangeSummary.__init__V  sx    '3D99'311
 
  +6s;;*600 
  
 &1#66r$   c           	         t          |          }|                                D ]\  }}t          j        t                    }t          j        d           }|dd         D ]}||j                                     |j        |j        f           | j	        |j        xx         dz  cc<   |j
        D ]}||j                                     |j        |j        f           | j	        |j        xx         dz  cc<   |j        D ]^}	||	j                 |	j                 |	j                                     |	j        |	j        f           | j	        |	j        xx         dz  cc<   _|                                D ];\  }
}t          |d          }t          | j        |
         |d          | j        |
<   <|                                D ]{\  }}|                                D ]a\  }
}|                                D ]G\  }}t          |d          }t          | j        |         |
         |d          | j        |         |
<   Hb|| j                                        D ]\  }
}t%          |          | j        |
<   | j                                        D ]<\  }}|                                D ]"\  }
}t%          |          | j        |         |
<   #=dS )zm
        Analysis node trees in profiler result, and get time range for different tracer event type.
        c                  ,    t          j        d           S )Nc                  4    t          j        t                    S rC   r   r#   r$   r%   r   z:TimeRangeSummary.parse.<locals>.<lambda>.<locals>.<lambda>j  s    K3D99 r$   )rU   rV   r#   r$   r%   r   z(TimeRangeSummary.parse.<locals>.<lambda>i  s    /99  r$   r   NF	is_sortedT)rc   rX   rU   rV   rW   r=   rY   rE   rD   r   r2   rG   	device_id	stream_idr
   r	   r   r   r   r   r   )r9   r[   thread2hostnodesthreadid	hostnodesr   r   r0   rf   rg   
event_typetime_rangesr   device_time_rangesevent_time_rangesr   s                   r%   parsezTimeRangeSummary.parsea  s    )33#3#9#9#;#; &	 &	Hi&2488L&2  L
 &abbM > >X]+22&8   ...!3...#+#8 	> 	>K !1299$-{/AB   OK$4555:555&1&= > >
$Z%9::?K&0 &*"5z7H!IJJJ
888A=8888	>	> ,8+=+=+?+?  '
K/uMMM0<%j1;$1 1 1!*-- 2>1C1C1E1E 
 
-	-5G5M5M5O5O 	 	1J 12C2I2I2K2K  .	;&7'5' ' ' DP -i8D'&*D D D))4Z@@		
 (,'8'>'>'@'@ 	G 	G#J/9+/F/FD ,,-1->-D-D-F-F 	 	)I)+=+C+C+E+E  '
K>H? ?$Y/
;;	 	r$   c                 4    | j                                         S rC   )r   keysrM   s    r%   get_gpu_devicesz TimeRangeSummary.get_gpu_devices  s     %%'''r$   c                 (    | j         |         |         S rC   )r   )r9   r   r   s      r%   get_gpu_range_sumz"TimeRangeSummary.get_gpu_range_sum  s    #I.z::r$   c                     | j         |         S rC   )r   )r9   r   s     r%   get_cpu_range_sumz"TimeRangeSummary.get_cpu_range_sum  s    #J//r$   N)	r   r   r   r   r:   r   r   r   r   r#   r$   r%   r   r   Q  sj         	7 	7 	73 3 3j( ( (; ; ;0 0 0 0 0r$   r   c                       e Zd ZdZd Zd ZdS )DistributedSummaryz
    Analysis communication and computation time range, and their overlap.
    The computation time is all kernel except kernels for communication like nccl.
    c                 h    g | _         g | _        g | _        g | _        g | _        d| _        d| _        d S r/   )cpu_communication_rangegpu_communication_rangecommunication_rangecomputation_rangeoverlap_range	cpu_calls	gpu_callsrM   s    r%   r:   zDistributedSummary.__init__  s<    ')$')$#% !#r$   c                   
 t          |          }|                                D ]\  }}|dd         D ]ъ

j        t          j        k    rv| j                            
j        
j        f           t          
          }|D ]=}|j        t          j
        k    r&| j                            |j        |j        f           >
j        t          j        k    rt          
fdt          D                       rw| j                            
j        
j        f           t          
          }|D ]=}|j        t          j
        k    r&| j                            |j        |j        f           >:
j        D ]}|j        D ]}|j        t          j
        k    rn|j                                        }	d|	v sd|	v r'| j                            |j        |j        f           _| j                            |j        |j        f           Ӑt)          t+          | j                            | _        t)          t+          | j                            | _        t1          | j        d          | _        t1          | j        d          | _        t3          | j        | j        d          | _        t1          | j        d          | _        t7          | j        | j        d          | _        dS )	zH
        Collect all communication and computation time ranges.
        r   Nc              3   N   K   | ]}|j                                         v V   d S rC   )r)   lower).0r)   r0   s     r%   	<genexpr>z+DistributedSummary.parse.<locals>.<genexpr>  sR       G G HM//111G G G G G Gr$   ncclxcclFr   T)rc   rX   r=   r   Communicationr   rY   rE   rD   rh   rH   r   r>   any_CommunicationOpNamer2   rG   r)   r   r   r   setr   r   r
   r	   r   r   r   )r9   r[   r   r   r   re   rG   rf   rg   kernel_namer0   s             @r%   r   zDistributedSummary.parse  sl    )33#3#9#9#;#; -	& -	&Hi%abbM ,& ,&=O$AAA077!*HO<   $4H#=#=L'3  &+/EEE 8??!,!5{7I J   ]o&>>>3 G G G G 4G G G D D> 077!*HO<   $4H#=#=L'3  &+/EEE 8??!,!5{7I J   (0'< & &*5*A & &J)/2HHH.8o.C.C.E.E$*k$9$9'-'<'<$($@$G$G)3)<j>O(P%& %& %& %& %)$:$A$A)3)<j>O(P%& %& %&&&=,&Z S!=>>??S!=>>??'8(E(
 (
 (
$ (9(E(
 (
 (
$ $0(($
 $
 $
 
 "3"e"
 "
 "
 1$d&<
 
 
r$   N)r   r   r   r   r:   r   r#   r$   r%   r   r     sA         
  E
 E
 E
 E
 E
r$   r   c                       e Zd ZdZ G d d          Z G d de          Z G d de          Z G d d	e          Z G d
 de          Zd Z	d Z
d Zd Zd Zd Zd Zd ZdS )EventSummaryzT
    Analyse operator event in profiling data, correlate with its device event.
    c                       e Zd Zd Zed             Zed             Zed             Zed             Zd Z	d Z
d Zd	 Zd
 Zd ZdS )EventSummary.ItemBasec                    || _         d| _        d| _        d| _        d| _        t          d          | _        d| _        t          d          | _        i | _	        i | _
        d| _        t          d          | _        d| _        d| _        d S )Nr   inf)r)   callr3   r5   max_cpu_timefloatmin_cpu_timemax_gpu_timemin_gpu_timedevicesoperator_innersr7   min_general_gpu_timemax_general_gpu_time_flopsrQ   s     r%   r:   zEventSummary.ItemBase.__init__  s    DIDIDMDM !D %eD !D %eDDL#%D $%D!(-eD%()D%DKKKr$   c                     | j         S rC   r   rM   s    r%   r   zEventSummary.ItemBase.flops  s
    ;r$   c                      | j         | j        z  S rC   )r3   r   rM   s    r%   avg_cpu_timez"EventSummary.ItemBase.avg_cpu_time      =49,,r$   c                      | j         | j        z  S rC   )r5   r   rM   s    r%   avg_gpu_timez"EventSummary.ItemBase.avg_gpu_time  r   r$   c                      | j         | j        z  S rC   )r7   r   rM   s    r%   avg_general_gpu_timez*EventSummary.ItemBase.avg_general_gpu_time  s    (4944r$   c                 n    || j         k    r|| _         || j        k     r|| _        | xj        |z  c_        d S rC   )r   r   r3   r9   times     r%   add_cpu_timez"EventSummary.ItemBase.add_cpu_time  B    d'''$(!d'''$(!MMT!MMMMr$   c                 n    || j         k    r|| _         || j        k     r|| _        | xj        |z  c_        d S rC   )r   r   r5   r   s     r%   add_gpu_timez"EventSummary.ItemBase.add_gpu_time$  r   r$   c                 n    || j         k    r|| _         || j        k     r|| _        | xj        |z  c_        d S rC   )r   r   r7   r   s     r%   add_general_gpu_timez*EventSummary.ItemBase.add_general_gpu_time+  sH    d///,0)d///,0)!!T)!!!!r$   c                 &    | xj         dz  c_         d S Nr   )r   rM   s    r%   add_callzEventSummary.ItemBase.add_call2  s    IINIIIIr$   c                 &    | xj         |z  c_         d S rC   r   )r9   r   s     r%   	add_flopszEventSummary.ItemBase.add_flops5  s    KK5 KKKKr$   c                     t           rC   )NotImplementedErrorr9   ro   s     r%   add_itemzEventSummary.ItemBase.add_item8  s    %%r$   N)r   r   r   r:   rS   r   r   r   r   r   r   r  r  r  r  r#   r$   r%   ItemBaser     s        	 	 	  
	 	 
	 
	- 	- 
	- 
	- 	- 
	- 
	5 	5 
	5	" 	" 	"	" 	" 	"	* 	* 	*	 	 		! 	! 	!	& 	& 	& 	& 	&r$   r  c                       e Zd Zd ZdS )EventSummary.DeviceItemc                 j    | xj         dz  c_         |                     |j        |j        z
             d S r  )r   r   rD   rE   r
  s     r%   r  z EventSummary.DeviceItem.add_item<  s4    IINIIdkDM9:::::r$   Nr   r   r   r  r#   r$   r%   
DeviceItemr  ;  s#        	; 	; 	; 	; 	;r$   r  c                       e Zd Zd ZdS )EventSummary.OperatorItemc                    |                                   |                     |j                   |                     |j                   |                     |j                   |                     |j                   |j	        D ]v}|j
        t          j        k    r_|j        | j        vr,t                              |j                  | j        |j        <   | j        |j                                     |           w|j        D ]^}|j        D ]T}|j        }|| j        vr"t                              |          | j        |<   | j        |                             |           U_d S rC   )r  r   r3   r   r5   r  r7   r  r   r1   r=   r   r>   r)   r   r   OperatorItemr  r2   rG   r   r  )r9   ro   rI   rf   rg   r)   s         r%   r  z"EventSummary.OperatorItem.add_itemA  s\   MMOOOdm,,,dm,,,%%d&;<<<NN4:&&&+ E E:!999z)===(55ejAA ,UZ8 (4==eDDD#0 < <"-"9 < <J%?D4<//-9-D-DT-J-JT*L&//
;;;;	<< <r$   Nr  r#   r$   r%   r  r  @  s#        	< 	< 	< 	< 	<r$   r  c                       e Zd Zd ZdS )EventSummary.ForwardItemc                    |                                   |                     |j                   |                     |j                   |                     |j                   |                     |j                   |j	        D ]v}|j
        t          j        k    r_|j        | j        vr,t                              |j                  | j        |j        <   | j        |j                                     |           wd S rC   )r  r   r3   r   r5   r  r7   r  r   r1   r=   r   r>   r)   r   r   r  r  )r9   ro   rI   s      r%   r  z!EventSummary.ForwardItem.add_itemW  s    MMOOOdm,,,dm,,,%%d&;<<<NN4:&&&+ E E:!999z)===(55ejAA ,UZ8 (4==eDDDE Er$   Nr  r#   r$   r%   ForwardItemr  V  s(        	E 	E 	E 	E 	Er$   r  c                       e Zd Zd ZdS )EventSummary.GeneralItemc                     |                                   |                     |j                   |                     |j                   |                     |j                   d S rC   )r  r   r3   r   r5   r  r7   r
  s     r%   r  z!EventSummary.GeneralItem.add_itemf  sX    MMOOOdm,,,dm,,,%%d&;<<<<<r$   Nr  r#   r$   r%   GeneralItemr  e  s#        	= 	= 	= 	= 	=r$   r  c                     i | _         t          j        t                    | _        i | _        t          j        t                    | _        i | _        i | _        i | _	        d S rC   )
rX   rU   rV   dictthread_itemsuserdefined_itemsuserdefined_thread_itemsmodel_perspective_itemsmemory_manipulation_itemskernel_itemsrM   s    r%   r:   zEventSummary.__init__l  sc    
'3
 
 "$(3(?)
 )
% (*$)+&r$   c                    t          |          \  }}|                                D ]\  }}|dd         D ]}|j        t          j        k    r|                     |           |j        t          j        k    s|j        t          j        k    rd|j        	                                v s6d|j        	                                v sd|j        	                                v r| 
                    |           |j        t          j        k    r|                     |           |                     |d                    |                                D ]\  }}t          j                    }|                    |           |r|                                }	|	j        D ]}
|
j        t          j        k    s?|
j        t          j        k    s*|
j        t          j        k    s|
j        t          j        k    r|                     |
           l|
j        t          j        k    r|                     |
           |                    |
           |ɌdS )z;
        Analysis operator event in the nodetress.
        r   Nmemcpy
memorycopymemsetr   )r   rX   r=   r   r>   add_operator_itemUserDefinedPythonUserDefinedr)   r   add_memory_manipulation_itemadd_userdefined_itemadd_kernel_itemrU   dequerY   popleftr1   rn   
Dataloaderrl   rm   add_model_perspective_itemProfileStep)r9   r[   node_statistic_treesthread2host_statistic_nodesr   host_statistic_nodeshost_statistic_noder   r0  ra   rI   s              r%   r   zEventSummary.parsey  s    =Fi<P<P99 )..00	: 	: 
 ';( K K# '+/GGG**+>???',0KKK*/&89 9 !$7$<$B$B$D$DDD'+>+C+I+I+K+KKK#':'?'E'E'G'GGG99:MNNNN 04.@A A !556IJJJ  !5a!89999-A-G-G-I-I 	, 	,)H)%''ELL,--- ,$}})7 , ,E
o&=== :)CCC :)AAA :)EEE77!    !:)DDD ;;EBBBU++++  ,	, 	,r$   c                     d S rC   r#   r9   operator_nodes     r%   add_forward_itemzEventSummary.add_forward_item  s    r$   c                    |j         | j        vr,t                              |j                   | j        |j         <   | j        |j                                      |           |j         | j        |j                 vr7t                              |j                   | j        |j                 |j         <   | j        |j                 |j                                      |           d S rC   )r)   rX   r   r  r  r   r]   r:  s     r%   r*  zEventSummary.add_operator_item  s    TZ//-9-F-F". .DJ})* 	
=%&//>>>T%6}7N%OOO))-*<== m56}7IJ 	-12=3EFOO	
 	
 	
 	
 	
r$   c                    |j         | j        vr,t                              |j                   | j        |j         <   | j        |j                                      |           |j         | j        |j                 vr7t                              |j                   | j        |j                 |j         <   | j        |j                 |j                                      |           d S rC   )r)   r!  r   r  r  r"  r]   )r9   userdefined_nodes     r%   r.  z!EventSummary.add_userdefined_item  s     (>>>(()9)>?? "#3#89 	/45>>?OPPP !01A1KLM M
 (()9)>?? )*:*DE % 	%&6&@A!	

(#
$
$
$
$
$r$   c                     |j         | j        vr,t                              |j                   | j        |j         <   | j        |j                                      |           d S rC   )r)   r$  r   r  r  )r9   memory_manipulation_nodes     r%   r-  z)EventSummary.add_memory_manipulation_item  sl    #(0NNN(()A)FGG *+C+HI 	&'?'DENN$	
 	
 	
 	
 	
r$   c                    |j         t          j        k    rd}nb|j         t          j        k    rd}nJ|j         t          j        k    rd}n2|j         t          j        k    rd}n|j         t          j        k    rd}nd S || j        vr"t          	                    |          | j        |<   | j        |         
                    |           d S )Nrn   rl   rm   r2  r4  )r=   r   rn   rl   rm   r2  r4  r#  r   r  r  )r9   model_perspective_noder)   s      r%   r3  z'EventSummary.add_model_perspective_item  s    !&/*AAADD#(O,DDDDD#(O,HHH!DD#(O,FFFDD#(O,GGG DDFt3331=1I1I$1O1OD(.$T*334JKKKKKr$   c                     t          |          }|D ]i}|j        t          j        k    rR|j        }|| j        vr"t                              |          | j        |<   | j        |                             |           jd S rC   )	rh   r=   r   rH   r)   r%  r   r  r  )r9   	root_nodere   rG   r)   s        r%   r/  zEventSummary.add_kernel_item  s    '	22' 	> 	>K?#999"'t000.:.E.Ed.K.KD%d+!$'00===	> 	>r$   N)r   r   r   r   r  r  r  r  r  r:   r   r<  r*  r.  r-  r3  r/  r#   r$   r%   r   r     ss        =& =& =& =& =& =& =& =&~; ; ; ; ;X ; ; ;
< < < < <x < < <,E E E E Eh E E E= = = = =h = = =  3, 3, 3,j  
 
 
 % % %&
 
 
L L L"> > > > >r$   r   c                   >    e Zd ZdZ G d d          Zd Zd Zd ZdS )MemorySummaryz2
    Analyse memory events in profiling data.
    c                       e Zd ZddZd ZdS )MemorySummary.MemoryItem	Allocatedc                 v    || _         || _        d| _        d| _        d| _        d| _        d| _        || _        d S r/   )
event_nameplaceallocation_count
free_countallocation_size	free_sizeincrease_sizememory_type)r9   rL  rM  rS  s       r%   r:   z!MemorySummary.MemoryItem.__init__  sD    (DODJ$%D!DO#$D DN!"D*Dr$   c                 P   |t           j        k    s|t           j        k    r!| xj        dz  c_        | xj        |z  c_        nP|t           j        k    s|t           j        k    r!| xj        dz  c_        | xj        |z  c_        nt          d           | j        | j        z
  | _
        d S )Nr   zNo corresponding type.)r   AllocateReservedAllocaterN  rP  FreeReservedFreerO  rQ  printrR  )r9   sizeallocation_types      r%   add_memory_recordz*MemorySummary.MemoryItem.add_memory_record  s    #5#>>>"&8&III%%*%%$$,$$$  #5#:::"&8&EEE1$$& .///!%!5!FDr$   N)rJ  )r   r   r   r:   r\  r#   r$   r%   
MemoryItemrI    s<        	+ 	+ 	+ 	+	G 	G 	G 	G 	Gr$   r]  c                     t          j        t                    | _        t          j        t                    | _        t          j        t
                    | _        t          j        t
                    | _        d S rC   )rU   rV   r  allocated_itemsreserved_itemsr   peak_allocation_valuespeak_reserved_valuesrM   s    r%   r:   zMemorySummary.__init__  sb    *6 
  
 *5
 
 '2&=c&B&B#$/$;C$@$@!!!r$   c                    |j         D ]}|j        t          j        k    s|j        t          j        k    r|| j        |j                 vr4t                              ||j        d          | j        |j                 |<   | j        |j                 |         	                    |j
        |j                   n|j        t          j        k    s|j        t          j        k    r~|| j        |j                 vr4t                              ||j        d          | j        |j                 |<   | j        |j                 |         	                    |j
        |j                   t          | j        |j                 |j                  | j        |j        <   t          | j        |j                 |j                  | j        |j        <   d S )NrJ  Reserved)mem_noder=   r   rU  rW  r_  rM  rG  r]  r\  increase_bytesrV  rX  r`  maxra  peak_allocatedrb  peak_reserved)r9   rL  ro   memnodes       r%   _analyse_node_memoryz"MemorySummary._analyse_node_memory%  s   } !	 !	G 2 ;;;<#5#:::T%9'-%HHH%00&{  (7
C
 $W]3##G$:GLIIII 2 CCC<#5#BBBT%8%GGG%00&z  '6zB
 #GM2##G$:GLIII9<+GM:&: :D'6 8;)'-8':O8 8D%gm44?!	 !	r$   c                 H   t          |          }|                                D ]}\  }}|dd         D ]m}|j        t          j        k    r|j        t          j        k    r%|j        D ]}|                     |j        |           |                     |j        |           n~dS )z8
        Analyse memory event in the nodetress.
        r   N)	rc   rX   r=   r   OperatorInnerr>   r1   rk  r)   )r9   r[   r   r   
host_nodes	host_noderI   s          r%   r   zMemorySummary.parseI  s     )33$4$:$:$<$< 	E 	E Hj'^ E E	>_%BBB>_%===!*!8 I I11).%HHHH))).)DDDDE	E 	Er$   N)r   r   r   r   r]  r:   rk  r   r#   r$   r%   rG  rG    s         G G G G G G G G<A A A" " "HE E E E Er$   rG  c                       e Zd ZdZd ZdS )StatisticDataz$
    Hold all analysed results.
    c                    || _         || _        t                      | _        t	                      | _        t                      | _        t                      | _	        | j        
                    |           | j        
                    |           | j        
                    |           | j	        
                    |           d S rC   )
node_trees
extra_infor   time_range_summaryr   event_summaryr   distributed_summaryrG  memory_summaryr   )r9   rs  rt  s      r%   r:   zStatisticData.__init__]  s    $$"2"4"4)^^#5#7#7 +oo%%j111  ,,, &&z222!!*-----r$   N)r   r   r   r   r:   r#   r$   r%   rq  rq  X  s-         
. 
. 
. 
. 
.r$   rq  TFmsd   K   c                 I  MNOPQ ddl m} 	 dMdgQdgNM gOdjMNOQfd	}	d }
g PPfd}dkd}dld}| j                            t          j                  }|
|j        |v rddg}d}d} |	|           |dd          D ]} |	|           Qd
         }Nd
         }Od
         } | |
|d                      ||            | |j        |             ||           d |t          | j	        d                             g} | |j        |            d |t          | j	        d                             g} | |j        |            | j        
                                D ]\}t          | j                            |t          j                            }||z  }d|  ||          g} | |j        |            ] ||            |d            |d|z              |d            |d           |d
k    rd                    P          S |
|j        |v rgg d}dgQdgNM gOd}|D ]} |	|           Qd
         }Nd
         }Od
         } | |
|d                      |d|             ||            | |j        |             ||           t          j        t"                    }t          j        t"                    }t          j        t"                    }t          j        t"                    }|                    | j        j                   |                    | j        j                   | j        j                                        D ]\  }}|t          j        k    r|||<   | j        j        r?t3          | j        j                  |t          j        <   | j        j        |t          j        <   t          j        t          j        t          j        t          j        fD ]p}t?          |                               d          d         } ||v rB| | j!        j"        v r4| j!        j"        |          j#        ||<   | j!        j"        |          j$        ||<   qt          j        tJ                    }!| j        j&                                        D ]:\  }"}#|#                                D ] \  }}$tO          |!|         |$d          |!|<   !;|!                                D ]\  }}$t3          |$          ||<   | j        j(        r?t3          | j        j(                  |t          j        <   | j        j)        |t          j        <   tU          |                                d  d!          }%|%d
         \  }}&d"                    t?          |                               d          d                   ||          ||&|#           |t          |&          |z            g} | |j        |            |%dd          D ]\  }}&d$                    t?          |                               d          d                   ||          ||&|#           |t          |&          |z            g} | |j        |             ||           g d%} | |j        |             ||           |                                D ]\  }}&d$                    t?          |                               d          d                   ||          ||&|#           |t          |&          |z            g} | |j        |             ||            |d&            |d|z              |d            |d           |
|j+        |v r| j!        j"        }'tY          |'          dk    rg }(d
})d
}*| j!        j"        d'         j-        }+d(D ]8},|,|'v r0|'|,         }-|+d
k    rd
}.nt          |-j-                  |+z  }.d'|,v r|, nd)|, },|, |-j#         ||-j$        |#           d* ||-j.        |#           d* ||-j/        |#           d* ||-j0        |#           d* |t          |-j$                  |z             	 ||-j-        |#           d* ||-j1        |#           d* ||-j2        |#           d* ||-j3        |#           d* ||.           	g}|(4                    |           d'|,vr|)|-j$        z  })|*|-j-        z  }*:||)z
  }/|+|*z
  }0|+d
k    rd
}.nt          |0          |+z  }.d+d ||/|#           d, |t          |/          |z              ||0|#           d, ||.           g}|(4                    |           d-}1d.}2d.}3|(D ]}tk          |d         t"                    rHtY          t?          |d                             |1k    r"tY          t?          |d                             }1tY          |d                   |2k    rtY          |d                   }2tY          |d/                   |3k    rtY          |d/                   }3g d0}dgQdgNM gOd1} |	|            |	|1            |	|2            |	|3           Qd
         }Nd
         }Od
         } | |
|d2                      |d|             ||            | |j        |             ||           |(D ]} | |j        |             ||            |d3            |d|z              |d            |d           |
|j6        |v r| j        j7        rg d4}dgQdgNM gOd}|D ]} |	|           Qd
         }Nd
         }Od
         } | |
|d5                      |d|             ||            | |j        |             ||           t3          | j        j7                  }4t3          | j        j8                  }5t3          | j        j9                  }6d' |||#           |t          |          |z            g} | |j        |            d6 ||4|#           |t          |4          |z            g} | |j        |            d7 ||5|#           |t          |5          |z            g} | |j        |            d8 ||6|#           |t          |6          |z            g} | |j        |             ||            |d9            |d|z              |d            |d           |
|j:        |v r| j!        j        rg }(d:}|r| j!        j;        }7nd;| j!        j        i}7|7                                D ]\  }8}9|(4                    d<|8            |tx          j=        k    r'tU          |9                                d= d!          }%ny|tx          j>        k    r'tU          |9                                d> d!          }%nB|tx          j?        k    r'tU          |9                                d? d!          }%n|tx          j@        k    r%tU          |9                                d@ A          }%n|tx          jA        k    r&tU          |9                                dB d!          }%n|tx          jB        k    r&tU          |9                                dC d!          }%nj|tx          jC        k    r&tU          |9                                dD d!          }%n4|tx          jD        k    r$tU          |9                                dE A          }%d
}:d
};|%D ]\  },}-|:|-j$        z  }:|;|-jE        z  };|%D ]\  },}-|:d
k    rd
}<nt          |-j$                  |:z  }<|;d
k    rd
}.nt          |-jE                  |;z  }.|,|-j#         ||-j$        |#           d* ||-j.        |#           d* ||-j/        |#           d* ||-j0        |#           d* ||<           	dF                     ||-jE        |#           ||-jF        |#           ||-jG        |#           ||-jH        |#           ||.                    |-jI        g}|(4                    |           |rk|-jJ                                        D ][\  }=}>|-j$        d
k    rd
}<nt          |>j$                  |-j$        z  }<|-jE        d
k    rd
}.nt          |>jE                  |-jE        z  }.tY          |=          dz   |k    r|=d |dGz
           }=|=dHz  }=d)|= |>j#        dF                     ||>j$        |#           ||>j.        |#           ||>j/        |#           ||>j0        |#           ||<                    dF                     ||>jE        |#           ||>jF        |#           ||>jG        |#           ||>jH        |#           ||.                    dg}|(4                    |           |>jK                                        D ]\  }?}@|>jE        d
k    rd
}.nt          |@j-                  |>jE        z  }.tY          |?          dIz   |k    r|?d |dJz
           }?|?dHz  }?dK|? |@j#        dLdF                     ||@j-        |#           ||@j1        |#           ||@j2        |#           ||@j3        |#           ||.                    dg}|(4                    |           ܐ]|-jK                                        D ]\  }?}@|-jE        d
k    rd
}.nt          |@j-                  |-jE        z  }.tY          |?          dz   |k    r|?d |dGz
           }?|?dHz  }?d)|? |@j#        dLdF                     ||@j-        |#           ||@j1        |#           ||@j2        |#           ||@j3        |#           ||.                    dg}|(4                    |           ܐd-}1d.}2d.}3dM}A|(D ]}tk          |t>                    rtk          |d         t"                    rHtY          t?          |d                             |1k    r"tY          t?          |d                             }1tY          |d                   |2k    rtY          |d                   }2tY          |d/                   |3k    rtY          |d/                   }3g dN}dgQdgNM gO |	|            |	|1            |	|2            |	|3            |	|A           Qd
         }Nd
         }Od
         } | |
|dO                      |d|             ||            | |j        |             ||           |(D ]@}tk          |t>                    r | |
||                     - | |j        |            A ||            |d            |d           |
|jL        |v r| j!        jM        rg }(| j!        jM        }B|tx          jB        k    r&tU          |B                                dP d!          }%n|tx          jC        k    r&tU          |B                                dQ d!          }%nZ|tx          jD        k    r%tU          |B                                dR A          }%n%tU          |B                                dS d!          }%d
}C|%D ]\  },}-|C|-j-        z  }C|%D ]\  },}-|Cd
k    rd
}.nt          |-j-                  |Cz  }.|,|-j#         ||-j-        |#           d* ||-j1        |#           d* ||-j2        |#           d* ||-j3        |#           d* ||.           	g}|(4                    |           g dT}dU}d-}1d.}3|(D ]}tk          |d         t"                    rHtY          t?          |d                             |1k    r"tY          t?          |d                             }1tY          |d                   |3k    rtY          |d                   }3dgQdgNM gO |	|            |	|1            |	|3           Qd
         }Nd
         }Od
         } | |
|dV                      |d|             ||            | |j        |             ||           t          jO        dW          }D|(D ]}|DP                    |d
                   }E|Er,|EQ                    d          |EQ                    d          z   },n|d
         },tY          |,          |k    r|,d |d/z
           dHz   |d
<   n|,|d
<    | |j        |             ||            |d            |d           |
|jR        |v r"| j!        jS        rg }(| j!        jS        }F| j!        j"        d'         jE        }+|F                                D ]	\  },}-|+d
k    rd
}.nt          |-jE                  |+z  }.|,|-j#         ||-j$        |#           d* ||-j.        |#           d* ||-j/        |#           d* ||-j0        |#           d* |t          |-j$                  |z             	 ||-jE        |#           d* ||-jF        |#           d* ||-jG        |#           d* ||-jH        |#           d* ||.           	g}|(4                    |           g d0}d
}d-}1d.}2d.}3|(D ]}tY          |d
                   |k    rtY          |d
                   }tk          |d         t"                    rHtY          t?          |d                             |1k    r"tY          t?          |d                             }1tY          |d                   |2k    rtY          |d                   }2tY          |d/                   |3k    rtY          |d/                   }3dgQdgNM gO |	|            |	|1            |	|2            |	|3           Qd
         }Nd
         }Od
         } | |
|dX                      |d|             ||            | |j        |             ||           |(D ]} | |j        |             ||            |d            |d           |
|jT        |v rF| j!        jU        r9g }(| j!        j"        d'         jE        }+|r| j!        jV        }Gnd;| j!        jU        i}G|G                                D ]\  }8}9|(4                    d<|8            |tx          j=        k    r'tU          |9                                dY d!          }%ny|tx          j>        k    r'tU          |9                                dZ d!          }%nB|tx          j?        k    r'tU          |9                                d[ d!          }%n|tx          j@        k    r%tU          |9                                d\ A          }%n|tx          jA        k    r&tU          |9                                d] d!          }%n|tx          jB        k    r&tU          |9                                d^ d!          }%nj|tx          jC        k    r&tU          |9                                d_ d!          }%n4|tx          jD        k    r$tU          |9                                d` A          }%|%D ]\  },}-|+d
k    rd
}.nt          |-jE                  |+z  }.|,|-j#         ||-j$        |#           d* ||-j.        |#           d* ||-j/        |#           d* ||-j0        |#           d* |t          |-j$                  |z             	dF                     ||-jE        |#           ||-jF        |#           ||-jG        |#           ||-jH        |#           ||.                    g}|(4                    |           d
}d-}1d.}2d.}3|(D ]}tk          |t>                    rtY          |d
                   |k    rtY          |d
                   }tk          |d         t"                    rHtY          t?          |d                             |1k    r"tY          t?          |d                             }1tY          |d                   |2k    rtY          |d                   }2tY          |d/                   |3k    rtY          |d/                   }3g d0}dgQdgNM gO |	|            |	|1            |	|2            |	|3           Qd
         }Nd
         }Od
         } | |
|da                      |d|             ||            | |j        |             ||           |(D ]@}tk          |t>                    r | |
||                     - | |j        |            A |d            |d           |
|jW        |v r| jX        jY        s| jX        jZ        rw| jX        jY                                        D ]W\  }H}Ig }(tU          |I                                db d!          }%|%D ]A\  }J}-|J|-j[        |-j\        |-j]        |-j^        |-j_        |-j`        g}|(4                    |           BtU          | jX        jZ        |H                                         dc d!          }K|KD ]A\  }J}-|J|-j[        |-j\        |-j]        |-j^        |-j_        |-j`        g}|(4                    |           Bg dd}dgQdgNM gOde}d1}L |	|            |	df            |	|L            |	|L            |	|L            |	|L            |	|L           Qd
         }Nd
         }Od
         } | |
|dg|H                       |dh                    | jX        ja        |H                               |di                    | jX        jb        |H                               ||            | |j        |             ||           |(D ]@}tk          |t>                    r | |
||                     - | |j        |            A |d            |d           Yd                    P          S )mNr   )SummaryViewr   r'   <c                     dxx         d|z   t          |           z   dz   dz  z   z  cc<   dxx         d| z  dz  z   z  cc<   dxx         | z   z  cc<   d S )Nr   z{: }r   -)str)paddingtext_dirSPACING_SIZEheader_sep_listline_length_listrow_format_lists     r%   
add_columnz _build_table.<locals>.add_column}  s    Hs7||+c1S<5GH	
 	cGms\/ABBw55r$   c                 R    | t          |          z
  }|dz  }d|z  |z   d||z
  z  z   S )Nr   r  )r   )r  textleft_lengthhalfs       r%   	add_titlez_build_table.<locals>.add_title  s:    D		)aTzD 3+*<#===r$   c                 \                         |                                 d           d S )Nr   )rY   )sresults    r%   rY   z_build_table.<locals>.append  s-    adr$   ry  r   c                     | t          d          k    rdS t          |           }|dk    r|dz  }n|dk    r|dz  }n|dk    r|dz  }d	                    d
|z  |          S )z7
        Transform time in ns to time in unit.
        r   r  r  r}   ry  r~   usr   {}{:.2f}r   )r   format)r   unitindentr  s       r%   format_timez!_build_table.<locals>.format_time  sy     5<<34[[Fs{{###$$S6\6:::r$   c                 :    d                     d|z  | dz            S )zK
        Transform ratio within [0, 1] to percentage presentation.
        r  r   rz  )r  )ratior  s     r%   format_ratioz"_build_table.<locals>.format_ratio  s"       vus{;;;r$   DevicezUtilization (%)      zDevice SummaryzCPU(Process)zProcess Cpu UtilizationzCPU(System)zSystem Cpu UtilizationGPUa?  Note:
CPU(Process) Utilization = Current process CPU time over all cpu cores / elapsed time, so max utilization can be reached 100% * number of cpu cores.
CPU(System) Utilization = All processes CPU time over all cpu cores(busy time) / (busy time + idle time).
GPU Utilization = Current process GPU time / elapsed time.r  )z
Event TypeCallszCPU Time	Ratio (%)   zOverview SummaryzTime unit: .Tr   c                     | d         S r  r#   xs    r%   r   z_build_table.<locals>.<lambda>6  s
    1 r$   )keyreversez{})r  z  {})r'   r  zGPU Timer  a@  Note:
In this table, We sum up all collected events in terms of event type.
The time of events collected on host are presented as CPU Time, and as GPU Time if on device.
Events with different types may overlap or inclusion, e.g. Operator includes OperatorInner, so the sum of ratios is not 100%.
The time of events in the same type with overlap will not calculate twice, and all time is summed after merged.
Example:
Thread 1:
  Operator: |___________|     |__________|
Thread 2:
  Operator:   |____________|     |___|
After merged:
  Result:   |______________|  |__________|
r4  )r4  r2  rn   rl   rm   z  z / z  Othersz / - / - / - / r   (   r   )Namer  &CPU Total / Avg / Max / Min / Ratio(%)&GPU Total / Avg / Max / Min / Ratio(%)   zModel SummaryzNote:
In this table, GPU time is the sum of all device(GPU) events called in the phase.
Unlike overview summary, if two device(GPU) events execute on different streams with overlap time, we sum them directly here.
)r  z
Total Timer  zDistribution Summaryz  Communicationz  Computationz	  Overlapa  Note:
Communication time: Communication Event time, Communication Op time and its kernel time on gpu.
Computation time: Kernel time, except kernels belong to communication(nccl kernels).
Overlap time: Communication time intersects with computation time.
Example:
Communication:
  CPU:              |_________________|
  GPU:                                  |______________|
  Total:            |_________________| |______________|
Computation time(Kernel):
  GPU:         |________________|
Overlap time:       |___________|
4   zAll threads mergedzThread: c                     | d         j         S r  r3   r  s    r%   r   z_build_table.<locals>.<lambda>+      QqT] r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>0      ad&7 r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>6  r  r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>;      QqT5F r$   )r  c                     | d         j         S r  r7   r  s    r%   r   z_build_table.<locals>.<lambda>@      ad&; r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>F      ad&? r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>L  r  r$   c                     | d         j         S r  r   r  s    r%   r   z_build_table.<locals>.<lambda>Q      QqT5N r$   z{} / {} / {} / {} / {}r   z...r   r   z    z- / - / - / - / -
   )r  r  r  r  FLOPszOperator Summaryc                     | d         j         S r  )r   r  s    r%   r   z_build_table.<locals>.<lambda>I      !A$"3 r$   c                     | d         j         S r  )r   r  s    r%   r   z_build_table.<locals>.<lambda>O  r  r$   c                     | d         j         S r  )r   r  s    r%   r   z_build_table.<locals>.<lambda>T  s    !8I r$   c                     | d         j         S r  )r5   r  s    r%   r   z_build_table.<locals>.<lambda>Y  s    !A$- r$   )r  r  r  Z   zKernel Summaryz(.+?)(<.*>)(\(.*\))zMemory Manipulation Summaryc                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>#  r  r$   zUserDefined Summaryc                     | d         j         S r  rR  r  s    r%   r   z_build_table.<locals>.<lambda>      !A$"4 r$   c                     | d         j         S r  r  r  s    r%   r   z_build_table.<locals>.<lambda>  r  r$   )r  TypezAllocation Countz
Free CountzAllocation Sizez	Free SizezIncreased Size2      zMemory Summary - zPeak Allocated Memory: {}zPeak Reserved Memory: {})r~  )ry  r   rx   )cprofilerr}  ru  r   r   r4  
DeviceViewr  r   rt  r   r   rH   r   OverViewrU   rV   r   updater   r   rX   r   rw  r   r   r   r2  rn   rl   rm   r  splitrv  r#  r   r3   rW   r   r	   r   r   sorted	ModelViewr   r5   r   r   r   r   r   r   rY   r   DistributedViewr   r   r   OperatorViewr   r   r   r   r   r   r   r    r!   r"   r7   r   r   r   r   r   r   
KernelViewr%  recompilematchgroupMemoryManipulationViewr$  UDFViewr!  r"  
MemoryViewrx  r_  r`  rS  rN  rO  rP  rQ  rR  ra  rb  )Rstatistic_data	sorted_by	op_detail
thread_sep	time_unit	row_limitmax_src_column_widthviewsr}  r  r  rY   r  r  
total_timeheadersname_column_widthDEFAULT_COLUMN_WIDTHrz   
row_format
header_sepline_length
row_valuesgpu_namer5   utilizationcpu_type_timegpu_type_timecpu_call_timesgpu_call_timesr   valueevent_type_namegpu_time_ranger   r   
time_rangesorted_itemsr   r#  all_row_valuesaccumulation_timegpu_accumulation_timegpu_total_timer)   item	gpu_ratio
other_timeother_gpu_timecalltime_widthcpu_data_description_widthgpu_data_description_widthcommunication_timecomputation_timeoverlap_timer   r]   rX   total_op_cpu_timetotal_op_gpu_time	cpu_ratioinnerop_nameinnerop_nodedevice_node_namerG   flops_widthr%  total_kernel_gpu_timekernel_name_patternr  r$  r"  device_typememory_eventsrL  sorted_reserved_itemsnumber_column_widthr  r  r  r  r  sR                                                                                @@@@@r%   _build_tabler#  j  sl4    &%%%%%%LdOdO%6 6 6 6 6 6 6 6 6> > >
 F    ; ; ; ; < < < <  2DD# J }.%77./!
$%%% 	- 	-AJ+,,,,$Q'
$Q'
&q) 	yy&677888z z '*+++zLn/0IJKK 

 	 z *-...Ln/0HIJJ 

 	 z *-...&9IIKK 	3 	3H1CCo4  H
 #Z/K***LL,E,EFJF$:$j12222zI	
 	
 	

 	s[ !!!r


r


??776??"},55BBB$$)M?! 	- 	-AJ+,,,,$Q'
$Q'
&q) 	yy&899:::(Y(()))z z '*+++z#/44#/44$055$055n?JKKKn?JKKK
 .>DDFF	2 	2 
_:::,1j)-E 	;E2J< <M/78 2< ?89
 &#$(	
 	 	J "*oo33C88;On,,#!/GH H #0H' z* #0H' j) %066 .;AACC	 	 
*<*B*B*D*D  &
J-9":.
d. . .z** '5&:&:&<&< 	? 	?"J
(2:(>(>M*%%-E 	;E2J< <M/78 2< ?89 !!~~t
 
 
 (?
DKKJ--c221566:&K9---Ltz122	

 	 z *-... ,QRR 0 	3 	3Jc*oo33C88;<<z*Dy111U4[[:566	J F$:$j12222z888 z '*+++z - 3 3 5 5 	3 	3Jc*oo33C88;<<z*Dy111U4[[:566	J F$:$j12222z
;	
 	
 	
 	s[ !!!r


r


}-66 (@ 	  &''!++N !$%!,D! 
 ? ? 222248D%**$%		$)$-$8$8>$I	(5(=(=d99;;;D		&;t}9EEE  U  U++VZVgnwJxJxJx  U  U  ~I  ~I  JN  J[  bk  ~l  ~l  ~l  U  U  q|  q|  }A  }N  U^  q_  q_  q_  U  U  dp  dp  qv  w{  wD  qE  qE  HR  qR  dS  dS  U  U&;t}9EEE  }  }++VZVgnwJxJxJx  }  }  ~I  ~I  JN  J[  bk  ~l  ~l  ~l  }  }  q|  q|  }A  }N  U^  q_  q_  q_  }  }  dp  dp  qz  d{  d{  }  }	"J #))*555$D00)T]:)->-#&77J+.CCN""		!.11NB	;z	:::yy<<X]^hXiXilvXvKwKwyy;~I>>>hh||\eOfOfhh	J !!*---N)+&)+&, 	D 	D
z!}c22=C
1..//.@@%(Z]););%<%<Nz!}%%(BBB14Z]1C1C.z!}%%(BBB14Z]1C1C.  G  "dO!dO!- "J()))J~&&&J1222J1222(+J(+J*1-K F99[/::;;;F,,,---F:F$:$g.///F:, 7 7
(z(*56666F:FR   F3$%%%F2JJJF2JJJ}3u<<-A K	  G
  "dO!dO!-#%  1 1
/0000(+J(+J*1-K F99[*@AABBBF,,,---F:F$:$g.///F:!+2F" "  *2D    &2@ L JY777U:..;<<J
 F$:$j1222!.Y???U#566CDDJ
 F$:$j1222  ,9===U#344zABBJ
 F$:$j1222 Ly999U<00:=>>J
 F$:$j1222F:F
6   F3$%%%F2JJJF2JJJ}0E99'- a	N " -;H ).*F*L  %1$6$6$8$8 e> e> 	5%%&<&<&<===
 333#)+B+BD$ $ $LL *"333#)77 $$ $ $LL
 *"333#)77 $$ $ $LL
 *"333#)+F+F$ $ $LL *"555#);; $$ $ $LL
 *"333#)?? $$ $ $LL
 *"333#)?? $$ $ $LL
 *"333#)+N+N$ $ $L %&!$%!". ? ?JD$%6%%)>>%%". r> r>JD$(A--$%		$)$-$8$8;L$L	(A--$%		 "$"788;LL " 	&;t}9EEE  }  }++VZVgnwJxJxJx  }  }  ~I  ~I  JN  J[  bk  ~l  ~l  ~l  }  }  q|  q|  }A  }N  U^  q_  q_  q_  }  }  dp  dp  qz  d{  d{  }  }077'K(=INNN'K $ 9	   (K $ 9	   (K $ 9	   )L33  
#"J& #))*555  S> "17799lB lB ((#}11,-		 %*,*?$@$@4=$P !*  $499,-		 %*,*G$H$H&*&;%< !*  #<00147HHH/;$;&7!&;$;0" !- 5 3\ 3 3 , 1 8 ? ?$/K(4(=I%& %& %& %0K(4(A-6%& %& %& %0K(4(A-6%& %& %& %0K(4(A-6%& %& %& %1L$;$;!!" !"$ !9 ? ?$/K(4(E-6%& %& %& %0K(4(I-6%& %& %& %0K(4(I-6%& %& %& %0K(4(I-6%& %& %& %1L$;$;#!" !"& !$Q)*JT +11*=== ".!5!;!;!=!=+B +B ! 0 +#/#@A#E#E01II ).k.B(C(C*6*G)H %.
 %((8$9$9A$=&7%8 %8 8H(?*;a*?(?8&$4 %5$=$4$=+;$=$=$/$4$7$<$C$C(3,7,@y)* )* )* )4,7,D1:)* )* )* )4,7,D1:)* )* )* )4,7,D1:)* )* )* )5Y(?(?!%& %&$ %(-."
0 !/ 5 5j A A A AW+B^ "\//11%> %> ,'#499,-		 %*+*>$?$?&*&;%< !*  ##344q8;LLL3C$;&7!&;$;4" 0 !1E 9 0 7%5 7 7 + 0 3 8 ? ?$/K(3(<9%& %& %& %0K(3(@y%& %& %& %0K(3(@y%& %& %& %0K(3(@y%& %& %& %1L$;$;!" !" !$'*J* +11*====er>h N)+&)+&K, D D
j#.. z!}c22=C
1..//.@@%(Z]););%<%<Nz!}%%(BBB14Z]1C1C.z!}%%(BBB14Z]1C1C.  G  "dO!dO!-J()))J~&&&J1222J1222J{###(+J(+J*1-K F99[*<==>>>F,,,---F:F$:$g.///F:, ; ;
j#.. ;F99[*==>>>>F,:,j9::::F:F2JJJF2JJJ}.%77'4 Z	N)7DLJ---% &&((33      
 j///% &&((33      
 j///% &&((.I.I       & &&((//       %&!* 7 7
d%6%%* 
2 
2
d(A-- !II %dm 4 47L LII"{4=yAAA  y  ykkRVRcjsFtFtFt  y  y  zE  zE  FJ  FW  ^g  zh  zh  zh  y  y  mx  mx  y}  yJ  QZ  m[  m[  m[  y  y  `l  `l  mv  `w  `w  y  y

 %%j1111  G !#N)+&, D D
z!}c22=C
1..//.@@%(Z]););%<%<Nz!}%%(BBB14Z]1C1C.!dO!dO!-J()))J~&&&J1222(+J(+J*1-K F99[*:;;<<<F,,,---F:F$:$g.///F:"$*-C"D"D, 
7 
7
+11*Q-@@ ) ;;q>>EKKNN:DD%a=Dt99000$()@+<q+@)@$AE$IJqMM$(JqM(z(*56666F:F2JJJF2JJJ}:eCC'A E	N,F & ,D!" 
 8==?? 2 2
d!Q&& !II %d&; < <~ MII"{4=yAAA  Q  QkkRVRcjsFtFtFt  Q  Q  zE  zE  FJ  FW  ^g  zh  zh  zh  Q  Q  mx  mx  y}  yJ  QZ  m[  m[  m[  Q  Q  `l  `l  mr  sw  s@  mA  mA  DN  mN  `O  `O  Q  Q"{4#8yIII  Y  YkkZ^Zs  {D  OE  OE  OE  Y  Y  JU  JU  VZ  Vo  v  J@  J@  J@  Y  Y  EP  EP  QU  Qj  qz  E{  E{  E{  Y  Y  @L  @L  MV  @W  @W  Y  Y	
 %%j1111  G !"N)+&)+&, D D
z!}%%(999(+JqM(:(:%z!}c22=C
1..//.@@%(Z]););%<%<Nz!}%%(BBB14Z]1C1C.z!}%%(BBB14Z]1C1C.!dO!dO!-J()))J~&&&J1222J1222(+J(+J*1-K F99[*GHHIIIF,,,---F:F$:$g.///F:, 7 7
(z(*56666F:F2JJJF2JJJ}+u44'9 J	N,D!" 
  "0I )(
 ).*F*X,( %=$B$B$D$D F6 F6 	5%%&<&<&<===
 333#)+B+BD$ $ $LL *"333#)77 $$ $ $LL
 *"333#)77 $$ $ $LL
 *"333#)+F+F$ $ $LL *"555#);; $$ $ $LL
 *"333#)?? $$ $ $LL
 *"333#)?? $$ $ $LL
 *"333#)+N+N$ $ $L #/ 6 6JD$%**$%		 "$"788>I " 	&;t}9EEE  U  U++VZVgnwJxJxJx  U  U  ~I  ~I  JN  J[  bk  ~l  ~l  ~l  U  U  q|  q|  }A  }N  U^  q_  q_  q_  U  U  dp  dp  qv  w{  wD  qE  qE  HR  qR  dS  dS  U  U077'K(=INNN'K $ 9	   (K $ 9	   (K $ 9	   )L33 	"J$ #))*5555368 !"N)+&)+&, D D
j#.. z!}%%(999(+JqM(:(:%z!}c22=C
1..//.@@%(Z]););%<%<Nz!}%%(BBB14Z]1C1C.z!}%%(BBB14Z]1C1C.  G  "dO!dO!-J()))J~&&&J1222J1222(+J(+J*1-K F99[*?@@AAAF,,,---F:F$:$g.///F:, ; ;
j#.. ;F99[*==>>>>F,:,j9::::F2JJJF2JJJ}.%77 )9d	,;d	  .>DDFF` ` !#%!''))44       )5 
6 
6$J"(-,*"J #))*5555(."1@#egg44 ) ) )% )> 
6 
6$J"(-,*"J #))*5555   $&$#%$%1M? $&!&(#
,---
2
.///
.///
.///
.///
.///,Q/
,Q/
.q1 Ik+L{+L+LMM   /66&5L'    .55&5J'    z"""(z('2333z""""0 ? ?J!*c22 ?yyjAABBBB0z0*=>>>>r


r



776??r$   )r   )r   )1rU   r  enumr   paddle.base.corer   r   paddle.utils.flopsr   statistic_helperr   r	   r
   r   r>   r2  r4  CudaRuntimerH   MemcpyMemsetr+  rm  rn   rl   rm   r   PythonOpr,  _AllTracerEventTyper   r   r+   r-   rc   rh   r{   r   r   r   r   r   r   r   r   rG  rq  r   r#  r#   r$   r%   <module>r-     s*       				       @ @ @ @ @ @ @ @ $ $ $ $ $ $            ! !% $ 988        B  ?, ?, ?, ?, ?, ?, ?, ?,D    "% % %P	$ 	$ 	$ 	$' ' ' '# # # #L0 0 0 0%+ %+ %+PL0 L0 L0 L0 L0 L0 L0 L0^T
 T
 T
 T
 T
 T
 T
 T
n~> ~> ~> ~> ~> ~> ~> ~>B]E ]E ]E ]E ]E ]E ]E ]E@. . . . . . . .( !
t t t t t tr$   