o
    :/i6                     @   sx  U d dl Z d dlmZ d dlmZ d dlmZmZmZ d dl	m
Z
mZmZmZ d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZm Z m!Z! d dl"m#Z# zd dl$Z%W n e&yr   e#dZ%Y nw eG dd dZ'eG dd dZ(eG dd dZ)e)e(B Z*ee+d< ede*dZ,eG dd dee, Z-eG dd deZ.G dd deZ/dS )    N)defaultdict)Callable)asdict	dataclassfield)AnyGeneric	TypeAliasTypeVar)
DeviceType_KinetoEvent_ProfilerResult)
_EventType_ExperimentalConfig_ProfilerEvent)FunctionEvent)ProfilerActivityprofile)TablePrinterevent_has_moduleevent_is_torch_opevent_module_reprevent_torch_op_stack_traceindent_string)PlaceholderModulepandasc                   @   sh   e Zd ZU eed< dZded< eedZed  ed< dZ	e
ed< ed	d
 Zedd Zedd ZdS )_ModuleTreeNodeeventNz_ModuleTreeNode | Noneparentdefault_factorychildren tracec                 C   s   | j jd u pt| j jdkS Nr   )r   r!   lenself r(   l/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/profiler/layerwise_profile.pyis_leaf&   s   z_ModuleTreeNode.is_leafc                 C   s
   t | jS N)r   r   r&   r(   r(   r)   is_torch_op*   s   
z_ModuleTreeNode.is_torch_opc                 C   s"   | j jtjko| j jd jtjkS )N   )r   tagr   Kinetotypeddevice_typer   CUDAr&   r(   r(   r)   is_cuda.   s   z_ModuleTreeNode.is_cuda)__name__
__module____qualname__r   __annotations__r   r   listr!   r#   strpropertyr*   r,   r3   r(   r(   r(   r)   r      s   
 

r   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )SummaryStatsEntrynamecuda_time_uspct_cuda_timeinvocationsN)r4   r5   r6   r9   r7   floatintr(   r(   r(   r)   r;   6   s
   
 r;   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ModelStatsEntryr<   cpu_time_usr=   r>   r#   N)r4   r5   r6   r9   r7   r@   r(   r(   r(   r)   rB   >   s   
 rB   
StatsEntryStatsEntryT)boundc                   @   s8   e Zd ZU eed< eedZed ed< dZded< dS )_StatsTreeNodeentryr   z_StatsTreeNode[StatsEntryT]r!   Nz"_StatsTreeNode[StatsEntryT] | Noner   )	r4   r5   r6   rE   r7   r   r8   r!   r   r(   r(   r(   r)   rG   K   s   
 rG   c                   @   s  e Zd ZU eed< eddZeee	e
 f ed< eddZeee	e f ed< eddZe	e ed< eddZe	ee  ed< eddZe	ee  ed< d	Zed	B ed
< dd Zd1deeef d	B fddZd1deeef d	B fddZdefddZdefddZdeeef fddZe	d2de	eeef  de egef eB fddZ!dd  Z"d!d" Z#d#efd$d%Z$d#efd&d'Z%d(d) Z&d*d+ Z'd,e	ee  de	eeef  fd-d.Z(d,e	ee  de	eeef  fd/d0Z)d	S )3LayerwiseProfileResults_kineto_resultsF)init_kineto_event_correlation_map_event_correlation_map_module_tree_model_stats_tree_summary_stats_treeNnum_running_seqsc                 C   s   |    |   |   d S r+   )_build_correlation_map_build_module_tree_build_stats_treesr&   r(   r(   r)   __post_init__^   s   z%LayerwiseProfileResults.__post_init__column_widthsc                 C   s^   t dddddd}|r|jd	i | dd | | jD }tt|| j|dd d d S )
N<      r<   rC   r=   r>   r#   c                 S   s,   g | ]\}}|j d ks|jd kr||fqS r   )r=   rC   .0depthrowr(   r(   r)   
<listcomp>i   s
    z=LayerwiseProfileResults.print_model_table.<locals>.<listcomp>c                 S      dd|   d S N|- r(   indentr(   r(   r)   <lambda>q       z;LayerwiseProfileResults.print_model_table.<locals>.<lambda>indent_styler(   )dictupdate_flatten_stats_treerO   r   rB   print_table _indent_row_names_based_on_depth)r'   rV   _column_widthsfiltered_model_tabler(   r(   r)   print_model_tablec   s   


z)LayerwiseProfileResults.print_model_tablec                 C   s\   t ddddd}|r|jd
i | dd | | jD }tt|| j|dd d	 d S )NP   rX      r<   r=   r>   r?   c                 S   s"   g | ]\}}|j d kr||fqS rZ   r=   r[   r(   r(   r)   r_   {   s
    
z?LayerwiseProfileResults.print_summary_table.<locals>.<listcomp>c                 S   r`   ra   r(   re   r(   r(   r)   rg      rh   z=LayerwiseProfileResults.print_summary_table.<locals>.<lambda>ri   r(   )rk   rl   rm   rP   r   r;   rn   ro   )r'   rV   rp   filtered_summary_tabler(   r(   r)   print_summary_tableu   s   

z+LayerwiseProfileResults.print_summary_tablefilenamec                 C   *   t dd | | jD }|| d S )Nc                 S      g | ]\}}t |qS r(   r   r\   _r^   r(   r(   r)   r_      s    zHLayerwiseProfileResults.export_model_stats_table_csv.<locals>.<listcomp>)pd	DataFramerm   rO   to_csvr'   ry   dfr(   r(   r)   export_model_stats_table_csv   s   z4LayerwiseProfileResults.export_model_stats_table_csvc                 C   rz   )Nc                 S   r{   r(   r|   r}   r(   r(   r)   r_      s    zJLayerwiseProfileResults.export_summary_stats_table_csv.<locals>.<listcomp>)r   r   rm   rP   r   r   r(   r(   r)   export_summary_stats_table_csv   s   
z6LayerwiseProfileResults.export_summary_stats_table_csvreturnc                 C   s"   d| j i| | j| | jdS )NrQ   )metadatasummary_statsmodel_stats)rQ   _convert_stats_tree_to_dictrP   rO   r&   r(   r(   r)   convert_stats_to_dict   s   

z-LayerwiseProfileResults.convert_stats_to_dictrd   depths_rowsrj   c                 C   sF   g }| D ]\}}|j dkrqt|}t|j|||_|| q|S r$   )r=   copydeepcopyr   r<   append)r   rj   indented_rowsr]   r^   indented_rowr(   r(   r)   ro      s   

z8LayerwiseProfileResults._indent_row_names_based_on_depthc                 C   s2   t t| _| j D ]}| j|  | q
d S r+   )r   r8   rL   rJ   eventscorrelation_idr   )r'   r   r(   r(   r)   rR      s   
z.LayerwiseProfileResults._build_correlation_mapc                    sF   g _ j }	 ddtdtd B f fdd |D ]} | qd S )Nr   	curr_nodec                    s   | j dkrd S t| r"t| |d}|r|j| nj| |}| jd u p-t| jdk}|rG|rGt| |t| dd dd}|j| |}| jD ]} || qJd S )Nr-   )r   r   r   c                 S   s   t | S r+   )r   )xr(   r(   r)   rg      s    zSLayerwiseProfileResults._build_module_tree.<locals>._df_traversal.<locals>.<lambda>)until)r   r   r#   )	start_tidr   r   r!   r   rN   r%   r   )r   r   noder*   child_df_traversalr'   r(   r)   r      s,   

zALayerwiseProfileResults._build_module_tree.<locals>._df_traversalr+   )rN   rJ   experimental_event_treer   r   )r'   
event_treerootr(   r   r)   rS      s   

z*LayerwiseProfileResults._build_module_treer   c                    s@    j jtjkr	d S | j j jg } fdd|D }t|d S )Nc                 3   s2    | ]}|  tjkr|  jjkr|V  qd S r+   )r1   r   r2   r<   r   )r\   r   r   r(   r)   	<genexpr>   s    z@LayerwiseProfileResults._get_kineto_gpu_event.<locals>.<genexpr>)r   r.   r   r/   rL   getr   next)r'   r   correlated_kineto_eventsiteratorr(   r   r)   _get_kineto_gpu_event   s   

z-LayerwiseProfileResults._get_kineto_gpu_eventc                    s   dt f fdd  |S )z Return cuda time in microsecondsr   c                    s@   | j r|  }r| d S d}| jD ]}| |7 }q|S )N     @@r   )r*   r   duration_nsr!   )r   gpu_kineto_eventcumulative_cuda_timer   _cumulative_cuda_time_recursiver'   r(   r)   r      s   
zVLayerwiseProfileResults._cumulative_cuda_time.<locals>._cumulative_cuda_time_recursive)r   )r'   r   r(   r   r)   _cumulative_cuda_time   s   	z-LayerwiseProfileResults._cumulative_cuda_timec                    s   t  fdd jD S )Nc                    s   g | ]}  |qS r(   )r   )r\   r   r&   r(   r)   r_      s    z<LayerwiseProfileResults._total_cuda_time.<locals>.<listcomp>)sumrN   r&   r(   r&   r)   _total_cuda_time   s   z(LayerwiseProfileResults._total_cuda_timec              
      s   i   fdd	 	ddtdtt d B dttdf dtt d B ffd	d
g _jD ]}|}|d urCj| q3	 ddtdtt	 d B dtt	 d B f fdd g _
jD ]} |}|d ursj
| qcd S )Nc                    s   |   d S )Nd   r(   rv   )total_cuda_timer(   r)   r>      s   zALayerwiseProfileResults._build_stats_trees.<locals>.pct_cuda_timer(   r   r   summary_trace.r   c           	         s   t | jrt| j}| }n|  }r"| }| d }nd S ||f }|v rG| j}| j|7  _| j	d7  _	|j|_
ntt|||dd|d}|r]|j| ||< | jD ]
} || | qd| S )Nr   r-   ru   rH   r   )r   r   r   r   r   r<   r   rH   r=   r?   r>   rG   r;   r!   r   )	r   r   r   r<   r=   r   rH   new_noder   )build_summary_stats_tree_dfr>   r'   summary_dictr(   r)   r      s<   



	

zOLayerwiseProfileResults._build_stats_trees.<locals>.build_summary_stats_tree_dfc           	         s   t | jrt| j}| }| jjd }d}n|  }r/| }| d }d}| j}nd S t	t
|||||d|d}|rH|j| | jD ]} || qK|S )Ni  r"   r   r   rY   r   )r   r   r   r   duration_time_nsr   r<   r   r#   rG   rB   r!   r   )	r   r   r<   r=   rC   r#   r   r   r   )build_model_stats_tree_dfr>   r'   r(   r)   r   )  s8   



zMLayerwiseProfileResults._build_stats_trees.<locals>.build_model_stats_tree_df)Nr(   r+   )r   r   rG   r;   tupler9   rP   rN   r   rB   rO   )r'   r   summary_node
model_noder(   )r   r   r>   r'   r   r   r)   rT      sF   


)


%
z*LayerwiseProfileResults._build_stats_treestreec                    s8   g ddt t dtf fdd |D ]} | qS )Nr   r   r]   c                    s0    || jf | jD ]
} ||d d qd S )Nr-   )r]   )r   rH   r!   )r   r]   r   df_traversalentriesr(   r)   r   Y  s   
zALayerwiseProfileResults._flatten_stats_tree.<locals>.df_traversalrZ   )rG   rE   rA   )r'   r   r   r(   r   r)   rm   T  s
   
z+LayerwiseProfileResults._flatten_stats_treec                    sB   g }dt t dttttf  f fdd |D ]} || q|S )Nr   curr_json_listc                    s8   | t| jg d | jD ]} ||d d  qd S )N)rH   r!   r!   )r   r   rH   r!   )r   r   r   r   r(   r)   r   h  s   
zILayerwiseProfileResults._convert_stats_tree_to_dict.<locals>.df_traversal)rG   rE   r8   rk   r9   r   )r'   r   
root_dictsr   r(   r   r)   r   c  s   z3LayerwiseProfileResults._convert_stats_tree_to_dictr+   )rd   )*r4   r5   r6   r   r7   r   rL   rk   rA   r8   r   rM   r   rN   r   rO   rG   rB   rP   r;   rQ   rU   r9   rr   rx   r   r   r   r   staticmethodr   rE   r   ro   rR   rS   r   r   r   rT   rm   r   r(   r(   r(   r)   rI   R   sJ   
 	%a


rI   c                       s@   e Zd Zd	dedB f fddZ fddZ fddZ  ZS )
layerwise_profileNrQ   c                    s.   t  jtjtjgdddtddd || _dS )a  
        layerwise profile constructor.

        Args:
            num_running_seqs (Optional[int], optional): When given,
                num_running_seqs will be passed to LayerProfileResults
                for metadata update. Defaults to None.
        T)verbose)
activitiesrecord_shapes
with_stackwith_modulesexperimental_configN)super__init__r   CPUr2   r   rQ   )r'   rQ   	__class__r(   r)   r   v  s   	

zlayerwise_profile.__init__c                    s
   t   S r+   )r   	__enter__r&   r   r(   r)   r     s   
zlayerwise_profile.__enter__c                    s(   t  ||| t| jj| jd| _d S )N)rQ   )r   __exit__rI   profilerkineto_resultsrQ   results)r'   exc_typeexc_valexc_tbr   r(   r)   r     s   
zlayerwise_profile.__exit__r+   )r4   r5   r6   rA   r   r   r   __classcell__r(   r(   r   r)   r   u  s    r   )0r   collectionsr   collections.abcr   dataclassesr   r   r   typingr   r   r	   r
   torch._C._autogradr   r   r   torch._C._profilerr   r   r   torch.autograd.profilerr   torch.profilerr   r   vllm.profiler.utilsr   r   r   r   r   r   vllm.utils.import_utilsr   r   r   ImportErrorr   r;   rB   rD   r7   rE   rG   rI   r   r(   r(   r(   r)   <module>   s>   
   $