o
    di%                  	   @   s`  d dl Z d dlmZmZmZ d dlZd dlm  mZ	 d dl
m  mZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlmZmZmZmZmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d d	l*m+Z+ d d
l,m-Z- dd Z.e j/dd Z0G dd de+Z1G dd deZ2e e	j3e	j4gfdedeej5j6ef dee defddZ7dS )    N)CallableDictList)SymBoolSymFloatSymInt)core_aten_decompositions)from_funto_fun)FunctionalTensorFunctionalTensorModedisable_functional_mode)GraphGraphModuleInterpreterProxy	traceback)	ProxyTorchDispatchMode_ProxyTensor_SymNodeDict	decomposedisable_proxy_modes_tracingfetch_object_proxyfetch_sym_proxyget_proxy_slottrack_tensor_tree)GraphAppendingTracer)WeakTensorKeyDictionaryc                 C   s(   | j ds| j dot| tjj S )Nztorch.nnztorch.ao.nn)
__module__
startswith
isinstancetorchnn
Sequential)m r%   o/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/fx/parallelization/decomp.pyis_leaf_module)   s   r'   c                  #   s6    t j  fdd} z| t _d V  W  t _d S  t _w )Nc                     s$    | i |}t  d |jd< |S )N	from_nodetraced_from)r   get_current_metameta)argskwargsnode
creat_noder%   r&   create_node_3   s   z)trace_decomp_origin.<locals>.create_node_)r   create_node)r1   r%   r/   r&   trace_decomp_origin/   s   r3   c                       s&   e Zd ZdZdef fddZ  ZS )DecompTracera  
    DecompTracer is a tracer class which works together with `DecompositionInterpreter`, it keeps track of tensors and their
    corresponding proxy objects during execution process. When invoked with `create_proxy`, it creates a node in the containing
    graph and associate the output tensor of the node with the created proxy.

    See https://github.com/pytorch/pytorch/blob/main/torch/fx/experimental/proxy_tensor.py for more details.
    graphc                    s    t  | t | _t | _d S N)super__init__r   tensor_trackerr   symnode_tracker)selfr5   	__class__r%   r&   r8   H   s   zDecompTracer.__init__)__name__r   __qualname____doc__r   r8   __classcell__r%   r%   r<   r&   r4   ?   s    r4   c                       sv   e Zd ZdZ	ddedef fddZ fddZ fd	d
Z fddZ	 fddZ
 fddZ fddZ  ZS )DecompositionInterpretera  
    DecompositionInterpreter takes the high-level graph module, run the iternal nodes following the topo order, and decompose
    high-level pytorch operators into core aten operators by utilizing torch dispatch infrastructure along the way.

    Notes:
        - Certain primitive layers(like `nn.Linear`, `nn.Embedding`, and activation layers) are preserved because we have specific
          heuristic based parallelization strategy for them so that we can conveniently replace them into their parallelized counterparts
          in the orignal graph module.

        - The traced graph is a low-level equivalent representation of the original graph module, and is only used for
          parallel axis propagation and analysis, the original graph module is still used for real execution.
    Nmodule	new_graphc                    sl   t  j|fi | || _t|| _|| _| jd u ri | _|| _| jd u r(g | _t | _t	| jdd| _
d S )Nreal)tracing_mode)r7   r8   rD   r4   tracerdecomposition_tableleaf_function_targetsr   fun_moder   mode)r;   rC   rD   rH   rI   r-   r<   r%   r&   r8   \   s   


z!DecompositionInterpreter.__init__c                    s   t  |||}ttdd |}| jd|||}t  t||d | jd W d    n1 s1w   Y  tt	j
dd |}|S )Nc                 S      t | S r6   r	   xr%   r%   r&   <lambda>p       z6DecompositionInterpreter.placeholder.<locals>.<lambda>placeholderconstantrG   c                 S   rL   r6   r
   rN   r%   r%   r&   rP   v   rQ   )r7   rR   pytreetree_map_onlyr   rG   create_proxyr   r   r!   Tensorr;   targetr,   r-   outproxyr<   r%   r&   rR   n   s   z$DecompositionInterpreter.placeholderc              
      sJ  || j v rttdd |}ttdd |}t " t  ||i |}W d    n1 s0w   Y  W d    n1 s?w   Y  ttjft| j	||f\}}tt
ttft| j	ttdd ||f\}}| j	d|||}t  t||d | j	d W d    n1 sw   Y  ttjdd |}|S t |||S )Nc                 S   rL   r6   rM   rN   r%   r%   r&   rP   {   rQ   z8DecompositionInterpreter.call_function.<locals>.<lambda>c                 S   rL   r6   rM   rN   r%   r%   r&   rP   |   rQ   c                 S      | j S r6   r]   er%   r%   r&   rP          call_functionrS   c                 S   rL   r6   rU   rN   r%   r%   r&   rP      rQ   )rI   rV   rW   r   r   r   r!   rY   r   rG   r   r   r   r   r   rX   r   r7   rc   )r;   r[   r,   r-   r\   
proxy_argsproxy_kwargsr]   r<   r%   r&   rc   y   s*   
  z&DecompositionInterpreter.call_functionc           	   
      s`  t |tsJ | |}t|st |||S ttdd |}ttdd |}t	 " t
  ||i |}W d    n1 sCw   Y  W d    n1 sRw   Y  ttjft| j||f\}}ttttft| jttdd ||f\}}| jd|||}t	  t||d | jd W d    n1 sw   Y  ttjdd |}|S )Nc                 S   rL   r6   rM   rN   r%   r%   r&   rP      rQ   z6DecompositionInterpreter.call_module.<locals>.<lambda>c                 S   rL   r6   rM   rN   r%   r%   r&   rP      rQ   c                 S   r^   r6   r_   r`   r%   r%   r&   rP      rb   call_modulerS   c                 S   rL   r6   rU   rN   r%   r%   r&   rP      rQ   )r    str
fetch_attrr'   r7   rf   rV   rW   r   r   r   r!   rY   r   rG   r   r   r   r   r   rX   r   )	r;   r[   r,   r-   submodr\   rd   re   r]   r<   r%   r&   rf      s.   
  z$DecompositionInterpreter.call_modulec                    sb   t  |||}t| j|| j}t  t||d | jd W d    |S 1 s*w   Y  |S )NrS   )r7   get_attrr   rD   rG   r   r   rZ   r<   r%   r&   rj      s   
z!DecompositionInterpreter.get_attrc                    sX   t tdd |}t tdd |}t |||} fdd} jt || |S )Nc                 S   rL   r6   rM   rN   r%   r%   r&   rP      rQ   z1DecompositionInterpreter.output.<locals>.<lambda>c                 S   rL   r6   rM   rN   r%   r%   r&   rP      rQ   c                    s   t |  j| dd S )Nc                 S   s   | j jS r6   )r]   r.   rN   r%   r%   r&   rP      rQ   zADecompositionInterpreter.output.<locals>.unwrap.<locals>.<lambda>)r   rG   r`   r;   r%   r&   unwrap   s   z/DecompositionInterpreter.output.<locals>.unwrap)rV   rW   r   r7   outputrD   tree_map)r;   r[   r,   r-   r\   rl   r<   rk   r&   rm      s   zDecompositionInterpreter.outputc                    sL  | j  ttjdd |}ttjdd |}t h t U t| j	@ | j
- t j|i |W  d    W  d    W  d    W  d    W  d    S 1 sZw   Y  W d    n1 siw   Y  W d    n1 sxw   Y  W d    n1 sw   Y  W d    d S W d    d S 1 sw   Y  d S )Nc                 S   rL   r6   rU   rN   r%   r%   r&   rP      rQ   z.DecompositionInterpreter.run.<locals>.<lambda>c                 S   rL   r6   rU   rN   r%   r%   r&   rP      rQ   )rJ   rV   rW   r!   rY   r   preserve_node_metar3   r   rH   rK   r7   run)r;   r,   r-   r<   r%   r&   rp      s   &8.  "zDecompositionInterpreter.run)NN)r>   r   r?   r@   r   r   r8   rR   rc   rf   rj   rm   rp   rA   r%   r%   r<   r&   rB   N   s    rB   graph_modulerH   rI   returnc                    s*   t | dt| ||  fdd}|S )a{  
    API to decompose and functionalize a high-level graph module.

    Args:
        graph_module (`GraphModule`):
            The high-level graph module to be decomposed and functionalized.
        decomposition_table (`Dict[torch._ops.OperatorBase, Callable]`, defaults to `core_aten_decompostions()`):
            The lookup table which maps high-level torch op to their equivalent low-level implementation.
        leaf_function_targets (`List[Callable]`, defaults to `[F.scaled_dot_product_attention]`):
            Functions which will not be traced through for convenience, `F.scaled_dot_product_attention` is
            treated as a leaf function by default so that we don't have to deal with all detailed version of
            sdpas in the traced graph.

    Returns:
        Callable: a wrapper which returns the traced low-level graph when called with concrete arguments.
    )owning_modulec                     s    j | i | S r6   )rp   )r,   r-   interprD   r%   r&   wrapper   s   z,decompose_and_functionalize.<locals>.wrapper)r   rB   )rq   rH   rI   rv   r%   rt   r&   decompose_and_functionalize   s   
rw   )8
contextlibtypingr   r   r   r!   torch.nn.functionalr"   
functionalFtorch.utils._pytreeutils_pytreerV   r   r   r   torch._decompr   /torch._functorch._aot_autograd.functional_utilsr	   r
   #torch._subclasses.functional_tensorr   r   r   torch.fxr   r   r   r   r   "torch.fx.experimental.proxy_tensorr   r   r   r   r   r   r   r   r   torch.fx.proxyr   torch.utils.weakr   r'   contextmanagerr3   r4   rB   scaled_dot_product_attentioncross_entropy_opsOperatorBaserw   r%   r%   r%   r&   <module>   s:   ,
y
