o
    3/ia                     @   s^   d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
 dd Zdd	 ZG d
d dZdS )a  ComposableEpiMixin: composes EpiOps into epilogue hook methods.

Subclasses declare _epi_ops as a tuple of EpiOp instances. The mixin auto-generates
epi_smem_bytes_per_stage, epi_get_smem_struct, epi_get_smem_tensors, epi_begin,
epi_begin_loop, epi_end, and EpilogueParams by querying each op.

epi_begin and epi_begin_loop return dicts keyed by op name, so epi_visit_subtile
can access values by name (e.g. epi_loop_tensors["alpha"]).

EpilogueParams is auto-generated from _epi_ops (via param_fields()) plus any
_extra_param_fields declared on the subclass. Subclasses still define
EpilogueArguments and epi_to_underlying_arguments manually.
    )make_dataclassMISSINGN)
const_expr)
EpiContextScalarc                 C   s2   i }d}| D ]}t |ts|||j< |d7 }q|S )u>   Pre-compute name → smem tensor index for each non-Scalar op.r      )
isinstancer   name)opssmem_mapidxop r   a/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/quack/epi_composable.py_compute_smem_map   s   

r   c           
      C   s   g g }}| D ]}|  D ]\}}}|tu r|n||||f qq|D ]\}}}|tu r/|n||||f q$dd |D dd |D  }	td|	|dS )zBuild EpilogueParams dataclass from epi_ops + extra fields.

    Required fields (default=MISSING) are placed first, then optional fields.
    c                 S   s   g | ]	\}}}||fqS r   r   ).0nt_r   r   r   
<listcomp>.   s    z$_make_epi_params.<locals>.<listcomp>c                 S   s   g | ]
\}}}|||fqS r   r   )r   r   r   dr   r   r   r   .   s    EpilogueParams)bases)param_fieldsr   appendr   )
epi_opsextra_fieldsr   requiredoptionalr   r	   typdefaultfieldsr   r   r   _make_epi_params#   s   
r"   c                       s   e Zd ZdZdZdZdZi ZdZ fddZ	dd Z
edd	 Zd
d Zdd ZdddddZejdd Zdd Zejdd Z  ZS )ComposableEpiMixinzABase mixin that composes EpiOps into the standard epilogue hooks.r   Fc                    sd   t  jdi | | jr.t| j| _tdd | jD | _d| jvr0t| j| j	| j
| _d S d S d S )Nc                 s   s    | ]}|  V  qd S N)needs_async_fencer   r   r   r   r   	<genexpr>?   s    z7ComposableEpiMixin.__init_subclass__.<locals>.<genexpr>r   r   )super__init_subclass___epi_opsr   _epi_smem_mapany_epi_has_async_ops__dict__r"   _extra_param_fields_epi_param_basesr   )clskwargs	__class__r   r   r)   ;   s   

z$ComposableEpiMixin.__init_subclass__c                 C   s&   i }| j D ]}||| | q|S )zMerge each op's to_params into a single dict. Subclasses call this,
        add custom fields, then construct self.EpilogueParams(**d).)r*   update	to_params)selfargsr   r   r   r   r   _epi_ops_to_params_dictH   s   
z*ComposableEpiMixin._epi_ops_to_params_dictc                    s   t  fdd| jD S )Nc                 3   s(    | ]}| t |jd V  qd S r$   )
smem_bytesgetattrr	   r&   r8   cta_tile_shape_mnkepi_tiler   r   r'   T   s
    
z>ComposableEpiMixin.epi_smem_bytes_per_stage.<locals>.<genexpr>)sumr*   )r1   r8   r=   r>   r   r<   r   epi_smem_bytes_per_stageR   s   z+ComposableEpiMixin.epi_smem_bytes_per_stagec                 C   sN   i }| j D ]}|| |}|d ur|\}}|||< qtddd|i}t|S )NEpiSharedStorager   __annotations__)r*   smem_struct_fieldtypecutestruct)r7   paramsr!   r   resultr	   ftyperA   r   r   r   epi_get_smem_structY   s   

z&ComposableEpiMixin.epi_get_smem_structc                    s   t  fddjD S )Nc                 3   s*    | ]}t |ts| jV  qd S r$   )r   r   get_smem_tensorepir&   rG   r7   storager   r   r'   d   s    
z:ComposableEpiMixin.epi_get_smem_tensors.<locals>.<genexpr>)tupler*   )r7   rG   rN   r   rM   r   epi_get_smem_tensorsc   s   z'ComposableEpiMixin.epi_get_smem_tensorsN)locipc                C   s&   g }| j D ]}||| | q|S r$   )r*   extend	tma_atoms)r7   rG   rQ   rR   atomsr   r   r   r   epi_get_tma_atomsj   s   
z$ComposableEpiMixin.epi_get_tma_atomsc
              	      s   t |||||||	 j fddjD }
tjr@tfddjD }t|r@tj  tj	d |
  |
S )Nc              
      sB   i | ]}|j |t|j d |j v r|j   nd  qS r$   )r	   beginr;   r&   ctxepi_smem_tensorsrG   r7   r   r   r   
<dictcomp>   s    z0ComposableEpiMixin.epi_begin.<locals>.<dictcomp>c                 3   s*    | ]}|  rt |jd d uV  qd S r$   )r%   r;   r	   r&   )rG   r   r   r'      s    
z/ComposableEpiMixin.epi_begin.<locals>.<genexpr>r   )r   r+   r*   r   r-   r,   rE   archcp_async_commit_groupcp_async_wait_grouparrive_and_wait)r7   rG   rZ   r>   tiled_copy_t2rtiled_copy_r2stile_coord_mnklvarlen_managerepilogue_barriertidxresultshas_async_datar   rX   r   	epi_beginr   s.   

	
zComposableEpiMixin.epi_beginc                    s    fddj D S )Nc                    s$   i | ]}|j ||j   qS r   )r	   
begin_loopr&   	epi_coordepi_tensorsr7   r   r   r[      s    z5ComposableEpiMixin.epi_begin_loop.<locals>.<dictcomp>)r*   )r7   rG   rl   rk   r   rj   r   epi_begin_loop   s   z!ComposableEpiMixin.epi_begin_loopc	           
      C   s:   | j D ]}	|	| t||	jd ||	j ||||||	 qd S r$   )r*   endr;   r	   )
r7   rG   rl   r>   r`   ra   rb   rc   re   r   r   r   r   epi_end   s   
zComposableEpiMixin.epi_end)__name__
__module____qualname____doc__r*   r/   r0   r+   r-   r)   r9   classmethodr@   rJ   rP   rV   rE   jitrh   rm   ro   __classcell__r   r   r3   r   r#   2   s&    



,r#   )rs   dataclassesr   r   cutlass.cuterE   cutlassr   quack.epi_opsr   r   r   r"   r#   r   r   r   r   <module>   s   