o
    3/i                     @   s`   d Z ddlZddlmZ ddlm  mZ ddlm	Z	 ddl
mZ dd Zdd Zd
dd	ZdS )z>Epilogue utilities: shared helpers for epilogue mixin classes.    Nc                    s>    du rdS t  fdd jD }t jtj j|dS )zAssume all strides are divisible by 32 bits (except static strides).

    Used for broadcast vectors and similar tensors where stride alignment is guaranteed.
    Returns a new tensor with the assumed strides.
    Nc                 3   s4    | ]}t |st j|d  jj dn|V  qdS )    )divbyN)cute	is_staticassumeelement_typewidth).0stensor \/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/quack/epi_utils.py	<genexpr>   s
    "
z-assume_stride_divisibility.<locals>.<genexpr>)stride)tupler   r   make_tensoriteratormake_layoutshape)r   
new_strider   r   r   assume_stride_divisibility   s   r   c                  G   s   dd | D S )zApply stride divisibility assumptions to multiple broadcast vectors.

    Returns a list with None preserved for None inputs.
    c                 S   s   g | ]}t |qS r   )r   )r	   tr   r   r   
<listcomp>    s    z,assume_broadcast_strides.<locals>.<listcomp>r   )tensorsr   r   r   assume_broadcast_strides   s   r   storec                 C   s   |du r| j }|j}tjj|}| jdkrtnt}|	|||| j
}t|dkr2tj|dddn|}| j||||d\}	}
|	|
||fS )a  Create TMA atom + smem layout for a supplemental epilogue tensor.

    Args:
        gemm: The GEMM object (provides arch, epi_stage, _make_tma_epi_atoms_and_tensors).
        tensor: The global memory tensor to set up TMA for.
        epi_tile: Epilogue tile shape. Defaults to gemm.epi_tile.
        op_type: "store" or "load".

    Returns:
        (tma_atom, tma_tensor, smem_layout_staged, epi_tile)
    Nd      r   T)
ragged_dim	ptr_shift)op_type)epi_tiler   cutlassutils
LayoutEnumfrom_tensorarchsm100_utils
sm90_utilsmake_smem_layout_epi	epi_stager   rank
copy_utilscreate_ragged_tensor_for_tma_make_tma_epi_atoms_and_tensors)gemmr   r"   r!   dtypelayout	utils_clssmem_layout_staged	tma_inputtma_atom
tma_tensorr   r   r   setup_epi_tensor#   s"   
r8   )Nr   )__doc__r#   cutlass.cuter   cutlass.utils.blackwell_helpersr$   blackwell_helpersr(   quack.sm90_utilsr)   quack.copy_utilsr-   r   r   r8   r   r   r   r   <module>   s   