o
    3/iA                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZ eddG dd dZ		dd	ee	 d
e
dee fddZ	dd	ee	 dee fddZ		dd	ee	 d
e
dee fddZdS )    N)OptionalList)partial)	dataclassT)frozenc                   @   sz   e Zd ZU dZeed< dZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< dZeed< dZeed< dZeed< dS )
GemmConfig   tile_m   tile_nTpingpongis_dynamic_persistent   	cluster_m   	cluster_nFswap_ab   max_swizzle_size	   device_capacityN)__name__
__module____qualname__r	   int__annotations__r   r   boolr   r   r   r   r   r    r   r   ^/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/quack/gemm_config.pyr      s   
 r   epilogue	tune_coopreturnc                 C   s   g d}dd |D ddg }dd |D dg }| dv r,d	d |D }d
d |D }n| dv r7dd |D }g }|rD|dd |D 7 }|dd |D 7 }ddg}| dv rYddg}ddg}| dv rddg}dd t |||D S )N)r      r
      c                 S      g | ]}d |fqS )   r   .0r   r   r   r   
<listcomp>       z%_get_sm90_configs.<locals>.<listcomp>)r      )r   r%   c                 S   r$   )r   r   r&   r   r   r   r(   !   r)   )r
   r   )gatedc                 S   s,   g | ]\}}|d  dkr|dkr||fqS )    r   r
   r   r'   mnr   r   r   r(   #   s   , c                 S   s$   g | ]\}}|d  dkr||fqS )r,   r   r   r-   r   r   r   r(   $   s   $ )lsec                 S   s    g | ]\}}|d kr||fqS )r
   r   r-   r   r   r   r(   &   s     c                 S      g | ]	\}}||d fqS )Fr   r-   r   r   r   r(   )       c                 S   r1   )Tr   r-   r   r   r   r(   *   r2   r   r   r   r   FTr0   r+   c                 S   s4   g | ]\\}}}\}}}t ||||||d ddqS )r   F)r	   r   r   r   r   r   r   r   )r   )r'   r	   r   r   r   r   r   r   r   r   r(   3   s    )	itertoolsproduct)r   r    tile_n_valstile_mn_coop_valstile_mn_pingpong_valstile_mn_valsclusterswap_ab_valsr   r   r   _get_sm90_configs   s8   r>   c                    s   g d}dd |D dd |D  dd |D  dd |D  dd |D  dd |D  }d	d
g}| dv r9d	g}t td	dd d
d	g} fddt|||D S )N)@   r   r"   r
   r*   r%   c                 S      g | ]}d |dfqS )r   )r   r   r   r&   r   r   r   r(   K       z&_get_sm100_configs.<locals>.<listcomp>c                 S   r@   )r   r3   r   r&   r   r   r   r(   L   rA   c                 S   r@   )r   r4   r   r&   r   r   r   r(   M   rA   c                 S   r@   )r   r   r   r   r&   r   r   r   r(   N   rA   c                 S   r@   )r%   r4   r   r&   r   r   r   r(   O   rA   c                 S   r@   )r%   rB   r   r&   r   r   r   r(   P   rA   FTr5   
   )r   r   c                    s2   g | ]\\}}\}}}} |||||d |dqS )r   )r	   r   r   r   r   r   r   r   )r'   r.   r/   cmcnsabuse_clcGemmConfigClsr   r   r(   Y   s    
)r   r   r6   r7   )r   r8   tile_mn_cluster_valsr=   use_clc_valsr   rH   r   _get_sm100_configsF   s2   

rL   c                 C   s   t | |t|  S )a.  Return autotuning configs for all supported device capabilities (sm90 + sm100).

    Each GemmConfig is tagged with its target device_capacity, so the caller can
    filter at runtime based on the actual device. This avoids querying the device
    (and initializing a CUDA context) at import time.
    )r>   rL   )r   r    r   r   r   get_all_configsi   s   
rM   )NT)N)r6   typingr   r   	functoolsr   dataclassesr   r   strr   r>   rL   rM   r   r   r   r   <module>   s<   
0
$