o
    3/i                  
   @   s  d dl mZmZ d dlmZ d dlmZmZ d dlZd dl	Z	d dl
Z
zd dlmZ W n ey5   dZY nw d dlZd dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZ ejeeeeeedfZ ej!j"j#j$Z%ej&Z'd dl(m  m)Z* e*j+Z,d	d
 Z-e-e*_+e
j.ee
j/ee
j0ee
j1ee
j2eiZ3edd Z4dedeeef fddZ5edde
j6deeef fddZ7dd Z8dd Z9dd Z:dd Z;eG dd dZ<dS )    )Tuple
get_origin)	lru_cache)	dataclassfieldsN)extract)Int32Int64Float16BFloat16Float32)spec)NumericMetac                 C   sj   |d urt |tju rt|S t| tr.tt| dr.|d u s%t|ds.t	| |t| |S t	| |||S )N_fields)
r   cutlass	Constexprr   	ConstNone
isinstancetuplehasattrtype_original_convert_single_arg)argarg_namearg_typectx r   a/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/quack/cute_dsl_utils.py_patched_convert_single_arg'   s   
r   c                 C   s   t j j| dS )Ncluster_size)r   utilsHardwareInfoget_max_active_clustersr   r   r   r   r#   A   s   r#   arch_strreturnc                 C   sF   t d|  t j}|std| d| \}}}t|t|fS )zRParse arch string (e.g. 'sm_90', 'sm90', '90', 'sm_100a') to (major, minor) tuple.z^(?:sm_?)?(\d+)(\d)([af]?)$zInvalid QUACK_ARCH format: z (expected e.g. '90', 'sm_90'))rematchstrip
IGNORECASE
ValueErrorgroupsint)r$   r'   majorminor_r   r   r   _parse_arch_strF   s
   r0   devicec                 C   s(   t jd}|durt|S tj| S )zReturn (major, minor) device capability.

    Override with QUACK_ARCH (e.g. 'sm_90' or '90') for CPU-only compilation
    without a GPU present.
    
QUACK_ARCHN)osenvirongetr0   torchcudaget_device_capability)r1   arch_overrider   r   r   get_device_capacityO   s   r:   c                    sB    fddt  D }dd | D }dd | D }||fS )zISplit dataclass fields into (constexpr_dict, non_constexpr_dict) by type.c                    s   i | ]
}|j t |j qS r   )namegetattr).0fieldobjr   r   
<dictcomp>^   s    z%_partition_fields.<locals>.<dictcomp>c                 S   s    i | ]\}}t |tr||qS r   r   StaticTypesr=   nfr   r   r   rA   _        c                 S   s    i | ]\}}t |ts||qS r   rB   rD   r   r   r   rA   `   rG   )r   items)r@   
all_fields	constexprnon_constexprr   r?   r   _partition_fields\   s   rL   c                 C   sb   t | \}}t| | jD ]\\}}}t||d | ||< ||d  }q| jdi ||S )Nr   )rL   ziprH   _values_posr   new_from_mlir_values	__class__)selfvaluesconstexpr_fieldsnon_constexpr_fieldsr;   r>   n_itemsr   r   r   _new_from_mlir_valuesd   s
   rV   c              	   C   s|   ddl m} t|}g }| D ]*}|du st|tr|| qt||}|t||d|  ||d }q| j	| S )aC  Generic __new_from_mlir_values__ for NamedTuples.

    Applied to NamedTuple classes via the ``@mlir_namedtuple`` decorator.

    Fields that are None or Constexpr (StaticTypes) are preserved from ``self`` (the compile-time
    template). Only non-static fields consume MLIR values. Multi-value fields (e.g. cute.Tensor)
    consume the correct number of values via ``cutlass.new_from_mlir_values``.

    Constexpr fields (annotated ``cutlass.Constexpr[T]``) are baked into the compiled kernel via
    a converter patch (see above). At call time, pass None for these fields.
    r   )get_mlir_typesN)
cutlass.base_dsl.typingrW   listr   rC   appendlenr   rO   rP   )rQ   rR   rW   
new_fields	field_valrU   r   r   r    _namedtuple_new_from_mlir_valuesl   s   
r^   c                 C   s
   t | _| S )zDecorator that adds MLIR value reconstruction to a NamedTuple class.

    Usage::

        @mlir_namedtuple
        class MyArgs(NamedTuple):
            tensor_arg: cute.Tensor
            const_arg: cutlass.Constexpr[int] = 0
    )r^   __new_from_mlir_values__)clsr   r   r   mlir_namedtuple   s   
ra   c                   @   s   e Zd Zdd ZeZdS )
ParamsBasec                 C   sL   t | \}}g g }| _| D ]}t|}||7 }| jt| q|S N)rL   rN   rR   r   extract_mlir_valuesrZ   r[   )rQ   r/   rT   rR   r@   
obj_valuesr   r   r   __extract_mlir_values__   s   
z"ParamsBase.__extract_mlir_values__N)__name__
__module____qualname__rf   rV   r_   r   r   r   r   rb      s    	rb   rc   )=typingr   r   	functoolsr   dataclassesr   r   r3   r&   r6   triton.tools.disasmr   ImportErrorr   cutlass.cutecuter   r	   r
   r   r    cutlass.base_dsl.tvm_ffi_builderr   cutlass.cutlass_dslr   r   r,   boolstrfloatr   rC   base_dslruntimer7   load_cubin_module_dataload_cubin_module_data_ogcompilecute_compile_og)cutlass.cute._tvm_ffi_args_spec_converter_tvm_ffi_args_spec_converter_converter_module_convert_single_argr   r   float16bfloat16float32int32int64torch2cute_dtype_mapr#   r0   r1   r:   rL   rV   r^   ra   rb   r   r   r   r   <module>   sP   	
	 