o
    :/iL                     @   sr  U d Z ddlmZ ddlmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ e s6ed	ddlZdd
lmZ ddlmZ ddlmZ ddlmZ edZerwddlm  mZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z%m&Z& ee'Z(e	ddZ)ddde*ddfddZ+		d4deddde,e*ef dB defddZ-G dd deZ.G d d! d!Z/G d"d# d#Z0i Z1e,e*e0f e2d$< de,e*e0f fd%d&Z3d'e*de0dB fd(d)Z4	d5d*edddefd+d,Z5eddd-d.ed/edB ddde0fd0d1Z6e	d5ddd-d.e*dB d/edB dddeege0f fd2d1Z6	d5ddd-d.e*eB dB d/edB ddde0eege0f B fd3d1Z6dS )6a  
vLLM Helion kernel registration with pre-tuned config selection.

This module leverages Helion's internal config selection infrastructure to use
pre-tuned configs instead of runtime autotuning.

How Helion Normally Works
-------------------------
For each kernel invocation, Helion:
1. Computes a cache key from input arguments
2. Looks up the key in its internal compilation cache
3. On cache miss, runs autotuning to find the best config
4. Compiles and caches the kernel with that config

How We Override It
------------------
We override two Helion hooks to use pre-tuned configs:

1. **key**: We provide a key function (derived from config_picker) that
   computes cache keys matching our pre-tuned config keys. This ensures Helion's
   internal cache uses keys that correspond to configs we've prepared.

2. **autotuner_fn**: We provide PresetConfigSearch which, instead of autotuning,
   simply returns the pre-tuned config for the computed key. On cache miss,
   Helion calls our autotuner which returns the author-prepared config.

Both hooks use the same config_picker logic to ensure the cache key computed
by key matches the config returned by the autotuner.

Key Classes
-----------
- HelionKernelWrapper: Wraps raw kernel + config_picker, creates configured kernels
- ConfiguredHelionKernel: Platform-specific kernel with pre-tuned configs
- PresetConfigSearch: Custom autotuner that returns pre-tuned configs
    )Callable)AnycastoverloadN)Library)init_logger)
has_helion)direct_register_custom_opzTregister module requires helion to be installed. Install it with: pip install helion)requires_torch_version)BaseAutotuner)Config)default_autotuner_fnz2.11)helion_kernel_side_tablehelion_kernel_wrapper_mutation)infer_output_spec)disable_proxy_modes_tracingget_proxy_modevllm_helionFRAGMENThelion_settingshelion.Settings | Noneop_namereturnc                 C   sl   | d u rd S |   }d|v r%|d d ur%|d tur%td| d| d|ddu r4td| d S d S )Nautotuner_fnzHelionKernelWrapper for 'za' uses a custom autotuner via config picker. Remove 'autotuner_fn' from helion_settings and use @z .register_config_picker instead.static_shapesTzKernel '%s' has static_shapes=True in helion_settings, which will be overridden to False. vLLM requires dynamic shapes for variable batch sizes and sequence lengths.)to_dictr   
ValueErrorgetloggerwarning)r   r   settings_dict r!   i/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/kernels/helion/register.pyvalidate_helion_settingsR   s"   r#   raw_kernel_funcextra_kwargsc                 C   s@   i }|r| |  d|d< |r| | tjdi || S )NFr   r!   )updater   helionkernel)r$   r   r%   kernel_kwargsr!   r!   r"   create_helion_decorated_kerneln   s   
r*   c                   @   sT   e Zd ZdZdeedf deeedf gef fddZddd	e	d
efddZ
dS )PresetConfigSearchzJCustom autotuner that uses a preset config selector instead of autotuning.args.config_selectorc                 C   s   || _ || _d S N)r,   r-   )selfr,   r-   r!   r!   r"   __init__   s   
zPresetConfigSearch.__init__F)
skip_cacher1   r   c                C   s   |  | jS r.   )r-   r,   )r/   r1   r!   r!   r"   autotune   s   zPresetConfigSearch.autotuneN)__name__
__module____qualname____doc__tupler   r   r   r0   boolr2   r!   r!   r!   r"   r+      s    

r+   c                	   @   s   e Zd ZdZ	ddedeeedf ee gedB f dB deddfd	d
Z	dd Z
dd Zdd ZdddZdedef fddZdS )ConfiguredHelionKernelz8A configured Helion kernel bound to a specific platform.Nr   config_picker.r$   r   r   c                 C   s&   || _ || _|| _|| _|  | _d S r.   )r   r:   r$   r   _create_decorated_kernel_decorated_kernel)r/   r   r:   r$   r   r!   r!   r"   r0      s
   zConfiguredHelionKernel.__init__c                 O   s   | j |i |S r.   )r<   )r/   r,   kwargsr!   r!   r"   __call__   s   zConfiguredHelionKernel.__call__c                    sB    j du rtd j d j d j dusJ  fdd}|S )z
        Create a key computer function derived from the config picker.

        The returned function receives kernel arguments unpacked (*args) to match
        Helion's key signature (called as self._key_fn(*args)).
        N(No config picker registered for kernel ''. Use @(.register_config_picker to register one.c                     sZ   t  j }ttttdf t t gtd B f  j}|| |}|r$|S d jv r+dS d S )N.default)	listconfigskeysr   r   r7   r   strr:   )r,   config_keysr:   selected_keyr/   r!   r"   key_computer   s   "
zAConfiguredHelionKernel._create_key_computer.<locals>.key_computer)r:   RuntimeErrorr   )r/   rJ   r!   rI   r"   _create_key_computer   s   

z+ConfiguredHelionKernel._create_key_computerc                        fdd}|S )Nc              	      sj    |  }|d u rt dj dtj  |jvr0t d| dj dtj  j| S )Nz(Config picker returned None for kernel 'z' with available config keys: z+Config picker returned invalid config key 'z' for kernel 'z'. Available keys: )r   r   rC   rD   rE   )r,   selected_config_keyrJ   r/   r!   r"   r-      s$   


zGConfiguredHelionKernel._create_config_selector.<locals>.config_selectorr!   )r/   rJ   r-   r!   rO   r"   _create_config_selector   s   z.ConfiguredHelionKernel._create_config_selectorr   c                 C   s^   ddl m} ddlm} | | _| }|| j| j| _| js-t	d| j d| j dd S )Nr   )ConfigManager)get_canonical_gpu_namez!No configs available for kernel 'z' on platform '')
"vllm.kernels.helion.config_managerrQ   vllm.kernels.helion.utilsrR   platformget_instanceget_platform_configsr   rD   r   )r/   rQ   rR   config_managerr!   r!   r"   _load_platform_configs   s   
z-ConfiguredHelionKernel._load_platform_configsc                    sN   |    |  }| |  fdd|d}td| j| j t| j| j	|S )Nc                    s
   t | S r.   )r+   )_r,   r-   r!   r"   <lambda>   s   
 zAConfiguredHelionKernel._create_decorated_kernel.<locals>.<lambda>)r   keyzACreating decorated kernel %s with custom autotuner on platform %s)
rZ   rL   rP   r   debugr   rV   r*   r$   r   )r/   rJ   r%   r!   r\   r"   r;      s   


z/ConfiguredHelionKernel._create_decorated_kernelr.   )r   N)r3   r4   r5   r6   rF   r   r7   r   rC   r0   r>   rL   rP   rZ   r;   r!   r!   r!   r"   r9      s"    "

r9   c                   @   s  e Zd ZdZ	d&dedededdfdd	Zd
d Zdee	df de
ee	f de	fddZede	dee	df de
ee	f dee
ee	f e
ee	f f fddZdeee	df ee gedB f deee	df ee gedB f fddZdeg e
eee	df f f deg e
eee	df f f fddZde
eee	df f fddZ	d'dee	df dedefd d!Zdefd"d#Zde	fd$d%ZdS )(HelionKernelWrapperzKWrapper for Helion kernels with pre-tuned config selection and HOP support.Nr$   r   	fake_implr   r   c                 C   s8   t || || _|| _|| _|| _d | _d | _d | _d S r.   )r#   r$   r   
_fake_implr   _config_picker_configured_kernel_input_generator)r/   r$   r   ra   r   r!   r!   r"   r0      s   

zHelionKernelWrapper.__init__c                 O   sB   t s|  }||i |S t d ur| ||S |  |i |S r.   )_HOP_AVAILABLE_get_or_register_custom_opr   _call_via_hopget_configured_op)r/   r,   r=   	custom_opr!   r!   r"   r>     s   
zHelionKernelWrapper.__call__r,   .r=   r   c                    s  |   j}t|}| |||\}}i || t fdd|jj D }t	  t
||}W d    n1 s;w   Y  t||||d}	|d}
|
d u rSd S t|
}t|	}g }|d D ]#}|d dkost|dtj }|r~||d  qb|t| qbt||S )	Nc                 3   s6    | ]\}}| v s|j |jur ||j V  qd S r.   )rB   emptyr   ).0np	all_namedr!   r"   	<genexpr>&  s    z4HelionKernelWrapper._call_via_hop.<locals>.<genexpr>)
kernel_idxconstant_argstensor_argsoutput_spectree_spec_str
leaf_specstypescalarscalar_value)ri   r<   r   
add_kernel_partition_argsr7   	signature
parametersitemsr   r   r   r   pytreetreespec_loadsiter
isinstancetorchSymIntappendnexttree_unflatten)r/   r,   r=   r(   rr   rs   rt   	full_argsru   
hop_resultrv   	tree_spechop_iterreconstructedspecis_constant_scalarr!   ro   r"   rh     s<   




z!HelionKernelWrapper._call_via_hopr(   c           	      C   s   i }i }t | jj }t|D ]\}}|| }t|tjr#|||< q|||< q| D ]\}}t|tjr;|||< q,|||< q,||fS r.   )	rC   r}   r~   rE   	enumerater   r   Tensorr   )	r(   r,   r=   rs   rt   paramsivalnamer!   r!   r"   r|   G  s   



z#HelionKernelWrapper._partition_argspicker_funcc                 C   
   || _ |S r.   )rc   )r/   r   r!   r!   r"   register_config_picker]  s   z*HelionKernelWrapper.register_config_pickergenerator_funcc                 C   r   )a  
        Register a function to generate inputs for autotuning and benchmarking.

        Args:
            generator_func: Function that returns dict[str, tuple] where:
                - key: Configuration identifier (e.g., "4096", "hidden_4096")
                - value: Tuple of arguments to pass to the kernel

        Returns:
            The registered function (for decorator usage)

        Example:
            @kernel_wrapper.register_input_generator
            def generate_inputs():
                return {
                    "4096": (torch.randn(4096, device="cuda"), 0.5),
                    "8192": (torch.randn(8192, device="cuda"), 0.5),
                }
        )re   )r/   r   r!   r!   r"   register_input_generatorc  s   z,HelionKernelWrapper.register_input_generatorc                 C   s,   | j d u rtd| j d| j d|   S )Nz*No input generator registered for kernel 'r@   z*.register_input_generator to register one.)re   NotImplementedErrorr   rI   r!   r!   r"   
get_inputs|  s   

zHelionKernelWrapper.get_inputsquickinputsautotune_effortc                 C   s$   |dd}t | j| j|}||S )z0Run autotuning for a single input configuration.T)r   autotune_ignore_errors)r*   r$   r   r2   )r/   r   r   r%   autotune_kernelr!   r!   r"   run_autotune  s   

z HelionKernelWrapper.run_autotunec                 C   sN   | j d usJ d| j d| j d| jd u r$t| j| j | j| jd| _| jS )Nr?   r@   rA   )r   r:   r$   r   )rc   r   rd   r9   r$   r   rI   r!   r!   r"   ri     s   

z%HelionKernelWrapper.get_configured_opc                 C   s^   t tjj| jrttjj| jS |  }td| j t	| j|j
d | jtd ttjj| jS )NzRegistering op: vllm_helion::%s)r   op_funcmutates_argsra   
target_lib)hasattrr   opsr   r   getattrri   r   infor	   r<   rb   vllm_helion_lib)r/   configured_kernelr!   r!   r"   rg     s   z.HelionKernelWrapper._get_or_register_custom_opr.   )r   )r3   r4   r5   r6   r   rF   r0   r>   r7   r   dictrh   staticmethodr|   rC   r   r   r   r   r   r9   ri   rg   r!   r!   r!   r"   r`      sf    



,





r`   _REGISTERED_KERNELSc                   C   s   t  S r.   )r   copyr!   r!   r!   r"   get_registered_kernels  s   r   kernel_namec                 C   s
   t | S r.   )r   r   )r   r!   r!   r"   get_kernel_by_name  s   
r   kernel_funcc                    rM   )Nc                     sb   i } r|    tjdi |}|| }|j }||}|| i |ddd iS )N	_launcherc                  _      d S r.   r!   )akwr!   r!   r"   r]     s    z=infer_fake_impl.<locals>.helion_fake_kernel.<locals>.<lambda>r!   )r&   r   r'   r(   bindconfig_specdefault_configcompile_config)r,   r=   r)   temp_decorated_kernelboundr   compiled_runnerr   r   r!   r"   helion_fake_kernel  s   


z+infer_fake_impl.<locals>.helion_fake_kernelr!   )r   r   r   r!   r   r"   infer_fake_impl  s   r   )ra   r   op_name_or_funcra   c                C   r   r.   r!   r   ra   r   r!   r!   r"   register_kernel     r   c                C   r   r.   r!   r   r!   r!   r"   r     r   c                   s8   dt dtf fdd}trtts|S |S )z
    Decorator to register a Helion kernel function as a HelionKernelWrapper.

    Wraps the raw kernel function in a HelionKernelWrapper and registers it
    in the global kernel registry. Auto-generates fake_impl if not provided.
    r   r   c                    s   t trnd }|r|n| j}|tv rtd| d }|d u r.t| }td| j t| ||d}|t|< t	d| j |S )NzHelion kernel 'zV' is already registered. Use a different op_name or check for duplicate registrations.z/Auto-generated fake_impl for Helion kernel '%s')r$   r   ra   r   z4Registered Helion kernel '%s' as HelionKernelWrapper)
r   rF   r3   r   r   r   r   r_   r`   r   )r   r   final_op_namefinal_fake_implkernel_wrapperra   r   r   r!   r"   	decorator  s2   

z"register_kernel.<locals>.decorator)r   r`   callabler   rF   )r   ra   r   r   r!   r   r"   r     s   ")NNr.   )7r6   collections.abcr   typingr   r   r   r   torch.libraryr   vllm.loggerr   vllm.utils.import_utilsr   vllm.utils.torch_utilsr	   ImportErrorr'   helion._compatr
   helion.autotuner.base_searchr   helion.runtime.configr   helion.runtime.settingsr   rf   torch.utils._pytreeutils_pytreer   )helion._compiler._dynamo.higher_order_opsr   r   "helion._compiler._dynamo.variablesr   "torch.fx.experimental.proxy_tensorr   r   r3   r   r   rF   r#   r   r*   r+   r9   r`   r   __annotations__r   r   r   r   r!   r!   r!   r"   <module>   s   $


i >
	
