o
    5/ih                     @   s6   d Z ddlmZ ddlmZmZmZ defddZdS )a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
   )env)JitSpecgen_jit_speccurrent_compilation_contextreturnc                  C   s$  ddl m} m} ddlm}m} | dtjd d d d	 |j d
|j||j d|j g d}|t	j
g dd7 }tdtjd tjd tjd tjd tjd tjd tjd tjd g|tjtjd tjd d tjd d d d tjd d d d d tjd d d d tjgdS )a  
    Generate a JitSpec for the MoE utilities module.

    This module contains:
    - moePermute: Permute input activations for MoE routing
    - moeUnpermute: Unpermute and scale outputs after expert computation
    - moeOutputMemset: Zero-initialize output buffers for scattered writes
    - moeActivation: Apply activation functions with optional FP4 quantization
    - moeSort: Sort tokens by expert assignment (DeepSeekV3 routing)
    r   )download_trtllm_headers	get_cubin   )ArtifactPathCheckSumHashbmm
flashinfertrtllmbatched_gemmtrtllmGen_bmm_exportz/include/trtllmGen_bmm_exportz/checksums.txt)z-DTLLM_GEN_EXPORT_INTERFACEz-DENABLE_BF16z-DENABLE_FP8z-DENABLE_FP4)	   
         )supported_major_versions	moe_utilszmoe_utils_binding.cuz;nv_internal/tensorrt_llm/kernels/cuteDslKernels/moeUtils.cuz#nv_internal/cpp/common/envUtils.cppz!nv_internal/cpp/common/logger.cppz&nv_internal/cpp/common/stringUtils.cppz(nv_internal/cpp/common/tllmException.cppz%nv_internal/cpp/common/memoryUtils.cuz$trtllm_fused_moe_routing_deepseek.cunv_internalincludetensorrt_llmcutlass_extensionskernelscutlass_kernels)extra_cuda_cflagsextra_include_paths)cubin_loaderr   r   	artifactsr
   r   jit_envFLASHINFER_CUBIN_DIRTRTLLM_GEN_BMMr   get_nvcc_flags_listr   FLASHINFER_CSRC_DIR)r   r   r
   r   
nvcc_flags r'   e/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/flashinfer/jit/moe_utils.pygen_moe_utils_module   s   
r)   N)	__doc__ r   r!   corer   r   r   r)   r'   r'   r'   r(   <module>   s    