o
    :/iR|                     @   sX  U d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	Z
d dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZ erad dlmZ d dlm Z  ee!Z"zd dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) W n e*y Z+ ze",de+ W Y dZ+[+ndZ+[+ww zd dl-Z.W n e*y Z+ ze",de+ W Y dZ+[+ndZ+[+ww zd dl/Z.W n e*y Z+ ze",de+ W Y dZ+[+ndZ+[+ww g Z0e1e2 e3d< i Z4e5e2e2f e3d< dddddddddd	Z6e5e2e2f e3d< dd Z7e7  dd Z8e8d e2fd!d"Z9d e2fd#d$Z:e: Z;e<d%d& d'D Z=e<d(d& d)D Z>e<d*d& d+D Z?d,e;v Z@d-e;v ZAd.e2d eBeCeCf dB fd/d0ZDd eEfd1d2ZFd eEfd3d4ZGd eEfd5d6ZHd eEfd7d8ZId eEfd9d:ZJe		dNd;ejKd<eCd=eCd>eCd?eCd@eCdAe2dBejLdB dCejLdB d eEfdDdEZMed eEfdFdGZNdHeEdIeEd e1e fdJdKZOG dLdM dMeZPdS )O    N)	timedelta)cache	lru_cachewraps)TYPE_CHECKING)PrefixStoreProcessGroup)is_nccl_available)init_loggercuda_device_count_stateless)AttentionBackendEnum   )DeviceCapabilityPlatformPlatformEnum)
VllmConfig)AttentionSelectorConfig)AmdSmiExceptionamdsmi_get_gpu_asic_infoamdsmi_get_processor_handlesamdsmi_initamdsmi_shut_downamdsmi_topo_get_link_typez$Failed to import from amdsmi with %rz%Failed to import from vllm._C with %rz*Failed to import from vllm._rocm_C with %r_ROCM_UNSUPPORTED_MODELS _ROCM_PARTIALLY_SUPPORTED_MODELSAMD_Instinct_MI300AAMD_Instinct_MI300XAMD_Instinct_MI308XAMD_Instinct_MI325XAMD_Instinct_MI300X_HFAMD_Radeon_RX7900XTX)	0x74a00x74a10x74b50x74a20x74a50x74b90x74a90x74bd0x744c_ROCM_DEVICE_ID_NAME_MAPc                  C   s   t jdpd} t jdpd}| dur)|dur)| |kr'td|  d| ddS | dur4| t jd< dS |dur?|t jd< dS dS )zEnsure HIP_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES are consistent.
    Treats empty string as unset. Raises on genuine conflicts.HIP_VISIBLE_DEVICESNCUDA_VISIBLE_DEVICESz;Inconsistent GPU visibility env vars: HIP_VISIBLE_DEVICES='z' vs CUDA_VISIBLE_DEVICES='z-'. Please set only one, or ensure they match.)osenvironget
ValueError)hip_valcuda_val r4   `/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/platforms/rocm.py_sync_hip_cuda_env_varsE   s"   r6   c                    s   t   fdd}|S )Nc                     s&   t   z | i |W t  S t  w N)r   r   )argskwargsfnr4   r5   wrapperc   s   z$with_amdsmi_context.<locals>.wrapper)r   )r;   r<   r4   r:   r5   with_amdsmi_contextb   s   r=   returnc                  C   s2   t  } | rt| d }|dd}|r|S td)z4Query GCN arch from amdsmi. Raises if not available.r   target_graphics_version z$amdsmi did not return valid GCN arch)r   r   r0   RuntimeError)handles	asic_info
target_gfxr4   r4   r5   _query_gcn_arch_from_amdsmin   s   rE   c               
   C   sR   zt  W S  ty! }  ztd|  td W Y d} ~ nd} ~ ww tjdjS )z
    Get GCN arch via amdsmi (no CUDA init), fallback to torch.cuda.
    Called once at module level; result stored in _GCN_ARCH.
    z%Failed to get GCN arch via amdsmi: %szFailed to get GCN arch via amdsmi, falling back to torch.cuda. This will initialize CUDA and may cause issues if CUDA_VISIBLE_DEVICES is not set yet.Ncuda)	rE   	Exceptionloggerdebugwarning_oncetorchrF   get_device_propertiesgcnArchName)er4   r4   r5   _get_gcn_arch|   s   rO   c                 c       | ]}|t v V  qd S r7   	_GCN_ARCH.0archr4   r4   r5   	<genexpr>       rV   )gfx11gfx12c                 c   rP   r7   rQ   rS   r4   r4   r5   rV      rW   )gfx942gfx950c                 c   rP   r7   rQ   rS   r4   r4   r5   rV      rW   )gfx90arZ   r[   rZ   r[   gcn_archc                 C   s   t d| }|s
dS |d}t|}|dk r"td|  d| d|dv r3t|d	 }t|d }n"|d
krFt|dd }t|d }n|dkrUtd|  d| d|dk rgtd|  d| d| d|dkrytd|  d| d| d||fS )ac  
    Parse (major, minor) from a GCN arch string, mirroring how
    HIP derives hipDeviceProp_t.major / .minor.

    Format: gfx<MAJOR><MINOR><STEPPING>
      - 1-digit major  (gfx9xx):  "gfx" + M + m + stepping
      - 2-digit major  (gfx1xxx): "gfx" + MM + m + stepping

    Examples:
      gfx90a  -> (9, 0)    gfx942  -> (9, 4)    gfx950 -> (9, 5)
      gfx1100 -> (11, 0)   gfx1101 -> (11, 0)   gfx1200 -> (12, 0)

    Returns None only when the string is not gfx-prefixed at all
    (i.e. not a ROCm arch string). Raises on any string that looks
    like a GCN arch but does not match a known layout.
    zgfx(\d+)Nr      z
GCN arch 'z' has too few digits (zb) after 'gfx' to derive a (major, minor) capability. Please file a vLLM issue with your GPU model.)r^      r         z' has z digits after 'gfx', which exceeds the known 4-digit layout (MMms). Cannot determine major/minor split unambiguously. Please file a vLLM issue with your GPU model.	   z0Parsed unknown ROCm architecture from GCN arch 'z	': major=z, minor=zl. Major version < 9 is not expected for any supported AMD GPU. Please file a vLLM issue with your GPU model.   z. Major version > 12 is beyond currently known AMD generations. Please file a vLLM issue with your GPU model so support can be added.)rematchgrouplenr1   int)r]   mdigitsnmajorminorr4   r4   r5   _capability_from_gcn_arch   sH   
rn   c                   C      t S r7   )	_ON_GFX1Xr4   r4   r4   r5   on_gfx1x      rq   c                   C   ro   r7   )	_ON_MI3XXr4   r4   r4   r5   on_mi3xx   rr   rt   c                   C   ro   r7   )_ON_GFX9r4   r4   r4   r5   on_gfx9   rr   rv   c                   C   ro   r7   )
_ON_GFX942r4   r4   r4   r5   	on_gfx942   rr   rx   c                   C   ro   r7   )
_ON_GFX950r4   r4   r4   r5   	on_gfx950   rr   rz   qtype	head_size
block_size	gqa_ratiomax_seq_lensliding_windowkv_cache_dtypealibi_slopessinksc	           	      C   s   t r7|dks
|dko6| tjkp| tjko6|dkp|dko6|dkp#|dko6|dko+|dko6|dko6tjo6|d u S tom|dkp@|dkom| tjkpJ| tjkom|dkom|dkom|d	koZ|dkom|dkom|d u om|d
komtjom|d u S )Nr   )r   @             r   i   r_   auto)ru   rK   halfbfloat16envsVLLM_ROCM_CUSTOM_PAGED_ATTNrp   )	r{   r|   r}   r~   r   r   r   r   r   r4   r4   r5   use_rocm_custom_paged_attention   sJ   	
r   c                  C   sv   t  sdS z+ddlm}  | dd u rW dS | dd u rW dS tjddkr.td W dS W d	S  ty:   Y dS w )
NFr   	find_spec
flash_attnz flash_attn.flash_attn_triton_amd!FLASH_ATTENTION_TRITON_AMD_ENABLETRUEz\Set FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE to enable Flash Attention Triton backend on RDNA.T)	rq   importlib.utilr   r.   r/   r0   rH   	info_onceImportErrorr   r4   r4   r5   flash_attn_triton_available  s"   r   use_mla
use_sparsec                 C   s   ddl m} |rtjgS | r| rtjtjtjgS tjgS g }tj	r,tj
r,|tj tj	r8tjr8|tj ddlm} | }|d urO|jjrO|tj |tj |S )Nr   rocm_aiter_ops)get_current_vllm_config_or_none)vllm._aiter_opsr   r   ROCM_AITER_MLA_SPARSEis_mla_enabledROCM_AITER_MLA
TRITON_MLAROCM_AITER_TRITON_MLAr   VLLM_ROCM_USE_AITER%VLLM_ROCM_USE_AITER_UNIFIED_ATTENTIONappendROCM_AITER_UNIFIED_ATTNVLLM_ROCM_USE_AITER_MHAROCM_AITER_FAvllm.configr   attention_configuse_prefill_decode_attention	ROCM_ATTNTRITON_ATTN)r   r   r   backendsr   vllm_configr4   r4   r5   _get_backend_priorities5  s0   r   c                       s  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< dZeed< d	Zeed
< dZeed< g dZee ed< g dZee ed< edu fddZe	dvdedddedB deeedef  edee f f fddZe	dvdddddedB defddZeded fdd Ze	dvd!ed"ejd#d$ddfd%d&Zed'ejddfd(d)Zeed*d+dwd-ededB fd.d/Z ee!d0ee de"fd1d2Z#ee!ed*d+dwd-edefd3d4Z$edwd-edefd5d6Z%edxd9d:Z&edxd;d<Z'edxd=d>Z(ed?eddfd@dAZ)edBeddf fdCdDZ*edefdEdFZ+e	dvd'ej,j-dB de.fdGdHZ/edefdIdJZ0ede"fdKdLZ1ede"fdMdNZ2ede"fdOdPZ3edejfdQdRZ4ede"fdSdTZ5ede"fdUdVZ6ede"fdWdXZ7edefdYdZZ8ed#ed[e9d\ed]ed^e:de;fd_d`Z<edefdadbZ=ed"ejfdcddZ>edeej?dfej?dgej?dhej?ddf
didjZ@edeej?dfej?dgej?dhej?ddf
dkdlZAede"fdmdnZBede"fdodpZCedwd-edefdqdrZDede"fdsdtZE  ZFS )yRocmPlatformrocmdevice_namerF   device_typeCUDAdispatch_keyGPUray_device_keynccldist_backendr-   device_control_env_var)*RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES+RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES+RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICESray_noset_device_env_vars)awq
awq_marlingptqgptq_marlinfp8zcompressed-tensors
fbgemm_fp8ggufquarkptpc_fp8mxfp4petit_nvfp4torchaobitsandbytessupported_quantizationr>   Nc                    sJ   t    ddl}|t ddl}W d   dS 1 sw   Y  dS )zImport ROCm-specific kernels.r   N)superimport_kernels
contextlibsuppressr   vllm._rocm_C)clsr   vllm	__class__r4   r5   r     s
   

"zRocmPlatform.import_kernelsdevice_capabilityattn_selector_configr   	num_headsr   c              	   C   s   g }i }t |j|j}t|D ]1\}}z| }	|	jdd|i| }
W n ty1   dg}
Y nw |
r9|
||< q|||f q||fS )Nr   r   r4   )	r   r   r   	enumerate	get_classvalidate_configuration_asdictr   r   )r   r   r   r   valid_backends_prioritiesinvalid_reasonsbackend_prioritiesprioritybackendbackend_classinvalid_reasons_ir4   r4   r5   get_valid_backends  s*   



zRocmPlatform.get_valid_backendsselected_backendc              	      s|  |   }|d us
J |jd d}|d urIz| }|jdd|i| }W n ty2   dg}Y nw |r?td| d| td| |	 S | j
|||d\ }dd	d
d | D  d }| }td| j d| d| d t dkrtd| j d| d| dttt  fddd}	|	d }
 |
 d }tjd|jdd	dd  D  d dd |	 S )N)r}   r   r   zSelected backend z. is not valid for this configuration. Reason: zUsing %s backend.)r   r   r   {, c                 s   s,    | ]\}}|j  d d| dV  qdS )z: [r   ]N)namejoin)rT   r   reasonsr4   r4   r5   rV     s
    
z4RocmPlatform.get_attn_backend_cls.<locals>.<genexpr>}z*Some attention backends are not valid for z with z. Reasons: .r   z%No valid attention backend found for c                    s    |  d S )Nr   r4   )ir   r4   r5   <lambda>  s    z3RocmPlatform.get_attn_backend_cls.<locals>.<lambda>)keyz9Using %s attention backend out of potential backends: %s.[c                 s   s"    | ]}d |d j  d V  qdS )'r   N)r   )rT   br4   r4   r5   rV     s     r   local)scoper4   )get_device_capability_replacer   r   r   r   r1   rH   infoget_pathr   r   items__repr__
debug_oncer   rg   sortedranger   r   )r   r   r   r   r   r   r   reasons_str
config_strsorted_indicesselected_indexr4   r   r5   get_attn_backend_cls  s   







z!RocmPlatform.get_attn_backend_clsc                 C   s   t jt jt jt jgS r7   )r   
FLASH_ATTNr   r   
TORCH_SDPAr   r4   r4   r5   get_supported_vit_attn_backends  s
   z,RocmPlatform.get_supported_vit_attn_backendsr|   dtyper   zAttentionBackendEnum | Nonec                 C   s   |d ur!||   v sJ d| d|    td| d |S ddlm} ddlm} | r<t r<td t	j
S t rW|d	d urW|tjksO|tjkrWtd
 t	jS t rot ro|tjksg|tjkrotd t	jS td t	jS )NzBackend z= is not supported for vit attention. Supported backends are: zUsing backend z for vit attentionr   r   r   z2Using AITER Flash Attention backend for ViT model.r   z,Using Flash Attention backend for ViT model.z=Using Flash Attention (Triton backend) for ViT model on RDNA.z'Using Torch SDPA backend for ViT model.)r	  rH   r   r   r   r   r   
is_enabledrv   r   r   rK   float16r   r  rq   r   r  )r   r|   r
  r   r   r   r4   r4   r5   get_vit_attn_backend  s<   


z!RocmPlatform.get_vit_attn_backenddevicec                 C   s   t j| dS )z:
        Set the device for the current platform.
        N)rK   rF   
set_device)r   r  r4   r4   r5   r  /  s   zRocmPlatform.set_device   )maxsizer   	device_idc                 C   sL   t t}|d urt|d |d dS tdt tj|\}}t||dS )Nr   r   )rl   rm   znCould not derive device capability from GCN arch '%s', falling back to torch.cuda (this will initialize CUDA).)rn   rR   r   rH   rJ   rK   rF   r   )r   r  caprl   rm   r4   r4   r5   r   6  s   z"RocmPlatform.get_device_capabilityphysical_device_idsc           	      C   s   dd |D }t |D ]F\}}t |D ]=\}}||k rPzt||}|d dks-|d dkr2W   dS W q tyO } ztjd|d	 W Y d
}~  dS d
}~ww qqdS )zN
        Query if the set of gpus are fully connected by xgmi (1 hop)
        c                 S   s   g | ]}t  | qS r4   )r   )rT   r   r4   r4   r5   
<listcomp>K  s    z3RocmPlatform.is_fully_connected.<locals>.<listcomp>hopsr   typer^   Fz AMD 1 hop XGMI detection failed.)exc_infoNT)r   r   r   rH   error)	r   r  rB   r   handlejpeer_handle	link_typer  r4   r4   r5   is_fully_connectedE  s"   


zRocmPlatform.is_fully_connectedc                 C   s<   |  |}t | }t|}|d }|tv rt| S |d S )Nr  market_name)device_id_to_physical_device_idr   r   r+   )r   r  physical_device_idr  rC   r   r4   r4   r5   get_device_nameY  s   

zRocmPlatform.get_device_namec                 C   s   t j|}|jS r7   )rK   rF   rL   total_memory)r   r  device_propsr4   r4   r5   get_device_total_memorye  s   z$RocmPlatform.get_device_total_memoryr   r   c           
      C   s  ddl m} ddlm} |j}|j|jk}| }| }|	 }|
 }	|r4|s4d|jvr4|jd |rAd|jvrA|jd |	rSd|jv rStd	 |jd |red
|jvred|jvre|jd
 | ryd|jvryd|jvry|jd |jd d S )Nr   r   CUDAGraphModez	-rms_normz	+rms_normz
-quant_fp8z
+quant_fp8z-grouped_topkzVLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS is enabled, which requires the 'grouped_topk' custom op. Overriding the user-provided '-grouped_topk'.z+grouped_topkz+rotary_embeddingz-rotary_embeddingz+sparse_attn_indexer)r   r   vllm.config.compilationr'  compilation_configcudagraph_modeNONEis_fused_moe_enabledis_rmsnorm_enabledis_linear_fp8_enabled$is_fusion_moe_shared_experts_enabled
custom_opsr   rH   rJ   remover  )
r   r   r   r'  r)  is_eager_executionuse_aiter_fused_moeuse_aiter_rms_normuse_aiter_fp8_linearuse_aiter_fused_ser4   r4   r5   apply_config_platform_defaultsj  s@   




z+RocmPlatform.apply_config_platform_defaultsc                 C   s   ddl m} |j}|j}|j}|j r1|jdkr#t	d |j
|_n|jdkr1t	d |j
|_|rH|jsHtjrEtjrEd|_td nd|_|jd	krRd
|_d S d S )Nr   r&  r   z~Decode context parallel (DCP) is enabled, which is incompatible with full CUDA graphs. Overriding cudagraph_mode to PIECEWISE.zPrefill context parallel (PCP) is enabled, which is incompatible with full CUDA graphs. Overriding cudagraph_mode to PIECEWISE.r   z=[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64.r   r   z vllm.v1.worker.gpu_worker.Worker)r(  r'  cache_configr)  parallel_configr*  has_full_cudagraphsdecode_context_parallel_sizerH   rJ   	PIECEWISEprefill_context_parallel_sizeuser_specified_block_sizer   r   r   r}   warning
worker_cls)r   r   r'  r8  r)  r9  r4   r4   r5   check_and_update_config  s8   




	

z$RocmPlatform.check_and_update_configc                 C   s   d S r7   r4   )r   r   r4   r4   r5   update_block_size_for_backend  s   z*RocmPlatform.update_block_size_for_backend
model_archc                 C   s>   |t v rtd| d|tv rt| }td|| d S d S )NzModel architecture 'z#' is not supported by ROCm for now.z:Model architecture '%s' is partially supported by ROCm: %s)r   r1   r   rH   r?  )r   rC  msgr4   r4   r5   verify_model_arch  s   
zRocmPlatform.verify_model_archquantc                    s2   t  | |dkrtjstd dtjd< d S )Nr   zcUsing AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ is not set, enabling VLLM_USE_TRITON_AWQ.1VLLM_USE_TRITON_AWQ)r   verify_quantizationr   rH  rH   r?  r.   r/   )r   rF  r   r4   r5   rI    s   z RocmPlatform.verify_quantizationc                 C      dS )Nz4vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPUr4   r  r4   r4   r5   get_punica_wrapper     zRocmPlatform.get_punica_wrapperc                 C   s$   t j| t j|\}}|| S r7   )rK   rF   reset_peak_memory_statsmem_get_info)r   r  free_mem	total_memr4   r4   r5   get_current_memory_usage  s   z%RocmPlatform.get_current_memory_usagec                 C   s   	 dS )NzHvllm.distributed.device_communicators.cuda_communicator.CudaCommunicatorr4   r  r4   r4   r5   get_device_communicator_cls  s   z(RocmPlatform.get_device_communicator_clsc                 C      t dd dD S )Nc                 s   rP   r7   rQ   rT   gfxr4   r4   r5   rV     rW   z+RocmPlatform.supports_mx.<locals>.<genexpr>)gfx95anyr  r4   r4   r5   supports_mx     zRocmPlatform.supports_mxc                 C   rS  )Nc                 s   rP   r7   rQ   rT  r4   r4   r5   rV     rW   z,RocmPlatform.supports_fp8.<locals>.<genexpr>)gfx94rV  rY   rW  r  r4   r4   r5   supports_fp8  rZ  zRocmPlatform.supports_fp8c                 C      dt v S )Nr[  rQ   r  r4   r4   r5   is_fp8_fnuz  s   zRocmPlatform.is_fp8_fnuzc                 C   s   |   rtjS tjS r7   )r^  rK   float8_e4m3fnuzfloat8_e4m3fnr  r4   r4   r5   	fp8_dtype  s   zRocmPlatform.fp8_dtypec                 C   rS  )Nc                 s   rP   r7   rQ   rT  r4   r4   r5   rV     rW   z4RocmPlatform.use_custom_allreduce.<locals>.<genexpr>)r[  rV  rW  r  r4   r4   r5   use_custom_allreduce  s   z!RocmPlatform.use_custom_allreducec                 C   rJ  NTr4   r  r4   r4   r5   opaque_attention_op  rL  z RocmPlatform.opaque_attention_opc                 C   r]  )Ngfx1rQ   r  r4   r4   r5   is_navi  s   zRocmPlatform.is_navic                 C   rJ  )Nz,vllm.compilation.cuda_graph.CUDAGraphWrapperr4   r  r4   r4   r5   get_static_graph_wrapper_cls  rL  z)RocmPlatform.get_static_graph_wrapper_clsprefix_store
group_rank
group_sizetimeoutc                 C   st   t  sJ t|||}ddlm} | }||_|||||}	tjj}
t	d}|
|
 |	  |||
|	 |S )Nr   )ProcessGroupNCCLrF   )r	   r   "torch.distributed.distributed_c10drl  Options_timeoutBackendTypeNCCLrK   r  _set_default_backend_set_sequence_number_for_group_register_backend)r   r   rh  ri  rj  rk  pgrl  backend_optionsr   backend_typer  r4   r4   r5   #stateless_init_device_torch_dist_pg  s$   
	

z0RocmPlatform.stateless_init_device_torch_dist_pgc                 C   s   t  S r7   r   r  r4   r4   r5   device_count=  s   zRocmPlatform.device_countc                 C   sb   |t jkr-| ds/|  }|  }|d u rd}n	| }d| }td| d| dd S d S )NP   z"does not have a compute capabilityzhas compute capability zQBfloat16 is only supported on GPUs with compute capability of at least 8.0. Your z GPU zg. You can use float16 instead by explicitly setting the `dtype` flag in CLI, for example: --dtype=half.)rK   r   has_device_capabilityr   r"  as_version_strr1   )r   r
  
capabilitygpu_namecompute_strversion_strr4   r4   r5   check_if_supports_dtypeA  s"   


z$RocmPlatform.check_if_supports_dtype	src_cache	dst_cachesrc_block_indicesdst_block_indicesc                 C   s,   |dd|f }| |j|dd|f< dS )z/Copy blocks from src_cache to dst_cache on GPU.N)tor  r   r  r  r  r  
_src_cacher4   r4   r5   insert_blocks_to_deviceV  s   	z$RocmPlatform.insert_blocks_to_devicec                 C   s(   |dd|f }|  |dd|f< dS )z#Copy blocks from GPU to host (CPU).N)cpur  r4   r4   r5   swap_out_blocks_to_hostb  s   	z$RocmPlatform.swap_out_blocks_to_hostc                 C   rJ  rc  r4   r  r4   r4   r5   support_hybrid_kv_cachen  rL  z$RocmPlatform.support_hybrid_kv_cachec                 C   rJ  rc  r4   r  r4   r4   r5   support_static_graph_moder  rL  z&RocmPlatform.support_static_graph_modec                 C   s   t j|jS r7   )rK   rF   rL   multi_processor_count)r   r  r4   r4   r5   num_compute_unitsv  s   zRocmPlatform.num_compute_unitsc                 C   rJ  rc  r4   r  r4   r4   r5   use_custom_op_collectivesz  rL  z&RocmPlatform.use_custom_op_collectives)r>   Nr7   )r   )r   r   r>   N)G__name__
__module____qualname__r   ROCM_enumr   str__annotations__r   r   r   r   r   r   listr   classmethodr   r   rh   tupledictr   r  r	  rK   r
  r  r  r  r   r   r=   boolr  r"  r%  r7  rA  rB  rE  rI  rK  typesDevicefloatrQ  rR  rY  r\  r^  ra  rb  rd  rf  rg  r   r   r   rx  ry  r  Tensorr  r  r  r  r  r  __classcell__r4   r4   r   r5   r   c  s0  
 
 I,	/-	
r   )NN)Qr.   datetimer   	functoolsr   r   r   typingr   regexrd   rK   torch.distributedr   r   rm  r	   	vllm.envsr   vllm.loggerr
   vllm.utils.torch_utilsr   #vllm.v1.attention.backends.registryr   	interfacer   r   r   r   r   vllm.v1.attention.selectorr   r  rH   amdsmir   r   r   r   r   r   r   rN   r?  vllm._Cr   r   r   r  r  r  r   r  r+   r6   r=   rE   rO   rR   rX  rp   rs   ru   rw   ry   r  rh   rn   r  rq   rt   rv   rx   rz   r
  r  r   r   r   r   r4   r4   r4   r5   <module>   s   
$G		
)
.