o
    :/i6                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZm	Z	 d dl
mZ d dlmZmZmZ d dlZd dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ eeZedZedZ eded fddZ!G dd dZ"dej#j$ddfddZ%dS )    N)abstractmethod)Callable	Generator)contextmanagernullcontext)CodeType)Any	ParamSpecTypeVar)CompilationModeCUDAGraphModeget_current_vllm_config)DynamicShapesType)init_logger)layerwise_nvtx_marker_contextRPreturnNNNc                  c   sb    t jjj} t jjj}zdt jj_dt jj_dV  W | t jj_|t jj_dS | t jj_|t jj_w )a  Context manager for compilation settings.

    This manager sets higher dynamo cache limits for compilation.
    (Needed for qwen2_5_vl see test_qwen2_5_vl_evs_functionality).
    Generally a recompilation can happen whenever we use a new
    backend instance in torch.compile.
    i   i    N)torch_dynamoconfigcache_size_limitaccumulated_cache_size_limit)original_cache_sizeoriginal_accumulated_cache r   e/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/compilation/wrapper.py_compilation_context   s   
	




r   c                   @   s   e Zd ZdZdededefddZdeeef dej	dej
defdd	ZdddZdededefddZdededefddZedededefddZdefddZdededd
fddZeded fddZd
S )TorchCompileWithNoGuardsWrapperaM  
    A wrapper class for torch.compile, it ensures that all guards are dropped
    when CompilationMode is not CompilationMode.STOCK_TORCH_COMPILE.
    When guards are dropped, the first time __call__ is invoked, a single
    compilation is triggered. Dynamo should never be traced again after that
    since we drop all guards.
    argskwargsr   c                 O   s.   t | dsJ | j|i | | j|i |S )N_check_shape_invariants)hasattrr"   forwardselfr    r!   r   r   r   check_invariants_and_forward8   s   z<TorchCompileWithNoGuardsWrapper.check_invariants_and_forwardcallable_fnc                 O   sx   | j r5t|}t|}td| jj| ||d}||i ||_W d    |jS 1 s-w   Y  |jS ||i |S )Nz Torch Compiled Module (input):{})	in_tensorr!   )layerwise_nvtx_tracing_enabledlistdictr   format	__class____name__result)r&   r(   r    r!   	args_listkwargs_dictctxr   r   r   _call_with_optional_nvtx_range>   s    
z>TorchCompileWithNoGuardsWrapper._call_with_optional_nvtx_rangeNc           	      C   s  d| _ t }|| _|jj}|jj| _|d u rtd|j	|}i }t
|tr/|dkr/|jj}d| _|jjj| _|jjj}|tjkrf| jrStjrLJ ddd |d< nttjd	r`tjj|d< nd
d |d< | j}|tjkrtjruJ d| jr|J d| j}t }tjrttjj drtjj j!dd}nd}|d7 }|d7 }t"#| | tj$|dd||d| _%W d    n1 sw   Y  tjr|tjkrtjj&'| j( d | _)d S d S d S )NFz)Compilation mode cannot be NO_COMPILATIONinductorTz\compilation_config.dynamic_shapes_config.evaluate_guards requires VLLM_USE_BYTECODE_HOOK=0. c                 S      dd | D S )Nc                 S   s   g | ]}|j d kqS )	SHAPE_ENV)
guard_type).0entryr   r   r   
<listcomp>o   s    
NTorchCompileWithNoGuardsWrapper.__init__.<locals>.<lambda>.<locals>.<listcomp>r   xr   r   r   <lambda>o   s    z:TorchCompileWithNoGuardsWrapper.__init__.<locals>.<lambda>guard_filter_fnskip_all_guards_unsafec                 S   r6   )Nc                 S   s   g | ]}d qS )Fr   )r9   _r   r   r   r;   x   s    r<   r   r=   r   r   r   r?   x   s    z;UNBACKED dynamic shapes requires VLLM_USE_BYTECODE_HOOK=0. z)UNBACKED dynamic shapes do not add guardsenable_aot_compile)rC   z/torch._dynamo.config.enable_aot_compile is not z.available. AOT compile is disabled and please z+upgrade PyTorch version to use AOT compile.)	fullgraphdynamicbackendoptions)*compiledr   vllm_configcompilation_configmodeobservability_configenable_layerwise_nvtx_tracingr*   RuntimeErrorinit_backend
isinstancestrinductor_compile_configfirst_compiledynamic_shapes_configevaluate_guardstyper   STOCK_TORCH_COMPILEenvsVLLM_USE_BYTECODE_HOOKr#   r   compilerrA   r$   r   UNBACKEDr'   r   VLLM_USE_AOT_COMPILEr   r   patchloggerwarningcompile_compiled_callableconvert_frameregister_bytecode_hookbytecode_hook_compiled_bytecode)	r&   rI   rK   rF   rG   ds_typecompiled_ptraot_contextmsgr   r   r   __init__N   sj   




	
z(TorchCompileWithNoGuardsWrapper.__init__c                 O   s.   t | jdstdtj d| j||fS )Naot_compilezaot_compile is not supported by the current configuration. Please make sure torch.compile is enabled with the latest version of PyTorch (current using torch: ))r#   ra   rN   r   __version__rk   r%   r   r   r   rk      s   z+TorchCompileWithNoGuardsWrapper.aot_compilec              	   O   s:  t jrO| jjjtjkr| j|i |S | js,t	j
j|   | j| jg|R i |S |   | j| jg|R i |W  d    S 1 sHw   Y  d S | jsU| jsXt nt	jd}d| _t / | | j| jg|R i |W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )Nfail_on_recompileF)rX   rY   rI   rJ   rK   r   rW   ra   re   r   r   
eval_frameremove_from_cacheoriginal_code_objectr4   _dispatch_to_compiled_coder$   rS   rU   r   rZ   
set_stancer   )r&   r    r!   r3   r   r   r   __call__   sN   
$
Rz(TorchCompileWithNoGuardsWrapper.__call__c                 O   s   d S )Nr   r%   r   r   r   r$      s   z'TorchCompileWithNoGuardsWrapper.forwardc                 C   s
   | j jjS )z6Return the original code object of the forward method.)r.   r$   __code__)r&   r   r   r   rq      s   
z4TorchCompileWithNoGuardsWrapper.original_code_objectold_codenew_codec                 C   s`  ||   urdS t }|r1|jr1|j}|jj}|jjtj	j
d }|dkr,|dkr,n|r1|js|jd }|j|ks=J |jd | urFdS || _| j }|r|d }| sz+ddl}||}	t|d	}
|
|	 W d   n1 sww   Y  td
| W n	 ty   Y nw | jjjtjkrd|jv rddl}||}	d|	 }t|dS dS )z8Hook to save the compiled bytecode for direct execution.N_compilezconvert_frame.pyframer&   ztransformed_code.pyr   wz#Dynamo transformed code saved to %supdateaB  Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):
)rq   sys	_getframef_backf_codeco_nameco_filenamesplitospathsepf_localsre   rI   compile_debug_dump_pathexistsdepyf	decompileopenwriter^   debug	ExceptionrJ   cudagraph_moder   NONEco_namesrN   )r&   rv   rw   rz   	code_name	file_namer   decompiled_filer   srcfri   r   r   r   rd      sP   






z-TorchCompileWithNoGuardsWrapper.bytecode_hookr   c                 c   sH    |   }| jdusJ | j| jj_zdV  W || jj_dS || jj_w )a+  
        Context manager to dispatch to internally compiled code for torch<2.8.
        Why does this work? Because Dynamo guarantees that the compiled
        bytecode has exactly the same arguments, cell variables, and free
        variables as the original code. Therefore we can directly switch
        the code object in the function and call it.

        See https://dev-discuss.pytorch.org/t/what-is-the-relationship-requirement-among-original-bytecode-transformed-bytecode-and-bytecode-returned-by-hooks-in-dynamo/1693/7 for more details.
        N)rq   re   r.   r$   ru   )r&   originalr   r   r   rr   	  s   z:TorchCompileWithNoGuardsWrapper._dispatch_to_compiled_code)r   N)r/   
__module____qualname____doc__r   r'   r   r   r   r    r!   r4   rj   rk   rt   r   r$   r   rq   rd   r   r   rr   r   r   r   r   r   /   s*    


S	 8r   modelc                 C   s   t | tst| dr| j} t | tsdS t| dr| jrdS ddlm} d|_d|_d|_	d|_
d|_d|_d|_d|_d|_d|_d|_d|_d|_d|_d|_t| drYd| _t| drad| _| jj}d	|_d	|_|  | jj_t |  dS )
zJ
    Clean up compiled model and captured CUDA graphs for elastic EP.
    r   Ndo_not_compiler   )compilation_counteraot_compiled_fn#was_aot_compile_fn_loaded_from_diskF )!rP   r   r#   r   r   vllm.compilation.counterr   num_models_seennum_graphs_seennum_piecewise_graphs_seen$num_piecewise_capturable_graphs_seennum_backend_compilationsnum_gpu_runner_capture_triggersnum_cudagraph_capturednum_inductor_compilesnum_eager_compilesnum_cache_entries_updatednum_compiled_artifacts_savedstock_torch_compile_countnum_aot_compilesnum_aot_artifacts_savednum_aot_artifacts_loadedr   r   rI   rJ   	cache_dirlocal_cache_dirrq   r.   r$   ru   rj   )r   r   rJ   r   r   r   reset_compile_wrapper  sB   


r   )&r   r}   abcr   collections.abcr   r   
contextlibr   r   typesr   typingr   r	   r
   r   	vllm.envsrX   vllm.configr   r   r   vllm.config.compilationr   vllm.loggerr   vllm.utils.nvtx_pytorch_hooksr   r/   r^   r   r   r   r   nnModuler   r   r   r   r   <module>   s*    p