o
    3/iv                     @   sV  U d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ ddlZddlmZ ddlZedddkZeed< ed	dZedB ed
< dZeed< g Zee ed< dZdZe
dg dZdd ZdefddZ deddfddZ!ej"dddefddZ#de$defdd Z%G d!d" d"Z&d#d$ Z'dS )%u  Persistent .o cache for CuTe DSL compiled kernels.

Compiled kernels are exported as object files (.o) via export_to_c.
On subsequent runs the .o is loaded via tvm_ffi (~1ms) instead of
re-generating IR + re-JIT'ing (~100ms per kernel).

Controls:
  QUACK_CACHE_ENABLED=0       — disable persistent .o cache (default: enabled)
  QUACK_CACHE_DIR=path        — override default cache directory
    N)
namedtuple)getuser)PathQUACK_CACHE_ENABLED1CACHE_ENABLEDQUACK_CACHE_DIR	CACHE_DIRFCOMPILE_ONLYEXTRA_SOURCE_DIRSfunc<   	CacheInfohitsmissesmaxsizecurrsizec                  O   s   d S N )argskwargsr   r   ^/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/quack/cache_utils.py_noop_kernel*   s   r   returnc                  C   s:   t d ur	tt } ntt t  d } | jddd | S )Nquack_cacheTparentsexist_ok)r	   r   tempfile
gettempdirr   mkdir)	cache_dirr   r   r   get_cache_path.   s
   
r#   rootc                 C   sb   t |dD ]'}| sq| ||   | }| t|	dd | | qdS )z.Hash all Python sources under *root* into *h*.z*.py   littleN)
sortedrglobis_fileupdaterelative_toas_posixencode
read_byteslento_bytes)hr$   srccontentr   r   r   _hash_source_dir7   s   r4      )r   c                  C   s   t  } | dtjj dtjj   | dtj	   | dt
j	   t| tt j tD ]}t| t|  q7|  S )zJHash quack + extra source dirs plus runtime ABI stamps into a fingerprint.py.zcutlass=ztvm_ffi=)hashlibsha256r*   sysversion_infomajorminorr-   cutlass__version__tvm_ffir4   r   __file__resolveparentr   	hexdigest)r1   	extra_dirr   r   r   _compute_source_fingerprintB   s   "rF   keyc                 C   s   t t|  S r   )r8   r9   pickledumpsrD   )rG   r   r   r   _key_to_hashO      rJ   c                   @   s<   e Zd ZdZddededefddZdd	d
ZdddZ	dS )FileLockz2Advisory file lock using fcntl.flock with timeout.   	lock_path	exclusivetimeoutc                 C   s   || _ || _|| _d| _d S )N)rN   rO   rP   _fd)selfrN   rO   rP   r   r   r   __init__[   s   
zFileLock.__init__r   c                 C   s   | j r	tjtjB ntjtjB }| j rtjntj}tt	| j
|| _t | j }t |k rQzt| j|tjB  | W S  tyJ   td Y nw t |k s/t| j d| _td| j
 )Ng?rQ   zTimed out waiting for lock: )rO   osO_WRONLYO_CREATO_RDONLYfcntlLOCK_EXLOCK_SHopenstrrN   rR   time	monotonicrP   flockLOCK_NBOSErrorsleepcloseRuntimeError)rS   flags	lock_typedeadliner   r   r   	__enter__a   s   zFileLock.__enter__Nc                 G   s4   | j dkrt| j tj t| j  d| _ d S d S )Nr   rQ   )rR   rY   r`   LOCK_UNrU   rd   )rS   excr   r   r   __exit__p   s
   

zFileLock.__exit__)rM   )r   rL   )r   N)
__name__
__module____qualname____doc__r   boolfloatrT   ri   rl   r   r   r   r   rL   X   s
    
rL   c                    s^   i  ddt  fdd} fdd} fdd} |_||_||_|S )zDecorator that caches compiled CuTe DSL kernels in-memory and on disk.

    The decorated function should return a compiled kernel (i.e. call cute.compile).
    The disk cache key is (fn.__qualname__, *args, **sorted_kwargs).
    r   c               
      s  |r| t t|  n| }| v rd7 trtS  | S jf| }trt|}t t	  }|j
ddd || d }|| d }zAt|dtd0 | rvtjjt|dd}|t }	|	 |< d7 trltn|	W  d    W S W d    n1 sw   Y  W n	 ty   Y nw d7 | i |}
|
 |< trz.t|dtd | s|jj
ddd |
jt|td	 W d    n1 sw   Y  W n ty } ztd
| d|  W Y d }~nd }~ww trtS |
S )Nr5   Tr   z.oz.lockF)rO   rP   )enable_tvm_ffi)object_file_pathfunction_namez#quack cache: export failed for key z: )tupler'   itemsr
   r   ro   r   rJ   r#   rF   r!   rL   LOCK_TIMEOUTexistscuteruntimeload_moduler]   EXPORT_FUNC_NAMEre   rC   export_to_c	Exceptionprint)r   r   	cache_keydisk_keysha
cache_patho_pathrN   mloadedcompiled_fnecachefnr   r   r   r   wrapper   s\   
 zjit_cache.<locals>.wrapperc                      s       ddd S )Nr   )clearr   r   r   r   r   r   cache_clear   s   zjit_cache.<locals>.cache_clearc                      s   t d t dS )Nr   )r   r/   r   r   r   r   
cache_info   rK   zjit_cache.<locals>.cache_info)	functoolswrapsr   r   r   )r   r   r   r   r   r   r   	jit_cache|   s   0r   )(rp   rY   r   r8   rU   rH   r:   r   r^   collectionsr   getpassr   pathlibr   r>   cutlass.cuterz   r@   getenvr   rq   __annotations__r	   r]   r
   r   listr}   rx   r   r   r#   r4   	lru_cacherF   rv   rJ   rL   r   r   r   r   r   <module>   s<   	
	$