
    )j(              	       \   d dl Z d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZ ddlmZ 	 d-de
j        dee         dee         fd	Zi fd
edee         deeef         fdZd.dZdee         defdZdee         dedee         fdZdedededee         fdZ G d d          Z G d de          Z G d de          Z G d de          Z G d  d!e          Z G d" d#e          Z  G d$ d%e          Z! G d& d'e          Z"d( Z# G d) d*e          Z$ G d+ d,e          Z%dS )/    N)AnyDictListOptional)tree_flattentree_maptree_unflatten   create_causal_maskmodelmax_kv_sizereturnc                     t          | d          r|                                 S t          | j                  }fdt	          |          D             S d t	          |          D             S )a  
    Construct the model's cache for use in generation.

    This function will defer the cache construction to the model if it has a
    ``make_cache`` method, otherwise it will make a default KV cache.

    Args:
        model (nn.Module): The language model.
        max_kv_size (Optional[int]): If provided and the model does not have a
            ``make_cache`` method, a ``RotatingKVCache`` is used with a maximum
            size of ``max_kv_size``
    
make_cacheNc                 2    g | ]}t          d           S )   )max_sizekeep)RotatingKVCache).0_r   s     ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/cache.py
<listcomp>z%make_prompt_cache.<locals>.<listcomp>"   s3     
 
 
>?O[q999
 
 
    c                 *    g | ]}t                      S  )KVCache)r   r   s     r   r   z%make_prompt_cache.<locals>.<listcomp>&   s    555a		555r   )hasattrr   lenlayersrange)r   r   
num_layerss    ` r   make_prompt_cacher$      s      ul## "!!!U\""J
 
 
 
CHCTCT
 
 
 	
 655#4#45555r   	file_namecachemetadatac                     d |D             }d |D             }t          t          |                    }d |D             }|||g}t          t          |                    }t          j        | ||           dS )a  
    Save a pre-computed prompt cache to a file.

    Args:
        file_name (str): The ``.safetensors`` file name.
        cache (List[Any]): The model state.
        metadata (Dict[str, str]): Optional metadata to save along with model
            state.
    c                     g | ]	}|j         
S r   stater   cs     r   r   z%save_prompt_cache.<locals>.<listcomp>3   s    )))a!')))r   c                     g | ]	}|j         
S r   
meta_stater,   s     r   r   z%save_prompt_cache.<locals>.<listcomp>4   s    ...1!,...r   c                 6    g | ]}t          |          j        S r   type__name__r,   s     r   r   z%save_prompt_cache.<locals>.<listcomp>6   s!    555!T!WW%555r   N)dictr   mxsave_safetensors)r%   r&   r'   
cache_data
cache_infocache_classescache_metadatas          r   save_prompt_cacher<   )   s     *)5)))J.....Jl:..//J55u555M (M:N,~6677N	:~>>>>>r   Fc                 >   t          j        | d          \  }}t          t          |                                                    }t          t          |                                                    }|\  }}}d t          |||          D             }|r||fS |S )aR  
    Load a prompt cache from a file.

    Args:
        file_name (str): The ``.safetensors`` file name.
        return_metadata (bool): Whether or not to return metadata.
            Default: ``False``.

    Returns:
        List[Any] or Tuple[List[Any], Dict[str, str]]: The prompt cache and
            the metadata if requested.
    T)return_metadatac                 f    g | ].\  }}}t                      |                             ||          /S r   globals
from_state)r   r-   r+   r0   s       r   r   z%load_prompt_cache.<locals>.<listcomp>M   sF        Auj 			!z22  r   )r6   loadr	   listitemszip)r%   r>   arraysr;   infor'   classesr&   s           r   load_prompt_cacherJ   <   s      WYEEEFND0011F#D)=)=)?)?$@$@AAN,D(G $'$>$>  E  hLr   c                 4    t          d | D                       S )z0
    Check if model's cache can be trimmed.
    c              3   >   K   | ]}|                                 V  d S Nis_trimmabler,   s     r   	<genexpr>z(can_trim_prompt_cache.<locals>.<genexpr>Z   s,      //Aq~~//////r   )allr&   s    r   can_trim_prompt_cacherS   V   s!     ////////r   
num_tokensc                 t    t          |           rt          |           dk    rdS fd| D             d         S )aj  
    Trim the model's cache by the given number of tokens.

    This function will trim the cache if possible (in-place) and return the
    number of tokens that were trimmed.

    Args:
        cache (List[Any]): The model's cache.
        num_tokens (int): The number of tokens to trim.

    Returns:
        (int): The number of tokens that were trimmed.
    r   c                 :    g | ]}|                               S r   )trim)r   r-   rT   s     r   r   z%trim_prompt_cache.<locals>.<listcomp>m   s%    ...1AFF:...r   )rS   r    )r&   rT   s    `r   trim_prompt_cacherX   ]   sG     !'' 3u::??q.......q11r   Noffsetreturn_arraywindow_sizec                 f    |t          | ||          S | dk    rd S |rt          | ||          S dS )Nr\   r
   causalr   )rY   rZ   r[   r\   s       r   create_attention_maskr`   p   sO     !!VEEEE	
at	 !!VEEEExr   c                       e Zd Zed             Zej        d             Zed             Zej        d             Zd Zd Zed             Z	d Z
ed	             Zd
S )
_BaseCachec                     g S rM   r   selfs    r   r+   z_BaseCache.state~   s    	r   c                 0    ||rt          d          d S d S )Nz,This cache has no state but a state was set.
ValueErrorre   vs     r   r+   z_BaseCache.state   s%    =Q=KLLL ===r   c                     dS )N r   rd   s    r   r0   z_BaseCache.meta_state   s    rr   c                 0    ||rt          d          d S d S )Nz6This cache has no meta_state but a meta_state was set.rg   ri   s     r   r0   z_BaseCache.meta_state   s%    =Q=UVVV ===r   c                     dS NFr   rd   s    r   rO   z_BaseCache.is_trimmable   s    ur   c                     dS )z
        Return the size (i.e. sequence length) of the cache.

        Not every cache is required to implement this, in which case the size
        will always be 0 (though the cache may not be empty).
        r   r   rd   s    r   sizez_BaseCache.size   s	     qr   c                      t          d          )z&Return the size of this cache in bytesz%Cache sub-class must implement nbytesNotImplementedErrorrd   s    r   nbytesz_BaseCache.nbytes   s     ""IJJJr   c                      t          d          )z6
        Return if the cache is empty or not.
        z$Cache sub-class must implement this.rs   rd   s    r   emptyz_BaseCache.empty   s     ""HIIIr   c                 L    |                      |           }||_        ||_        |S rM   )__new__r+   r0   clsr+   r0   objs       r   rB   z_BaseCache.from_state   s(     kk#	#
r   N)r4   
__module____qualname__propertyr+   setterr0   rO   rq   ru   rw   classmethodrB   r   r   r   rb   rb   }   s          X \M M \M   X W W W     K K XKJ J J   [  r   rb   c                       e Zd ZdZd Zd Zed             Zej        d             Zd Z	d Z
d Zd	 Zed
             ZdS )ConcatenateKVCachea  ConcatenateKVCache the simplest KV cache implementation.

    Can be used as a mock KV cache or when large blocks are being processed at
    a time in which case KVCache isn't necessarily faster. Consider using the
    KVCache with a larger step size before using this cache.
    c                 0    d | _         d | _        d| _        d S Nr   keysvaluesrZ   rd   s    r   __init__zConcatenateKVCache.__init__       	r   c                     | j         || _         || _        nDt          j        | j         |gd          | _         t          j        | j        |gd          | _        | j         j        d         | _        | j         | j        fS )Naxis)r   r   r6   concatenateshaperZ   re   r   r   s      r   update_and_fetchz#ConcatenateKVCache.update_and_fetch   ss    9DI DKK	4'8rBBBDI.$+v)>RHHHDKiob)y$+%%r   c                     | j         | j        fS rM   r   r   rd   s    r   r+   zConcatenateKVCache.state   s    y$+%%r   c                 R    |\  | _         | _        | j         j        d         | _        d S )Nr   r   r   r   rZ   ri   s     r   r+   zConcatenateKVCache.state   s$    !"	4;iob)r   c                     dS NTr   rd   s    r   rO   zConcatenateKVCache.is_trimmable       tr   c                 P    t          | j        |          }| xj        |z  c_        |S rM   minrZ   re   ns     r   rW   zConcatenateKVCache.trim   (    Qqr   c                 (    t          |d| j        i|S NrZ   r`   rZ   re   argskwargss      r   	make_maskzConcatenateKVCache.make_mask       $dI4;I&IIIr   c                     | j         d u S rM   r   rd   s    r   rw   zConcatenateKVCache.empty       yD  r   c                 F    | j         dS | j         j        | j        j        z   S r   r   ru   r   rd   s    r   ru   zConcatenateKVCache.nbytes   $    91y$+"444r   N)r4   r}   r~   __doc__r   r   r   r+   r   rO   rW   r   rw   ru   r   r   r   r   r      s           
	& 	& 	& & & X& \* * \*    
J J J! ! ! 5 5 X5 5 5r   r   c                       e Zd ZdZddedefdZd Zed             Zej	        d	             Zed
             Z
e
j	        d             Z
d Zd Zd Zd Zed             ZdS )QuantizedKVCache   @      
group_sizebitsc                 L    d | _         d | _        d| _        || _        || _        d S r   )r   r   rZ   r   r   re   r   r   s      r   r   zQuantizedKVCache.__init__   s)    	$			r   c                     j         \  }}}}|j         d         } j         j        |z    j        d         j         d         k    rdt          j        j        z   j        z   j        |z   dz
   j        z   j        z  }|||f fd}	fd}
 j        d j        z  dk    r,t          fd j         j	        f          \   _         _	        t          |
 j         j	        f          \   _         _	        n! |	|           |	|          c _         _	         xj        |z  c_        t          j
         j         j        	          t          j
        | j         j        	          }t          t           j                            D ]F}|          j        |         d
 j        d d f<   ||          j	        |         d
 j        d d f<   Gt           fd j         j	        f          S )Nr   r   r   r
   c                     t          j        g | z  R t           j                  t          j        g | j        z  R j                  t          j        g | j        z  R j                  fS )Ndtype)r6   zerosuint32r   r   )dim
el_per_intr   re   r   s    r   
init_quantz5QuantizedKVCache.update_and_fetch.<locals>.init_quant   s    H8u8cZ&788	JJJH=u=cT_&<==TZPPPH=u=cT_&<==TZPPP r   c                     t          j        g | j        d         R | j                  }t          j        | |gd          S )Nr   r   r   r   )r6   r   r   r   r   )xnew_xr   s     r   expand_quantz7QuantizedKVCache.update_and_fetch.<locals>.expand_quant  sF    !65!6!'"+!6!6agFFF~q%jr::::r   c                 "    | dd d d f         S N.r   )r   prevs    r   <lambda>z3QuantizedKVCache.update_and_fetch.<locals>.<lambda>  s    !C$M"2 r   r   r   .c                 ,    | dd j         d d f         S r   rZ   r   re   s    r   r   z3QuantizedKVCache.update_and_fetch.<locals>.<lambda>  s    !C4;$9": r   )r   rZ   r   r6   r   rq   r   stepr   r   quantizer   r"   r    )re   r   r   B
n_kv_heads	num_steps
k_head_dim
v_head_dim	new_stepsr   r   ir   r   r   s   ``          @@@r   r   z!QuantizedKVCache.update_and_fetch   sB   /3z,:y*\"%
{9	!1TYq\5G5K K KRY^+ty8JY.2ty@49LI
I.E       ; ; ; ; ; y$$)#q((-52222TY4L. .*DIt{ *2 49dk":* *&	4;; *4J)?)?JAWAW&	4;y {4DO$)LLLVdiPPPs49~~&& 	C 	CA7;AwDIaLdT[0!!!349?DKN3t{ 2AAA566::::TY<TUUUr   c                       j          j        d         j        d         k    r j         j        fS t	           fd j         j        f          S )Nr      c                 ,    | dd j         d d f         S r   r   r   s    r   r   z(QuantizedKVCache.state.<locals>.<lambda>!  s    !C4;12 r   )rZ   r   r   r   r   rd   s   `r   r+   zQuantizedKVCache.state  sV    ;$)A,,Q///9dk))2222TY4L  r   c                 $    |\  | _         | _        d S rM   r   ri   s     r   r+   zQuantizedKVCache.state$      !"	4;;;r   c                 j    t          t          t          | j        | j        | j        f                    S rM   )tuplemapstrrZ   r   r   rd   s    r   r0   zQuantizedKVCache.meta_state(  s'    St{DOTYGHHIIIr   c                 V    t          t          |          \  | _        | _        | _        d S rM   )r   intrZ   r   r   ri   s     r   r0   zQuantizedKVCache.meta_state,  s     25c1++/T_diiir   c                     dS r   r   rd   s    r   rO   zQuantizedKVCache.is_trimmable0  r   r   c                 P    t          | j        |          }| xj        |z  c_        |S rM   r   r   s     r   rW   zQuantizedKVCache.trim3  r   r   c                 (    t          |d| j        i|S r   r   r   s      r   r   zQuantizedKVCache.make_mask8  r   r   c                     | j         d u S rM   r   rd   s    r   rw   zQuantizedKVCache.empty;  r   r   c                 >    t          d | j        | j        fd          S )Nc                     | |j         z   S rM   ru   )ar   s     r   r   z)QuantizedKVCache.nbytes.<locals>.<lambda>@  s    AH r   r   )tree_reducer   r   rd   s    r   ru   zQuantizedKVCache.nbytes>  s"    44ty$+6NPQRRRr   N)r   r   )r4   r}   r~   r   r   r   r   r   r+   r   r0   rO   rW   r   rw   ru   r   r   r   r   r      s'       D 3 3    )V )V )VV   X \# # \# J J XJ > > >    
J J J! ! ! S S XS S Sr   r   c                       e Zd ZdZd Zd Zd Zed             Zej	        d             Zd Z
d ZddededefdZd Zed             Zd Zed             ZdS )r   r   c                 0    d | _         d | _        d| _        d S r   r   rd   s    r   r   zKVCache.__init__F  r   r   c                    | j         }| j        %||j        d         z   | j        j        d         k    r$|j        \  }}}}|j        d         }| j        |j        d         z   dz
  | j        z  }	|||	| j        z  |f}
|||	| j        z  |f}t	          j        |
|j                  }t	          j        ||j                  }| j        || j        z  dk    r2| j        dd |d d f         | _        | j        dd |d d f         | _        t	          j        | j        |gd          | _        t	          j        | j        |gd          | _        n||c| _        | _        | xj         |j        d         z  c_         || j        d|| j         d d f<   || j        d|| j         d d f<   | j        dd | j         d d f         | j        dd | j         d d f         fS Nr      r
   r   .r   )	rZ   r   r   r   r6   r   r   r   r   re   r   r   r   r   r   r   r   r   n_stepsk_shapev_shapenew_knew_vs                 r   r   zKVCache.update_and_fetchK  s   {9
1!59K K K+/:(Az1jaJy4:a=014BG*g	&9:FG*g	&9:FGHWdj11EHWfl33Ey$$)#q(( $	#uuaaa- 8DI"&+c5D5!!!m"<DKNDIu+=AFFF	 ndk5-AJJJ).&	4;tz!}$04	#tdk)111,-28C+QQQ./ymmQQQ./S-DK-QRQRQR=R1SSSr   c                     | j         S rM   r   rd   s    r   rq   zKVCache.sizec  s
    {r   c                     | j         | j        j        d         k    r| j        | j        fS | j        dd | j         d d f         | j        dd | j         d d f         fS Nr   .rZ   r   r   r   rd   s    r   r+   zKVCache.statef  i    ;$)/!,,,9dk)) 	#}}aaa/0C4;12 r   c                 R    |\  | _         | _        | j         j        d         | _        d S Nr   r   ri   s     r   r+   zKVCache.statep  $    !"	4;ioa(r   c                     dS r   r   rd   s    r   rO   zKVCache.is_trimmableu  r   r   c                 P    t          | j        |          }| xj        |z  c_        |S rM   r   r   s     r   rW   zKVCache.trimx  r   r   r   r   r   r   r   c                     t          ||          }| j        |_        | j        Bt          j        | j        ||          |_        t          j        | j        ||          |_        |S )Nr   )r   rZ   r   r6   r   r   )re   r   r   quant_caches       r   to_quantizedzKVCache.to_quantized}  sl    &*4HHH![9 !{49RVWWWK!#
" " "K r   c                 (    t          |d| j        i|S r   r   r   s      r   r   zKVCache.make_mask  r   r   c                 6    t                               |          S rM   )BatchKVCachemerger   cachess     r   r  zKVCache.merge  s    !!&)))r   c                     | j         d u S rM   r   rd   s    r   rw   zKVCache.empty  r   r   c                 F    | j         dS | j         j        | j        j        z   S r   r   rd   s    r   ru   zKVCache.nbytes  r   r   Nr   r   )r4   r}   r~   r   r   r   rq   r   r+   r   rO   rW   r   r   r  r   r   r  rw   ru   r   r   r   r   r   C  s%       D  
T T T0     X \) ) \)    
 s s CS    J J J * * [*! ! ! 5 5 X5 5 5r   r   c                   $   e Zd ZdZddZd dZd Zd Zd Zd	 Z	d
 Z
ed             Zej        d             Zed             Zej        d             Zd Zd Zd!dededefdZ	 d"dedee         defdZed             Zd Zed             ZdS )#r   r   r   c                 Z    || _         d | _        d | _        d| _        || _        d| _        d S r   )r   r   r   rZ   r   _idx)re   r   r   s      r   r   zRotatingKVCache.__init__  s0    		 			r   Nc                     g }|dk    r,|dd | j         d d f         |d|| j         z   d d d f         g}n|g}||                    |           t          j        |d          S Nr   .r   r   )r   appendr6   r   )re   	trim_sizerj   r  to_cats        r   _trimzRotatingKVCache._trim  s    q==[ty[!!!+,aY5J5L5Laaa0O.PQFFSFMM&!!!~f1----r   c           	      "   | j         |j        d         k    r|S | j         | j        k     rTt          j        |dd| j        ddf         |d| j         dddf         |d| j        | j         ddf         gd          S |dd| j         ddf         S )zY
        Rearrange the cache into temporal order, slicing off the end if unused.
        r   .Nr   )r  r   rZ   r6   r   r   ri   s     r   _temporal_orderzRotatingKVCache._temporal_order  s     9
""HY$$>c;TY;)*c49;;)*c49ty0!!!34
     S+DI+qqq())r   c                     | j         || _         || _        n|                     | j                   | _         |                     | j                  | _        | j         j        d         | _        | j        | j        z
  dz   }|                     || j         |          | _         |                     || j        |          | _        | xj        |j        d         z  c_        | j         j        d         | _        | j         | j        fS Nr   r
   )r   r   r  r   r  r   r  rZ   )re   r   r   r  s       r   _update_concatzRotatingKVCache._update_concat  s    9DI DKK ,,TY77DI..t{;;DK	*DI 	DM1A5I

9di>>DI**YVDDDKtz!}$IOA&	y$+%%r   c                    |j         \  }}}}| j        }| j        1|| j        j         d         k    r| j        j         d         | j        k     r|j         d         }t	          | j        | j        |z
            }	|||	|f}
|||	|f}t          j        |
|j                  }t          j        ||j                  }| j        Et          j	        | j        |gd          | _        t          j	        | j
        |gd          | _
        n||c| _        | _
        || _        | j        j         d         | j        z
  }|dk    rL|                     || j                  | _        |                     || j
                  | _
        | j        | _        | j        | j        k    r| j        | _        || j        d| j        | j        |z   d d f<   || j
        d| j        | j        |z   d d f<   | xj        |z  c_        | xj        |z  c_        | j        | j        k     r2| j        dd | j        d d f         | j
        dd | j        d d f         fS | j        | j
        fS )Nr   r   r   r   .)r   rZ   r   r   r   r   r6   r   r   r   r   r  r  r   re   r   r   r   r   Sr   r   r   new_sizer   r   r   r   r  s                  r   _update_in_placez RotatingKVCache._update_in_place  sD    (,z$:q*{9DIOA&&&49?1+=+M+MaJ49dmd&:;;H*h
;G*h
;GHWdj11EHWfl33Ey$NDIu+=AFFF	 ndk5-AJJJ).&	4;DI IOA&6	q==

9di88DI**Y<<DKDI 9%%	DI 8<	#ty49q=0!!!349?CTY]2AAA56q		Q		 ;&&9S-DK-23T[mmUVUVUVAV5WWWy$+%%r   c                 |    |j         d         dk    r|                     ||          S |                     ||          S r  r   r  r  r   s      r   r   z RotatingKVCache.update_and_fetch  ?    :a=A((v666""4000r   c                 6    t          | j        | j                  S rM   )r   rZ   r   rd   s    r   rq   zRotatingKVCache.size  s    4;...r   c                     | j         | j        j        d         k     r2| j        dd | j         d d f         | j        dd | j         d d f         fS | j        | j        fS r   r   rd   s    r   r+   zRotatingKVCache.state  se    ;+++9S-DK-23T[mmUVUVUVAV5WWW9dk))r   c                 $    |\  | _         | _        d S rM   r   ri   s     r   r+   zRotatingKVCache.state  r   r   c           	      v    t          t          t          | j        | j        | j        | j        f                    S rM   )r   r   r   r   r   rZ   r  rd   s    r   r0   zRotatingKVCache.meta_state  s+    Sty$-diPQQRRRr   c                 b    t          t          |          \  | _        | _        | _        | _        d S rM   )r   r   r   r   rZ   r  ri   s     r   r0   zRotatingKVCache.meta_state  s,    ;><
 <
8	4=$+tyyyr   c                 "    | j         | j        k     S rM   )rZ   r   rd   s    r   rO   zRotatingKVCache.is_trimmable  s    {T]**r   c                 p    t          | j        |          }| xj        |z  c_        | xj        |z  c_        |S rM   )r   rZ   r  r   s     r   rW   zRotatingKVCache.trim  s7    Qq		Q		r   r   r   r   r   r   c                      t          d          )Nz RotatingKVCache Quantization NYIrs   r   s      r   r  zRotatingKVCache.to_quantized%  s    !"DEEEr   FrY   r\   r[   c                    |dk    rE|p| j         }t          | j         dz
  | j                  }||z   |k    s|rt          |||          S dS |d S | j        |k    rw| j         |k    rn| j        }|| j         k    rd}| j        | j         k     r| j        dz   }n| j         }t          j        |          ||z
  k    }t          j        ||dz             }|S d S d S )Nr
   r^   r_   r   )shift)r   r   rZ   r   r  r6   arangeroll)re   rY   r\   r[   rZ   idx	mask_sizemasks           r   r   zRotatingKVCache.make_mask(  s    q55%6K*DK88FzK''<')!VMMMMx"t{k))dmk.I.Ii$-''C;.. $aII $Iy++	K0GHwt37333 *).I.Ir   c                 6    t                               |          S rM   )BatchRotatingKVCacher  r  s     r   r  zRotatingKVCache.mergeB  s    #))&111r   c                     | j         d u S rM   r   rd   s    r   rw   zRotatingKVCache.emptyF  r   r   c                 F    | j         dS | j         j        | j        j        z   S r   r   rd   s    r   ru   zRotatingKVCache.nbytesI  r   r   )r   rM   r  ro   )r4   r}   r~   r   r   r  r  r  r  r   rq   r   r+   r   r0   rO   rW   r   r   r  r   boolr   r   r  rw   ru   r   r   r   r   r     s       D   . . . .* * *$& & &()& )& )&V1 1 1
/ / / * * X* \# # \# S S XS 
 
 
+ + +  F Fs Fs FCS F F F F OT #+C=GK   4 2 2 [2! ! ! 5 5 X5 5 5r   r   c                        e Zd Z fdZddeee                  fdZd Zd Z	e
d             Zej        d             Zd	 Zd
 Zd ZddZd Zd ZdefdZed             Zd Ze
d             Z xZS )ArraysCachec                 f    t                                          |           }d |_        d |_        |S rM   )superry   left_paddinglengths)r{   r   r   instance	__class__s       r   ry   zArraysCache.__new__Q  s-    77??3'' $r   Nr:  c                 V    d g|z  | _         |rt          j        |          | _        d S d S rM   )r&   r6   arrayr:  )re   rq   r:  s      r   r   zArraysCache.__init__W  s9    Vd]
 	7 " 6 6D	7 	7r   c                     || j         |<   d S rM   rR   )re   r.  values      r   __setitem__zArraysCache.__setitem__\  s    
3r   c                     | j         |         S rM   rR   re   r.  s     r   __getitem__zArraysCache.__getitem___  s    z#r   c                     | j         S rM   rR   rd   s    r   r+   zArraysCache.stateb  s
    zr   c                     || _         d S rM   rR   ri   s     r   r+   zArraysCache.statef  s    


r   c                 8    fd| j         D             | _         dS )N
        In-place filter to keep just the given indices in the cache.
        c                      g | ]
}|         S r   r   )r   r-   batch_indicess     r   r   z&ArraysCache.filter.<locals>.<listcomp>n  s    ;;;1a&;;;r   NrR   re   rK  s    `r   filterzArraysCache.filterj  s&     <;;;
;;;


r   c                 X    d t          | j        |j                  D             | _        dS )B
        In-place extend this cache with the other cache.
        c                 @    g | ]\  }}t          j        ||g          S r   r6   r   )r   r-   os      r   r   z&ArraysCache.extend.<locals>.<listcomp>t  s*    VVVAbnaV,,VVVr   N)rF   r&   )re   others     r   extendzArraysCache.extendp  s,     WVTZ9U9UVVV


r   c                 z    t          t          | j                            }fd| j        D             |_        |S )Nc                 *    g | ]}|d z            S )r
   r   r   r-   r.  s     r   r   z'ArraysCache.extract.<locals>.<listcomp>x  s&    <<<AqsQw'<<<r   )r7  r    r&   )re   r.  r&   s    ` r   extractzArraysCache.extractv  s:    C
OO,,<<<<<<<r   c                 8    t          j        |          | _        d S rM   )r6   r?  r;  )re   r;  r   s      r   preparezArraysCache.prepare{  s    x((r   c                 "    d | _         d | _        d S rM   r;  r:  rd   s    r   finalizezArraysCache.finalize~  s     r   c                 f    | j         | xj         |z  c_         | j        | xj        |z  c_        d S d S rM   r\  )re   rY   s     r   advancezArraysCache.advance  sE    <#LLALL(" )(r   rY   c                     | j         )t          j        |          }|| j         d d d f         k    S | j        )t          j        |          }|| j        d d d f         k     S d S rM   )r:  r6   r,  r;  )re   rY   poss      r   r   zArraysCache.make_mask  sg    ()A,,C$+AAAtG444\%)A,,Caaag...4r   c                    t          |d         j                  }t          |          } | |          }t          |          D ]t          t	          fd|D                                 }t          |j                  }||d<   t          j        ||j	                  |<   t          |          D ]-}||                  ||                  |         ||dz   <   .|S )Nr   c              3   8   K   | ]}|         
|         V  d S rM   r   )r   r-   es     r   rP   z$ArraysCache.merge.<locals>.<genexpr>  s0      HHqt7Gqt7G7G7G7GHHr   r
   )
r    r&   r"   nextiterrD   r   r6   r   r   )	r{   r  n_stater   r&   c_initr   r   rd  s	           @r   r  zArraysCache.merge  s    fQio&&KKGw 	3 	3A$HHHHVHHHHHIIF&&EE!Hxv|44E!H1XX 3 3!9Q<'&,QilaQU##3 r   c                      | j         d         d u S r   rR   rd   s    r   rw   zArraysCache.empty  s    z!}$$r   c                 >    t          d | j        D                       S )Nc              3   (   K   | ]}||j         V  d S rM   r   r,   s     r   rP   z%ArraysCache.nbytes.<locals>.<genexpr>  s$      AA1=18====AAr   )sumr&   rd   s    r   ru   zArraysCache.nbytes  s!    AATZAAAAAAr   rM   )r4   r}   r~   ry   r   r   r   r   rB  rE  r   r+   r   rM  rT  rX  rZ  r]  r_  r   r   r  rw   ru   __classcell__)r=  s   @r   r7  r7  P  s           7 78DI+> 7 7 7 7
          X \  \< < <W W W  
) ) ) )! ! !# # #3       [% % % B B XB B B B Br   r7  c                       e Zd ZdZd Zd Zd Zed             Zej	        d             Zd Z
d Zed	             Zej	        d
             Zd Zed             ZdS )ChunkedKVCacher   c                 L    d | _         d | _        d| _        || _        d| _        d S r   )r   r   rZ   
chunk_sizestart_position)re   rq  s     r   r   zChunkedKVCache.__init__  s,    	$r   c                    | j         | j         j        d         | j        k    rj| xj        | j         j        d         | j        z
  z  c_        | j         d| j         d d d f         | _         | j        d| j         d d d f         | _        d S d S d S r   )r   r   rq  rr  r   rd   s    r   maybe_trim_frontzChunkedKVCache.maybe_trim_front  s    9 TY_Q%74?%J%J49?1#5#GG	#'7'9'9111"<=DI+cDO+;+=+=qqq&@ADKKK ! %J%Jr   c                    | j         | j        z
  }| j        %||j        d         z   | j        j        d         k    r$|j        \  }}}}|j        d         }| j        |j        d         z   dz
  | j        z  }	|||	| j        z  |f}
|||	| j        z  |f}t          j        |
|j                  }t          j        ||j                  }| j        || j        z  dk    r2| j        dd |d d f         | _        | j        dd |d d f         | _        t          j	        | j        |gd          | _        t          j	        | j        |gd          | _        n||c| _        | _        | xj         |j        d         z  c_         | j         | j        z
  }|| j        d||d d f<   || j        d||d d f<   | j        dd |d d f         | j        dd |d d f         fS r   )
rZ   rr  r   r   r   r6   r   r   r   r   )re   r   r   r   r   r   r   r   r   r   r   r   r   r   ends                  r   r   zChunkedKVCache.update_and_fetch  s   {T009
1!59K K K+/:(Az1jaJy4:a=014BG*g	&9:FG*g	&9:FGHWdj11EHWfl33Ey$$)#q(( $	#uuaaa- 8DI"&+c5D5!!!m"<DKNDIu+=AFFF	 ndk5-AJJJ).&	4;tz!}$kD//&*	#tCx"#(.Cc111$%ydsdAAA&C#qqqL(AAAr   c                     | j         | j        j        d         k    r| j        | j        fS | j        dd | j         d d f         | j        dd | j         d d f         fS r   r   rd   s    r   r+   zChunkedKVCache.state  r   r   c                 R    |\  | _         | _        | j         j        d         | _        d S r   r   ri   s     r   r+   zChunkedKVCache.state  r   r   c                     dS r   r   rd   s    r   rO   zChunkedKVCache.is_trimmable  r   r   c                 `    t          | j        | j        z
  |          }| xj        |z  c_        |S rM   )r   rZ   rr  r   s     r   rW   zChunkedKVCache.trim  s0    d11155qr   c                 ^    t          t          t          | j        | j        f                    S rM   )r   r   r   rq  rr  rd   s    r   r0   zChunkedKVCache.meta_state  s$    St0CDEEFFFr   c                 J    t          t          |          \  | _        | _        d S rM   )r   r   rq  rr  ri   s     r   r0   zChunkedKVCache.meta_state  s    /23{{,,,,r   c                     | j         d u S rM   r   rd   s    r   rw   zChunkedKVCache.empty  r   r   c                 F    | j         dS | j         j        | j        j        z   S r   r   rd   s    r   ru   zChunkedKVCache.nbytes  r   r   N)r4   r}   r~   r   r   rt  r   r   r+   r   rO   rW   r0   rw   ru   r   r   r   ro  ro    s	       D     B B BB B B2   X \) ) \)    
 G G XG ; ; ;! ! ! 5 5 X5 5 5r   ro  c                       e Zd Zd Zd Zd Zd Zed             Zej	        d             Zed             Z
e
j	        d             Z
d	 Zd
 Zed             Zd Zd Zd Zd Zd Zed             Zed             ZdS )	CacheListc                     || _         d S rM   r  )re   r  s     r   r   zCacheList.__init__  s    r   c                     | j         |         S rM   r  rD  s     r   rE  zCacheList.__getitem__  s    {3r   c                 >    t          d | j        D                       S )Nc              3   >   K   | ]}|                                 V  d S rM   rN   r,   s     r   rP   z)CacheList.is_trimmable.<locals>.<genexpr>  s,      991>>##999999r   )rQ   r  rd   s    r   rO   zCacheList.is_trimmable  s!    99T[999999r   c                 D    | j         D ]}|                    |          }|S rM   )r  rW   )re   r   r-   ms       r   rW   zCacheList.trim  s)     	 	Aq		AAr   c                 $    d | j         D             S )Nc                     g | ]	}|j         
S r   r*   r,   s     r   r   z#CacheList.state.<locals>.<listcomp>  s    ---A---r   r  rd   s    r   r+   zCacheList.state  s    ------r   c                 J    t          | j        |          D ]\  }}||_        d S rM   )rF   r  r+   )re   rj   r-   ss       r   r+   zCacheList.state  s4    Q'' 	 	DAqAGG	 	r   c                 F    d | j         D             d | j         D             fS )Nc                 6    g | ]}t          |          j        S r   r2   r,   s     r   r   z(CacheList.meta_state.<locals>.<listcomp>  s!    333!T!WW333r   c                     g | ]	}|j         
S r   r/   r,   s     r   r   z(CacheList.meta_state.<locals>.<listcomp>  s    ///aQ\///r   r  rd   s    r   r0   zCacheList.meta_state  s4     43t{333//4;///
 	
r   c                 V    t          | j        |d                   D ]\  }}||_        d S Nr
   )rF   r  r0   )re   rj   r-   r  s       r   r0   zCacheList.meta_state  s8    QqT** 	 	DAqALL	 	r   c                 D    | j         D ]}|                    |           dS rI  N)r  rM  )re   rK  r-   s      r   rM  zCacheList.filter!  s4      	$ 	$AHH]####	$ 	$r   c                 p    t          | j        |j                  D ]\  }}|                    |           dS )rO  N)rF   r  rT  )re   rS  r-   rR  s       r   rT  zCacheList.extend(  s@     U\22 	 	DAqHHQKKKK	 	r   c           	           |             }t          fdt          t          d         j                            D                       |_        |S )Nc              3   ~   K   | ]6d          j                                      fdD                       V  7dS )r   c                 *    g | ]}|j                  S r   r  )r   r-   r   s     r   r   z-CacheList.merge.<locals>.<genexpr>.<listcomp>3  s    &C&C&Cqqx{&C&C&Cr   N)r  r  )r   r   r  s    @r   rP   z"CacheList.merge.<locals>.<genexpr>2  sg       
 
 1IQ%%&C&C&C&CF&C&C&CDD
 
 
 
 
 
r   r   )r   r"   r    r  )r{   r  r&   s    ` r   r  zCacheList.merge/  sc     
 
 
 
3vay/0011
 
 
 
 
 r   c                 8    t          fd| j        D              S )Nc              3   B   K   | ]}|                               V  d S rM   )rX  rW  s     r   rP   z$CacheList.extract.<locals>.<genexpr>9  s-      ??a199S>>??????r   )r  r  rD  s    `r   rX  zCacheList.extract8  s%    ????4;???@@r   c                 4    | j         D ]} |j        di | d S )Nr   )r  rZ  )re   r   r-   s      r   rZ  zCacheList.prepare;  s8     	  	 AAI	  	 r   c                 B    | j         D ]}|                                 d S rM   )r  r]  )re   r-   s     r   r]  zCacheList.finalize?  s,     	 	AJJLLLL	 	r   c                 >    t          d | j        D                       S )Nc              3   >   K   | ]}|                                 V  d S rM   rq   r,   s     r   rP   z!CacheList.size.<locals>.<genexpr>D  s*      1116688111111r   )maxr  rd   s    r   rq   zCacheList.sizeC  s!    11T[111111r   c                 @    | j         d                                         S r   )r  rw   rd   s    r   rw   zCacheList.emptyF  s    {1~##%%%r   c                 >    t          d | j        D                       S )Nc              3   $   K   | ]}|j         V  d S rM   r   r,   s     r   rP   z#CacheList.nbytes.<locals>.<genexpr>K  s$      1118111111r   )rl  r  rd   s    r   ru   zCacheList.nbytesI  s!    11T[111111r   c                 h    |                      |           }d t          |g|R  D             |_        |S )Nc                 f    g | ].\  }}}t                      |                             ||          /S r   r@   )r   r  r-   r  s       r   r   z(CacheList.from_state.<locals>.<listcomp>P  sC     
 
 
.5aAGIIaL##Aq))
 
 
r   )ry   rF   r  rz   s       r   rB   zCacheList.from_stateM  sJ    kk#
 
9<U9PZ9P9P9P
 
 

 
r   N)r4   r}   r~   r   rE  rO   rW   r   r+   r   r0   rM  rT  r   r  rX  rZ  r]  rq   rw   ru   rB   r   r   r   r  r    s              : : :  
 . . X. \  \ 
 
 X
   $ $ $     [A A A       2 2 2& & & 2 2 X2   [  r   r  c                     | j         |         }dd| j        |z
  z  z   }|d d         }t          j        |          |         ||         z
  |z  }t          j        | ||          }|S )N).rM   r   r   )r   ndimr6   r,  take_along_axis)r   shiftsr   r   expand_shiftsexpand_indicesr.  rolleds           r   dynamic_rollr  V  sm    	AW66M"3B3'N9Q<<'&*??1
DC3T222FMr   c                       e Zd ZdZdee         fdZd ZdddddZd Z	e
d	             Zej        d
             Zd Zd ZddedefdZd Zd Zd Zed             Zd Ze
d             ZdS )r  r   r:  c                     d| _         d| _        t          j        |          | _        t          j        d |D                       | _        d| _        d| _        dS )a  
        The BatchKV cache expects inputs to be left-padded.

        E.g. the following prompts:

            [1, 3, 5]
            [7]
            [2, 6, 8, 9]

        Should be padded like so:

            [0, 1, 3, 5]
            [0, 0, 0, 7]
            [2, 6, 8, 9]

        And ``left_padding`` specifies the amount of padding for each.
        In this case, ``left_padding = [1, 3, 0]``.
        Nc                     g | ]}| S r   r   r   ls     r   r   z)BatchKVCache.__init__.<locals>.<listcomp>x      999q999r   r   )r   r   r6   r?  r:  rZ   r  _right_padding)re   r:  s     r   r   zBatchKVCache.__init__b  sY    & 	H\22h99L999::	"r   c                    | j         }| j        %||j        d         z   | j        j        d         k    r$|j        \  }}}}|j        d         }| j        |j        d         z   dz
  | j        z  }	|||	| j        z  |f}
|||	| j        z  |f}t	          j        |
|j                  }t	          j        ||j                  }| j        || j        z  dk    r2| j        dd |d d f         | _        | j        dd |d d f         | _        t	          j        | j        |gd          | _        t	          j        | j        |gd          | _        n||c| _        | _        | xj	        |j        d         z  c_	        | xj         |j        d         z  c_         || j        d|| j         d d f<   || j        d|| j         d d f<   | j        dd | j         d d f         | j        dd | j         d d f         fS r   )
r  r   r   r   r6   r   r   r   r   rZ   r   s                 r   r   zBatchKVCache.update_and_fetch}  s   y9
1!59K K K+/:(Az1jaJy4:a=014BG*g	&9:FG*g	&9:FGHWdj11EHWfl33Ey$$)#q(( $	#uuaaa- 8DI"&+c5D5!!!m"<DKNDIu+=AFFF	 ndk5-AJJJ).&	4;tz!}$		TZ]"		.2	#tdi'*+06C	)111,-yk	k111,-t{3$)QQQ;N/OOOr   Nr:  r;  right_paddingc                   |J| j         t          d          t          j        |          }| xj        |z  c_        | xj        |z  c_        |.t          |          dk    rt          j        |          | _        d S d S d S )Nz7Left padding can only be added to an empty BatchKVCacher   )r   rh   r6   r?  r:  rZ   r  r  re   r:  r;  r  s       r   rZ  zBatchKVCache.prepare  s    #y$ M   8L11L-KK<'KK$]););a)?)?"$(="9"9D %$)?)?r   c                    | j         || j         }t          | j        |d d d f         d          | _        t          | j        |d d d f         d          | _        | xj        |z  c_        | xj        |z  c_        d | _         d S d S )Nr   r   )r  r  r   r   rZ   r:  )re   paddings     r   r]  zBatchKVCache.finalize  s    *)G$TY40@qIIIDI&t{GAAAtG4D1MMMDKKK7"KK("&D +*r   c                     | j         | j        }}| j        |j        d         k     r(|dd | j        d d f         }|dd | j        d d f         }||| j        | j        fS r   )r   r   r  r   rZ   r:  re   krj   s      r   r+   zBatchKVCache.state  sl    y$+19qwqz!!#{{AAA%&A#{{AAA%&A!T[$"333r   c                 j    |\  | _         | _        | _        | _        | j         j        d         | _        d S r   )r   r   rZ   r:  r   r  ri   s     r   r+   zBatchKVCache.state  s-    AB>	4;T->IOA&			r   c                     dS r   r   rd   s    r   rO   zBatchKVCache.is_trimmable  r   r   c                 p    t          | j        |          }| xj        |z  c_        | xj        |z  c_        |S rM   )r   r  rZ   r   s     r   rW   zBatchKVCache.trim  s7    	1		Q		qr   FrY   r[   c                 6    t          |f| j        | j        d|S )N)rZ   r:  )r   r  r:  )re   rY   r[   r   s       r   r   zBatchKVCache.make_mask  s3    !
id.?
 
CI
 
 	
r   c                    | j         |         | _         | j        |         | _        | j        |         | _        | j        |         | _        | j                                                                        }|dk    rT| j         d|dddf         | _         | j        d|dddf         | _        | xj        |z  c_        | xj        |z  c_        dS dS )rI  r   .N)r   r   rZ   r:  r   itemr  )re   rK  min_left_pads      r   rM  zBatchKVCache.filter  s     Im,	k-0k-0 -m< (,,..3355!	#|}}aaa"78DI+c<==!!!&;<DKII%II-	 r   c           	      T   t          | j        |j                  t          | j        j        d         |j        j        d                   fd}t	          t
          j        t           ||            ||          f           \  | _        | _        | _	        | _
        | _        dS )rO  r   c                 V   | j         z
  }| j        j        d         z
  |z
  }| j        | j        }}|dk     r |dd |d d f         }|dd |d d f         }d}|dk    s|dk    r2dd||fdg}t	          j        ||          }t	          j        ||          }| j        |z   }||| j        |fS Nr   r   .)r   r   r  r   r   r   r6   padr:  rZ   	r-   leftrightr  rj   r  r:  max_idxr   s	          r   r  z BatchKVCache.extend.<locals>.pad      QV#Dqv|A.5E618qAqyyc6E6111n%c6E6111n%qyyEQJJve}f=F1cNNF1cNN>D0La<//r   N)r  r  r   r   r   r6   r   rF   r   rZ   r:  re   rS  r  r  r   s      @@r   rT  zBatchKVCache.extend  s     di,,tyq)5:+;A+>??	0 	0 	0 	0 	0 	0 BENC##d))SSZZ!89B
 B
>	4;T-> 			r   c                 b   t                      }| j        |                                         }t          j        | j        ||dz   d d || j        f                   |_        t          j        | j        ||dz   d d || j        f                   |_        |j        j        d         |_	        |S )Nr
   r   )
r   r:  r  r6   
contiguousr   r  r   r   rZ   )re   r.  r&   r  s       r   rX  zBatchKVCache.extract  s    		#C(--//]49S37]AAAw?R-R#STT
}T[sQw7TYCV1V%WXXz'*r   c                 j   d |D             }t          |          fd|D             }t          |          }t          d |D                       }t          d |D                       }t          d |D                       }t          t          d |D                                 }t	          j        |||f|          }	t	          j        |||f|          }
t          t          ||                    D ]p\  }\  }}|j        |j        dd |j	        d d f         |	||d	z   d d |||j	        z   f<   |j
        dd |j	        d d f         |
||d	z   d d |||j	        z   f<   q | |          }|	|_        |
|_
        |xj	        |	j        d
         z  c_	        |	j        d
         |_        |S )Nc                 6    g | ]}|                                 S r   r  r,   s     r   r   z&BatchKVCache.merge.<locals>.<listcomp>       ,,,16688,,,r   c                     g | ]}|z
  S r   r   r   r  
max_lengths     r   r   z&BatchKVCache.merge.<locals>.<listcomp>      333a:>333r   c              3   H   K   | ]}|j         	|j         j        d         V  d S r  r   r   r,   s     r   rP   z%BatchKVCache.merge.<locals>.<genexpr>  1      FFA163EQ3E3E3E3EFFr   c              3   H   K   | ]}|j         	|j         j        d         V  d S Nr   r  r,   s     r   rP   z%BatchKVCache.merge.<locals>.<genexpr>  1      GGQAF4Fa4F4F4F4FGGr   c              3   H   K   | ]}|j         	|j         j        d         V  d S r  r   r   r,   s     r   rP   z%BatchKVCache.merge.<locals>.<genexpr>  2      KKqah6J"6J6J6J6JKKr   c              3   <   K   | ]}|j         	|j         j        V  d S rM   r   r   r,   s     r   rP   z%BatchKVCache.merge.<locals>.<genexpr>  -      JJqv7Iqv|7I7I7I7IJJr   r   .r
   r   )r  r    re  rf  r6   r   	enumeraterF   r   rZ   r   r   r  )r{   r  r;  r  r   HDkDvdtr   r   r   pr-   r&   r  s                  @r   r  zBatchKVCache.merge  s   ,,V,,,\\
33337333KKFFFFFFFGG&GGGGGKKFKKKKK$JJVJJJJJKKxAz2.b9991aR0;;;"3w#7#788 	R 	RIAv1v~346#zz111:L3MDQUAAAq1qx<//056Xc:QX:qqq>P5QF1q1u9aaaQ\!1122G

1%Z]
r   c                     | j         d u S rM   r   rd   s    r   rw   zBatchKVCache.empty  r   r   c                 F    | j         dS | j         j        | j        j        z   S r   r   rd   s    r   ru   zBatchKVCache.nbytes  r   r   F)r4   r}   r~   r   r   r   r   r   rZ  r]  r   r+   r   rO   rW   r5  r   rM  rT  rX  r   r  rw   ru   r   r   r   r  r  _  sj       D#T#Y # # # #6P P P2 '+D : : : : :' ' ' 4 4 X4 \' ' \'    
 
3 
d 
 
 
 

. . ."  :     [4! ! ! 5 5 X5 5 5r   r  c                   V   e Zd ZdZdee         fdZd$dZd Zd Z	d Z
d	 Zdddd
dZd Zed             Zej        d             Zed             Zej        d             Zd Zd Zd%dededefdZ	 d&dedee         defdZd Zd Zd  Zed!             Zd" Zed#             ZdS )'r2  r   r:  c                     d | _         d | _        t          j        |          | _        t          j        d |D                       | _        || _        d| _        d| _        d| _	        d | _
        d S )Nc                     g | ]}| S r   r   r  s     r   r   z1BatchRotatingKVCache.__init__.<locals>.<listcomp>*  r  r   r   F)r   r   r6   r?  r:  rZ   r   r  _offsetrotated_lengths)re   r   r:  s      r   r   zBatchRotatingKVCache.__init__%  sk    	H\22h99L999:: 	 r   Nc                 d    |dk    r|d|d d d f         }|t          j        ||gd          S |S r  rQ  )re   r  rj   r  s       r   r  zBatchRotatingKVCache._trim5  sH    q==#yzz111$%A>1f+A6666r   c                     | j         rnt          j        | j        | j         d          | _        t          j        | j        | j         d          | _        | j        j        d         | _        d| _         dS dS )z:
        Rearrange the cache into temporal order.
        r   r   FN)r  r6   r-  r   r  r   r   rd   s    r   r  z$BatchRotatingKVCache._temporal_order<  so     < 	!	DI:A>>>DI'$+	zBBBDK	*DI DLLL		! 	!r   c                    | j         || _         || _        nj|                                  | j         j        d         | j        k    r<| j         dd | j        d d f         | _         | j        dd | j        d d f         | _        | j        t          j        d| j        | j        z
            }t          | j         |d d d f         d          | _         t          | j        |d d d f         d          | _        | xj
        |z  c_
        | xj        |z  c_        | j        | j        z
  dz   }|dk    r| xj
        |z  c_
        |                     || j         |          | _         |                     || j        |          | _        | xj        |j        d         z  c_        | xj        |j        d         z  c_        | j         j        d         | _        t          j        | j         | j
        | j        f          | _         | j         | j        fS )Nr   .r   r   r
   )r   r   r  r   r  r  r6   maximumrZ   r  r:  r   r  r  depends)re   r   r   r-  r  s        r   r  z#BatchRotatingKVCache._update_concatF  s   9DI DKK   """ yq!DI-- Ic;TY;&9:	"k#{{AAA*=> }(z!T[4=%@AA(DDMJJJ	*4;QQQWANNN!!T)!!t# 	DM1A5I1}}!!Y.!!

9di>>DI**YVDDDKtz!}$
1%IOA&	 Jty4+<dk*JKK	y$+%%r   c                    | j         t          d          |j        \  }}}}| j        }| j        1|| j        j        d         k    r| j        j        d         | j        k     r|j        d         }t          | j        | j        |z
            }	|||	|f}
|||	|f}t          j	        |
|j
                  }t          j	        ||j
                  }| j        Et          j        | j        |gd          | _        t          j        | j        |gd          | _        n||c| _        | _        || _        | j        j        d         | j        z
  }|dk    r\|                     || j                  | _        |                     || j                  | _        | j        | _        | xj        |z  c_        | j        | j        k    rd| _        d| _        | j        r| xj        |z  c_        || j        d| j        | j        |z   d d f<   || j        d| j        | j        |z   d d f<   | xj        |z  c_        | xj        |z  c_        | xj        |z  c_        t          j        | j        | j        | j        f          | _        | j        | j        k     r2| j        dd | j        d d f         | j        dd | j        d d f         fS | j        | j        fS )NzFfinalize() should be called before deocoding with BatchRotatingKVCacher   r   r   r   T.)r  RuntimeErrorr   r  r   r   r   r   r6   r   r   r   r   r  r  r:  r  rZ   r  r  s                  r   r  z%BatchRotatingKVCache._update_in_placem  s   =$X   (,z$:q*|9DIOA&&&49?1+=+M+MaJ49dmd&:;;H*h
;G*h
;GHWdj11EHWfl33Ey$NDIu+=AFFF	 ndk5-AJJJ).&	4;DI IOA&6	q==

9di88DI**Y<<DKDI* 9%%DLDI< 	#" 8<	#ty49q=0!!!349?CTY]2AAA56q		Q		 Jty4+<dk*JKK	 <$-''	#~~qqq01C4<23  y$+%%r   c                 |    |j         d         dk    r|                     ||          S |                     ||          S r  r   r   s      r   r   z%BatchRotatingKVCache.update_and_fetch  r!  r   r  c                   |J| j         t          d          t          j        |          }| xj        |z  c_        | xj        |z  c_        |6t          |          dk    r%t          j        |          | j        z   | _        d S d S d S )Nz?Left padding can only be added to an empty BatchRotatingKVCacher   )r   rh   r6   r?  r:  rZ   r  r  r  s       r   rZ  zBatchRotatingKVCache.prepare  s    #y$ U   8L11L-KK<'KK$]););a)?)?HW--;DMMM %$)?)?r   c                 B   | j         t          j        d| j        | j         z
            }t	          | j        |d d d f         d          | _        t	          | j        |d d d f         d          | _        | xj        |z  c_        | xj        |z  c_        d | _         d S d S )Nr   r   r   )r  r6   r  rZ   r  r   r   r:  )re   r-  s     r   r]  zBatchRotatingKVCache.finalize  s    =$:at}!<==D$TYQQQWAFFFDI&t{DDMJJJDK%KK4KK DMMM %$r   c                     | j         | j        }}| j        |j        d         k     r(|dd | j        d d f         |dd | j        d d f         }}||| j        | j        fS r   )r   r   r  r   rZ   r:  r  s      r   r+   zBatchRotatingKVCache.state  sk    y$+1<!'!*$$S.DL.!!!+,a^t|^QQQ0F.GqA!T[$"333r   c                 <    |\  | _         | _        | _        | _        d S rM   r   r   rZ   r:  ri   s     r   r+   zBatchRotatingKVCache.state  s    AB>	4;T->->->r   c           	      v    t          t          t          | j        | j        | j        | j        f                    S rM   )r   r   r   r   r  r  r  rd   s    r   r0   zBatchRotatingKVCache.meta_state  s+    St}dlDIt|TUUVVVr   c                     t          t          |d d                   \  | _        | _        | _        t          |d                   | _        d S r  )r   r   r   r  r  r5  r  ri   s     r   r0   zBatchRotatingKVCache.meta_state  sC    14bqbE2
 2
.t|TY AaDzzr   c                 "    | j         | j        k     S rM   )r  r   rd   s    r   rO   z!BatchRotatingKVCache.is_trimmable  s    |dm++r   c                     t          | j        |          }| xj        |z  c_        | xj        |z  c_        | xj        |z  c_        |S rM   )r   r  r  rZ   r   s     r   rW   zBatchRotatingKVCache.trim  sG    a  		Q		qr   r   r   r   r   r   c                      t          d          )Nz%BatchRotatingKVCache Quantization NYIrs   r   s      r   r  z!BatchRotatingKVCache.to_quantized  s    !"IJJJr   FrY   r\   r[   c                 N   | j         }|p| j        }t          | j        dz
  | j                  }t	          j        ||z             }|rt	          j        |||z             n|}|d d d f         }|d          }||k    }||||z   k     z  }| j        | j        z
  t          |dk              z   x}	dk    r||	z
  }|dk    o| j        p| j        | j        k    }
|
r|dz
  }||t	          j	        |d          k    z  }|
r.| j        }|| j        k    rd}t	          j
        ||dz   d          }|S )Nr
   r   )r
   r   r   r   )r+  r   )r:  r   r   r  r6   r,  r  r   r  expand_dimsr-  )re   rY   r\   r[   r:  rZ   rindslindsr0  r  r  r.  s               r   r   zBatchRotatingKVCache.make_mask  sV    (!2T]T]Q&55	&1*%%17B	&&1*---Uaaagd~+++T]2SQZZ??I1DD')3Lq&IdlHdi4=.H 	,'!+Lu|Y G GGH 	9)Cdm##74sQwR888Dr   c                     | j         |         | _         | j        |         | _        | j        |         | _        | j        |         | _        dS r  r  rL  s     r   rM  zBatchRotatingKVCache.filter  sD     Im,	k-0k-0 -m<r   c           	      "   | j         |j         k    s| j        |j        k    r(|                                  |                                 t          | j        |j                  t          | j        j        d         |j        j        d                   fd}t          t          j        t           ||            ||          f           \  | _        | _
        | _        | _        | _        t          | j        |j                  | _        dS )rO  r   c                 V   | j         z
  }| j        j        d         z
  |z
  }| j        | j        }}|dk     r |dd |d d f         }|dd |d d f         }d}|dk    s|dk    r2dd||fdg}t	          j        ||          }t	          j        ||          }| j        |z   }||| j        |fS r  r  r  s	          r   r  z(BatchRotatingKVCache.extend.<locals>.pad  r  r   N)r  r  r  r  r   r   r   r6   r   rF   r   rZ   r:  r  r  s      @@r   rT  zBatchRotatingKVCache.extend  s    LEM))di5:.E.E  """!!###di,,tyq)5:+;A+>??	0 	0 	0 	0 	0 	0 BENC##d))SSZZ!89B
 B
>	4;T-> 	4<77r   c                    t          | j                  }| j        |                                         }| j        |                                         }| j        ||dz            |_        | j        ||dz            |_        | j        |_        | j        rZt          j
        |j        | j         d          |_        t          j
        |j        | j         d          |_        | j        |_        t          j        |j        d d d d ||j        f                   |_        t          j        |j        d d d d ||j        f                   |_        ||_        |j        j        d         |_        |S )Nr
   r   r   )r   r   r:  r  rZ   r   r   r  r  r6   r-  r  r   )re   r.  r&   r  rZ   s        r   rX  zBatchRotatingKVCache.extract-  s?   ..#C(--//S!&&((YsS1W}-
{3q=1Y
< 	'diZa@@@EJ75<$)!DDDELEJ]5:aaaGej4H.H#IJJ
}U\!!!QQQ%*8L2L%MNNZ%a(
r   c                    t          fdD                       st          d          d D             }d D             }t          |          fd|D             }t                    }t          d D                       }t          d D                       }t          d D                       }t	          t          d	 D                                 }	t          j        |||f|	
          }
t          j        |||f|	
          }t          t          |                    D ]r\  }\  }}|j
        |                    |j
                  |
||dz   d d |||j        z   f<   |                    |j                  |||dz   d d |||j        z   f<   s | d         j        |          }|
|_
        ||_        t          j        |          |_        |
j        d         |_        |
j        d         |_        |S )Nc              3   D   K   | ]}|j         d          j         k    V  dS )r   N)r   )r   r-   r  s     r   rP   z-BatchRotatingKVCache.merge.<locals>.<genexpr>@  s1      DD1:!33DDDDDDr   zEBatchRotatingKVCache can only merge caches with the same maximum sizec                     g | ]	}|j         
S r   r   r,   s     r   r   z.BatchRotatingKVCache.merge.<locals>.<listcomp>E  s    ,,,18,,,r   c                 6    g | ]}|                                 S r   r  r,   s     r   r   z.BatchRotatingKVCache.merge.<locals>.<listcomp>F  r  r   c                     g | ]}|z
  S r   r   r  s     r   r   z.BatchRotatingKVCache.merge.<locals>.<listcomp>H  r  r   c              3   H   K   | ]}|j         	|j         j        d         V  d S r  r  r,   s     r   rP   z-BatchRotatingKVCache.merge.<locals>.<genexpr>J  r  r   c              3   H   K   | ]}|j         	|j         j        d         V  d S r  r  r,   s     r   rP   z-BatchRotatingKVCache.merge.<locals>.<genexpr>K  r  r   c              3   H   K   | ]}|j         	|j         j        d         V  d S r  r  r,   s     r   rP   z-BatchRotatingKVCache.merge.<locals>.<genexpr>L  r  r   c              3   <   K   | ]}|j         	|j         j        V  d S rM   r  r,   s     r   rP   z-BatchRotatingKVCache.merge.<locals>.<genexpr>M  r  r   r   r
   r   r   )rQ   rh   r  r    re  rf  r6   r   r  rF   r   r  r  r   r   r?  rZ   r   r  )r{   r  offsetsr;  r  r   r  r  r  r  r   r   r   r  r-   r&   r  s    `              @r   r  zBatchRotatingKVCache.merge>  sC   DDDDVDDDDD 	W   -,V,,,,,V,,,\\
33337333KKFFFFFFFGG&GGGGGKKFKKKKK$JJVJJJJJKKxAz2.b9991aR0;;;"3w#7#788 	O 	OIAv1v~121B1B161J1JDQUAAAq1qv:~-.343D3DQX3N3NF1q1u9aaaQZ/00F1I&00
x((Z]

1r   c                     | j         d u S rM   r   rd   s    r   rw   zBatchRotatingKVCache.empty`  r   r   c                 F    | j         dS | j         j        | j        j        z   S r   r   rd   s    r   ru   zBatchRotatingKVCache.nbytesc  r   r   rM   r  ro   ) r4   r}   r~   r   r   r   r   r  r  r  r  r   rZ  r]  r   r+   r   r0   rO   rW   r   r  r   r5  r   rM  rT  rX  r   r  rw   ru   r   r   r   r2  r2  "  s8       DtCy        ! ! !%& %& %&N9& 9& 9&v1 1 1
 '+D < < < < <! ! ! 4 4 X4 \C C \C W W XW " " ", , ,  K Ks Ks KCS K K K K OT #+C=GK   := = =8 8 8@  "   [B! ! ! 5 5 X5 5 5r   r2  rM   r  )&copytypingr   r   r   r   mlx.corecorer6   mlx.nnnn	mlx.utilsr   r   r	   baser   Moduler   r$   r   r<   rJ   r5  rS   rX   r`   rb   r   r   r   r   r7  ro  r  r  r  r2  r   r   r   <module>r"     s    , , , , , , , , , , , ,             < < < < < < < < < < $ $ $ $ $ $
 "&6 696#6 
#Y6 6 6 68 TV ? ? ?T#Y ?$sCx. ? ? ? ?&   40c 0t 0 0 0 02T#Y 2C 2DI 2 2 2 2&



'+
:B3-
 
 
 
0 0 0 0 0 0 0 0f35 35 35 35 35 35 35 35lZS ZS ZS ZS ZSz ZS ZS ZSzR5 R5 R5 R5 R5j R5 R5 R5ju5 u5 u5 u5 u5j u5 u5 u5pWB WB WB WB WB* WB WB WBtP5 P5 P5 P5 P5Z P5 P5 P5fV V V V V
 V V Vr  @5 @5 @5 @5 @5: @5 @5 @5FE5 E5 E5 E5 E5: E5 E5 E5 E5 E5r   