
    )jD                        d dl Z d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 e G d d                      Z	 	 	 	 ddeded	ee         d
eej                 deej                 f
dZ	 dd	ee         defdZd dZ	 	 d!dej        deej        ej        ej        f         deej        ej        ej        f         dedeej                 dededej        fdZ	 d dedeej                 deej                 dej        fdZdS )"    N)	dataclass)AnyOptional)tree_mapc                   $    e Zd Zed             ZdS )BaseModelArgsc                 P       di  fd|                                 D             S )Nc                 R    i | ]#\  }}|t          j                  j        v  ||$S  )inspect	signature
parameters).0kvclss      \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/base.py
<dictcomp>z+BaseModelArgs.from_dict.<locals>.<dictcomp>   sB       Aq)#..999 1999    r   )items)r   paramss   ` r   	from_dictzBaseModelArgs.from_dict   sO    s 
 
   "LLNN  
 
 	
r   N)__name__
__module____qualname__classmethodr   r   r   r   r   r      s-        
 
 [
 
 
r   r   Noffsetwindow_sizeright_paddingleft_paddingc                 @   t          j        || z             }|rt          j        ||| z             n|}|d d d f         }|d          }||k    }|||||z   k     z  }|"||t          j        || z   |z
  d          k     z  }||t          j        |d          |k    z  }|S )N)         )mxarangeexpand_dims)r   r   r   r    r!   rindslindsmasks           r   create_causal_maskr,      s     Ifqj!!E-3>BIffqj)))E!!!T'NE$KEE>Duu{223 ur~vz].JIVVVWr~lI>>%GHKr   Freturn_arrayc                     | j         d         }|r(t          |d          r|                    |||          S |dk    rd S |s|r||k    rt          ||          S dS )Nr#   	make_mask)r-   r   )r   causal)shapehasattrr/   r,   )hcacher   r-   r   s        r   create_attention_maskr5   -   s     	

A V,, Vq|UUUAvvt > >K!!====8r   c                 j    |r0t          |d          r |                    | j        d                   S d S )Nr/   r#   )r2   r/   r1   )r3   r4   s     r   create_ssm_maskr7   :   s8     +,, +qwqz***4r   @      queriesq_keysq_valuesscaler+   
group_sizebitsreturnc                    | j         \  }}}	}
|d         j         d         }||z  }| |z  } |dk    r<t          j        | ||||	|
f          } t          d |          }t          d |          }t          j        | g|R d||d}|t          |t                    rT|j         dd          \  }}t          j        ||z
  |          }t          j        |          }|d d d f         |d          k    }|j        t          j	        k    r3t          j
        ||t          j        |j                  j                  }n||z  }t          j        |d	d
          }t          j        |g|R d||d}|dk    rt          j        ||||	|
f          }|S )Nr   r#   c                 .    t          j        | d          S NrB   )axisr&   r(   xs    r   <lambda>z8quantized_scaled_dot_product_attention.<locals>.<lambda>Q   s    BN12$>$>$> r   c                 .    t          j        | d          S rD   rF   rG   s    r   rI   z8quantized_scaled_dot_product_attention.<locals>.<lambda>R   s    bnQR&@&@&@ r   T)	transposer>   r?   )rE   preciseF)r1   r&   reshaper   quantized_matmul
isinstancestrr'   dtypebool_wherefinfominsoftmax)r:   r;   r<   r=   r+   r>   r?   B	n_q_headsLD
n_kv_heads	n_repeatsscoresqLkL	q_indices	k_indicesouts                      r   &quantized_scaled_dot_product_attentionre   @   s    !Ay!Q$JZ'IuG1}}*Wq*iA&FGG>>GG@@(KK  $(Zd  F dC   	9\"##&FB	"r'2..I	"IQQQW%48D:!!XdFBHV\,B,B,FGGFFdNFZR666F

 %*z  C 1}}jq)Q233Jr   sinksc           	          t          |d          r1|t          d          t          | |||||j        |j                  S t
          j                            | |||||          S )Nr?   z0Quantized SDPA does not support attention sinks.)r=   r+   r>   r?   )r=   r+   rf   )r2   
ValueErrorre   r>   r?   r&   fastscaled_dot_product_attention)r:   keysvaluesr4   r=   r+   rf   s          r   rj   rj   l   s     uf 
OPPP5'
 
 
 	
 w33 4 
 
 	
r   )r   NNN)NNF)N)r8   r9   )r   dataclassesr   typingr   r   mlx.corecorer&   	mlx.utilsr   r   intarrayr,   boolr5   r7   tuplefloatre   rj   r   r   r   <module>rw      s>    ! ! ! ! ! !                             	
 	
 	
 	
 	
 	
 	
 	
 !%(,'+ 
 # BH%	
 28$   , LQ
 
 (
DH
 
 
 
    ) )X)"(BHbh./) BHbh01) 	)
 28
) ) ) X) ) ) )f !%
 

 
 28

 BH
 X
 
 
 
 
 
r   