
    )j'                        d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z
 d dlmZ ddlmZmZmZ e G d de                      Z ee
j        d	
          d             Zd Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    N)	dataclass)partial)AnyOptional   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   dZeed<   dZe	ed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dS )	ModelArgs
model_typehidden_sizedense_attention_every_n_layersff_intermediate_sizegegelu_limitnum_hidden_layersnum_attention_headslayer_norm_epsilon
vocab_sizenum_key_value_heads      ?mup_attn_multiplierTmup_use_scalingg      $@mup_embedding_multiplierg       @mup_width_multiplieri@B rope_embedding_baserope_position_scale@   blocksparse_block_size   blocksparse_num_local_blocks   blocksparse_vert_strideN)__name__
__module____qualname__str__annotations__intfloatr   r   boolr   r   r   r   r   r!   r#        a/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/phi3small.pyr   r      s        OOO$''''OOO!$$$$ OT   &*e***"%%%%%!((((!$$$$"$C$$$(* #***#$S$$$$$r-   r   T)	shapelessc           	      <   t          j        t          j        |           | t          j        | d |                    } t          j        t          j        |          |t          j        || |                    }| t          j        d| z            z  }||dz   z  S )N)a_mina_maxgZd;?r   )mxwhereisinfclipsigmoid)a_gelua_linearlimitout_gelus       r.   gegelu_implr<   %   s    X

d%000 F
 x

e444 H
 
56>222Hx#~&&r-   c                 X    | dd d df         | ddd df         }}t          |||          S )N.   r   )r<   )xr:   r8   r9   s       r.   gegelur@   5   s9    ccc{Ac14a4iLHFvx///r-   c            	            e Zd Zdef fdZd Zd Z	 	 ddej        de	ej                 de	e
         d	ej        fd
Z xZS )	Attentionargsc                    t                                                       |j        }|j        x| _        }|j        x| _        }||z  | _        |j        |z  x| _        }t          j
        || j        d| j        z  z   |z            | _        t          j
        ||          | _        |j        r||j        z  }nt          j        |          }d|z  | _        t          j        |d|j        |j                  | _        ||j        z  dk    rMd| _        |j        | _        | j        dvrt3          d| j                   |j        | _        |j        | _        d S d| _        d S )	Nr>   r   F)traditionalbasescaler   T)    r   zUnsupported block size )super__init__r   r   n_headsr   
n_kv_heads
n_q_per_kvhead_dimnnLinearquery_key_valuedenser   r   mathsqrtrG   RoPEr   r   roper   block_sparser   
ValueErrorr!   r#   )	selfrC   	layer_idxdimrK   rL   rN   norm_factor	__class__s	           r.   rJ   zAttention.__init__;   s   !%!99w'+'??*!Z/#'#3w#>>!y$,T_!44@ 
  
 YsC((
 	."T%==KK)H--K;&
G)*	
 
 
	 t::a?? $D*.*ED'*(:: Kd.IKK   150QD-+/+GD((( %Dr-   c                 |   | j         }| j        }| j        }| j        }||z   dz
  |z  }||z   dz
  |z  }t	          j        ||z
  |          d d d d f         }	t	          j        |          d         }
t	          j        |          d d d f         t	          j        d|dz             d d d f         z   |z  }|dk    d d d d d f         }|	|
k    |	|
z
  |k     |z  z  } |j        | j        | j        g|j	        dd          R  }t	          j
        t	          j
        ||d          |d          }||d| d d |f         fS )Nr   NNr   )axis.)r#   r!   r   rK   r3   arangereshaperL   rM   shaperepeat)rY   q_lenkv_lenvert_stridelocal_blocks
block_sizerK   	kv_blocksq_blocksq_posk_posmask_vert_strided
block_mask
dense_masks                 r.   _block_sparse_maskzAttention._block_sparse_maskc   s   280
,j(1,;	J&*z9	)h.	::4D=I	)$$Z0 Ii  qqq)BIa1,E,Eaaag,NN /!3QQQaaaZ@unU]\)->>

 (Z'OT_
/9/?/D
 
 

 YIj*2666

 
 

 :cE677GVG&;<<<r-   c           
         ||z  }|j         d         }|j         d         }t          j        ||| j        | j        |df          }t          j        |d          }t          j        |d          }|                     ||j         d                   \  }}	|t          j        |dd          z  }
||
|z   }
|
t          j        |	t          j	        d|
j
                  t          j	        t          d           |
j
                            z   }
t          j        |
dd          }
|
|z  }t          j        ||| j        |df          S )Nr   r>   ra   r`   infT)rb   precise)re   r3   rd   rL   rM   expand_dimsrs   swapaxesr4   arraydtyper*   softmaxrK   )rY   querieskeysvaluesrG   maskBLrq   rr   scoresoutputs               r.   _block_sparse_attentionz!Attention._block_sparse_attention~   s3   '/M!M!*Wq$/4?Ar&RSS~dA&&** "&!8!8DJrN!K!K
J2;tR444 d]F"(FL1128U5\\M6<3X3X
 
 
 FT:::&
 z&1dlAr":;;;r-   Nr?   r   cachereturnc                    |j         \  }}}|                     |          }|                    ||d| j        dz   | j                  }|dd dd d f                             dd          }|ddd d f         }	|ddd d f         }
|                    dddd          }|	                    dddd          }	|
                    dddd          }
|R|                     ||j        	          }|                     |	|j        	          }	|	                    |	|
          \  }	}
n*|                     |          }|                     |	          }	| j
        r |                     ||	|
| j        |
          }nt          ||	|
|| j        |          }|                    dddd                              ||d          }|                     |          S )Nra   r>   .r`   r   r      )offset)rG   r   )r   rG   r   )re   rQ   rd   rM   rN   flatten	transposerV   r   update_and_fetchrW   r   rG   r
   rR   )rY   r?   r   r   r   r   Dqkvr|   r}   r~   r   s               r.   __call__zAttention.__call__   s    '1a""1%%kk!QDOa$7GGc3B3k"**2r223AAA:S"aaaZ ##Aq!Q//~~aAq))!!!Q1--iii==G99T%,977D 11$??LD&&ii((G99T??D 	11vTZd 2  FF 2vU$*4  F !!!Q1--55aB??zz&!!!r-   r_   )r$   r%   r&   r   rJ   rs   r   r3   ry   r   r   r   __classcell__r]   s   @r.   rB   rB   :   s        &&Y && && && && && &&P= = =6< < <H $(#	$" $"8$" rx $" }	$"
 
$" $" $" $" $" $" $" $"r-   rB   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       |j        }|j        }|j        | _        t          j        |d|z            | _        t          j        ||          | _        d S )Nr>   )	rI   rJ   r   r   r   rO   rP   up_proj	down_proj)rY   rC   r[   
hidden_dimr]   s       r.   rJ   zMLP.__init__   sb    .
 -ya*n55:s33r-   r   c                 |    |                      |          }|                     t          || j                            S N)r   r   r@   r   )rY   r?   s     r.   r   zMLP.__call__   s0    LLOO~~fQ(9::;;;r-   )r$   r%   r&   rJ   r3   ry   r   r   r   s   @r.   r   r      sU        4 4 4 4 4<RX < < < < < < < <r-   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockrC   c                 l   t                                                       |j        | _        |j        | _        t	          ||          | _        t          |          | _        t          j	        |j        |j
                  | _        t          j	        |j        |j
                  | _        || _        d S )Neps)rI   rJ   r   r   rB   	self_attnr   mlprO   	LayerNormr   input_layernormpost_attention_layernormrC   )rY   rC   rZ   r]   s      r.   rJ   zTransformerBlock.__init__   s    #'#; +"433t99!|$"9 
  
  
 )+')
 )
 )
% 			r-   Nr?   r   r   r   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r   )r   r   r   r   )rY   r?   r   r   rhouts          r.   r   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r-   r_   )r$   r%   r&   r   rJ   r3   ry   r   r   r   r   r   s   @r.   r   r      s        Y      $ $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r-   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )	Phi3ModelrC   c                    t                                                       | _        j        | _        j        | _        | j        dk    sJ j        | _        t          j        j        j                  | _	        fdt          j                  D             | _        t          j        j        j                  | _        d S )Nr   c                 2    g | ]}t          |           S ))rC   rZ   )r   ).0lrC   s     r.   
<listcomp>z&Phi3Model.__init__.<locals>.<listcomp>   s6     
 
 
 $!444
 
 
r-   r   )rI   rJ   rC   r   r   r   rO   	Embeddingr   embed_tokensrangelayersr   r   final_layernormrY   rC   r]   s    `r.   rJ   zPhi3Model.__init__   s    	/!%!7""""(,(E%L$:JKK
 
 
 
4122
 
 
  "|$"9 
  
  
r-   Ninputsc                 ,   |                      |          }| j        r
| j        |z  }|d gt          | j                  z  }t	          ||d         d          }t          | j        |          D ]\  }} ||||          }|                     |          S )Nr   T)return_array)r   r   lenr   r	   zipr   )rY   r   r   r   r   layercs          r.   r   zPhi3Model.__call__  s    
 f%%( 	2-1A=FS---E$QatDDDDK// 	" 	"HE1aq!!AA##A&&&r-   r   )	r$   r%   r&   r   rJ   r3   ry   r   r   r   s   @r.   r   r      sl        
Y 
 
 
 
 
 
& ' '' ' ' ' ' ' ' 'r-   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZed             Z	d Z
 xZS )	ModelrC   c           	          t                                                       |j        | _        t          |          | _        || _        |j        | _        t          j        g dt          t          dd                    z             | _        d S )N)i i i i i i i i  )rI   rJ   r   r   modelrC   r   r3   ry   listr   _dummy_tokenizer_idsr   s     r.   rJ   zModel.__init__  s~    /t__
	$($=!$&H<<<5(())*%
 %
!!!r-   Nr   c                     |                      ||          }| j         j                            |          }| j        r
|| j        z  }t	          d           || j        <   |S )Nru   )r   r   	as_linearr   r*   r   )rY   r   r   r   s       r.   r   zModel.__call__"  sa    
 jj''j%//44$ 	211C*/,,D%&
r-   c                     | j         j        S r   )r   r   )rY   s    r.   r   zModel.layers.  s    z  r-   c                 >    d |                                 D             S )Nc                 "    i | ]\  }}d |v	||S )zself_attn.rotary_emb.inv_freqr,   )r   kvs      r.   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>4  s1     
 
 
Q0OWX0X0XAq0X0X0Xr-   )items)rY   weightss     r.   sanitizezModel.sanitize2  s+    
 
$]]__
 
 
 	
r-   r   )r$   r%   r&   r   rJ   r3   ry   r   propertyr   r   r   r   s   @r.   r   r     s        	
Y 	
 	
 	
 	
 	
 	
 
 

 
 
 
 ! ! X!
 
 
 
 
 
 
r-   r   )rS   dataclassesr   	functoolsr   typingr   r   mlx.corecorer3   mlx.nnrO   rF   r   r	   r
   r   compiler<   r@   ModulerB   r   r   r   r   r,   r-   r.   <module>r      s    ! ! ! ! ! !                                   T T T T T T T T T T % % % % % % % %, 	t$$$' ' %$'0 0 0
I" I" I" I" I"	 I" I" I"X< < < < <") < < <    ry   :"' "' "' "' "'	 "' "' "'J 
  
  
  
  
BI  
  
  
  
  
r-   