
    )j                     0   d dl Z d dlmZ d dlmZ d dlmZ ddlm	Z	m
Z
mZ e G d de	                      Z G d dej                  Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    N)	dataclass   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   d	Z
eed
<   d	Zeed<   d	Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   d ZdS )	ModelArgsphi
model_typei   max_position_embeddingsi   
vocab_sizei 
  hidden_size    num_attention_headsnum_hidden_layersnum_key_value_headsg?partial_rotary_factori (  intermediate_sizegh㈵>layer_norm_epsg     @
rope_thetac                 0    | j         | j        | _         d S d S N)r   r   selfs    [/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/phi.py__post_init__zModelArgs.__post_init__   s$    #+'+'?D$$$ ,+    N)__name__
__module____qualname__r   str__annotations__r   intr   r   r   r   r   r   floatr   r   r   r    r   r   r	   r	      s         J#'S'''JK!!!!s!!!!#&5&&&"s""" NE   J@ @ @ @ @r   r	   c                   ,     e Zd Zdef fdZddZ xZS )PhiAttentionconfigc                 f   t                                                       |j        | _        |j        | _        | j        | j        z  | _        |j        | _        | j        | j        z  | _        |j        | _        |j	        | _	        | j        | j        z  | j        k    r t          d| j         d| j         d          t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        z  | j        d          | _        t          j        t%          | j	        | j        z            d| j                  | _        d S )Nz?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).TbiasF)traditionalbase)super__init__r   r   	num_headshead_dimr   repeatsr   r   
ValueErrornnLinearq_projk_projv_projdenseRoPEr#   roper   r(   	__class__s     r   r/   zPhiAttention.__init__    s   !-3(DN:#)#= ~)AA +%+%A"MDN*t/???8RVRb 8 8%)^8 8 8  
 idnt}<4
 
 
 id6FT
 
 
 id6FT
 
 
 YNT]*D,<4
 
 

 G*T]:;;
 
 
			r   Nc                    |                      |          |                     |          |                     |          }}}|j        \  }}}	| j        | j        }}
|                    |||
d                              dd          }|                    |||d                              dd          }|                    |||d                              dd          }|R|                     ||j	                  }|                     ||j	                  }|
                    ||          \  }}n*|                     |          }|                     |          }t          j        d|j        d         z            }t          |                    t          j                  |||||                              |j                  }|                    dd                              ||d          }|                     |          S )Nr      )offset)cachescalemask)r6   r7   r8   shaper0   r   reshapemoveaxisr;   rA   update_and_fetchmathsqrtr   astypemxfloat32dtyper9   )r   xrD   rB   querieskeysvaluesBLDn_heads
n_kv_headsrC   outputs                 r   __call__zPhiAttention.__call__D   s    $AAAv -1a"nd.F //	
 

 (1a.. 	 ||Aq*b11::1a@@1j"55>>q!DD iii==G99T%,977D 11$??LD&&ii((G99T??D	!gmB//00-NN2:&&
 
 
 &

 	 A&&..q!R88zz&!!!r   )NNr   r   r    r	   r/   rY   __classcell__r=   s   @r   r'   r'      sY        "
y "
 "
 "
 "
 "
 "
H&" &" &" &" &" &" &" &"r   r'   c                   :     e Zd Zdef fdZdej        fdZ xZS )PhiMLPr(   c                     t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S r   )r.   r/   r4   r5   r   r   fc1fc2r<   s     r   r/   zPhiMLP.__init__n   sO    9V/1IJJ9V5v7IJJr   returnc                 v    |                      t          j        |                     |                              S r   )ra   r4   gelu_approxr`   )r   rO   s     r   rY   zPhiMLP.__call__s   s(    xxtxx{{33444r   )	r   r   r    r	   r/   rL   arrayrY   r[   r\   s   @r   r^   r^   m   sh        Ky K K K K K K
5RX 5 5 5 5 5 5 5 5r   r^   c                   *     e Zd Zdef fdZd Z xZS )PhiDecoderLayerr(   c                     t                                                       t          |          | _        t	          j        |j        |j                  | _        t          |          | _
        d S )N)r(   eps)r.   r/   r'   	self_attnr4   	LayerNormr   r   input_layernormr^   mlpr<   s     r   r/   zPhiDecoderLayer.__init__x   sb    %V444!|F$9 
  
  
 &>>r   c                     |                      |          }|                     |||          }|                     |          }||z   |z   S r   )rm   rk   rn   )r   rO   rD   rB   hattn_hff_hs          r   rY   zPhiDecoderLayer.__call__   sH      ##4//xx{{}q  r   rZ   r\   s   @r   rg   rg   w   sS        "y " " " " " "! ! ! ! ! ! !r   rg   c                   *     e Zd Zdef fdZd Z xZS )PhiModelr(   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 .    g | ]}t                    S r%   )rg   ).0ir(   s     r   
<listcomp>z%PhiModel.__init__.<locals>.<listcomp>   s!    XXX1v..XXXr   ri   )r.   r/   r4   	Embeddingr   r   embed_tokensranger   layersrl   r   final_layernormr<   s    `r   r/   zPhiModel.__init__   s    L):F<NOOXXXXf>V8W8WXXX!|F$9 
  
  
r   c                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r{   lenr}   r   zipr~   )r   rO   rB   rD   layercs         r   rY   zPhiModel.__call__   s    a  =FS---E$Qa11DK// 	" 	"HE1aq!!AA##A&&&r   rZ   r\   s   @r   rt   rt      sS        
y 
 
 
 
 
 
	' 	' 	' 	' 	' 	' 	'r   rt   c                   b     e Zd Zdef fdZ	 ddej        dej        fdZed             Z	 xZ
S )	Modelr(   c                     t                                                       |j        | _        t          |          | _        t          j        |j        |j        d          | _	        || _
        d S )NTr*   )r.   r/   r   rt   modelr4   r5   r   r   lm_headargsr<   s     r   r/   zModel.__init__   s[     +f%%
y!3V5FTRRR			r   NrO   rb   c                 X    |                      ||          }|                     |          S r   )r   r   )r   rO   rB   ys       r   rY   zModel.__call__   s'    
 JJq%  ||Ar   c                     | j         j        S r   )r   r}   r   s    r   r}   zModel.layers   s    z  r   r   )r   r   r    r	   r/   rL   re   rY   propertyr}   r[   r\   s   @r   r   r      s        y        8 
	    ! ! X! ! ! ! !r   r   )rI   dataclassesr   mlx.corecorerL   mlx.nnr4   r-   r   r   r   r	   Moduler'   r^   rg   rt   r   r%   r   r   <module>r      s    ! ! ! ! ! !             T T T T T T T T T T @ @ @ @ @ @ @ @$K" K" K" K" K"29 K" K" K"\5 5 5 5 5RY 5 5 5! ! ! ! !bi ! ! ! ' ' ' ' 'ry ' ' '*! ! ! ! !BI ! ! ! ! !r   