
    )j                        d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 ddlmZmZ ddlmZ e G d d                      Z G d	 d
e
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  ZdS )    N)	dataclass)Tuple   )create_attention_maskscaled_dot_product_attention)	SwitchMLPc                       e Zd ZU eed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   ed             ZdS )	ModelArgs
model_typei   	num_vocabi 
  	model_dim    	num_heads
num_layers
rotary_dim   num_experts_per_tok   num_local_expertsc                 P       di  fd|                                 D             S )Nc                 R    i | ]#\  }}|t          j                  j        v  ||$S  )inspect	signature
parameters).0kvclss      `/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/phixtral.py
<dictcomp>z'ModelArgs.from_dict.<locals>.<dictcomp>   sB       Aq)#..999 1999    r   )items)r   paramss   ` r    	from_dictzModelArgs.from_dict   sO    s 
 
   "LLNN  
 
 	
r"   N)__name__
__module____qualname__str__annotations__r   intr   r   r   r   r   r   classmethodr%   r   r"   r    r
   r
      s         OOOIsIsIsJJ    s
 
 [
 
 
r"   r
   c                   4     e Zd Zdededef fdZddZ xZS )RoPEAttentiondimsr   r   c                     t                                                       || _        t          j        |d          | _        t          j        |d|z            | _        t          j        ||          | _        d S )NF)traditional   )	super__init__r   nnRoPEropeLinearWqkvout_proj)selfr/   r   r   	__class__s       r    r4   zRoPEAttention.__init__&   sd    "GJE:::	IdAH--		$--r"   Nc                 ,   |                      |          }t          j        |dd          \  }}}| j        }|j        \  }	}
}|                    |	|
|d                              dddd          }|                    |	|
|d                              dddd          }|                    |	|
|d                              dddd          }|R|                     ||j                  }|                     ||j                  }|	                    ||          \  }}n*|                     |          }|                     |          }|
                    t          j                  }t          j        d|j        d         z            }t          |
                    t          j                  |||||          
                    |j                  }|                    dd                              |	|
d          }|                     |          S )	Nr2   axisr   r   r   )offset)cachescalemask)r9   mxsplitr   shapereshape	transposer7   rA   update_and_fetchastypefloat32mathsqrtr   dtypemoveaxisr:   )r;   xrD   rB   qkvquerieskeysvaluesr   BLDrC   outputs                 r    __call__zRoPEAttention.__call__/   s   iill "ab 9 9 9v N	-1a //!Q	266@@Aq!LL||Aq)R00::1aAFF1i44>>q!QJJ iii==G99T%,977D 11$??LD&&ii((G99T??D..,, 	!gmB//00-NN2:&&
 
 
 &

 	 A&&..q!R88}}V$$$r"   NN)r&   r'   r(   r+   r4   rZ   __classcell__r<   s   @r    r.   r.   %   sf        .S .S .c . . . . . .%% %% %% %% %% %% %% %%r"   r.   c                   P     e Zd Zdededef fdZdej        dej        fdZ xZ	S )MOEargsdim
hidden_dimc                 .   t                                                       || _        || _        |j        | _        |j        | _        t          | j        | j        | j        d          | _        t          j
        |j        | j        d          | _        d S )NT)biasF)r3   r4   ra   rb   r   num_expertsr   r   
switch_mlpr5   r8   r   gate)r;   r`   ra   rb   r<   s       r    r4   zMOE.__init__X   s    $1#'#; #Hdot'7d
 
 
 Idnd.>UKKK			r"   rQ   returnc                 r   |                      |          }| j        }t          j        t          j        | |dz
  d                    dd |f         }t          j        ||d          }t          j        |dd          }|                     ||          }||d         z                      d	          }|S )
Nr   r>   )kthr@   .r?   T)r@   precise).N)	rg   r   rE   stop_gradientargpartitiontake_along_axissoftmaxrf   sum)r;   rQ   gatesr   indsscoresys          r    rZ   zMOE.__call__c   s    		!$AE K K KLLSRTSTRTWU#E4b999FT:::OOAt$$	""''R'00r"   )
r&   r'   r(   r
   r+   r4   rE   arrayrZ   r\   r]   s   @r    r_   r_   W   s        	LY 	LS 	Lc 	L 	L 	L 	L 	L 	L"( rx        r"   r_   c                   *     e Zd Zdef fdZd Z xZS )ParallelBlockconfigc                     t                                                       |j        }|dz  }t          ||j        |j                  | _        t          j        |          | _	        t          |||          | _        d S )Nr   )r3   r4   r   r.   r   r   mixerr5   	LayerNormlnr_   moe)r;   ry   r/   mlp_dimsr<   s       r    r4   zParallelBlock.__init__r   sk    !8"4)96;LMM
,t$$vtX..r"   c                     |                      |          }|                     |||          }|                     |          }||z   |z   S N)r}   r{   r~   )r;   rQ   rD   rB   hattn_hff_hs          r    rZ   zParallelBlock.__call__z   sD    GGAJJAtU++xx{{}q  r"   r&   r'   r(   r
   r4   rZ   r\   r]   s   @r    rx   rx   q   sS        /y / / / / / /! ! ! ! ! ! !r"   rx   c                   *     e Zd Zdef fdZd Z xZS )TransformerDecoderry   c                     t                                                       t                    | _        fdt	          j                  D             | _        d S )Nc                 .    g | ]}t                    S r   )rx   )r   iry   s     r    
<listcomp>z/TransformerDecoder.__init__.<locals>.<listcomp>   s!    JJJA-''JJJr"   )r3   r4   Embdembdranger   r   r;   ry   r<   s    `r    r4   zTransformerDecoder.__init__   sQ    LL	JJJJv7H1I1IJJJr"   c                     |                      |          }|d gt          | j                  z  }t          | j        |          D ]\  }} ||||          }|S r   )r   lenr   zip)r;   rQ   rD   rB   layercs         r    rZ   zTransformerDecoder.__call__   sc    IIaLL=FS[[(EDFE** 	" 	"HE1aq!!AAr"   r   r]   s   @r    r   r      sZ        Ky K K K K K K
      r"   r   c                   *     e Zd Zdef fdZd Z xZS )r   ry   c                     t                                                       t          j        |j        |j                  | _        d S r   )r3   r4   r5   	Embeddingr   r   wter   s     r    r4   zEmbd.__init__   s6    < 0&2BCCr"   c                 ,    |                      |          S r   )r   )r;   rQ   s     r    rZ   zEmbd.__call__   s    xx{{r"   r   r]   s   @r    r   r      sZ        Dy D D D D D D      r"   r   c                   .     e Zd Zdeddf fdZd Z xZS )
OutputHeadry   rh   Nc                     t                                                       t          j        |j                  | _        t          j        |j        |j                  | _        d S r   )	r3   r4   r5   r|   r   r}   r8   r   linearr   s     r    r4   zOutputHead.__init__   sJ    ,v/00i 0&2BCCr"   c                 R    |                      |                     |                    S r   )r   r}   )r;   inputss     r    rZ   zOutputHead.__call__   s    {{4776??+++r"   r   r]   s   @r    r   r      sb        Dy DT D D D D D D
, , , , , , ,r"   r   c                   x     e Zd Zdef fdZ	 	 d
dej        dej        dej        fdZd Ze	d	             Z
 xZS )Modelry   c                     t                                                       |j        | _        t          |          | _        t          |          | _        || _        d S r   )r3   r4   r   r   transformerr   lm_headr`   r   s     r    r4   zModel.__init__   sM     +-f55!&))			r"   NrQ   rD   rh   c                 ~    |t          ||          }|                     |||          }|                     |          S r   )r   r   r   )r;   rQ   rD   rB   ru   s        r    rZ   zModel.__call__   s?     <(E22DQe,,||Ar"   c           
         dvrS t          | j        j                  D ]d}d| dD ]ZdD ]U d d v rGfdt          | j        j                  D             }t	          j        |           d d <   V[eS )	Nz$transformer.h.0.moe.mlp.0.fc1.weightztransformer.h.)fc1fc2)weightscalesbiasesrd   z.moe.mlp.0..c                 P    g | ]"}                      d | d d           #S )z	.moe.mlp.r   )pop)r   er   nprefixweightss     r    r   z"Model.sanitize.<locals>.<listcomp>   sS     # # # ! $KK6(G(GA(G(G(G(GA(G(GHH# # #r"   z.moe.switch_mlp.)r   r`   r   r   rE   stack)r;   r   lto_joinr   r   r   s    `  @@@r    sanitizezModel.sanitize   s
   1@@Nty+,, 		X 		XA)a))F# X X? X XA 44Q4444??# # # # # # #%*49+F%G%G# # # GIhwFWFW6 B B1 B Bq B BCXX r"   c                     | j         j        S r   )r   r   )r;   s    r    layerszModel.layers   s    !!r"   r[   )r&   r'   r(   r
   r4   rE   rv   rZ   r   propertyr   r\   r]   s   @r    r   r      s        y       	 8 h
 
      " " X" " " " "r"   r   )r   rM   dataclassesr   typingr   mlx.corecorerE   mlx.nnr5   baser   r   switch_layersr   r
   Moduler.   r_   rx   r   r   r   r   r   r"   r    <module>r      s     ! ! ! ! ! !                   E E E E E E E E $ $ $ $ $ $ 
 
 
 
 
 
 
 
*/% /% /% /% /%BI /% /% /%d    ")   4! ! ! ! !BI ! ! !             29   , , , , , , , ,&" &" &" &" &"BI &" &" &" &" &"r"   