
    )j                     X   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZmZmZ ddlmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_rope)	SwitchGLUc                   L   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zeed<   dZ	e
e         ed<   dZe
e         ed<   dZe
e         ed<   d
Zeed<   d
Zeed<   dZeed<   d
Zeed<   dZe
eeeeef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizenum_expertsnum_experts_per_tokFnorm_topk_probNhead_dimmax_position_embeddingsnum_key_value_headsattention_biasmlp_biasi'  
rope_thetarope_traditionalrope_scalingTtie_word_embeddingsc                 0    | j         | j        | _         d S d S N)r   r   selfs    ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/olmoe.py__post_init__zModelArgs.__post_init__$   s$    #+'+'?D$$$ ,+    )__name__
__module____qualname__str__annotations__intfloatr   boolr   r   r   r   r   r   r   r    r!   r   r   r"   r(    r)   r'   r   r      sX        OOOOOO ND   "Hhsm"""-1Xc]111)-#--- ND   HdJ"d""";?L(4U5#:%6 678??? $$$$@ @ @ @ @r)   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  x| _        }|dz  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        ||z  ||j                  | _        t!          | j        |j        |j        |j        |j                  | _        t          j        ||z  |j                  | _        t          j        ||z  |j                  | _        d S )Ng      ࿩bias)super__init__r   r   n_headsr   
n_kv_headsr   scalennLinearr   q_projk_projv_projo_projr   r   r    r!   r   ropeRMSNormr   q_normk_norm)r&   r5   dimr;   r<   r   	__class__s         r'   r:   zAttention.__init__*   sN   !%!99w'+'??*#'=#OD4D4OOt^
iWx%7d>QRRRiZ(%:ATUUUiZ(%:ATUUUi( 2Cd>QRRR#MO!(
 
	 j8!3T5FGGjh!68IJJr)   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |          }|                     |          }|                    ||| j        d                              dddd          }|                    ||| j	        d                              dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rL   r=   rK   )shaper@   rA   rB   rF   rG   reshaper;   	transposer<   rD   rR   update_and_fetchr   r=   rC   )r&   rJ   rK   rL   BLDquerieskeysvaluesoutputs              r'   __call__zAttention.__call__E   s    '1a $AAAv++g&&{{4  //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r)   NNr*   r+   r,   r   r:   mxarrayr   r   r^   __classcell__rI   s   @r'   r4   r4   )   s        KY K K K K K K< $(#	# #8# rx # }	#
 
# # # # # # # #r)   r4   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )OlmoeSparseMoeBlockr5   c                 4   t                                                       |j        | _        |j        | _        |j        | _        t          j        |j        | j        d          | _	        t          |j        |j        | j        |j                  | _        d S NFr7   )r9   r:   r   r   top_kr   r>   r?   r   gater   r   r   
switch_mlpr&   r5   rI   s     r'   r:   zOlmoeSparseMoeBlock.__init__a   s    +-
"1Id.0@uMMM	#"	
 
 
r)   rJ   rM   c                     |j         \  }}}|                    d|          }|                     |          }t          j        |dd          }| j        }t          j        t          j        | |dz
  d          dd |f                   }	t          j        ||	d          }
| j	        r|
|

                    dd          z  }
|                     ||	          }||
d	         z  
                    d
          }|                    |||          S )NrO   r   T)axisprecise)kthrn   .)rn   )rn   keepdims).N)rS   rT   rj   ra   softmaxri   stop_gradientargpartitiontake_along_axisr   sumrk   )r&   rJ   rW   rX   rY   x_flatrouter_logitsrouting_weightskindicesscoresys               r'   r^   zOlmoeSparseMoeBlock.__call__o   s   '1a2q!!		&))*]DIIIJ"O_,!a%bAAA#rr'J
 
 #OW2FFF 	Afjjb4j@@@FOOFG,,	""''R'00yyAq!!!r)   	r*   r+   r,   r   r:   ra   rb   r^   rc   rd   s   @r'   rf   rf   `   sj        
Y 
 
 
 
 
 
""( "rx " " " " " " " "r)   rf   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr5   c                 ,   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )Neps)r9   r:   r4   	self_attnrf   mlpr>   rE   r   r   input_layernormpost_attention_layernormrl   s     r'   r:   zTransformerBlock.__init__   s|    "4&t,,!z$*:@QRRR(*
$"3)
 )
 )
%%%r)   NrJ   rK   rL   rM   c                     ||                      |                     |          ||          z   }||                     |                     |                    z   }|S r$   )r   r   r   r   )r&   rJ   rK   rL   s       r'   r^   zTransformerBlock.__call__   sU     t33A66eDDD66q99:::r)   r_   r`   rd   s   @r'   r   r      s        
Y 
 
 
 
 
 
 $(#	 8 rx  }	
 
       r)   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )
OlmoeModelr5   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r5   )r   ).0_r5   s     r'   
<listcomp>z'OlmoeModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r)   r   )r9   r:   r5   r   r   r>   	Embeddingr   embed_tokensrangelayersrE   r   normrl   s    `r'   r:   zOlmoeModel.__init__   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r)   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rL   )r   lenr   r
   zipr   )r&   r   rL   hrK   layercs          r'   r^   zOlmoeModel.__call__   s    
 f%%=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r)   r$   r   rd   s   @r'   r   r      ss        
HY 
H 
H 
H 
H 
H 
H         r)   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr5   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S rh   )r9   r:   r5   r   r   modelr"   r>   r?   r   r   lm_headrl   s     r'   r:   zModel.__init__   sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr)   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r$   )r   r5   r"   r   	as_linearr   )r&   r   rL   outs       r'   r^   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r)   c           
         dvrS t          | j        j                  D ]d}d| dD ]ZdD ]U d d v rGfdt          | j        j                  D             }t	          j        |           d d <   V[eS )	Nz+model.layers.0.mlp.experts.0.up_proj.weightzmodel.layers.)up_proj	down_proj	gate_proj)weightscalesbiasesz.mlp.experts.0..c                 P    g | ]"}                      d | d d           #S )z.mlp.experts.r   )pop)r   er{   nprefixweightss     r'   r   z"Model.sanitize.<locals>.<listcomp>   sS     # # # ! $KK6(K(K(K(KA(K(K(K(KLL# # #r)   z.mlp.switch_mlp.)r   r5   r   r   ra   stack)r&   r   lto_joinr{   r   r   s    `  @@@r'   sanitizezModel.sanitize   s
   8GGNty233 		X 		XA(Q((F: X X7 X XA 8888Q88GCC# # # # # # #%*49+@%A%A# # # GIhwFWFW6 B B1 B Bq B BCXX r)   c                     | j         j        S r$   )r   r   r%   s    r'   r   zModel.layers   s    z  r)   r$   )r*   r+   r,   r   r:   ra   rb   r^   r   propertyr   rc   rd   s   @r'   r   r      s        TY T T T T T T 
 

 
 
 
   ! ! X! ! ! ! !r)   r   )dataclassesr   typingr   r   r   r   mlx.corecorera   mlx.nnr>   baser	   r
   r   
rope_utilsr   switch_layersr   r   Moduler4   rf   r   r   r   r2   r)   r'   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -             T T T T T T T T T T ' ' ' ' ' ' $ $ $ $ $ $ @ @ @ @ @ @ @ @44# 4# 4# 4# 4#	 4# 4# 4#n" " " " "") " " "@    ry   *       6&! &! &! &! &!BI &! &! &! &! &!r)   