
    )jt                     X   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_ropec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   d	Zeed
<   dZeed<   dZ	e
ed<   dZeeeeeef         f                  ed<   dZe
ed<   dZeed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizenum_key_value_headsi   max_position_embeddingsg     @
rope_thetaFrope_traditionalNrope_scalingtie_word_embeddings   num_nextn_predict_layers)__name__
__module____qualname__str__annotations__intfloatr   r   r   boolr   r   r   r   r   r        \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/mimo.pyr   r      s         OOOOOO#(S(((J"d""";?L(4U5#:%6 678??? %%%%$%c%%%%%r(   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        }|j        x| _        }|j        J |j        x| _        }|j        |z  }|dz  | _        t          j	        |||z  d          | _
        t          j	        |||z  d          | _        t          j	        |||z  d          | _        t          j	        ||z  |d          | _        t          ||j        |j        |j        |j                  | _        d S )Ng      TbiasF)basetraditionalscaling_configr   )super__init__r   r   n_headsr   
n_kv_headsscalennLinearq_projk_projv_projo_projr   r   r   r   r   rope)selfr,   dimr5   r6   head_dim	__class__s         r)   r4   zAttention.__init__!   s   !%!99w'333'+'??*#w.t^
iWx%7dCCCiZ(%:FFFiZ(%:FFFi( 2CeDDD#-,$($@
 
 
			r(   Nxmaskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr   r   r      )offset)rE   r7   rD   )shaper:   r;   r<   reshaper5   	transposer6   r>   rJ   update_and_fetchr   r7   r=   )r?   rC   rD   rE   BLDquerieskeysvaluesoutputs              r)   __call__zAttention.__call__9   s    '1a $AAAv//!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r(   NNr   r    r!   r   r4   mxarrayr   r   rV   __classcell__rB   s   @r)   r+   r+       s        
Y 
 
 
 
 
 
6 $(#	# #8# rx # }	#
 
# # # # # # # #r(   r+   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S NFr.   )r3   r4   r8   r9   	gate_proj	down_projup_proj)r?   r@   
hidden_dimrB   s      r)   r4   zMLP.__init__W   se    3
???:s???yju===r(   rF   c                     |                      t          |                     |          |                     |                              S N)rb   r	   ra   rc   )r?   rC   s     r)   rV   zMLP.__call__]   s4    ~~fT^^A%6%6QHHIIIr(   )r   r    r!   r4   rY   rZ   rV   r[   r\   s   @r)   r^   r^   V   s^        > > > > >JRX J J J J J J J Jr(   r^   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr,   c                    t                                                       |j        | _        |j        | _        t	          |          | _        t          |j        |j                  | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        || _        d S )Neps)r3   r4   r   r   r+   	self_attnr^   r   mlpr8   RMSNormr   input_layernormpost_attention_layernormr,   r?   r,   rB   s     r)   r4   zTransformerBlock.__init__b   s    #'#; +"4t')?@@!z$*:@QRRR(*
$"3)
 )
 )
% 			r(   NrC   rD   rE   rF   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S rf   )rl   ro   rm   rp   )r?   rC   rD   rE   rhouts          r)   rV   zTransformerBlock.__call__n   s]     NN4//22D%@@EHHT2215566!e
r(   rW   rX   r\   s   @r)   rh   rh   a   s        
Y 
 
 
 
 
 
 $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r(   rh   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )	MiMoModelr,   c                    t                                                       | _        j        | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _	        fdt          j                  D             | _        t          j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r,   )rh   ).0_r,   s     r)   
<listcomp>z&MiMoModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r(   rj   )r3   r4   r,   r   r   r   r8   	Embeddingr   embed_tokensrangelayersrn   r   normrq   s    `r)   r4   zMiMoModel.__init__|   s    	/!%!7(,(E%""""L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r(   Ninputsc                 
   |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          }|S )Nr   )r~   lenr   r   zipr   )r?   r   rE   rt   rD   layercs          r)   rV   zMiMoModel.__call__   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAIIaLLr(   rf   )	r   r    r!   r   r4   rY   rZ   rV   r[   r\   s   @r)   rw   rw   {   ss        HY H H H H H H"         r(   rw   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr,   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S r`   )r3   r4   r,   r   rw   modelr   r8   r9   r   r   lm_headrq   s     r)   r4   zModel.__init__   so    	/t__
' 	T9T%5tUSSSDLLL	T 	Tr(   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S rf   )r   r,   r   r~   	as_linearr   )r?   r   rE   ru   s       r)   rV   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r(   c                     | j         j        r|                    dd            d |                                D             S )Nzlm_head.weightc                 L    i | ]!\  }}d |v	|                     d          ||"S )zself_attn.rotary_emb.inv_freqzmodel.mtp_layers.)
startswith)rz   kvs      r)   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   sH     
 
 
1.a77LL!455 8 q777r(   )r,   r   popitems)r?   weightss     r)   sanitizezModel.sanitize   sM    9( 	0KK($///
 

 
 
 	
r(   c                     | j         j        S rf   )r   r   )r?   s    r)   r   zModel.layers   s    z  r(   rf   )r   r    r!   r   r4   rY   rZ   rV   r   propertyr   r[   r\   s   @r)   r   r      s        TY T T T T T T     	
 	
 	
 ! ! X! ! ! ! !r(   r   )dataclassesr   typingr   r   r   r   mlx.corecorerY   mlx.nnr8   activationsr	   r0   r
   r   r   
rope_utilsr   r   Moduler+   r^   rh   rw   r   r'   r(   r)   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' ' & & & & & & & &"3# 3# 3# 3# 3#	 3# 3# 3#lJ J J J J") J J J    ry   4         	      F$! $! $! $! $!BI $! $! $! $! $!r(   