
    )j$                        d dl mZ d dlmZmZ d dlmZ d dlm	Z	 d dl
ZddlmZ ddlmZmZmZ e G d de                      Z G d	 d
e	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)AnyOptionalN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                   V    e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dS )		ModelArgs
model_type
vocab_sized_model
ffn_configattn_confign_layersn_headsN)__name__
__module____qualname__str__annotations__intdict     \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/dbrx.pyr   r      sU         OOOOOOLLLMMMLLLLLr   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 4   t                                                       |j        | _        |j        | _        |j        |j        z  | _        |j        d         | _        |j        d         | _        |j        d         | _	        | j        dz  | _
        t          j        |j        | j        dz  | j        z   | j        z  d          | _        t          j        |j        |j        d          | _        t          j        | j        d| j	                  | _        d S )	N
kv_n_headsclip_qkv
rope_thetag         Fbias)traditionalbase)super__init__r   	num_headsr   head_dimr   num_key_value_headsr#   r$   scalennLinearWqkvout_projRoPEropeselfr    	__class__s     r   r+   zAttention.__init__   s    |4#'#3L#A (4*<8]D(
IL%)DN:dmK
 
 
	
 	$,5IIIGM
 
 
			r   Nxmaskcachereturnc                    |                      |          }t          j        || j         | j                  }| j        | j        | j        | j        z  z   g}t          j        ||d          \  }}}|j        \  }	}
}|	                    |	|
| j
        d                              dddd          }|	                    |	|
| j        d                              dddd          }|	                    |	|
| j        d                              dddd          }|R|                     ||j                  }|                     ||j                  }|                    ||          \  }}n*|                     |          }|                     |          }t          ||||| j        |	          }|                    dddd          	                    |	|
d          }|                     |          S )
N)a_mina_maxaxisr   r%   r      )offset)r;   r/   r:   )r2   mxclipr#   r   r-   r.   splitshapereshaper,   	transposer5   rD   update_and_fetchr
   r/   r3   )r7   r9   r:   r;   qkvsplitsquerieskeysvaluesBLDoutputs                r   __call__zAttention.__call__1   s    iillgc$-t}EEE,t}t?W/W WX "f2 > > >v'1a //!Q;;EEaAqQQ||Aq$":B??II!QPQSTUU1d&>CCMMq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??}}V$$$r   NNr   r   r   r   r+   rE   arrayr   r   rU   __classcell__r8   s   @r   r   r      s        
Y 
 
 
 
 
 
4 $(#	!% !%8!% rx !% }	!%
 
!% !% !% !% !% !% !% !%r   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
NormAttnNormr    c                     t                                                       t          j        |j        d          | _        t          j        |j        d          | _        t          |          | _        d S NFr&   )	r*   r+   r0   	LayerNormr   norm_1norm_2r   attnr6   s     r   r+   zNormAttnNorm.__init__V   sZ    l4<e<<<l4<e<<<dOO			r   Nr9   r:   r;   r<   c                     |                      |                     |          ||          }||z   }||                     |          fS )N)r:   r;   )rb   r`   ra   )r7   r9   r:   r;   hs        r   rU   zNormAttnNorm.__call__\   sC     IIdkk!nn4uI==E$++a..  r   rV   rW   rZ   s   @r   r\   r\   U   s        $Y $ $ $ $ $ $ $(#	! !8! rx ! }	!
 
! ! ! ! ! ! ! !r   r\   c                   L     e Zd Zdedef fdZdej        dej        fdZ xZS )MLPr   ffn_dimc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S r^   )r*   r+   r0   r1   v1w1w2)r7   r   rg   r8   s      r   r+   zMLP.__init__h   se    )GW5999)GW5999)GW5999r   r9   r<   c                     t          |                     |          |                     |                    }|                     |          }|S N)r   rj   ri   rk   )r7   r9   current_hidden_statess      r   rU   zMLP.__call__n   s?     &twwqzz4771:: > > $(= > >$$r   	r   r   r   r   r+   rE   rX   rU   rY   rZ   s   @r   rf   rf   g   sq        : :c : : : : : :%"( %rx % % % % % % % %r   rf   c                   >     e Zd Zdedef fdZdej        fdZ xZS )Routerr   num_expertsc                     t                                                       t          j        ||d          | _        d S r^   )r*   r+   r0   r1   layer)r7   r   rr   r8   s      r   r+   zRouter.__init__u   s5    Yw%@@@


r   r9   c                 ,    |                      |          S rm   )rt   )r7   r9   s     r   rU   zRouter.__call__y   s    zz!}}r   ro   rZ   s   @r   rq   rq   t   sp        A A# A A A A A A"(        r   rq   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )SparseMoeBlockr    c                 V    t                                                       |j         _        |j        d          _        |j        d          _        |j        d          _        t           j         j                   _         fdt           j                  D              _
        d S )Nffn_hidden_sizemoe_num_experts	moe_top_kc                 D    g | ]}t          j        j                  S r   )rf   r   rg   ).0_r7   s     r   
<listcomp>z+SparseMoeBlock.__init__.<locals>.<listcomp>   s4     
 
 
01Cdl++
 
 
r   )r*   r+   r   r   rg   rr   num_experts_per_tokrq   routerrangeexpertsr6   s   ` r   r+   zSparseMoeBlock.__init__~   s    |'89?+<=#'?;#? T\4+;<<
 
 
 
5:4;K5L5L
 
 
r   r9   r<   c                 ,     j         }|j        }|                    d|j        d                   }                     |          }t	          j        |                    t          j                  d          }t	          j        t	          j	        | |dz
  d          d d d |f                   }t	          j
        ||d          }|t          j                            |ddd          z  }|                    |j                  } j        rt          j        |          }t	          j        |j        d         ||j        d         f|j                  }t%           j                  D ]Z\  }}	t)          t          j        t          j        ||k                        \  }
}|
j        dk    rD |	||
                   ||
|f<   [||d d d d d f         z                      d          }ng }t1          |||                                          D ]W\  }}t	          j         fd|D             d          }||z                      d          }|                    |           Xt	          j        |d          }|                    |          S )	Nr@   rA   r   )kthrB   T)ordrB   keepdimsr   c                 <    g | ]} j         |                   S r   )r   )r}   er7   xts     r   r   z+SparseMoeBlock.__call__.<locals>.<listcomp>   s)    ???qt|Ar22???r   )r   rH   rI   r   rE   softmaxastypefloat32stop_gradientargpartitiontake_along_axislinalgnormdtypetrainingnprX   zeros	enumerater   mapwheresizesumziptoliststackappend)r7   r9   ne
orig_shapegatesindsscoresyr   expertidx1idx2stitytr   s   `              @r   rU   zSparseMoeBlock.__call__   s`   %W
IIb!'"+&&A
5<<
33"===BF L L LQQQPSQSPSV TUU#E4b999")..QR$.OOOqw''= 	$8D>>D!'!*b!'"+6@@A&t|44 0 0	6 28DAI+>+>??
d9>> &qw$*VAAAqqq$J'',,!,44AAA!!VT[[]];;  
BX?????B???bIII2g]]]++###Ayy$$$r   	r   r   r   r   r+   rE   rX   rU   rY   rZ   s   @r   rw   rw   }   sj        

Y 

 

 

 

 

 

%"( %rx % % % % % % % %r   rw   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
DecoderLayerr    c                     t                                                       t          |          | _        t	          |          | _        d S rm   )r*   r+   rw   ffnr\   norm_attn_normr6   s     r   r+   zDecoderLayer.__init__   s>    !$''*400r   Nr9   r:   r;   r<   c                 j    |                      |||          \  }}|                     |          |z   }|S rm   )r   r   )r7   r9   r:   r;   rrd   outs          r   rU   zDecoderLayer.__call__   s7     ""1dE221hhqkkAo
r   rV   rW   rZ   s   @r   r   r      s        1Y 1 1 1 1 1 1 $(#	 8 rx  }	
 
       r   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )DBRXr    c                 4   t                                                       j        | _        t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        d          | _        d S )Nc                 0    g | ]}t                     S ))r    )r   )r}   r~   r    s     r   r   z!DBRX.__init__.<locals>.<listcomp>   s$    MMM1|...MMMr   Fr&   )r*   r+   r   r0   	Embeddingr   wter   r   blocksr_   norm_fr6   s    `r   r+   zDBRX.__init__   s{    /<>>MMMMdm8L8LMMMl4<e<<<r   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r	   r   r   )r7   r   r;   rd   r:   rt   cs          r   rU   zDBRX.__call__   s    
 HHV=FS---E$Qa11DK// 	" 	"HE1aq!!AA{{1~~r   rm   r   rZ   s   @r   r   r      sl        =Y = = = = = =         r   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZed             Z	d Z
 xZS )	Modelr    c                     t                                                       |j        | _        t          |          | _        t          j        |j        |j        d          | _	        || _
        d S r^   )r*   r+   r   r   transformerr0   r1   r   r   lm_headr    r6   s     r   r+   zModel.__init__   sW    /::ytUKKK			r   Nr   c                 X    |                      ||          }|                     |          S rm   )r   r   )r7   r   r;   r   s       r   rU   zModel.__call__   s+    
 vu--||C   r   c                     | j         j        S rm   )r   r   )r7   s    r   layerszModel.layers   s    &&r   c           	         | j         j        d         }| j         j        d         }dfd|                                D             }|                                D ]o\  }v rffdt          t	          j        ||d                    D             }                    d          rd	 |D             }|                    |           p|S )
Nrz   ry   zexperts.mlpc                 $    i | ]\  }}|v	||S r   r   )r}   kvpatterns      r   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s)    LLL17!;K;Kq!;K;K;Kr   c                 R    g | ]#\  }}                     d d|           dz   |f$S )z.mlp.z.weight)replace)r}   r   svr   s      r   r   z"Model.sanitize.<locals>.<listcomp>   sK       2 YYvw1ww//);R@  r   r   rA   rk   c                 &    g | ]\  }}||j         fS r   )T)r}   sr   s      r   r   z"Model.sanitize.<locals>.<listcomp>   s"    >>>UQ24y>>>r   )r    r   itemsr   rE   rG   endswithupdate)	r7   weightsrr   dimnew_weightsr   r   r   r   s	          @@r   sanitizezModel.sanitize   s    i*+<=i"#45LLLLLLLMMOO 	, 	,DAq!||   !*28A{+K+K+K!L!L   ::d## ?>>g>>>G""7+++r   rm   )r   r   r   r   r+   rE   rX   rU   propertyr   r   rY   rZ   s   @r   r   r      s        Y       ! !! ! ! ! ' ' X'      r   r   )dataclassesr   typingr   r   mlx.corecorerE   mlx.nnr0   numpyr   activationsr   r)   r   r	   r
   r   Moduler   r\   rf   rq   rw   r   r   r   r   r   r   <module>r      s3   " ! ! ! ! !                                       T T T T T T T T T T        9% 9% 9% 9% 9%	 9% 9% 9%x! ! ! ! !29 ! ! !$
% 
% 
% 
% 
%") 
% 
% 
%    RY   ,% ,% ,% ,% ,%RY ,% ,% ,%^    29   "    29   4$ $ $ $ $BI $ $ $ $ $r   