
    )j                     ~   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZmZmZ ddlmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_rope)	SwitchGLUc                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZe	e
eeeef         f                  ed<   dZeed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizelogits_scalingattention_multiplierembedding_multiplierresidual_multipliermax_position_embeddingsnum_key_value_headsattention_bias
rope_thetanum_local_expertsnum_experts_per_tokNrope_scalingTtie_word_embeddings)__name__
__module____qualname__str__annotations__intfloatboolr!   r   r   r   r"        b/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/granitemoe.pyr   r      s        OOOOOO    ;?L(4U5#:%6 678??? $$$$$$r,   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
GraniteMoeAttentionargsc                 &   t                                                       |j        }|j        x| _        }|j        x| _        }|j        |z  x| _        }|j        | _	        |j
        }t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        t#          | j        |j        d|j        |j                  | _        d S )NbiasF)super__init__r   r   n_headsr   
n_kv_headshead_dimr   scaler   nnLinearq_projk_projv_projo_projr   r   r!   r   rope)selfr0   dimr6   r7   r8   r   	__class__s          r-   r5   zGraniteMoeAttention.__init__&   s   !%!99w'+'??*#'#3w#>>.
,iWx%7nMMMiZ(%:PPPiZ(%:PPPi( 2CnMMM#MO(
 
			r,   Nxmaskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rF   r9   rE   )shaper<   r=   r>   reshaper6   	transposer7   r@   rL   update_and_fetchr   r9   r?   )rA   rD   rE   rF   BLDquerieskeysvaluesoutputs              r-   __call__zGraniteMoeAttention.__call__>   s    '1a $AAAv//!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r,   NNr#   r$   r%   r   r5   mxarrayr   r   rX   __classcell__rC   s   @r-   r/   r/   %   s        
Y 
 
 
 
 
 
6 $(#	# #8# rx # }	#
 
# # # # # # # #r,   r/   c                   B     e Zd Zdededef fdZdej        fdZ xZS )GraniteMoeTopKGating
input_sizenum_expertstop_kc                     t                                                       || _        || _        || _        t          j        ||d          | _        d S NFr2   )r4   r5   rb   ra   rc   r:   r;   layer)rA   ra   rb   rc   rC   s       r-   r5   zGraniteMoeTopKGating.__init__]   sK    &$
Yz;UCCC


r,   hidden_statesc                 "   |                      |          }t          j        || j         d          d| j         d f         }t          j        ||d          }t          j        |                    t          j                  d          }||fS )NrI   )kthaxis.rj   )rf   r[   argpartitionrc   take_along_axissoftmaxastypefloat32)rA   rg   logits	top_k_idxtop_k_logitstop_k_gatess         r-   rX   zGraniteMoeTopKGating.__call__d   s    M**OF"EEE$*
	 )&)"EEEj!4!4RZ!@!@rJJJ+%%r,   )	r#   r$   r%   r(   r5   r[   r\   rX   r]   r^   s   @r-   r`   r`   \   sx        D3 DS D D D D D D D&bh & & & & & & & &r,   r`   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )GraniteMoeMoEr0   c                    t                                                       |j        | _        |j        | _        t          | j        | j        |j                  | _        t          | j        |j        |j	                  | _
        d S )N)ra   rb   rc   )r4   r5   r   ra   r   r   r   
switch_mlpr`   r    routerrA   r0   rC   s     r-   r5   zGraniteMoeMoE.__init__o   s{    *1#OT-t/E
 
 +.*
 
 
r,   rD   rG   c                     |                      |          \  }}|                     ||          }||d         z                      d                              |j                  S )N).Nrk   )ry   rx   sumro   dtype)rA   rD   	token_idsgatesys        r-   rX   zGraniteMoeMoE.__call__}   sZ    ;;q>>	5OOAy))E)$$))r)2299!'BBBr,   	r#   r$   r%   r   r5   r[   r\   rX   r]   r^   s   @r-   rv   rv   n   st        
Y 
 
 
 
 
 
C"( Crx C C C C C C C Cr,   rv   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
GraniteMoeDecoderLayerr0   c                 D   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        |j        | _        d S )Neps)r4   r5   r/   	self_attnrv   block_sparse_moer:   RMSNormr   r   input_layernormpost_attention_layernormr   rz   s     r-   r5   zGraniteMoeDecoderLayer.__init__   s    ,T22 -d 3 3!z$*:@QRRR(*
$"3)
 )
 )
% $(#;   r,   NrD   rE   rF   rG   c                     |                      |                     |          ||          }||| j        z  z   }|                     |                     |                    }||| j        z  z   }|S N)r   r   r   r   r   )rA   rD   rE   rF   rhouts          r-   rX   zGraniteMoeDecoderLayer.__call__   sq     NN4//22D%@@D,,,!!$"?"?"B"BCC!d...
r,   rY   rZ   r^   s   @r-   r   r      s        <Y < < < < < < $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r,   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )GraniteMoEModelr0   c                 L   t                                                       | _        t          j        j        j                  | _        fdt          j	                  D             | _
        t          j        j        j                  | _        j        | _        d S )Nc                 0    g | ]}t                     S ))r0   )r   ).0_r0   s     r-   
<listcomp>z,GraniteMoEModel.__init__.<locals>.<listcomp>   s2     
 
 
23"---
 
 
r,   r   )r4   r5   r0   r:   	Embeddingr   r   embed_tokensranger   layersr   r   normr   rz   s    `r-   r5   zGraniteMoEModel.__init__   s    	L$:JKK
 
 
 
7<T=S7T7T
 
 
 Jt/T5FGGG	$($=!!!r,   Ninputsc                    |                      |          | j        z  }|d gt          | j                  z  }t	          ||d                   }t          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rF   )r   r   lenr   r
   zipr   )rA   r   rF   r   rE   rf   cs          r-   rX   zGraniteMoEModel.__call__   s    
 f%%(AA=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r,   r   r   r^   s   @r-   r   r      sl        >Y > > > > > >         r,   r   c                   p     e Zd Zdef fdZ	 d	dej        fdZd Ze	d             Z
e	d             Z xZS )
Modelr0   c                    t                                                       || _        |j        | _        t	          |          | _        |j        s&t          j        |j	        |j
        d          | _        |j        | _        d S re   )r4   r5   r0   r   r   modelr"   r:   r;   r   r   lm_headr   rz   s     r-   r5   zModel.__init__   sr    	/$T**
' 	T9T%5tUSSSDL"1r,   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|| j        z  S r   )r   r0   r"   r   	as_linearr   r   )rA   r   rF   r   s       r-   rX   zModel.__call__   s]    
 jj''9( 	$*)33C88CC,,s##CT(((r,   c                    d|vr|S t          | j        j                  D ]}d| d}| d}|                    |          }t	          j        |dd          \  }}|||                    dd	          <   |||                    dd
          <   | d}|                    |          ||                    dd          <   | j        j        r|                    dd            |S )Nz3model.layers.0.block_sparse_moe.input_linear.weightzmodel.layers.z.block_sparse_moez.input_linear.weightrJ   r   rk   input_linearzswitch_mlp.gate_projzswitch_mlp.up_projz.output_linear.weightoutput_linearzswitch_mlp.down_projzlm_head.weight)r   r0   r   popr[   splitreplacer"   )rA   weightslprefixkeyvalue	gate_projup_projs           r-   sanitizezModel.sanitize   s   @OONty233 
	 
	A9Q999F111CKK$$E!#%!;!;!;IwKTGCKK0FGGHIPGCKK0DEEF222CLSKKM MGCKK1GHHII 9( 	0KK($///r,   c                     d }|S )Nc                 :    |                      d          rdddS dS )Nzblock_sparse_moe.router.layer@      )
group_sizebitsT)endswith)pathr   s     r-   	predicatez(Model.quant_predicate.<locals>.predicate   s*    }}<== 5&(!4444r,   r+   )rA   r   s     r-   quant_predicatezModel.quant_predicate   s    	 	 	
 r,   c                     | j         j        S r   )r   r   )rA   s    r-   r   zModel.layers   s    z  r,   r   )r#   r$   r%   r   r5   r[   r\   rX   r   propertyr   r   r]   r^   s   @r-   r   r      s        2Y 2 2 2 2 2 2 
) 
)
) 
) 
) 
)  $   X ! ! X! ! ! ! !r,   r   )dataclassesr   typingr   r   r   r   mlx.corecorer[   mlx.nnr:   baser	   r
   r   
rope_utilsr   switch_layersr   r   Moduler/   r`   rv   r   r   r   r+   r,   r-   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -             T T T T T T T T T T ' ' ' ' ' ' $ $ $ $ $ $ % % % % % % % %,4# 4# 4# 4# 4#") 4# 4# 4#n& & & & &29 & & &$C C C C CBI C C C*    RY   0    bi   :3! 3! 3! 3! 3!BI 3! 3! 3! 3! 3!r,   