
    )j                      z   d dl mZ d dlmZmZmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  ZdS )    )	dataclass)AnyListOptionalN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)	SwitchGLUc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   ee         ed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsattention_biasmlp_only_layersnum_expertsnum_experts_per_tokdecoder_sparse_stepn_shared_expertsmoe_intermediate_sizerms_norm_eps
vocab_sizenum_key_value_heads
rope_thetamax_position_embeddingsnorm_topk_probN)	__name__
__module____qualname__str__annotations__intboolr   float     ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/Klear.pyr   r      s         OOO#YOOO    r*   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
KlearAttentionargsc                 6   t                                                       |j        | _        |j        | _        |j        |j        z  | _        | j        dz  | _        t          j        |j        | j        | j        z  |j	                  | _
        t          j        |j        | j        | j        z  |j	                  | _        t          j        |j        | j        | j        z  |j	                  | _        t          j        | j        | j        z  |j        |j	                  | _        t          j        | j        |j                  | _        t          j        | j        |j                  | _        t          j        | j        d|j                  | _        d S )Ng      ࿩biasepsF)traditionalbase)super__init__r   r   r   head_dimscalennLinearr   q_projk_projv_projo_projRMSNormr   q_normk_normRoPEr   ropeselfr.   	__class__s     r+   r7   zKlearAttention.__init__%   sz   #'#; #'#; (D,DD]D(
i$t}4$
 
 

 i$t}4$
 
 

 i$t}4$
 
 

 i$t}4$
 
 
 jD4EFFFjD4EFFFGM
 
 
			r*   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |                    ||| j        d                                        dddd          }|                     |                    ||| j	        d                                        dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rJ   r9   rI   )shaper<   r=   r>   rA   reshaper   	transposerB   r   rD   rP   update_and_fetchr   r9   r?   )rF   rH   rI   rJ   BLDquerieskeysvaluesoutputs              r+   __call__zKlearAttention.__call__K   s    '1a $AAAv++OOAq$":B??
 

)Aq!Q

 	 {{4<<1d.FKKLLVVq!Q
 
 1d&>CCMMq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r*   NNr!   r"   r#   r   r7   mxarrayr   r   r\   __classcell__rG   s   @r+   r-   r-   $   s        $
Y $
 $
 $
 $
 $
 $
R $(#	 #  #8 # rx  # }	 #
 
 #  #  #  #  #  #  #  #r*   r-   c                   4     e Zd Z fdZdej        fdZ xZS )KlearMLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S NFr0   )r6   r7   r:   r;   	gate_proj	down_projup_proj)rF   dim
hidden_dimrG   s      r+   r7   zKlearMLP.__init__o   se    3
???:s???yju===r*   rK   c                     |                      t          |                     |          |                     |                              S N)rh   r   rg   ri   )rF   rH   s     r+   r\   zKlearMLP.__call__u   s4    ~~fT^^A%6%6QHHIIIr*   )r!   r"   r#   r7   r_   r`   r\   ra   rb   s   @r+   rd   rd   n   s^        > > > > >JRX J J J J J J J Jr*   rd   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )KlearSparseMoeBlockr.   c                 
   t                                                       |j        | _        |j        | _        |j        | _        t          j        |j        |j        d          | _	        t          |j        |j        |j                  | _        t          |j        |j        |j        z            | _        t          j        |j        d          | _        t#          j        | j        ft"          j                  | _        d S )NFr0   )rk   rN   )dtype)r6   r7   r    r   r   top_kr:   r;   r   gater   r   expertsrd   r   shared_expertscoefficientr_   zerosfloat32expert_biasrE   s     r+   r7   zKlearSparseMoeBlock.__init__z   s    "1+-
Id.0@uMMM	 d8$:J
 
 '1D4II
 
 
 9T%5q998T%5$7rzJJJr*   rH   rK   c                    t          j        |                     |                              t           j                            }|| j                            d          z   }| j        }t          j        | |dz
  d          dd |f         }t          j	        ||d          }| j
        r|t          j        |dd          z  }|                    |j                  }|                     ||          }||d	         z                      d
          }t          j        |                     |          dd          }	|                     |          }
||	dd df         z  |
|	ddd f         z  z   }|S )N)r   r   rM   r   rM   )kthaxis.)r|   T)r|   keepdims).N)r|   precise)r_   sigmoidrs   astyperx   ry   rR   rr   argpartitiontake_along_axisr    sumrq   rt   softmaxrv   ru   )rF   rH   routing_weightsbiased_weightskindsscores
expert_out	y_expertscoefsharedys               r+   r\   zKlearSparseMoeBlock.__call__   sb   *TYYq\\%8%8%D%DEE(4+;+C+CJ+O+OOJAECCCC!GL#OTCCC 	EbfV"tDDDDFqw''\\!T**
&"3388b8AA	z$**1--BEEE$$Q''S"1"W%c122g(>>r*   )	r!   r"   r#   r   r7   r_   r`   r\   ra   rb   s   @r+   ro   ro   y   sq        KY K K K K K K""( rx        r*   ro   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )KlearDecoderLayerr.   	layer_idxc                    t                                                       t          |          | _        ||j        vr1|j        dk    r&|dz   |j        z  dk    rt          |          | _        nt          |j
        |j                  | _        t          j        |j
        |j                  | _        t          j        |j
        |j                  | _        d S )Nr   r   r2   )r6   r7   r-   	self_attnr   r   r   ro   mlprd   r   r   r:   r@   r   input_layernormpost_attention_layernorm)rF   r.   r   rG   s      r+   r7   zKlearDecoderLayer.__init__   s    '--T111q  i!mt7O%OST%T%T*400DHH 0$2HIIDH!z$*:@QRRR(*
$"3)
 )
 )
%%%r*   NrH   rI   rJ   rK   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S rm   )r   r   r   r   )rF   rH   rI   rJ   rhouts          r+   r\   zKlearDecoderLayer.__call__   s]     NN4//22D%@@EHHT2215566!e
r*   r]   )r!   r"   r#   r   r&   r7   r_   r`   r   r   r\   ra   rb   s   @r+   r   r      s        
Y 
3 
 
 
 
 
 
& $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r*   r   c                   \     e Zd Zdef fdZ	 ddej        dee         dej        fdZ	 xZ
S )	
KlearModelr.   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 2    g | ]}t          |           S ))r.   r   )r   ).0ir.   s     r+   
<listcomp>z'KlearModel.__init__.<locals>.<listcomp>   s6     
 
 
 41555
 
 
r*   r2   )r6   r7   r:   	Embeddingr   r   embed_tokensranger   layersr@   r   normrE   s    `r+   r7   zKlearModel.__init__   s    L$:JKK
 
 
 
4122
 
 
 Jt/T5FGGG			r*   NinputsrJ   rK   c                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r
   zipr   )rF   r   rJ   r   rI   layercs          r+   r\   zKlearModel.__call__   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r*   rm   r^   rb   s   @r+   r   r      s        HY H H H H H H  $  } 
	       r*   r   c                        e Zd Zdef fdZ	 ddej        dee         fdZ	d Z
ed             Zed	             Zed
             Z xZS )Modelr.   c                     t                                                       || _        |j        | _        t	          |          | _        t          j        |j        |j	        d          | _
        d S rf   )r6   r7   r.   r   r   modelr:   r;   r   r   lm_headrE   s     r+   r7   zModel.__init__   sY    	/%%
y!14?OOOr*   Nr   rJ   c                 X    |                      ||          }|                     |          S rm   )r   r   )rF   r   rJ   r   s       r+   r\   zModel.__call__   s)    
 jj''||C   r*   c                     dvrS t          | j        j                  D ]Q}d| ddD ]Ffdt          | j        j                  D             }t	          j        |           d d<   GRS )Nz-model.layers.0.mlp.experts.0.gate_proj.weightzmodel.layers.z.mlp.experts)rg   ri   rh   c           
      L    g | ] }                      d | d  d          !S )..weight)pop)r   enameprefixweightss     r+   r   z"Model.sanitize.<locals>.<listcomp>   sN        KK6 = =A = = = = =>>  r*   r   r   )r   r.   r   r   r_   stack)rF   r   lstackedr   r   s    `  @@r+   sanitizezModel.sanitize   s    :'IINty233 	G 	GA4Q444F= G G     "49#899   68Xg5F5F611D11122G r*   c                     | j         j        S rm   )r   r   )rF   s    r+   r   zModel.layers   s    z  r*   c                     d }|S )Nc                 :    |                      d          rdddS dS )Nzmlp.gate@      )
group_sizebitsT)endswith)path_s     r+   	predicatez(Model.quant_predicate.<locals>.predicate   s)    }}Z(( 5&(!4444r*   r)   rF   r   s     r+   quant_predicatezModel.quant_predicate   s    	 	 	
 r*   c                     d }|S )Nc                 
    d| vS )Nry   r)   )r   s    r+   r   z'Model.cast_predicate.<locals>.predicate  s     ))r*   r)   r   s     r+   cast_predicatezModel.cast_predicate  s    	* 	* 	* r*   rm   )r!   r"   r#   r   r7   r_   r`   r   r   r\   r   propertyr   r   r   ra   rb   s   @r+   r   r      s        PY P P P P P P  $! !! }! ! ! !   ! ! X!   X   X    r*   r   )dataclassesr   typingr   r   r   mlx.corecorer_   mlx.nnr:   activationsr   r5   r	   r
   r   switch_layersr   r   Moduler-   rd   ro   r   r   r   r)   r*   r+   <module>r      s   " ! ! ! ! ! & & & & & & & & & &                   T T T T T T T T T T $ $ $ $ $ $        *G# G# G# G# G#RY G# G# G#TJ J J J Jry J J J         ")      F    	   <       81 1 1 1 1BI 1 1 1 1 1r*   