
    )j'                        d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z d dlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ e G d
 de                      Z G d dej                  Zej        d             Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z  G d dej                  Z! G d dej                  Z"dS )    )	dataclass)partial)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_rope)	SwitchGLUc                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   ee         ed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZ	ee         ed<   dZ
eed<   dZee         ed<   dZee         ed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizemax_position_embeddingsnum_key_value_headsfirst_k_dense_replacemoe_intermediate_sizen_routed_expertsn_shared_expertsnorm_topk_probnum_experts_per_tok
rope_thetarouted_scaling_factorNhead_dim)noaux_tcscoring_funcr	   n_group
topk_groupFattention_biasmlp_biasrope_scalingtie_word_embeddings)__name__
__module____qualname__str__annotations__intfloatr   boolr#   r%   r&   r'   r(   r)   r*   r   r   r+        ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/dots1.pyr   r      s        OOOOOO%c]***    "Hhsm"""%L#%%%GXc] !J!!! ND   Hd;?L(4U5#:%6 678??? %%%%%%r5   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
Dots1Attentionargsc                    t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  }|dz  | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        ||z  |d          | _        t          j        ||j                  | _        t          j        ||j                  | _        t'          ||j        d|j        |j                  | _        d S )Ng      Fbiaseps)basetraditionalscaling_configr   )super__init__r   r   n_headsr   
n_kv_headsr#   scalennLinearq_projk_projv_projo_projRMSNormr   q_normk_normr   r!   r*   r   rope)selfr9   dimrD   rE   r#   	__class__s         r6   rC   zDots1Attention.__init__.   s5   !%!99w'+'??*=?D$4$?t^
iWx%7eDDDiZ(%:GGGiZ(%:GGGi( 2CeDDDjt/@AAAjt/@AAA#,$($@
 
 
			r5   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |                    ||| j        d                                        dddd          }|                     |                    ||| j	        d                                        dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r	      )offset)rV   rF   rU   )shaperI   rJ   rK   rN   reshaperD   	transposerO   rE   rP   r\   update_and_fetchr   rF   rL   )rQ   rT   rU   rV   BLDquerieskeysvaluesoutputs              r6   __call__zDots1Attention.__call__G   s    '1a $AAAv++gooaDL"EEFFPPq!Q
 
 {{4<<1dorBBCCMMq!Q
 
 1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r5   NN)r,   r-   r.   r   rC   mxarrayr   r   rh   __classcell__rS   s   @r6   r8   r8   -   s        
Y 
 
 
 
 
 
8 $(#	# #8# rx # }	#
 
# # # # # # # #r5   r8   c                    |}t          j        |                     t           j                            }|}	||z   }||z
  }|dk    rt          j        |d|df          }t          j        |dd                              dd          }
t          j        |
|dz
  d	
          dd |d d f         }t          j        ||t          j	        d          d	          }t          j
        |d	d          }|}t          j        | |dz
  d
          dd |f         }t          j        |	|d          }|dk    r|r|                    dd          }||z  }||z  }||fS )Nr   rY   )axisr]   rZ   ro   T)ro   keepdimsr	   )kthro   .g        )rj   sigmoidastypefloat32	unflattentopksumargpartitionput_along_axisrk   flattentake_along_axis)gatese_score_correction_biastop_kr&   r'   r"   r   kscoresorig_scoresgroup_scores	group_idxindsdenominators                 r6   group_expert_selectr   h   ss    	AZRZ0011FK--F*AAvvf2gr]CCCwvqr22266B6NNOLa!e"EEEc2A2qqqjQ	"69bhsmm"MMMFB++A?F7AB777RaR@DT;;;Fqyy^yjjb4j88+%++F<r5   c                   *     e Zd Zdef fdZd Z xZS )Dots1TopkRouterr9   c                 `   t                                                       |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        t          j
        | j        |j        f          | _        t          j
        | j        f          | _        d S N)rB   rC   r    r   r   r   r"   r&   r'   rj   zerosr   weightr   rQ   r9   rS   s     r6   rC   zDots1TopkRouter.__init__   s    -
"1 $ 5%)%?"|/h 5t7GHII')x1F0H'I'I$$$r5   c           	          t          || j        j        z  | j        | j        | j        | j        | j        | j                  S r   )	r   r   Tr   r   r&   r'   r"   r   rQ   rT   s     r6   rh   zDots1TopkRouter.__call__   sA    "(JLO&
 
 	
r5   r,   r-   r.   r   rC   rh   rl   rm   s   @r6   r   r      sZ        	JY 	J 	J 	J 	J 	J 	J	
 	
 	
 	
 	
 	
 	
r5   r   c                   F     e Zd Z	 ddededef fdZdej        fdZ xZ	S )	Dots1MLPNr9   r   r   c                    t                                                       ||j        n|| _        ||j        n|| _        t	          j        | j        | j        |j                  | _        t	          j        | j        | j        |j                  | _        t	          j        | j        | j        |j                  | _	        d S )Nr;   )
rB   rC   r   r   rG   rH   r)   	gate_projup_proj	down_proj)rQ   r9   r   r   rS   s       r6   rC   zDots1MLP.__init__   s     	/:/B4++&7&?D""EV 	 d44=
 
 
 yd44=
 
 
 "D$44=
 
 
r5   rW   c                     |                      t          |                     |          |                     |                              S r   )r   r
   r   r   r   s     r6   rh   zDots1MLP.__call__   s4    ~~fT^^A%6%6QHHIIIr5   ri   )
r,   r-   r.   r   r1   rC   rj   rk   rh   rl   rm   s   @r6   r   r      s        QU
 

,/
KN
 
 
 
 
 
(JRX J J J J J J J Jr5   r   c                   *     e Zd Zdef fdZd Z xZS )Dots1MoEr9   c                 0   t                                                       |j        | _        |j        | _        t	          |j        |j        |j                  | _        t          |          | _
        t          ||j        |j        z            | _        d S )N)r9   r   )rB   rC   r    r   r   r   r   r   expertsr   gater   shared_expertsr   s     r6   rC   zDots1MoE.__init__   s    #'#;  $ 5 &!
 
 $D))	&"84;PP
 
 
r5   c                    |                      |          \  }}|                     ||          }||d         z                      d                              |j                  }| j        ||                     |          z   }|S )N).Nrr   rp   )r   r   ry   ru   dtyper   r   )rQ   rT   r   r   ys        r6   rh   zDots1MoE.__call__   s~    yy||fLLD!!	""''R'0077@@ ,D''***Ar5   r   rm   s   @r6   r   r      sS        
Y 
 
 
 
 
 
$      r5   r   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )Dots1DecoderLayerr9   	layer_idxc                 l   t                                                       t          |          | _        ||j        k    rt          |          | _        nt          |          | _        t          j	        |j
        |j                  | _        t          j	        |j
        |j                  | _        d S )Nr=   )rB   rC   r8   	self_attnr   r   mlpr   rG   rM   r   r   input_layernormpost_attention_layernorm)rQ   r9   r   rS   s      r6   rC   zDots1DecoderLayer.__init__   s    '--222~~DHH~~DH!z$*:@QRRR(*
$"3)
 )
 )
%%%r5   NrT   rU   rV   rW   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   S r   )r   r   r   r   )rQ   rT   rU   rV   rhs         r6   rh   zDots1DecoderLayer.__call__   sX     NN4//22D%@@EHHT22155661ur5   ri   )r,   r-   r.   r   r1   rC   rj   rk   r   r   rh   rl   rm   s   @r6   r   r      s        
Y 
3 
 
 
 
 
 
" $(#		 	8	 rx 	 }		
 
	 	 	 	 	 	 	 	r5   r   c                   L     e Zd Zdef fdZ	 ddej        dej        fdZ xZS )
Dots1Modelr9   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 0    g | ]}t          |          S r4   )r   ).0r   r9   s     r6   
<listcomp>z'Dots1Model.__init__.<locals>.<listcomp>   s3     
 
 
 dI..
 
 
r5   r=   )rB   rC   rG   	Embeddingr   r   embed_tokensranger   layersrM   r   normr   s    `r6   rC   zDots1Model.__init__   s    L$:JKK
 
 
 
"4#9::
 
 
 Jt/T5FGGG			r5   NinputsrW   c                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r   zipr   )rQ   r   rV   r   rU   layercs          r6   rh   zDots1Model.__call__   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r5   r   )	r,   r-   r.   r   rC   rj   rk   rh   rl   rm   s   @r6   r   r      s        HY H H H H H H   
	       r5   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr9   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr;   )rB   rC   r9   r   r   modelr+   rG   rH   r   r   lm_headr   s     r6   rC   zModel.__init__  sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr5   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r   )r   r9   r+   r   	as_linearr   )rQ   r   rV   outs       r6   rh   zModel.__call__  sT    
 jj''9( 	$*)33C88CC,,s##C
r5   c           
         | j         j        r                    dd            t          | j         j                  D ]w}d| || j         j        k    r`dD ]]\  }dD ]U d d v rGfdt          | j         j                  D             }t          j        |           d d <   V^xd	 	                                D             S )
Nzlm_head.weightzmodel.layers.))w1r   )w2r   )w3r   )r   scalesbiasesz.mlp.experts.0..c                 P    g | ]"}                      d | d d           #S ).mlp.experts.r   )pop)r   er   mprefixweightss     r6   r   z"Model.sanitize.<locals>.<listcomp>2  sS     ' ' '$% !(v,O,OA,O,O,O,OA,O,O P P' ' 'r5   r   c                 "    i | ]\  }}d |v	||S )zrotary_emb.inv_freqr4   )r   r   vs      r6   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>8  s*    SSSA4IQR4R4R14R4R4Rr5   )
r9   r+   r   r   r   r   r   rj   stackitems)rQ   r   lnto_joinr   r   r   s    `   @@@r6   sanitizezModel.sanitize$  sH   9( 	0KK($///ty233 	Y 	YA(Q((FDI333 Y YDAq
 < Y Y$<<Q<<<<GG' ' ' ' ' ' ').ty/I)J)J' ' 'G HJxPWGXGXGv$C$CA$C$C$C$CDY TSSSSSr5   c                     | j         j        S r   )r   r   )rQ   s    r6   r   zModel.layers:  s    z  r5   r   )r,   r-   r.   r   rC   rj   rk   rh   r   propertyr   rl   rm   s   @r6   r   r     s        TY T T T T T T 
 

 
 
 
T T T, ! ! X! ! ! ! !r5   r   )#dataclassesr   	functoolsr   typingr   r   r   r   mlx.corecorerj   mlx.nnrG   activationsr
   r?   r   r   r   
rope_utilsr   switch_layersr   r   Moduler8   compiler   r   r   r   r   r   r   r4   r5   r6   <module>r      sb   " ! ! ! ! !       - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' ' $ $ $ $ $ $ & & & & & & & &88# 8# 8# 8# 8#RY 8# 8# 8#v   B
 
 
 
 
bi 
 
 
0J J J J Jry J J J2    ry   :    	   6       8-! -! -! -! -!BI -! -! -! -! -!r5   