
    )j+                        d dl mZ d dlmZmZmZmZmZ d dlm	Z
 d dlmZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Zd
 Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalTupleUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)	SwitchGLUc                   2   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   ded<   dZe	e
eef                  ed<   dZe	eee
eef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_type
vocab_sizehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsnum_key_value_headsattention_biasmoe_topknum_expertsnum_shared_expertuse_mixed_mlp_moeuse_qk_normrms_norm_eps
rope_thetause_cla   cla_share_factorNmoe_intermediate_sizerope_scalingFtie_word_embeddingsc                 |      j         r1ddh}t           fd|D                       st          d|           d S d S )Nfactortypec              3   *   K   | ]}|j         v V  d S N)r$   ).0keyselfs     _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/hunyuan.py	<genexpr>z*ModelArgs.__post_init__.<locals>.<genexpr>)   s+      IICsd//IIIIII    zrope_scaling must contain keys )r$   all
ValueError)r-   required_keyss   ` r.   __post_init__zModelArgs.__post_init__%   sn     	T%v.MIIII=IIIII T !R=!R!RSSS	T 	TT Tr0   )__name__
__module____qualname__str__annotations__intboolfloatr#   r   r   listr$   r   r%   r4    r0   r.   r   r      sB        OOOOOOMMMMMM8<8E#t)$45<<<;?L(4U5#:%6 678??? %%%%T T T T Tr0   r   c                 @    t          | t                    r| |         S | S r*   )
isinstancer=   )argidxs     r.   _int_or_listrC   -   s"    #t 3xJr0   c                   @     e Zd Z	 	 d
dededef fdZddefd	Z xZS )DynamicNTKAlphaRoPE'        ?dimsbasescaling_alphac                     t                                                       || _        ||||dz
  z  z  z  }|t          j        d| j        d          | j        z  z  | _        d S )Nr!   r   )super__init__rH   mxarange_freqs)r-   rH   rI   rJ   	__class__s       r.   rM   zDynamicNTKAlphaRoPE.__init__4   sa     		mq(9::ryDIq99DIEFr0   r   offsetc           	      b    t           j                            || j        dd d|| j                  S )NFrG   )traditionalrI   scalerR   freqs)rN   fastroperH   rP   )r-   xrR   s      r.   __call__zDynamicNTKAlphaRoPE.__call__?   s8    w||I+  
 
 	
r0   )rF   rG   r   )r5   r6   r7   r:   r<   rM   rZ   __classcell__rQ   s   @r.   rE   rE   3   s         "		G 	G	G 	G 		G 	G 	G 	G 	G 	G	
 	
# 	
 	
 	
 	
 	
 	
 	
 	
r0   rE   c            	       ~     e Zd Zdedef fdZ	 	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )	Attentionkv_projargsc                    t                                                       |j        }|j        x| _        }|j        J |j        x| _        }|j        |z  }|dz  | _        t          j	        |||z  |j
                  | _        |rHt          j	        |||z  |j
                  | _        t          j	        |||z  |j
                  | _        t          j	        ||z  ||j
                  | _        |j        | _        | j        r>t          j        ||j                  | _        t          j        ||j                  | _        t)          ||j        |j        d                   | _        d S )Ng      ࿩biasalpha)rI   rJ   )rL   rM   r   r   n_headsr   
n_kv_headsrU   nnLinearr   q_projk_projv_projo_projr   RMSNormr   query_layernormkey_layernormrE   r   r$   rX   )r-   r`   ra   dimrf   rg   head_dimrQ   s          r.   rM   zAttention.__init__L   sr   !%!99w'333'+'??*#w.t^
iWx%7d>QRRR 	)Z(*1D  DK )Z(*1D  DK i( 2Cd>QRRR+ 	I#%:h8I#J#JD !#Hd6G!H!HD'+G4
 
 
			r0   NrY   maskcachereturnc                    |j         \  }}}|                     |          }|/|                     |          |                     |          }
}	|	|
f}n|\  }	}
|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|
                    ||| j        d                              dddd          }
|r|j        nd}| 	                    ||          }| 	                    |	|          }	| j
        r*|                     |          }|                     |	          }	||                    |	|
          \  }	}
t          ||	|
|| j        |          }|                    dddd                              ||d          }|                     |          |fS )Nr   r!   r	      )rR   )rt   rU   rs   )shaperj   rk   rl   reshaperf   	transposerg   rR   rX   r   ro   rp   update_and_fetchr   rU   rm   )r-   rY   rs   rt   	kv_statesBLDquerieskeysvaluesrR   outputs                r.   rZ   zAttention.__call__j   s    '1a++a..;;q>>4;;q>>&DfII$LD& //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPP!&-A))GF)33yyfy-- 	,**733G%%d++D 11$??LD&-T6djt
 
 
 !!!Q1--55aB??{{6""I--r0   NNN)r5   r6   r7   r;   r   rM   rN   arrayr   r   rZ   r\   r]   s   @r.   r_   r_   K   s        
 
I 
 
 
 
 
 
B $(##. #.8#. rx #. }	#. 
#. #. #. #. #. #. #. #.r0   r_   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S NFrc   )rL   rM   rh   ri   	gate_proj	down_projup_proj)r-   rq   
hidden_dimrQ   s      r.   rM   zMLP.__init__   se    3
???:s???yju===r0   ru   c                     |                      t          |                     |          |                     |                              S r*   )r   r
   r   r   r-   rY   s     r.   rZ   zMLP.__call__   s4    ~~fT^^A%6%6QHHIIIr0   r5   r6   r7   rM   rN   r   rZ   r\   r]   s   @r.   r   r      s^        > > > > >JRX J J J J J J J Jr0   r   c                   4     e Zd Z fdZdej        fdZ xZS )Gatec                     t                                                       t          j        ||d          | _        d S r   )rL   rM   rh   ri   wg)r-   rq   r   rQ   s      r.   rM   zGate.__init__   s5    )C5999r0   ru   c                 ,    |                      |          S r*   )r   r   s     r.   rZ   zGate.__call__   s    wwqzzr0   r   r]   s   @r.   r   r      sU        : : : : :RX        r0   r   c                   @     e Zd Zddedef fdZdej        fdZ xZ	S )MoeBlockr   ra   	layer_idxc                    t                                                       |j        }|j        }|j        | _        |j        r:t          |j        |          }t          |t          ||z                      | _
        |j        x| _        }t          |j        |          | _        t          ||          | _        |}|j        t          |j        |          }t#          |||          | _        d S r*   )rL   rM   r   r   r   use_shared_mlprC   r   r   r:   
shared_mlpr   r   top_kr   gater#   r   
switch_mlp)	r-   ra   r   rq   r   
num_sharedr   expert_intermediate_sizerQ   s	           r.   rM   zMoeBlock.__init__   s     2"4! 	L%d&<iHHJ!#s+<z+I'J'JKKDO)-)99;!$-;;
k**	 $5 %1'3*I( ($ $C)A;OOr0   rY   c                    |                      |          }t          j        |dd          }| j        }t          j        t          j        | |dz
  d          dd |f                   }t          j        ||d          }|                     ||          }||d                             t          j	                  z  
                    d	                              |j                  }| j        r|                     |          }||z   }|S )
Nrw   T)axispreciser	   )kthr   .r   ).N)r   rN   softmaxr   stop_gradientargpartitiontake_along_axisr   astypefloat32sumdtyper   r   )r-   rY   gateskindsscoresyshared_expert_outputs           r.   rZ   zMoeBlock.__call__   s     		!
5r4888JAE K K KCQSRSQSG TUU#E4b999OOAt$$	"))"*555:::CCJJ17SS 	)#'??1#5#5 ((Ar0   r[   )
r5   r6   r7   r   r:   rM   rN   r   rZ   r\   r]   s   @r.   r   r      sw        P PY P3 P P P P P P08       r0   r   c                        e Zd Zddededef fdZ	 	 	 ddej        de	ej                 d	e	e
         d
e	eej        ej        f                  fdZ xZS )DecoderLayerr   ra   r`   r   c                    t                                                       |j        | _        t          ||          | _        |j        dk    r t          |j        |j                  | _        nt          ||          | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        || _        d S )Nr	   eps)rL   rM   r   r_   	self_attnr   r   r   mlpr   rh   rn   r   input_layernormpost_attention_layernormra   )r-   ra   r`   r   rQ   s       r.   rM   zDecoderLayer.__init__   s    +"7D11q  4+T-CDDDHHi00DH!z$*:@QRRR(*
$"3)
 )
 )
% 			r0   NrY   rs   rt   shared_kv_statesc                     |                      |                     |          |||          \  }}||z   }|                     |                     |                    }||z   }||fS r*   )r   r   r   r   )r-   rY   rs   rt   r   rhouts           r.   rZ   zDecoderLayer.__call__   st     #nn  ##T52B
 
 EHHT2215566!e$$$r0   r[   r   )r5   r6   r7   r   r;   r:   rM   rN   r   r   r   r   rZ   r\   r]   s   @r.   r   r      s         Y  #      $ $(#@D% %8% rx % }	%
 #528);#<=% % % % % % % %r0   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )HunYuanModelra   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 \    g | ](}t          j         p|j        z  d k    |          )S )r   )ra   r`   r   )r   r    r"   )r+   ira   s     r.   
<listcomp>z)HunYuanModel.__init__.<locals>.<listcomp>   sZ     
 
 
  !\)Nq43H/HQ.N  
 
 
r0   r   )rL   rM   ra   r   r   rh   	Embeddingr   embed_tokensrangelayersrn   r   normr-   ra   rQ   s    `r.   rM   zHunYuanModel.__init__   s    	/!%!7""""L$:JKK
 
 
 
 4122
 
 
 Jt/T5FGGG			r0   Ninputsc                 p   |                      |          }|d gt          | j                  z  }t          ||d                   }t	          t          | j        |                    D ]:\  }\  }}| j        j        r|| j        j        z  dk    rd } |||||          \  }};| 	                    |          S )Nr   )
r   lenr   r   	enumeratezipra   r    r"   r   )	r-   r   rt   r   rs   r   layercr   s	            r.   rZ   zHunYuanModel.__call__  s    
 f%%=FS---E$Qa11&s4;'>'>?? 	F 	FMAzqI% (!di.H*HA*M*M#' "'%44D"E"EAyy||r0   r*   )	r5   r6   r7   r   rM   rN   r   rZ   r\   r]   s   @r.   r   r      ss        HY H H H H H H(         r0   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelra   c                     t                                                       || _        |j        | _        t	          |          | _        d S r*   )rL   rM   ra   r   r   modelr   s     r.   rM   zModel.__init__  s<    	/!$''


r0   Nr   c                 l    |                      ||          }| j         j                            |          S r*   )r   r   	as_linear)r-   r   rt   r   s       r.   rZ   zModel.__call__  s0    
 jj''z&00555r0   c           
      v   dv r+i }| j         j        }| j         j        }| j         j        |z  }|| j         j        z  }                                D ]\  }dv r|                    ||dz   |d          }|                    ||dz   gd          }t          g d|          D ]4\  }	}
                    d|	          }t          j
        |
dd          ||<   5d	v rK|                    dd          }t          d
dg|          D ] \  }	}
                    d	|	          }|
||<   !||<   |dvrS t          | j         j                  D ]d}d| dD ]ZdD ]U d d v rGfdt          | j         j                  D             }t          j        |           d d <   V[eS )Nz*model.layers.0.mlp.gate_and_up_proj.weightqkv_projr!   rw   r	   r   )rj   rk   rl   r   gate_and_up_projr   r   z+model.layers.0.mlp.experts.0.up_proj.weightzmodel.layers.)r   r   r   )weightscalesbiasesz.mlp.experts.0..c                 P    g | ]"}                      d | d d           #S )z.mlp.experts.r   )pop)r+   er   nprefixweightss     r.   r   z"Model.sanitize.<locals>.<listcomp>E  sS     # # # ! $KK6(K(K(K(KA(K(K(K(KLL# # #r0   z.mlp.switch_mlp.)ra   r   r   r   itemsrz   splitr   replacerN   flattenr   r   r   stack)r-   r   new_weightsr   rg   n_kv_groupsrr   vsplitsk_upv_newk_newlto_joinr   r   r   s    `            @@@r.   sanitizezModel.sanitize&  s   77BBK	%A6J)7:EKDI99H ' '1??		*kAoxLLAWWk;?%C!WLLF'*+I+I+I6'R'R E Ee !		*d ; ;-/Zq!-D-DE**E (1,,WWQQW//F'*I{+CV'L'L 3 3e !		*<d C C-2E**3 &'KNN!G8GGNty233 		X 		XA(Q((F: X X7 X XA 8888Q88GCC# # # # # # #%*49+@%A%A# # # GIhwFWFW6 B B1 B Bq B BCXX r0   c                     | j         j        S r*   )r   r   )r-   s    r.   r   zModel.layersL  s    z  r0   r*   )r5   r6   r7   r   rM   rN   r   rZ   r   propertyr   r\   r]   s   @r.   r   r     s        (Y ( ( ( ( ( ( 6 66 6 6 6$ $ $L ! ! X! ! ! ! !r0   r   ) dataclassesr   typingr   r   r   r   r   mlx.corecorerN   mlx.nnrh   activationsr
   rI   r   r   r   switch_layersr   r   rC   ModulerE   r_   r   r   r   r   r   r   r>   r0   r.   <module>r     st   " ! ! ! ! ! 4 4 4 4 4 4 4 4 4 4 4 4 4 4                   T T T T T T T T T T $ $ $ $ $ $ T T T T T T T T<  
 
 
 
 
") 
 
 
0B. B. B. B. B.	 B. B. B.JJ J J J J") J J J    29   + + + + +ry + + +\% % % % %29 % % %@" " " " "29 " " "J7! 7! 7! 7! 7!BI 7! 7! 7! 7! 7!r0   