
    )jf*                        d dl mZ d dlmZmZmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ dd	lmZ e G d
 de                      Ze G d de                      Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z  G d de
j                  Z!dS )    )	dataclass)AnyOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)ChunkedKVCacheKVCache)initialize_rope)	SwitchGLUc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZ	eed<   dZ
eed<   dZeed<   dS )TextArgsattention_biasattention_chunk_sizehead_dimhidden_sizeinterleave_moe_layer_stepintermediate_sizeintermediate_size_mlpmax_position_embeddings
model_typenum_attention_headsnum_experts_per_toknum_hidden_layersnum_key_value_headsnum_local_expertsrms_norm_epsrope_scaling
rope_thetause_qk_norm
vocab_size   attn_temperature_tuningi    floor_scaleg?
attn_scaleN)__name__
__module____qualname__bool__annotations__intstrfloatr   r&   r'   r(        ^/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/llama4.pyr   r      s        MMM""""    OOOOOO#$S$$$KJr2   r   c                   :    e Zd ZU eeef         ed<   eed<   d ZdS )	ModelArgstext_configr   c                 N    t                               | j                  | _        d S N)r   	from_dictr6   selfs    r3   __post_init__zModelArgs.__post_init__/   s!    #--d.>??r2   N)	r)   r*   r+   r   r   dictr-   r/   r<   r1   r2   r3   r5   r5   *   sH         x~&&&&OOO@ @ @ @ @r2   r5   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )	Attentionargs	layer_idxc                 
   t                                                       |j        }|j        x| _        }|j        x| _        }t          |dz   dz  dk              | _        |j	        | _	        |j
        | _
        |j        | _        |j        p	|j        |z  x| _        }|dz  | _        t          |d          r|j        }nd}t!          j        |||z  |          | _        t!          j        |||z  |          | _        t!          j        |||z  |          | _        t!          j        ||z  ||          | _        |j        o| j        | _        | j        r*t/          ||j        d|j        |j        	          | _        d S d S )
Nr   r%   r   g      r   FbiasT)traditionalscaling_configr   )super__init__r   r   n_headsr   
n_kv_headsr.   use_roper&   r'   r(   r   scalehasattrr   nnLinearq_projk_projv_projo_projr#   r   r"   r!   r   rope)	r;   r@   rA   dimrI   rJ   r   r   	__class__s	           r3   rH   zAttention.__init__4   s   !%!99w'+'??*Y]a/1455'+'C$+/#'=#OD4D4OOt^
4)** 	#!0NN"NiWx%7nMMMiZ(%:PPPiZ(%:PPPi( 2CnMMM+== 	' #0(,(D  DIII	 	r2   Nxmaskcachereturnc           	         |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	||j        }
nd}
| j	        r.| 
                    ||
          }| 
                    ||
          }| j        rDt          j                            |d d          }t          j                            |d d          }| j        r| j	        st          j        t          j        t          j        |
dz   |
|z   dz             | j        z            d	z             | j        z  d	z   }|d d d f         }||z                      |j                  }||                    ||	          \  }}	t1          |||	|| j        |
          }|                    dddd                              ||d          }|                     |          S )Nr      r      )offsetgư>)weightepsg      ?)rY   rL   rX   )shaperP   rQ   rR   reshaperI   	transposerJ   r_   rK   rT   r#   mxfastrms_normr&   logflooraranger'   r(   astypedtypeupdate_and_fetchr   rL   rS   )r;   rW   rX   rY   BLDquerieskeysvaluesr_   attn_scalesoutputs                r3   __call__zAttention.__call__X   ss    '1a $AAAv//!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPP\FFF= 	2iii77G99T&911D 	Ag&&wt&FFG7##D4#@@D' 
	D 
	DHRYvz6A:>BBTEUUVV  /	"
   &aaag.K,44W]CCG 11$??LD&-T6djt
 
 
 !!!Q1--55aB??{{6"""r2   NNr)   r*   r+   r   r.   rH   re   arrayr   r   rv   __classcell__rV   s   @r3   r?   r?   3   s        "X "# " " " " " "N $(#	.# .#8.# rx .# }	.#
 
.# .# .# .# .# .# .# .#r2   r?   c                   @     e Zd Zddedef fdZdej        fdZ xZ	S )MLPNr@   r   c                    t                                                       |j        }|p|j        }t	          j        ||d          | _        t	          j        ||d          | _        t	          j        ||d          | _        d S NFrC   )	rG   rH   r   r   rN   rO   	gate_proj	down_projup_proj)r;   r@   r   rU   
hidden_dimrV   s        r3   rH   zMLP.__init__   sz    &@$*@
3
???:s???yju===r2   rZ   c                     |                      t          |                     |          |                     |                              S r8   )r   r   r   r   )r;   rW   s     r3   rv   zMLP.__call__   s4    ~~fT^^A%6%6QHHIIIr2   r8   )
r)   r*   r+   r5   r.   rH   re   ry   rv   rz   r{   s   @r3   r}   r}      sv        > >Y >3 > > > > > >JRX J J J J J J J Jr2   r}   c                   4     e Zd Z fdZdej        fdZ xZS )MoEc                 `   t                                                       |j        | _        | j        dk    s
J d            |j        | _        t          |j        |j        | j                  | _	        t          j        |j        |j        d          | _        t          |          | _        d S )Nr   z!Only 1 expert per token supportedFrC   )rG   rH   r   top_kr   num_expertsr   r   r   expertsrN   rO   routerr}   shared_expertr;   r@   rV   s     r3   rH   zMoE.__init__   s    -
zQ C1 d4d6F
 
 i 0$2HuUUU YYr2   rZ   c                    |                      |          }| j        }t          j        | |dz
  d          dd |f         }t          j        ||d          }t          j        |                    t          j                                                |j                  }| 	                    ||z  |          
                    d          }||                     |          z   S )Nr   r\   )kthaxis.r   r]   )r   r   re   argpartitiontake_along_axissigmoidrk   float32rl   r   squeezer   )r;   rW   logitskindicesscoresouts          r3   rv   zMoE.__call__   s    QJ/6'q1u2>>>sBQBwG#FG"===FMM"*5566==agFFll1v:w//77::T''****r2   )r)   r*   r+   rH   re   ry   rv   rz   r{   s   @r3   r   r      sU        	' 	' 	' 	' 	'+RX + + + + + + + +r2   r   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )TransformerBlockr@   rA   c                    t                                                       |j        | _        |j        | _        t	          ||          | _        ||j        z  |j        dz
  k    | _        | j        rt          |          | _	        nt          ||j                  | _	        t          j        |j        |j                  | _        t          j        |j        |j                  | _        || _        d S )Nr   ra   )rG   rH   r   r   r?   	self_attnr   is_moe_layerr   feed_forwardr}   r   rN   RMSNormr    input_layernormpost_attention_layernormr@   )r;   r@   rA   rV   s      r3   rH   zTransformerBlock.__init__   s    #'#; +"433&)GG*Q.
  	F #D		D #D$*D E ED!z$*:@QRRR(*
$"3)
 )
 )
% 			r2   NrW   rX   rY   rZ   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r8   )r   r   r   r   )r;   rW   rX   rY   rhr   s          r3   rv   zTransformerBlock.__call__   s_     NN4//22D%@@Ed;;A>>??!e
r2   rw   rx   r{   s   @r3   r   r      s        X #      , $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r2   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )
LlamaModelr@   c                    t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        j        | _        d S )Nr   c                 0    g | ]}t          |          S r1   )r   ).0ir@   s     r3   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>   s$    XXXQ'a00XXXr2   r   )rG   rH   r@   r$   r   rN   	Embeddingr   embed_tokensrangelayersr   r    normr   r   s    `r3   rH   zLlamaModel.__init__   s    	/!%!7""""L$:JKKXXXX%@V:W:WXXXJt/T5FGGG	$($=!!!r2   Ninputsc                    |                      |          }|Pt          |          D ]%\  }}|dz   dz  dk    r|                                 &|d         j        }|d         j        }nd}d}||j        d         z   }t          j        ||          }	t          j        ||          d d d f         }
t          j        |	| j	        z  |
| j	        z  z
            }|	|
k    }|dk    |z  }|d gt          | j                  z  }t          ||d                   }t          t          | j        |                    D ](\  }\  }}|dz   dz  dk    }|r|n|} ||||          })|                     |          S )Nr   r%   r   r^   )rY   )r   	enumeratemaybe_trim_frontstart_positionr_   rb   re   rj   absr   lenr   r
   zipr   )r;   r   rY   r   idxcstartr_   endlindsrinds	block_pos	token_pos
chunk_maskglobal_masklayeruse_chunked_attentionrX   s                     r3   rv   zLlamaModel.__call__   s   
 f%%#E** ) )Q!Gq=A%%&&(((!H+E1X_FFEFqwqz!	%%%	&#&&qqq$w/Fd//ET=V4VW
 
	 UN	1n	1
=FS---E+AuQx88(T[%)@)@AA 	( 	(OC%%(1WMQ$6!!6G::KDaQ'''AAyy||r2   r8   	r)   r*   r+   r   rH   re   ry   rv   rz   r{   s   @r3   r   r      sl        	>X 	> 	> 	> 	> 	> 	> # ## # # # # # # #r2   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )LanguageModelr@   c                     t                                                       || _        |j        | _        t	          | j                  | _        t          j        | j        j        | j        j	        d          | _
        d S r   )rG   rH   r@   r   r   modelrN   rO   r   r$   lm_headr   s     r3   rH   zLanguageModel.__init__  sg    	/	**
yI!49#7e
 
 
r2   Nr   c                 X    |                      ||          }|                     |          S r8   )r   r   )r;   r   rY   r   s       r3   rv   zLanguageModel.__call__  s)    
 jj''||C   r2   r8   r   r{   s   @r3   r   r     sl        
X 
 
 
 
 
 
 ! !! ! ! ! ! ! ! !r2   r   c                   `     e Zd Zdef fdZ	 d	dej        fdZd Ze	d             Z
d Z xZS )
Modelr@   c                     t                                                       || _        |j        | _        t	          |j                  | _        d S r8   )rG   rH   r@   r   r   r6   language_modelr   s     r3   rH   zModel.__init__  sB    	/+D,<==r2   Nr   c                 .    |                      ||          S r8   )r   )r;   r   rY   s      r3   rv   zModel.__call__  s    
 ""65111r2   c                   
 d 

fd|                                 D             }t          | j        j        j                  D ]}d| d}| d|v rn|                    | d          }| d}| d}t          j        |dd	
          \  }}t          j        |dd          ||<   t          j        |dd          ||<   | d|v r4|                    | d          }	t          j        |	dd          || d<   |S )Nc                     d| v pd| v S )Nvision_modelmulti_modal_projectorr1   )r   s    r3   	to_removez!Model.sanitize.<locals>.to_remove$  s    !Q&F*AQ*FFr2   c                 2    i | ]\  }} |          ||S r1   r1   )r   r   vr   s      r3   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>(  s,    HHHDAq99Q<<H1aHHHr2   zlanguage_model.model.layers.z.feed_forward.expertsz.gate_up_projz.gate_proj.weightz.up_proj.weightr]   r\   r   r   z
.down_projz.down_proj.weight)	itemsr   r@   r6   r   popre   splitswapaxes)r;   weightslprefixr   gate_kup_kr   r   r   r   s             @r3   sanitizezModel.sanitize#  sU   	G 	G 	G IHHHGMMOOHHH ty,>?? 	U 	UALALLLF'''722KK6 8 8 899"555 111%'Xa%<%<%<"	7"$+iA">"> "GQ : :$$$//#KK6(=(=(=>>	8:IqRS8T8T64445r2   c                 $    | j         j        j        S r8   )r   r   r   r:   s    r3   r   zModel.layers9  s    "(//r2   c                    | j         j        j        }g }t          t	          | j                            D ]R}|dz   dz  dk    r#|                    t          |                     1|                    t                                 S|S )Nr   r%   r   )	r@   r6   r   r   r   r   appendr   r   )r;   
chunk_sizecachesr   s       r3   
make_cachezModel.make_cache=  s    Y*?
s4;''(( 	) 	)AA{anZ889999gii((((r2   r8   )r)   r*   r+   r5   rH   re   ry   rv   r   propertyr   r   rz   r{   s   @r3   r   r     s        >Y > > > > > > 2 22 2 2 2  , 0 0 X0      r2   r   )"dataclassesr   typingr   r   r   mlx.corecorere   mlx.nnrN   activationsr   baser	   r
   r   rY   r   r   
rope_utilsr   switch_layersr   r   r5   Moduler?   r}   r   r   r   r   r   r1   r2   r3   <module>r      s   " ! ! ! ! ! ' ' ' ' ' ' ' ' ' '                   T T T T T T T T T T * * * * * * * * ' ' ' ' ' ' $ $ $ $ $ $     }   2 @ @ @ @ @ @ @ @S# S# S# S# S#	 S# S# S#lJ J J J J") J J J+ + + + +") + + +.    ry   B/ / / / / / / /d! ! ! ! !BI ! ! !&0 0 0 0 0BI 0 0 0 0 0r2   