
    )j07                        d dl mZmZ d dlmZmZmZmZmZ d dl	m
Z d dlmZ ddlmZmZmZ ddlmZ ddlmZ  ed	           G d
 d                      Z ed	           G d d                      Z ed	           G d d                      ZdededefdZdededefdZej        ej        ej        ej         ej         dZ!e G d de                      Z" G d dej#                  Z$ G d dej#                  Z% G d dej#                  Z& G d  d!ej#                  Z' G d" d#ej#                  Z( G d$ d%ej#                  Z)dS )&    )	dataclassfield)AnyDictListOptionalUnionN   )BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCache)initialize_ropeT)frozenc                       e Zd ZU dZeed<   dZeed<   dZee	e
                  ed<   dZee         ed<   dZee         ed<   dZee         ed<   dZeed	<   dZeed
<   d ZdS )AttentionConfigFno_opreplace_with_linearNsparsifyn_heads_in_groupwindow_lengthnum_sink_tokens$use_prefill_window_in_sink_attentionunshifted_sinkc                 P   | j         s| j        rVt                              | dd            t                              | dd            t                              | dd            d S | j         s8| j        t          d          | j        dk    rt          d| j                   d S d S )Nr   r   r   z>n_heads_in_group must be specified for active attention blocksr   z'n_heads_in_group must be positive, got )r   r   object__setattr__r   
ValueErrorselfs    d/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/nemotron-nas.py__post_init__zAttentionConfig.__post_init__   s    : 	1 	t%7>>>t_d;;;t%6===== 	$, T   $)) Ud>SUU  	 	
 *)    )__name__
__module____qualname__r   bool__annotations__r   r   r   liststrr   intr   r   r   r   r"    r#   r!   r   r      s         E4 %%%%$(HhtCy!(((&*hsm***#'M8C='''%)OXc]))) )$    !ND       r#   r   c                   r    e Zd ZU dZeed<   dZeed<   dZee	e
                  ed<   dZee         ed<   d ZdS )	FFNConfigFr   r   Nr   ffn_multc                     | j         s| j        rt                              | dd            d S | j         sG| j        t          d          t                              | dt          | j        d                     d S d S )Nr/   z0ffn_mult must be specified for active FFN blocks   )r   r   r   r   r/   r   roundr   s    r!   r"   zFFNConfig.__post_init__5   s    : 	J1 	JtZ66666 	J}$ !STTTtZt}a1H1HIIIII		J 	Jr#   )r$   r%   r&   r   r'   r(   r   r   r   r)   r*   r/   floatr"   r,   r#   r!   r.   r.   .   sx         E4 %%%%$(HhtCy!((( $Hhuo$$$	J 	J 	J 	J 	Jr#   r.   c                   @    e Zd ZU eed<   eed<   edefd            ZdS )BlockConfig	attentionffndatac                     t          di |                    di           }t          di |                    di           } | ||          S )Nr6   r7   )r6   r7   r,   )r   getr.   )clsr8   	attn_confffn_confs       r!   	from_dictzBlockConfig.from_dictF   s[     $@@dhh{B&?&?@@	33txxr2233sYH5555r#   N)	r$   r%   r&   r   r(   r.   classmethoddictr>   r,   r#   r!   r5   r5   A   sP         	NNN6T 6 6 6 [6 6 6r#   r5   nkreturnc                 .    | |z  dk    r| S | |z   | |z  z
  S )z<Finds the smallest multiple of k greater than or equal to n.r   r,   )rA   rB   s     r!   _find_multiplerE   N   s&    1uzzq5AE?r#   r/   n_embdc                 R    t          d| z  |z  dz            }t          |d          S )zQCalculates intermediate size based on multiplier, rounding up to multiple of 256.         )r+   rE   )r/   rF   intermediate_sizes      r!   _ffn_mult_to_intermediate_sizerL   U   s/    AL61A566+S111r#   )silurelugelugelu_new	gelu_fastc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeed<    ee          Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   dZeed<   d ZdS )	ModelArgsznemotron-nas
model_typei    hidden_sizeP   num_hidden_layers@   num_attention_headsgh㈵>rms_norm_epsi  
vocab_size)default_factoryblock_configsrM   
hidden_actFattention_biasmlp_biasg    A
rope_thetaNrope_scalingi   max_position_embeddingstie_word_embeddingsc           
      r   | j         r6t          | j         d         t                    rd | j         D             | _         t          | j                   | j        k    r-t          dt          | j                    d| j         d          | j        rCd| j        vrt          d          | j                            d          }|t          d	          t          | j                   D ]P\  }}|j	        }|j
        s=|j        s6| j        |j        z  dk    r#t          d
| d| j         d|j         d          Qd S )Nr   c                 B    g | ]}t                               |          S r,   )r5   r>   ).0confs     r!   
<listcomp>z+ModelArgs.__post_init__.<locals>.<listcomp>y   s3     " " "04%%d++" " "r#   zNumber of block_configs (z ) must match num_hidden_layers ()factorz"rope_scaling must contain 'factor'	rope_typez%rope_scaling must contain 'rope_type'zLayer z: num_attention_heads (z)) must be divisible by n_heads_in_group ()r]   
isinstancer@   lenrW   r   rb   r:   	enumerater6   r   r   rY   r   )r    rl   i
block_confr<   s        r!   r"   zModelArgs.__post_init__v   s    	*T-?-BD"I"I 	" "8<8J" " "D t!""d&<<<@C0B,C,C @ @&*&<@ @ @    	Jt000 !EFFF)--k::I  !HIII 't'9:: 	 	MAz",I? 9+H +i.HHAMM$` ` `4;S ` `BKB\` ` `  		 	r#   )r$   r%   r&   rT   r*   r(   rU   r+   rW   rY   rZ   r3   r[   r   r)   r]   r^   r_   r'   r`   ra   rb   r   r   r	   rc   rd   r"   r,   r#   r!   rS   rS   e   s.        $J$$$Ks!!!!L%J%555M4555J ND   Hd J   ;?L(4U5#:%6 678???#)S))) %%%%    r#   rS   c            	            e Zd ZdZdedef fdZ	 	 ddej        de	ej                 de	e
         d	ej        fd
Z xZS )	Attentionz8Standard GQA Attention mechanism for layers that use it.argsattention_configc                    t                                                       |j        }|j        x| _        }||j        z  x| _        }|j        |z  x| _        }| j        |z  |k    rt          d| d| d          |dz  | _	        t          j        |||z  |j                  | _        t          j        |||z  |j                  | _        t          j        |||z  |j                  | _        t          j        ||z  ||j                  | _        t#          | j        |j        d|j        |j                  | _        d S )Nzhidden_size (z,) must be divisible by num_attention_heads (rj   g      ࿩biasF)super__init__rU   rY   n_headsr   
n_kv_headshead_dimr   scalennLinearr_   q_projk_projv_projo_projr   ra   rb   rc   rope)r    rt   ru   dimr{   r|   r}   	__class__s          r!   rz   zAttention.__init__   sQ   !%!99w'.2B2S'SS*#'#3w#>>MG#++[[[QX[[[   t^
iWx%7d>QRRRiZ(%:ATUUUiZ(%:ATUUUi( 2Cd>QRRR $MO(
 
			r#   NxmaskcacherC   c                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        | j                                      dddd          }|                    ||| j        | j                                      dddd          }|	                    ||| j        | j                                      dddd          }	|R| 	                    ||j
                  }| 	                    ||j
                  }|                    ||	          \  }}	n*| 	                    |          }| 	                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr   rH   r
   rI   )offset)r   r~   r   )shaper   r   r   reshaper{   r}   	transposer|   r   r   update_and_fetchr   r~   r   )r    r   r   r   BLDquerieskeysvaluesoutputs              r!   __call__zAttention.__call__   s    '1a $AAAv//!QdmDDNNq!Q
 
 ||Aq$/4=AAKKAqRSUVWW1dot}EEOOq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r#   NN)r$   r%   r&   __doc__rS   r   rz   mxarrayr   r   r   __classcell__r   s   @r!   rs   rs      s        BB
Y 
/ 
 
 
 
 
 
@ $(#	# #8# rx # }	#
 
# # # # # # # #r#   rs   c                   B     e Zd ZdZdedef fdZdej        fdZ	 xZ
S )MLPz5Standard Feed-Forward Network for layers that use it.rt   
ffn_configc                    t                                                       |j        }t          |j        |          }t          j        |||j                  | _        t          j        |||j                  | _	        t          j        |||j                  | _
        |j        | _        | j        t          vrt          d|j                   d S )Nrw   zUnknown activation function: )ry   rz   rU   rL   r/   r   r   r`   	gate_proj	down_projup_projr^   act_fn_ACT2FNr   )r    rt   r   r   
hidden_dimr   s        r!   rz   zMLP.__init__   s    3J4GMM
3
GGG:sGGGyjt}EEEo;g%%NT_NNOOO &%r#   rC   c                     t           | j                 }|                      ||                     |                    |                     |          z            S N)r   r   r   r   r   )r    r   r   s      r!   r   zMLP.__call__   sE    %~~ffT^^A%6%677$,,q//IJJJr#   )r$   r%   r&   r   rS   r.   rz   r   r   r   r   r   s   @r!   r   r      s        ??PY PI P P P P P PKRX K K K K K K K Kr#   r   c                   P     e Zd ZdZdedef fdZdej        dej        fdZ	 xZ
S )LinearSubblockReplacementz>A simple linear layer used to replace Attention or MLP blocks.rU   rx   c                     t                                                       t          j        |||          | _        d S )Nrw   )ry   rz   r   r   linear)r    rU   rx   r   s      r!   rz   z"LinearSubblockReplacement.__init__   s5    i[tDDDr#   r   rC   c                 ,    |                      |          S r   )r   )r    r   rt   kwargss       r!   r   z"LinearSubblockReplacement.__call__   s    {{1~~r#   )r$   r%   r&   r   r+   r'   rz   r   r   r   r   r   s   @r!   r   r      s        HHEC Et E E E E E E"(         r#   r   c            	            e Zd ZdZdedef fdZ	 	 ddej        de	ej                 de	e
         d	ej        fd
Z xZS )TransformerBlockzFA single transformer block, potentially heterogeneous based on config.rt   	layer_idxc                    t                                                       |j        | _        |j        |         }|j        | _        |j        | _        | j        j        s&t          j
        |j        |j                  | _        nd | _        | j        j        rd | _        nF| j        j        r t          |j        |j                  | _        nt#          || j                  | _        | j        j        s&t          j
        |j        |j                  | _        nd | _        | j        j        r	d | _        d S | j        j        r!t          |j        |j                  | _        d S t+          || j                  | _        d S )Neps)ry   rz   rU   r]   r6   ru   r7   r   r   r   RMSNormrZ   input_layernorm	self_attnr   r   r_   rs   post_attention_layernormmlpr`   r   )r    rt   r   block_configr   s       r!   rz   zTransformerBlock.__init__   su   +))4 , 6&* $* 	(#%:d.>DDU#V#V#VD  #'D   & 	D!DNN"6 	D6 $"5 DNN
 'tT-BCCDN $ 	1,.J d&7- - -D)) -1D) ?  	2DHHH_0 	201A4=QQDHHH 411DHHHr#   Nr   r   r   rC   c                     | j         4|}|                     |          }|                      |||          }||z   }| j        1|}|                     |          }|                     |          }||z   }|S )N)r   r   )r   r   r   r   )r    r   r   r   residualhattn_outmlp_outs           r!   r   zTransformerBlock.__call__'  s     >%H$$Q''A~~ad%~@@H8#A 8H--a00AhhqkkG7"Ar#   r   )r$   r%   r&   r   rS   r+   rz   r   r   r   r   r   r   r   s   @r!   r   r      s        PP(2Y (23 (2 (2 (2 (2 (2 (2Z $(#	 8 rx  }	
 
       r#   r   c                   ^     e Zd ZdZdef fdZ	 ddej        dee	e
                  fdZ xZS )	NemotronNASModelz.The core Nemotron-NAS style transformer model.rt   c                    t                                                       | _        j        | _        j        | _        t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        t          d | j
        D                       | _        d S )Nc                 2    g | ]}t          |           S ))rt   r   )r   )rg   rp   rt   s     r!   ri   z-NemotronNASModel.__init__.<locals>.<listcomp>H  s6     
 
 
 $!444
 
 
r#   r   c              3   (   K   | ]}|j         	dV  d S )Nr
   )r   rg   layers     r!   	<genexpr>z,NemotronNASModel.__init__.<locals>.<genexpr>M  s2       #
 #
%/*EA*E*E*E*E#
 #
r#   )ry   rz   rt   r[   rW   r   	EmbeddingrU   embed_tokensrangelayersr   rZ   normsumnum_attn_layersr    rt   r   s    `r!   rz   zNemotronNASModel.__init__B  s    	/!%!7L$:JKK
 
 
 
4122
 
 
 Jt/T5FGGG	" #
 #
;#
 #
 #
  
  
r#   Ninputsr   c                     |                      |          }|d g| j        z  }t          ||d                   }d}| j        D ]'}|j        ||         }|dz  }nd } ||||          }(|                     |          S )Nr   r
   r   )r   r   r   r   r   r   )r    r   r   r   r   	cache_idxr   cs           r!   r   zNemotronNASModel.__call__Q  s    
 f%%=FT11E$Qa11	[ 	( 	(E*)$Q		aQ'''AAyy||r#   r   )r$   r%   r&   r   rS   rz   r   r   r   r   r   r   r   r   s   @r!   r   r   ?  s        88
Y 
 
 
 
 
 
$ &*  S	"       r#   r   c                   `     e Zd Zdef fdZ	 d	dej        fdZd Ze	d             Z
d Z xZS )
Modelrt   c                    t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d | _        d S )NFrw   )ry   rz   rt   rT   r   modelrd   r   r   rU   r[   lm_headr   s     r!   rz   zModel.__init__k  so    	/%d++
' 	 9T%5tUSSSDLLLDLLLr#   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S )Nr   )r   rt   rd   r   	as_linearr   )r    r   r   outs       r!   r   zModel.__call__u  sV    
 jjuj--9( 	$*)33C88CC,,s##C
r#   c                 J    | j         j        r|                    dd            |S )Nzlm_head.weight)rt   rd   pop)r    weightss     r!   sanitizezModel.sanitize  s)    9( 	0KK($///r#   c                     | j         j        S r   )r   r   r   s    r!   r   zModel.layers  s    z  r#   c                 $    d | j         D             S )Nc                 8    g | ]}|j         	t                      S r   )r   r   r   s     r!   ri   z$Model.make_cache.<locals>.<listcomp>  s#    RRReeo6Q		6Q6Q6Qr#   )r   r   s    r!   
make_cachezModel.make_cache  s    RRt{RRRRr#   r   )r$   r%   r&   rS   rz   r   r   r   r   propertyr   r   r   r   s   @r!   r   r   i  s         Y             
 

 
 
 
  
 ! ! X!S S S S S S Sr#   r   )*dataclassesr   r   typingr   r   r   r   r	   mlx.corecorer   mlx.nnr   baser   r   r   r   r   
rope_utilsr   r   r.   r5   r+   rE   r3   rL   rM   rN   rO   gelu_approxr   rS   Modulers   r   r   r   r   r   r,   r#   r!   <module>r      s\   ) ( ( ( ( ( ( ( 3 3 3 3 3 3 3 3 3 3 3 3 3 3             T T T T T T T T T T       ' ' ' ' ' ' $       > $J J J J J J J J$ $	6 	6 	6 	6 	6 	6 	6 	6c c c    2U 2C 2C 2 2 2 2 GGG  - - - - - - - -`># ># ># ># >#	 ># ># >#BK K K K K") K K K.	 	 	 	 		 	 	 	B B B B Bry B B BJ' ' ' ' 'ry ' ' 'T"S "S "S "S "SBI "S "S "S "S "Sr#   