
    )j                        d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z d dlmZ ddlmZmZmZ e G d de                      Z eej        d	
          d             Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)partial)AnyDictOptionalUnionN   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                   2   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zee         ed<   d
Z	ee         ed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZeed<   d
Zeeeeeef         f                  ed<   dZeed<   d Zd
S )	ModelArgs
model_typehidden_size
hidden_actnum_hidden_layersintermediate_sizenum_attention_headsnorm_eps
vocab_sizenum_key_value_headsNhead_dimmax_position_embeddingsFattention_biasmlp_biasg      ?partial_rotary_factorg     @
rope_thetarope_traditionalrope_scalingtie_word_embeddingsc                     | j         rpd| j         vrt          d          | j                             d          p| j                             d          }|t          d          |dvrt          d          d S d S )Nfactorz"rope_scaling must contain 'factor'type	rope_typez6rope_scaling must contain either 'type' or 'rope_type')linearz4rope_scaling 'type' currently only supports 'linear')r   
ValueErrorget)selfr$   s     `/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/nemotron.py__post_init__zModelArgs.__post_init__"   s     	Yt000 !FGGG)--f55 9J9N9N: :I   M   
** !WXXX	Y 	Y +*    )__name__
__module____qualname__str__annotations__intfloatr   r   r   r   boolr   r   r   r   r   r   r   r    r*    r+   r)   r   r      s>        OOOOOOOOOOOO"Hhsm"""-1Xc]111 ND   Hd#&5&&&J"d""";?L(4U5#:%6 678??? %%%%Y Y Y Y Yr+   r   T)	shapelessc                 N    t          j        |                                           S N)nnrelusquare)xs    r)   relu_squaredr<   1   s    71::r+   c                       e Zd Zd ZdS )NemotronLayerNorm1Pc                     d| v r
| j         dz   nd }d| v r| j        nd }t          j                            |||| j                  S )Nweightr	   bias)r@   rA   mxfast
layer_normeps)r(   r;   r@   rA   s       r)   __call__zNemotronLayerNorm1P.__call__7   sL    $,$4$4q$"dNNtyyw!!!VT48<<<r+   N)r,   r-   r.   rF   r4   r+   r)   r>   r>   6   s#        = = = = =r+   r>   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 $   t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  x| _        }|j        | _        |dz  | _	        t          |d          r|j        }nd}t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        d}|j        rC|j        d         dk    r2t'          |j        d         t(                    sJ d	|j        d         z  }t          j        t-          | j        | j        z            |j        |
          | _        d S )Ng      r   FrA   g      ?r#   r%   r"   r	   )basescale)super__init__r   r   n_headsr   
n_kv_headsr   r   rM   hasattrr   r8   Linearq_projk_projv_projo_projr   
isinstancer2   RoPEr1   r   rope)	r(   rI   dimrP   rQ   r   r   
rope_scale	__class__s	           r)   rO   zAttention.__init__>   s   !%!99w'+'??*#'=#OD4D4OO%)%?"t^
4)** 	#!0NN"NiWx%7nMMMiZ(%:PPPiZ(%:PPPi( 2CnMMM
 	9!26!:h!F!Fd/95AAAAAT.x88JG*T]:;;
 
 
			r+   Nr;   maskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r	      )offset)r_   rM   r^   )shaperT   rU   rV   reshaperP   	transposerQ   rZ   re   update_and_fetchr   rM   rW   )r(   r;   r^   r_   BL_querieskeysvaluesoutputs              r)   rF   zAttention.__call__]   s    '1a $AAAv //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r+   NNr,   r-   r.   r   rO   rB   arrayr   r   rF   __classcell__r]   s   @r)   rH   rH   =   s        
Y 
 
 
 
 
 
D $(#	# #8# rx # }	#
 
# # # # # # # #r+   rH   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPrI   c                     t                                                       |j        }|j        }|j        }t          j        |||          | _        t          j        |||          | _        d S )NrK   )	rN   rO   r   r   r   r8   rS   	down_projup_proj)r(   rI   r[   
hidden_dimr   r]   s        r)   rO   zMLP.__init__|   sd    +
=:sBBByjx@@@r+   r`   c                 l    |                      t          |                     |                              S r7   )ry   r<   rz   )r(   r;   s     r)   rF   zMLP.__call__   s&    ~~l4<<??;;<<<r+   	r,   r-   r.   r   rO   rB   rs   rF   rt   ru   s   @r)   rw   rw   {   sh        AY A A A A A A=RX = = = = = = = =r+   rw   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockrI   c                 H   t                                                       |j        | _        |j        | _        t	          |          | _        t          |          | _        t          |j        |j	                  | _
        t          |j        |j	                  | _        d S )NrE   )rN   rO   r   r   rH   	self_attnrw   mlpr>   r   input_layernormpost_attention_layernormr(   rI   r]   s     r)   rO   zTransformerBlock.__init__   s    #'#; +"4t99243CWWW(;$-)
 )
 )
%%%r+   Nr;   r^   r_   r`   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r7   )r   r   r   r   )r(   r;   r^   r_   rhouts          r)   rF   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r+   rq   rr   ru   s   @r)   r   r      s        	
Y 	
 	
 	
 	
 	
 	
 $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r+   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )NemotronModelrI   c                 t   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))rI   )r   ).0rl   rI   s     r)   
<listcomp>z*NemotronModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r+   r   )rN   rO   rI   r   r   r8   	Embeddingr   embed_tokensrangelayersr>   r   normr   s    `r)   rO   zNemotronModel.__init__   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 ((8dmLLL			r+   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r_   )r   lenr   r   zipr   )r(   r   r_   r   r^   layercs          r)   rF   zNemotronModel.__call__   s    
 f%%=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r+   r7   r}   ru   s   @r)   r   r      ss        
MY 
M 
M 
M 
M 
M 
M         r+   r   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )ModelrI   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFrK   )rN   rO   rI   r   r   modelr    r8   rS   r   r   lm_headr   s     r)   rO   zModel.__init__   sq    	/"4((
' 	T9T%5tUSSSDLLL	T 	Tr+   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r7   )r   rI   r    r   	as_linearr   )r(   r   r_   r   s       r)   rF   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r+   c                     | j         j        S r7   )r   r   )r(   s    r)   r   zModel.layers   s    z  r+   r7   )r,   r-   r.   r   rO   rB   rs   rF   propertyr   rt   ru   s   @r)   r   r      s        TY T T T T T T 
 

 
 
 
 ! ! X! ! ! ! !r+   r   )dataclassesr   	functoolsr   typingr   r   r   r   mlx.corecorerB   mlx.nnr8   rL   r
   r   r   r   compiler<   	LayerNormr>   ModulerH   rw   r   r   r   r4   r+   r)   <module>r      s   " ! ! ! ! !       - - - - - - - - - - - -             T T T T T T T T T T  Y  Y  Y  Y  Y  Y  Y  YF 	t$$$  %$= = = = =", = = =;# ;# ;# ;# ;#	 ;# ;# ;#|= = = = =") = = =    ry   2    BI   <! ! ! ! !BI ! ! ! ! !r+   