
    )j                     r   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ e G d de                      Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZeed	<   d
Z	eed<   dZ
eed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizeTbiasi   max_position_embeddingsNnum_key_value_heads'  
rope_thetaFrope_traditionalrope_scalingtie_word_embeddingsc                       j          j         _          j        rOddh}t           fd|D                       st	          d|            j        d         dvrt	          d          d S d S )Nfactortypec              3   *   K   | ]}|j         v V  d S N)r   ).0keyselfs     a/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/internlm2.py	<genexpr>z*ModelArgs.__post_init__.<locals>.<genexpr>$   s+      IICsd//IIIIII    zrope_scaling must contain keys )lineardynamicz@rope_scaling 'type' currently only supports 'linear' or 'dynamic)r   r   r   all
ValueError)r%   required_keyss   ` r&   __post_init__zModelArgs.__post_init__   s    #+'+'?D$ 	%v.MIIII=IIIII T !R=!R!RSSS (0EEE V  	 	
 FEr(   )__name__
__module____qualname__str__annotations__intfloatr   boolr   r   r   r   r   r   r   r   r   r.    r(   r&   r   r      s         OOOOOOD$#(S(((####J"d""";?L(4U5#:%6 678??? %%%%    r(   r   c                   V     e Zd ZdZ	 	 	 	 ddededed	ed
ef
 fdZd ZddefdZ	 xZ
S )DynamicNTKScalingRoPEzCImplements the rotary positional encoding with Dynamic NTK scaling.   Fr         ?dimsr   traditionalbasescalec                     t                                                       || _        || _        || _        || _        || _        d S r"   )super__init__r   original_baser<   r=   r?   )r%   r<   r   r=   r>   r?   	__class__s         r&   rB   zDynamicNTKScalingRoPE.__init__0   sE     	'>$!	&


r(   c                 D    | j          d| j         d| j         d| j         S )Nz, traditional=z, max_position_embeddings=z, scaling_factor=)r<   r=   r   scaling_factorr%   s    r&   
extra_reprz DynamicNTKScalingRoPE.extra_repr?   sV    )  \  \4+;  \  \W[Ws  \  \  GK  GZ  \  \  	\r(   r   offsetc                     |j         d         |z   }|| j        k    r9| j        | j        |z  | j        z  | j        dz
  z
  | j        | j        dz
  z  z  z  }n| j        }t
          j                            || j        | j        || j        |          S )Nr      )r=   r>   r?   rI   )	shaper   rC   r?   r<   mxfastroper=   )r%   xrI   seq_lenr>   s        r&   __call__zDynamicNTKScalingRoPE.__call__B   s    '!*v%T111%g%(DDVWX)ty1}-)/ /DD %Dw||I(*  
 
 	
r(   )r:   Fr   r;   )r   )r/   r0   r1   __doc__r4   r6   r5   rB   rH   rR   __classcell__rD   s   @r&   r9   r9   -   s        MM
 (,!  "% 	
       \ \ \
 
# 
 
 
 
 
 
 
 
r(   r9   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 "   t                                                       |j        }|j        x| _        }|j        x| _        }||j        z  | _        |j        |z  x| _        }|dz  | _	        t          j        ||d|z  z   |z  |j                  | _        t          j        ||z  ||j                  | _        |j        !|j        d         dk    rd|j        d         z  nd}t!          ||j        |j        |j        |	          | _        d S )
Ng      rK   r   r    r)   r   r   g       @)r   r=   r>   r?   )rA   rB   r   r   n_headsr   
n_kv_headsn_kv_groupshead_dimr?   nnLinearr   wqkvwor   r9   r   r   r   rO   )r%   rX   dimr[   r\   r^   
rope_scalerD   s          r&   rB   zAttention.__init__V   s+   !%!99w'+'??*"d&>>#'#3w#>>t^
I'A
N*h6TY
 
 
	 )Gh.$)DDD  ,1B61Jh1V1V !(+++ 	 *$($@-
 
 
			r(   NrP   maskcachereturnc                 
   |j         \  }}}|                     |          }|                    ||dd| j        z   | j                  }|dd | j        d d f         }|                    ||d| j                  }|ddd d f         }	|ddd d f         }
|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|
                    ||| j        d                              dddd          }
|R|                     ||j	                  }|                     |	|j	                  }	|
                    |	|
          \  }	}
n*|                     |          }|                     |	          }	t          ||	|
|| j        |	          }|                    dddd                              ||d          }|                     |          S )
NrK   .r   r      )rI   )rf   r?   re   )rL   ra   reshaper]   r^   r[   	transposer\   rO   rI   update_and_fetchr   r?   rb   )r%   rP   re   rf   BLD
qkv_statesquerieskeysvaluesoutputs               r&   rR   zAttention.__call__t   s    '1aYYq\\
''1b!d6F2FVV
S"4D$4"4aaa78//!QDM::#r111*%CQQQJ' //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??wwvr(   NNr/   r0   r1   r   rB   rM   arrayr   r   rR   rT   rU   s   @r&   rW   rW   U   s        
Y 
 
 
 
 
 
B $(#	! !8! rx ! }	!
 
! ! ! ! ! ! ! !r(   rW   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S NFrZ   )rA   rB   r_   r`   w1w2w3)r%   rc   
hidden_dimrD   s      r&   rB   zMLP.__init__   se    )C%888)J%888)C%888r(   rg   c                     |                      t          |                     |          |                     |                              S r"   )r   r	   r~   r   )r%   rP   s     r&   rR   zMLP.__call__   s2    wwvdggajj$''!**55666r(   )r/   r0   r1   rB   rM   ry   rR   rT   rU   s   @r&   r{   r{      sU        9 9 9 9 97RX 7 7 7 7 7 7 7 7r(   r{   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockrX   c                 B   t                                                       t          |          | _        t	          |j        |j                  | _        t          j	        |j        |j
                  | _        t          j	        |j        |j
                  | _        d S )Neps)rA   rB   rW   	attentionr{   r   r   feed_forwardr_   RMSNormr   attention_normffn_normr%   rX   rD   s     r&   rB   zTransformerBlock.__init__   s{    "4 0$2HII j)9t?PQQQ
4#39JKKKr(   NrP   re   rf   rg   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r"   )r   r   r   r   )r%   rP   re   rf   rhouts          r&   rR   zTransformerBlock.__call__   s]     NN4..q114??EdmmA..//!e
r(   rw   rx   rU   s   @r&   r   r      s        LY L L L L L L $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r(   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )InternLM2ModelrX   c                 @   t                                                       j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))rX   )r   )r#   _rX   s     r&   
<listcomp>z+InternLM2Model.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r(   r   )rA   rB   r   r_   	Embeddingr   tok_embeddingsranger   layersr   r   normr   s    `r&   rB   zInternLM2Model.__init__   s    """" l4?D<LMM
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r(   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rf   )r   lenr   r   zipr   )r%   r   rf   r   re   layercs          r&   rR   zInternLM2Model.__call__   s    
 ''=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r(   r"   )	r/   r0   r1   r   rB   rM   ry   rR   rT   rU   s   @r&   r   r      ss        HY H H H H H H         r(   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	ModelrX   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S r}   )rA   rB   rX   r   r   modelr   r_   r`   r   r   rv   r   s     r&   rB   zModel.__init__   sq    	/#D))
' 	S)D$4doERRRDKKK	S 	Sr(   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r"   )r   rX   r   r   	as_linearrv   )r%   r   rf   r   s       r&   rR   zModel.__call__   sT    
 jj''9( 	#*+55c::CC++c""C
r(   c                 >    d |                                 D             S )Nc                 "    i | ]\  }}d |v	||S )zattention.rope.inv_freqr7   )r#   kvs      r&   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s*    WWWA4MUV4V4V14V4V4Vr(   )items)r%   weightss     r&   sanitizezModel.sanitize   s    WWWWWWr(   c                     | j         j        S r"   )r   r   rG   s    r&   r   zModel.layers   s    z  r(   r"   )r/   r0   r1   r   rB   rM   ry   rR   r   propertyr   rT   rU   s   @r&   r   r      s        SY S S S S S S 
 

 
 
 
X X X ! ! X! ! ! ! !r(   r   )dataclassesr   typingr   r   r   r   mlx.corecorerM   mlx.nnr_   activationsr	   r>   r
   r   r   r   Moduler9   rW   r{   r   r   r   r7   r(   r&   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T        >%
 %
 %
 %
 %
BI %
 %
 %
P@ @ @ @ @	 @ @ @F7 7 7 7 7") 7 7 7    ry   *    RY   6! ! ! ! !BI ! ! ! ! !r(   