
    )jD                     r   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ e G d de                      Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZeed	<   dZ	eed
<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizeFbiasqkv_biasi   max_position_embeddingsNnum_key_value_heads'  
rope_thetarope_traditionalrope_scalingtie_word_embeddingsc                       j          j         _          j        rOddh}t           fd|D                       st	          d|            j        d         dvrt	          d          d S d S )Nfactor	rope_typec              3   *   K   | ]}|j         v V  d S N)r   ).0keyselfs     a/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/internlm3.py	<genexpr>z*ModelArgs.__post_init__.<locals>.<genexpr>%   s+      IICsd//IIIIII    zrope_scaling must contain keys )lineardynamiczErope_scaling 'rope_type' currently only supports 'linear' or 'dynamic)r   r   r   all
ValueError)r&   required_keyss   ` r'   __post_init__zModelArgs.__post_init__   s    #+'+'?D$ 	%{3MIIII=IIIII T !R=!R!RSSS -5JJJ [  	 	
 KJr)   )__name__
__module____qualname__str__annotations__intfloatr   boolr   r   r   r   r   r   r   r   r   r   r/    r)   r'   r   r      s        OOOOOOD$Hd#(S(((####J"d""";?L(4U5#:%6 678??? %%%%    r)   r   c                   V     e Zd ZdZ	 	 	 	 ddededed	ed
ef
 fdZd ZddefdZ	 xZ
S )DynamicNTKScalingRoPEzCImplements the rotary positional encoding with Dynamic NTK scaling.   Fr         ?dimsr   traditionalbasescalec                     t                                                       || _        || _        || _        || _        || _        d S r#   )super__init__r   original_baser=   r>   r@   )r&   r=   r   r>   r?   r@   	__class__s         r'   rC   zDynamicNTKScalingRoPE.__init__1   sE     	'>$!	&


r)   c                 D    | j          d| j         d| j         d| j         S )Nz, traditional=z, max_position_embeddings=z, scaling_factor=)r=   r>   r   scaling_factorr&   s    r'   
extra_reprz DynamicNTKScalingRoPE.extra_repr@   sV    )  \  \4+;  \  \W[Ws  \  \  GK  GZ  \  \  	\r)   r   offsetc                     |j         d         |z   }|| j        k    r9| j        | j        |z  | j        z  | j        dz
  z
  | j        | j        dz
  z  z  z  }n| j        }t
          j                            || j        | j        || j        |          S )Nr      )r>   r?   r@   rJ   )	shaper   rD   r@   r=   mxfastroper>   )r&   xrJ   seq_lenr?   s        r'   __call__zDynamicNTKScalingRoPE.__call__C   s    '!*v%T111%g%(DDVWX)ty1}-)/ /DD %Dw||I(*  
 
 	
r)   )r;   Fr   r<   )r   )r0   r1   r2   __doc__r5   r7   r6   rC   rI   rS   __classcell__rE   s   @r'   r:   r:   .   s        MM
 (,!  "% 	
       \ \ \
 
# 
 
 
 
 
 
 
 
r)   r:   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        }|j        }|j        x| _        }|j        x| _        }||j        z  | _        |j        |z  x| _	        }|dz  | _
        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        |j        !|j        d         dk    rd|j        d         z  nd}t%          ||j        |j        |j        |          | _        d S )	Ng      ࿩r   r!   r*   r   r    g       @)r   r>   r?   r@   )rB   rC   r   r   r   n_headsr   
n_kv_headsn_kv_groupshead_dimr@   nnLinearq_projk_projv_projo_projr   r:   r   r   r   rP   )	r&   rY   dimr   r\   r]   r_   
rope_scalerE   s	           r'   rC   zAttention.__init__W   sY   =!%!99w'+'??*"d&>>#'#3w#>>t^
iWx%7hGGGiZ(%:JJJiZ(%:JJJi( 2ChGGG  ,!+.(:: !(+++ 	 	 *$($@-
 
 
			r)   NrQ   maskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr   rL   r      )rJ   )ri   r@   rh   )rM   rb   rc   rd   reshaper\   	transposer]   rP   rJ   update_and_fetchr   r@   re   )r&   rQ   rh   ri   BLDquerieskeysvaluesoutputs              r'   rS   zAttention.__call__w   s    '1a $AAAv //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r)   NNr0   r1   r2   r   rC   rN   arrayr   r   rS   rU   rV   s   @r'   rX   rX   V   s        
Y 
 
 
 
 
 
F $(#	# #8# rx # }	#
 
# # # # # # # #r)   rX   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        |||          | _        t          j        |||          | _        t          j        |||          | _        d S )Nr[   )rB   rC   r`   ra   	gate_proj	down_projup_proj)r&   rf   
hidden_dimr   rE   s       r'   rC   zMLP.__init__   se    3
>>>:s>>>yjt<<<r)   rj   c                     |                      t          |                     |          |                     |                              S r#   )r   r	   r~   r   )r&   rQ   s     r'   rS   zMLP.__call__   s4    ~~fT^^A%6%6QHHIIIr)   )r0   r1   r2   rC   rN   rz   rS   rU   rV   s   @r'   r|   r|      s^        = = = = =JRX J J J J J J J Jr)   r|   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockrY   c                 N   t                                                       t          |          | _        t	          |j        |j        |j                  | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        d S )Neps)rB   rC   rX   	self_attnr|   r   r   r   mlpr`   RMSNormr   input_layernormpost_attention_layernormr&   rY   rE   s     r'   rC   zTransformerBlock.__init__   s    "4t')?KK!z$*:@QRRR(*
$"3)
 )
 )
%%%r)   NrQ   rh   ri   rj   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r#   )r   r   r   r   )r&   rQ   rh   ri   rhouts          r'   rS   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r)   rx   ry   rV   s   @r'   r   r      s        
Y 
 
 
 
 
 
 $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r)   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )InternLM2ModelrY   c                 @   t                                                       j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))rY   )r   )r$   _rY   s     r'   
<listcomp>z+InternLM2Model.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r)   r   )rB   rC   r   r`   	Embeddingr   embed_tokensranger   layersr   r   normr   s    `r'   rC   zInternLM2Model.__init__   s    """"L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r)   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )ri   )r   lenr   r   zipr   )r&   r   ri   r   rh   layercs          r'   rS   zInternLM2Model.__call__   s    
 f%%=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r)   r#   )	r0   r1   r2   r   rC   rN   rz   rS   rU   rV   s   @r'   r   r      ss        HY H H H H H H         r)   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	ModelrY   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr[   )rB   rC   rY   r   r   modelr   r`   ra   r   r   lm_headr   s     r'   rC   zModel.__init__   sq    	/#D))
' 	T9T%5tUSSSDLLL	T 	Tr)   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r#   )r   rY   r   r   	as_linearr   )r&   r   ri   r   s       r'   rS   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r)   c                 >    d |                                 D             S )Nc                 "    i | ]\  }}d |v	||S )zattention.rope.inv_freqr8   )r$   kvs      r'   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s*    WWWA4MUV4V4V14V4V4Vr)   )items)r&   weightss     r'   sanitizezModel.sanitize   s    WWWWWWr)   c                     | j         j        S r#   )r   r   rH   s    r'   r   zModel.layers   s    z  r)   r#   )r0   r1   r2   r   rC   rN   rz   rS   r   propertyr   rU   rV   s   @r'   r   r      s        TY T T T T T T 
 

 
 
 
X X X ! ! X! ! ! ! !r)   r   )dataclassesr   typingr   r   r   r   mlx.corecorerN   mlx.nnr`   activationsr	   r?   r
   r   r   r   Moduler:   rX   r|   r   r   r   r8   r)   r'   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T        @%
 %
 %
 %
 %
BI %
 %
 %
P=# =# =# =# =#	 =# =# =#@J J J J J") J J J    ry   .    RY   6! ! ! ! !BI ! ! ! ! !r)   