
    )j                     X   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_ropec                   *   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZee         ed	<   dZ	ee         ed
<   dZ
ee         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizeNhead_dimmax_position_embeddingsnum_key_value_headsFattention_biasmlp_biasi'  
rope_thetarope_traditionalrope_scalingTtie_word_embeddingsc                 0    | j         | j        | _         d S d S N)r   r   selfs    ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/olmo2.py__post_init__zModelArgs.__post_init__!   s$    #+'+'?D$$$ ,+    )__name__
__module____qualname__str__annotations__intfloatr   r   r   r   r   boolr   r   r   r   r   r   r   r%    r&   r$   r   r      s0        OOOOOO"Hhsm"""-1Xc]111)-#--- ND   HdJ"d""";?L(4U5#:%6 678??? $$$$@ @ @ @ @r&   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  x| _        }|dz  | _        t          |d          r|j
        }nd}t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        t#          | j        |j        |j        |j        |j                  | _        t          j        ||z  |j                  | _        t          j        ||z  |j                  | _        d S )Ng      r   Fbias)super__init__r   r   n_headsr   
n_kv_headsr   scalehasattrr   nnLinearq_projk_projv_projo_projr   r   r   r   r   ropeRMSNormr   q_normk_norm)r#   r2   dimr8   r9   r   r   	__class__s          r$   r7   zAttention.__init__'   sd   !%!99w'+'??*#'=#OD4D4OOt^
4)** 	#!0NN"NiWx%7nMMMiZ(%:PPPiZ(%:PPPi( 2CnMMM#MO!(
 
	 j8!3T5FGGjh!68IJJr&   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |          }|                     |          }|                    ||| j        d                              dddd          }|                    ||| j	        d                              dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rJ   r:   rI   )shaper>   r?   r@   rD   rE   reshaper8   	transposer9   rB   rP   update_and_fetchr   r:   rA   )r#   rH   rI   rJ   BLDquerieskeysvaluesoutputs              r$   __call__zAttention.__call__F   s    '1a $AAAv++g&&{{4   //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r&   NNr'   r(   r)   r   r7   mxarrayr   r   r\   __classcell__rG   s   @r$   r1   r1   &   s        KY K K K K K KD $(#	# #8# rx # }	#
 
# # # # # # # #r&   r1   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPr2   c                 @   t                                                       |j        }|j        }t	          |d          r|j        }nd}t          j        |||          | _        t          j        |||          | _	        t          j        |||          | _
        d S )Nr   Fr4   )r6   r7   r   r   r;   r   r<   r=   	gate_proj	down_projup_proj)r#   r2   rF   
hidden_dimr   rG   s        r$   r7   zMLP.__init__h   s    +
4$$ 	}HHH3
BBB:sBBByjx@@@r&   rK   c                     |                      t          |                     |          |                     |                              S r!   )rg   r	   rf   rh   )r#   rH   s     r$   r\   zMLP.__call__v   s4    ~~fT^^A%6%6QHHIIIr&   	r'   r(   r)   r   r7   r_   r`   r\   ra   rb   s   @r$   rd   rd   g   sq        AY A A A A A AJRX J J J J J J J Jr&   rd   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr2   c                 j   t                                                       |j        | _        |j        | _        t	          |          | _        t          |          | _        t          j	        |j        |j
                  | _        t          j	        |j        |j
                  | _        || _        d S )Neps)r6   r7   r   r   r1   	self_attnrd   mlpr<   rC   r   post_attention_layernormpost_feedforward_layernormr2   r#   r2   rG   s     r$   r7   zTransformerBlock.__init__{   s    #'#; +"4t99(*
$"3)
 )
 )
% +-*$"3+
 +
 +
' 			r&   NrH   rI   rJ   rK   c                     |                      |                     |||                    }||z   }|                     |                     |                    }||z   }|S r!   )rs   rq   rt   rr   )r#   rH   rI   rJ   rhouts          r$   r\   zTransformerBlock.__call__   s[     ))$..D%*H*HIIE++DHHQKK88!e
r&   r]   r^   rb   s   @r$   rm   rm   z   s        Y      " $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r&   rm   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )
LlamaModelr2   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r2   )rm   ).0_r2   s     r$   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r&   ro   )r6   r7   r2   r   r   r<   	Embeddingr   embed_tokensrangelayersrC   r   normru   s    `r$   r7   zLlamaModel.__init__   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r&   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rJ   )r   lenr   r   zipr   )r#   r   rJ   rx   rI   layercs          r$   r\   zLlamaModel.__call__   s    
 f%%=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r&   r!   rk   rb   s   @r$   r{   r{      ss        
HY 
H 
H 
H 
H 
H 
H         r&   r{   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr2   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr4   )r6   r7   r2   r   r{   modelr   r<   r=   r   r   lm_headru   s     r$   r7   zModel.__init__   sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr&   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r!   )r   r2   r   r   	as_linearr   )r#   r   rJ   ry   s       r$   r\   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r&   c                 >    d |                                 D             S )Nc                 "    i | ]\  }}d |v	||S )zself_attn.rotary_emb.inv_freqr/   )r~   kvs      r$   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s1     
 
 
Q0OWX0X0XAq0X0X0Xr&   )items)r#   weightss     r$   sanitizezModel.sanitize   s+    
 
$]]__
 
 
 	
r&   c                     | j         j        S r!   )r   r   r"   s    r$   r   zModel.layers   s    z  r&   r!   )r'   r(   r)   r   r7   r_   r`   r\   r   propertyr   ra   rb   s   @r$   r   r      s        TY T T T T T T 
 

 
 
 

 
 
 ! ! X! ! ! ! !r&   r   )dataclassesr   typingr   r   r   r   mlx.corecorer_   mlx.nnr<   activationsr	   baser
   r   r   
rope_utilsr   r   Moduler1   rd   rm   r{   r   r/   r&   r$   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' ' @ @ @ @ @ @ @ @.># ># ># ># >#	 ># ># >#BJ J J J J") J J J&    ry   8       <! ! ! ! !BI ! ! ! ! !r&   