
    )j                     l   d dl mZ d dlmZmZmZmZmZ d dlm	Z
 d dlmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ e G d	 d
e                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictListOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCacheRotatingKVCache)initialize_ropec                   *   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   dZeed<   dZ	e
ee                  ed<   dZe
e         ed<   dZe
e         ed<   dZe
eeeeef         f                  ed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizemax_position_embeddingssliding_window
rope_thetaFattention_biasNlayer_typesnum_key_value_headshead_dimrope_scalingtie_word_embeddingsc                     | j         | j        | _         | j        %d t          | j                  D             | _        d S d S )Nc                 .    g | ]}|d z   dz  dk    rdndS )r	      r   full_attentionsliding_attention ).0is     ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/olmo3.py
<listcomp>z+ModelArgs.__post_init__.<locals>.<listcomp>&   sA           &'UaK1$4$4  :M         )r   r   r   ranger   selfs    r+   __post_init__zModelArgs.__post_init__"   sY    #+'+'?D$#   t566     D $#r-   )__name__
__module____qualname__str__annotations__intfloatr   boolr   r   r   r   r    r!   r   r   r"   r1   r(   r-   r+   r   r      s"        OOOOOO     ND   '+K$s)$+++)-#---"Hhsm""";?L(4U5#:%6 678??? %%%%    r-   r   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )Olmo3Attentionargs	layer_idxc                    t                                                       |j        | _        |j        | _        || _        |j        p|j        |j        z  | _        | j        dz  | _        t          j	        |j        |j        | j        z  |j
                  | _        t          j	        |j        |j        | j        z  |j
                  | _        t          j	        |j        |j        | j        z  |j
                  | _        t          j	        |j        | j        z  |j        |j
                  | _        t          j        |j        | j        z  |j                  | _        t          j        |j        | j        z  |j                  | _        |j        |         dk    r(t          j        | j        d|j                  | _        d S t/          | j        d|j        |j        |j                  | _        d S )Ng      ࿩biasepsr&   F)traditionalbase)rC   rD   scaling_configr   )super__init__r   r   r=   r    r   scalennLinearr   q_projk_projv_projo_projRMSNormr   q_normk_normr   RoPEr   roper   r!   r   r0   r<   r=   	__class__s      r+   rG   zOlmo3Attention.__init__-   s   #'#; #'#; "U)9T=U)U]D(
i$t}4$
 
 

 i$t}4$
 
 

 i$t}4$
 
 

 i$t}4$
 
 
 j$t}4$:K
 
 
 j$t}4$:K
 
 
 I&*:::5tWWWDIII'!_#0(,(D  DIIIr-   Nxmaskcachereturnc                    |j         \  }}}|                     |                     |                    }|                     |                     |                    }|                     |          }	|                    ||| j        d                              dddd          }|                    ||| j	        d                              dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r	      )offset)rX   rH   rW   )shaperP   rK   rQ   rL   rM   reshaper   	transposer   rS   r^   update_and_fetchr   rH   rN   )r0   rV   rW   rX   BL_querieskeysvaluesoutputs              r+   __call__zOlmo3Attention.__call__]   s    '1a++dkk!nn--{{4;;q>>**Q//!Q(@"EEOOq!Q
 
 ||Aq$":B??II!QPQSTUU1d&>CCMMq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r-   NNr2   r3   r4   r   r7   rG   mxarrayr   r   rj   __classcell__rU   s   @r+   r;   r;   ,   s        .Y .3 . . . . . .f $(#	# #8# rx # }	#
 
# # # # # # # #r-   r;   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )Olmo3MLPr<   c                 ,   t                                                       t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        d S NFr?   )	rF   rG   rI   rJ   r   r   	gate_proj	down_projup_projr0   r<   rU   s     r+   rG   zOlmo3MLP.__init__   sz    4#3T5KRWXXX4#94;KRWXXXy!143IPUVVVr-   rV   rY   c                     |                      t          |                     |          |                     |                              S N)rv   r
   ru   rw   )r0   rV   s     r+   rj   zOlmo3MLP.__call__   s4    ~~fT^^A%6%6QHHIIIr-   )	r2   r3   r4   r   rG   rm   rn   rj   ro   rp   s   @r+   rr   rr      s{        WY W W W W W WJ"( Jrx J J J J J J J Jr-   rr   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )Olmo3DecoderLayerr<   r=   c                 n   t                                                       |j        | _        |j        | _        t	          ||          | _        t          |          | _        t          j	        |j        |j
                  | _        t          j	        |j        |j
                  | _        || _        d S )N)r=   rA   )rF   rG   r   r   r;   	self_attnrr   mlprI   rO   r   post_attention_layernormpost_feedforward_layernormr<   rT   s      r+   rG   zOlmo3DecoderLayer.__init__   s    #'#; +'	BBBD>>(*
$"3)
 )
 )
% +-*$"3+
 +
 +
' 			r-   NrV   rW   rX   rY   c                     |                      |                     |||                    }||z   }|                     |                     |                    }||z   }|S rz   )r   r~   r   r   )r0   rV   rW   rX   rhouts          r+   rj   zOlmo3DecoderLayer.__call__   s[     ))$..D%*H*HIIE++DHHQKK88!e
r-   rk   rl   rp   s   @r+   r|   r|      s        Y 3      " $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r-   r|   c                   \     e Zd Zdef fdZ	 ddej        dee         dej        fdZ	 xZ
S )	
Olmo3Modelr<   c                    t                                                       j        | _        t          j        j        j                  | _        fdt          j	                  D             | _
        t          j        j        j                  | _        j                            d          | _        j                            d          | _        j        | _        d S )Nc                 2    g | ]}t          |           S ))r<   r=   )r|   )r)   r*   r<   s     r+   r,   z'Olmo3Model.__init__.<locals>.<listcomp>   s6     
 
 
 41555
 
 
r-   rA   r'   r&   )rF   rG   r   rI   	Embeddingr   r   embed_tokensr.   r   layersrO   r   normr   indexswa_idxga_idxrx   s    `r+   rG   zOlmo3Model.__init__   s    "1L$:JKK
 
 
 
4122
 
 
 Jt/T5FGGG	'--.ABB&,,-=>>+r-   NinputsrX   rY   c                 x   |                      |          }|d gt          | j                  z  }t          ||| j                           }t          ||| j                 | j                  }t          | j        || j                  D ]\  }}}|dk    r|n|}	 |||	|          }| 	                    |          S )N)window_sizer&   )rX   )
r   lenr   r   r   r   r   zipr   r   )
r0   r   rX   r   	full_masksliding_window_masklayerc
layer_typerW   s
             r+   rj   zOlmo3Model.__call__   s    
 f%%=FS---E)!U4;-?@@	3uT\"0C
 
 
 %(UD<L$M$M 	( 	( E1j *.> > >99DWDaQ'''AAyy||r-   rz   )r2   r3   r4   r   rG   rm   rn   r   r   rj   ro   rp   s   @r+   r   r      s        ,Y , , , , , ,$  $  } 
	       r-   r   c                   x     e Zd Zdef fdZ	 d
dej        dee         dej        fdZ	e
d             Zd	 Z xZS )Modelr<   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S rt   )rF   rG   r<   r   r   modelr"   rI   rJ   r   r   lm_headrx   s     r+   rG   zModel.__init__   sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr-   Nr   rX   rY   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S rz   )r   r<   r"   r   	as_linearr   )r0   r   rX   r   s       r+   rj   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r-   c                     | j         j        S rz   )r   r   r/   s    r+   r   zModel.layers   s    z  r-   c                     g }| j         j        D ]W}|dk    r"|                    t                                 *|                    t	          | j        j                             X|S )Nr&   )max_size)r   r   appendr   r   r<   r   )r0   cacheslts      r+   
make_cachezModel.make_cache   sl    *( 	R 	RB%%%gii((((oty7OPPPQQQQr-   rz   )r2   r3   r4   r   rG   rm   rn   r   r   rj   propertyr   r   ro   rp   s   @r+   r   r      s        TY T T T T T T  $
 

 }
 
	
 
 
 
 ! ! X!      r-   r   )dataclassesr   typingr   r   r   r   r   mlx.corecorerm   mlx.nnrI   activationsr
   rD   r   r   r   rX   r   r   
rope_utilsr   r   Moduler;   rr   r|   r   r   r(   r-   r+   <module>r      s   " ! ! ! ! ! 3 3 3 3 3 3 3 3 3 3 3 3 3 3                   T T T T T T T T T T + + + + + + + + ' ' ' ' ' '        8P# P# P# P# P#RY P# P# P#fJ J J J Jry J J J    	   8# # # # # # # #L         BI          r-   