
    )j                     h   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ e G d	 d
e                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCacheRotatingKVCache)initialize_ropec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eee	eef         f         ed<   e
e         ed<   e
e         ed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizenum_key_value_headsmax_position_embeddings
rope_thetahead_dimtie_word_embeddingsrope_scalingsliding_windowsliding_window_patternN)__name__
__module____qualname__str__annotations__intfloatboolr   r   r        _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/exaone4.pyr   r      s         OOOOOO    MMMsE%*--....SM!!!$SM)))))r*   r   c            	            e Zd Zdedee         f fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )	Attentionargsis_localc                    t                                                       |j        }|j        x| _        }|j        J |j        x| _        }|j        }|dz  | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        ||z  |d          | _        t          j        ||j                  | _        t          j        ||j                  | _        |pd| _        |d u p|| _        | j        r*t+          ||j        d|j        |j                  | _        d S d S )Ng      Fbiaseps)basetraditionalscaling_configr   )super__init__r   r   n_headsr   
n_kv_headsr   scalennLinearq_projk_projv_projo_projRMSNormr   q_normk_normr/   use_roper   r   r   r   rope)selfr.   r/   dimr:   r;   r   	__class__s          r+   r9   zAttention.__init__#   sh   !%!99w'333'+'??*=t^
iWx%7eDDDiZ(%:GGGiZ(%:GGGi( 2CeDDDjt/@AAAjt/@AAA )E D(4H= 	'_!#0(,(D  DIII	 	r*   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |                    ||| j        d                                        dddd          }|                     |                    ||| j	        d                                        dddd          }|	                    ||| j	        d                              dddd          }	|Y| j
        r8|                     ||j                  }|                     ||j                  }|                    ||	          \  }}	n1| j
        r*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rM   r<   rL   )shaper?   r@   rA   rD   reshaper:   	transposerE   r;   rF   rG   rS   update_and_fetchr   r<   rB   )rH   rK   rL   rM   BLDquerieskeysvaluesoutputs              r+   __call__zAttention.__call__@   s    '1a $AAAv++gooaDL"EEFFPPq!Q
 
 {{4<<1dorBBCCMMq!Q
 
 1dor::DDQ1aPP} <))GEL)AAyyely;; 11$??LD&&] 	#ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r*   NN)r!   r"   r#   r   r   r(   r9   mxarrayr   r_   __classcell__rJ   s   @r+   r-   r-   "   s        Y (4.      @ $(#	# #8# rx # }	#
 
# # # # # # # #r*   r-   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S NFr1   )r8   r9   r=   r>   	gate_proj	down_projup_proj)rH   rI   
hidden_dimrJ   s      r+   r9   zMLP.__init__c   se    3
???:s???yju===r*   rN   c                     |                      t          |                     |          |                     |                              S N)rj   r	   ri   rk   )rH   rK   s     r+   r_   zMLP.__call__i   s4    ~~fT^^A%6%6QHHIIIr*   )r!   r"   r#   r9   ra   rb   r_   rc   rd   s   @r+   rf   rf   b   s^        > > > > >JRX J J J J J J J Jr*   rf   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )TransformerBlockr.   r/   c                    t                                                       |j        | _        |j        | _        t	          ||          | _        t          |j        |j                  | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        || _        d S )Nr3   )r8   r9   r   r   r-   	self_attnrf   r   mlpr=   rC   r   post_attention_layernormpost_feedforward_layernormr.   )rH   r.   r/   rJ   s      r+   r9   zTransformerBlock.__init__n   s    #'#; +"422t')?@@(*
$"3)
 )
 )
% +-*$"3+
 +
 +
' 			r*   NrK   rL   rM   rN   c                     |                      |||          }||                     |          z   }|                     |          }||                     |          z   }|S rn   )rr   rt   rs   ru   )rH   rK   rL   rM   rhouts          r+   r_   zTransformerBlock.__call__|   s]     NN1dE**--a000HHQKK$11!444
r*   r`   )r!   r"   r#   r   r(   r9   ra   rb   r   r   r_   rc   rd   s   @r+   rp   rp   m   s        Y $      " $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r*   rp   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )ExaoneModelr.   c                 2   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        j	        fdt          j                  D             | _        r5                    d          | _                            d          | _        nd | _        d| _        j        | _        t          j        j        j                  | _        d S )Nr   c           
      n    g | ]1}t          r|t                    z           d k    nd          2S )rY   N)r.   r/   )rp   len).0ir.   patterns     r+   
<listcomp>z(ExaoneModel.__init__.<locals>.<listcomp>   s_     
 
 

 	 =DNS\\!12c99$  
 
 
r*   rY   Gr3   )r8   r9   r.   r   r   r=   	Embeddingr   embed_tokensr    rangelayersindexswa_idxfull_idxr   window_sizerC   r   norm)rH   r.   r   rJ   s    `@r+   r9   zExaoneModel.__init__   s   	/!%!7""""L$:JKK-
 
 
 
 

 4122
 
 
  	"==--DL#MM#..DMMDLDM.Jt/T5FGGG			r*   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||| j                           }| j        #t          ||| j                 | j                  }nd }t          | j        |          D ]"\  }}|j        j	        r|n|} ||||          }#| 
                    |          S )N)r   )r   r~   r   r   r   r   r   ziprr   r/   r   )	rH   r   rM   rx   global_maskswa_masklayercrL   s	            r+   r_   zExaoneModel.__call__   s    
 f%%=FS---E+AuT]/CDD<#,5&D4D  HH HDK// 	" 	"HE1$7H88[Daq!!AAyy||r*   rn   )	r!   r"   r#   r   r9   ra   rb   r_   rc   rd   s   @r+   r{   r{      ss        HY H H H H H H8         r*   r{   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr.   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S rh   )r8   r9   r.   r   r{   modelr   r=   r>   r   r   lm_head)rH   r.   rJ   s     r+   r9   zModel.__init__   sq    	/ &&
' 	T9T%5tUSSSDLLL	T 	Tr*   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S rn   )r   r.   r   r   	as_linearr   )rH   r   rM   ry   s       r+   r_   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r*   c                 *      fd j         D             S )Nc                 z    g | ]7}|j         j        rt          j        j        d           nt                      8S )r   )max_sizekeep)rr   r/   r   r.   r   r   )r   lrH   s     r+   r   z$Model.make_cache.<locals>.<listcomp>   sS     
 
 
  ;')AJJJJYY	
 
 
r*   )r   rH   s   `r+   
make_cachezModel.make_cache   s2    
 
 
 
 [
 
 
 	
r*   c                     | j         j        S rn   )r   r   r   s    r+   r   zModel.layers   s    z  r*   rn   )r!   r"   r#   r   r9   ra   rb   r_   r   propertyr   rc   rd   s   @r+   r   r      s        TY T T T T T T 
 

 
 
 

 
 
 ! ! X! ! ! ! !r*   r   )dataclassesr   typingr   r   r   r   mlx.corecorera   mlx.nnr=   activationsr	   r5   r
   r   r   rM   r   r   
rope_utilsr   r   Moduler-   rf   rp   r{   r   r)   r*   r+   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T + + + + + + + + ' ' ' ' ' ' * * * * * * * *$=# =# =# =# =#	 =# =# =#@J J J J J") J J J    ry   8/ / / / /") / / /d!! !! !! !! !!BI !! !! !! !! !!r*   