
    )jr                     ~   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_ropec                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zee         ed<   d
Z	ee         ed<   dZ
eed<   d
Zeeeeeef         f                  ed<   dZeed<   dZeed<   dZeed<   d
S )	ModelArgs
model_typehidden_size
num_layersintermediate_sizenum_attention_heads
vocab_size
rope_thetalayer_norm_epsilonnum_key_value_headsNhead_dimmax_position_embeddingsFrope_traditionalrope_scalingTtie_word_embeddingsattention_biasmlp_bias)__name__
__module____qualname__str__annotations__intfloatr   r   r   r   boolr   r   r   r   r   r        ^/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/exaone.pyr   r      s        OOOOOOOOO"Hhsm"""-1Xc]111"d""";?L(4U5#:%6 678??? $$$$ ND   Hdr)   r   c            	       v     e Zd Zdef fdZ	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
AttentionModuleargsc                 J   t                                                       |j        }|j        x| _        }|j        x| _        }|j        p||z  x| _        }|dz  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        ||z  ||j                  | _        t!          | j        |j        |j        |j        |j                  | _        d S )Ng      ࿩bias)super__init__r   r   n_headsr   
n_kv_headsr   scalennLinearr   q_projk_projv_projout_projr   r   r   r   r   rope)selfr-   dimr3   r4   r   	__class__s         r*   r2   zAttentionModule.__init__#   s   !%!99w'+'??*#'=#DSG^Dt^
iWx%7d>QRRRiZ(%:ATUUUiZ(%:ATUUU	'H"4c@STTT#MO!(
 
			r)   Nxmaskcachereturnc                 p   |j         \  }}}|                     |                              ||| j        d                              dddd          }|                     |                              ||| j        d                              dddd          }|                     |                              ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              |||          }
|                     |
          S )Nr      r      )offset)rB   r5   rA   )shaper8   reshaper3   	transposer9   r4   r:   r<   rH   update_and_fetchr   r5   r;   )r=   r@   rA   rB   BLDqkvouts              r*   __call__zAttentionModule.__call__8   s    '1aKKNN""1ar::DDQ1aPPKKNN""1a"==GG1aQRSSKKNN""1a"==GG1aQRSS		!EL	11A		!EL	11A))!Q//DAqq		!A		!A*q!5

 
 
 mmAq!Q''//1a88}}S!!!r)   NNr    r!   r"   r   r2   mxarrayr   r   rT   __classcell__r?   s   @r*   r,   r,   "   s        
Y 
 
 
 
 
 
, TX" ""!)"(!3"CKC="	" " " " " " " "r)   r,   c                   $     e Zd Zdef fdZ xZS )	Attentionr-   c                 p    t                                                       t          |          | _        d S N)r1   r2   r,   	attentionr=   r-   r?   s     r*   r2   zAttention.__init__P   s,    (..r)   )r    r!   r"   r   r2   rY   rZ   s   @r*   r\   r\   O   sD        /Y / / / / / / / / / /r)   r\   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )MLPr-   c                 *   t                                                       |j        }|j        }t	          j        |||j                  | _        t	          j        |||j                  | _        t	          j        |||j                  | _	        d S )Nr/   )
r1   r2   r   r   r6   r7   r   c_fc_0c_fc_1c_proj)r=   r-   r>   
hidden_dimr?   s       r*   r2   zMLP.__init__V   s{    +
iZdmDDDiZdmDDDi
CdmDDDr)   r@   rC   c                     |                      t          |                     |          |                     |                              S r^   )rf   r	   rd   re   )r=   r@   s     r*   rT   zMLP.__call__^   s2    {{6$++a..$++a..AABBBr)   	r    r!   r"   r   r2   rW   rX   rT   rY   rZ   s   @r*   rb   rb   U   s{        EY E E E E E EC"( Crx C C C C C C C Cr)   rb   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr-   c                 ,   t                                                       t          j        |j        |j                  | _        t          |          | _        t          j        |j        |j                  | _	        t          |          | _        d S )Neps)r1   r2   r6   RMSNormr   r   ln_1r\   attnln_2rb   mlpr`   s     r*   r2   zTransformerBlock.__init__c   so    Jt/T5LMMM	dOO	Jt/T5LMMM	t99r)   Nr@   rA   rB   rC   c                     || j                             |                     |          ||          z   }||                     |                     |                    z   }|S r^   )rq   r_   rp   rs   rr   )r=   r@   rA   rB   hrS   s         r*   rT   zTransformerBlock.__call__j   sQ     	##DIIaLL$>>>$((499Q<<(((
r)   rU   rV   rZ   s   @r*   rk   rk   b   s        Y       $(#	 8 rx  }	
 
       r)   rk   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )ExaoneModelr-   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 .    g | ]}t                    S r(   )rk   ).0_r-   s     r*   
<listcomp>z(ExaoneModel.__init__.<locals>.<listcomp>y   s"    IIIQ"4((IIIr)   rm   )r1   r2   r6   	Embeddingr   r   wteranger   ru   ro   r   ln_fr`   s    `r*   r2   zExaoneModel.__init__v   sw    <1ABBIIII%2H2HIIIJt/T5LMMM			r)   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rB   )r~   lenru   r   zipr   )r=   r   rB   ru   rA   layercs          r*   rT   zExaoneModel.__call__|   s    
 HHV=FS[[(E$Qa11DFE** 	( 	(HE1aQ'''AAyy||r)   r^   ri   rZ   s   @r*   rw   rw   u   ss        NY N N N N N N         r)   rw   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr-   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr/   )r1   r2   r-   r   rw   transformerr   r6   r7   r   r   lm_headr`   s     r*   r2   zModel.__init__   sr    	/&t,,' 	T9T%5tUSSSDLLL	T 	Tr)   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r^   )r   r-   r   r~   	as_linearr   )r=   r   rB   rS   s       r*   rT   zModel.__call__   sW    
 vu--9( 	$"&0055CC,,s##C
r)   c                     | j         j        S r^   )r   ru   )r=   s    r*   layerszModel.layers   s    !!r)   r^   )r    r!   r"   r   r2   rW   rX   rT   propertyr   rY   rZ   s   @r*   r   r      s        TY T T T T T T 
 

 
 
 
 " " X" " " " "r)   r   )dataclassesr   typingr   r   r   r   mlx.corecorerW   mlx.nnr6   activationsr	   baser
   r   r   
rope_utilsr   r   Moduler,   r\   rb   rk   rw   r   r(   r)   r*   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' '        &*" *" *" *" *"bi *" *" *"Z/ / / / /	 / / /
C 
C 
C 
C 
C") 
C 
C 
C    ry   &    ")   2" " " " "BI " " " " "r)   