
    )j                     8   d dl mZ d dlmZmZ d dlmZ d dlm	Z	 ddl
mZmZmZ e G d de                      Z G d d	e	j                  Z G d
 de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)AnyOptionalN   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                   t    e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   d	Zeed
<   d Zd	S )	ModelArgs
model_typen_ctxn_embdn_headn_layern_positionslayer_norm_epsilon
vocab_sizeNnum_key_value_headsc                 0    | j         | j        | _         d S d S N)r   r   selfs    \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/gpt2.py__post_init__zModelArgs.__post_init__   s#    #+'+{D$$$ ,+    )	__name__
__module____qualname__str__annotations__intfloatr   r    r   r   r   r      s         OOOJJJKKKKKKLLLOOO####3 3 3 3 3r   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        |j        z  dk    s
J d            |j        | _        |j        | _        | j        | j        z  | _        | j        dz  | _        t          j        | j        d| j        z  d          | _        t          j        | j        | j        d          | _	        d S )Nr   z"n_embd must be divisible by n_headg         T)bias)
super__init__r   r   head_dimscalennLinearc_attnc_projr   r&   	__class__s     r   r+   zAttention.__init__   s    {T[(A---/S---kkt{2]D(
iQ_4HHHiT[tDDDr   Nxmaskcachereturnc                    |j         \  }}}|                     |          }t          j        |dd          \  }}	}
|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|
                    ||| j        d                              dddd          }
||                    |	|
          \  }	}
t          ||	|
|| j	        |          }|                    dddd                              ||d          }| 
                    |          S )Nr(   )axisr      r   )r6   r-   r5   )shaper0   mxsplitreshaper   	transposeupdate_and_fetchr	   r-   r1   )r   r4   r5   r6   BLDqkvquerieskeysvaluesoutputs               r   __call__zAttention.__call__,   sG    '1akk!nn "ab 9 9 9v //!QR88BB1aANN||Aq$+r22<<Q1aHH1dk266@@Aq!LL 11$??LD&-T6djt
 
 
 !!!Q1--55aB??{{6"""r   NNr   r   r   r   r+   r=   arrayr   r   rJ   __classcell__r3   s   @r   r%   r%      s        EY E E E E E E" $(#	# #8# rx # }	#
 
# # # # # # # #r   r%   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPr&   c                     t                                                       |j        | _        t          j        | j        d| j        z            | _        t          j        d| j        z  | j                  | _        d S )N   )r*   r+   r   r.   r/   c_fcr1   r2   s     r   r+   zMLP.__init__H   s\    kIdk1t{?;;	iDK==r   r7   c                 v    |                      t          j        |                     |                              S r   )r1   r.   gelu_approxrT   )r   r4   s     r   rJ   zMLP.__call__O   s(    {{2>$))A,,77888r   	r   r   r   r   r+   r=   rM   rJ   rN   rO   s   @r   rQ   rQ   G   sa        >Y > > > > > >9RX 9 9 9 9 9 9 9 9r   rQ   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr&   c                 t   t                                                       |j        | _        |j        | _        |j        | _        t          |          | _        t          |          | _        t          j
        | j        | j                  | _        t          j
        | j        | j                  | _        d S )Neps)r*   r+   r   r   r   r%   attnrQ   mlpr.   	LayerNormln_1ln_2r2   s     r   r+   zTransformerBlock.__init__T   s    kk"&"9dOO	t99LK'
 
 
	 L$2IJJJ			r   Nr4   r5   r6   r7   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r   )r]   r`   r^   ra   )r   r4   r5   r6   rhouts          r   rJ   zTransformerBlock.__call__b   sU     IIdiillD%00EHHTYYq\\""!e
r   rK   rL   rO   s   @r   rY   rY   S   s        KY K K K K K K" $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r   rY   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )	GPT2Modelr&   c                     t                                                       j        | _        j        | _        j        | _        j        | _        j        | _        | j        dk    sJ t          j        | j        | j                  | _	        t          j        | j        | j                  | _
        fdt          | j                  D             | _        t          j        | j        | j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r&   )rY   ).0_r&   s     r   
<listcomp>z&GPT2Model.__init__.<locals>.<listcomp>z   s%    KKK!"---KKKr   r[   )r*   r+   r   r   r   r   r   r.   	Embeddingwtewperangerd   r_   ln_fr2   s    `r   r+   zGPT2Model.__init__p   s    k+/|"&"9""""<==< 0$+>>KKKKuT\7J7JKKKL$2IJJJ			r   Ninputsc                    |j         \  }}|                     |          }|d gt          | j                  z  }d}|d         |d         j        }t          j        |          }t          j        |          |d         z   }||                     |          z  }t          ||d                   }t          | j        |          D ]\  }	}
 |	|||
          }|                     |          S )Nr   ).N)r6   )r<   rn   lenrd   offsetr=   rM   arangero   r   ziprq   )r   rr   r6   rk   rC   hidden_statesru   position_idsr5   layercs              r   rJ   zGPT2Model.__call__}   s    
 |1((=FS[[(E81X_F&!!y||fY&77,///$]E!H==DFE** 	@ 	@HE1!E-Q???MMyy'''r   r   rW   rO   s   @r   rg   rg   o   ss        KY K K K K K K  ( (( ( ( ( ( ( ( (r   rg   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr&   c                     t                                                       || _        |j        | _        t	          |          | _        d S r   )r*   r+   r&   r   rg   modelr2   s     r   r+   zModel.__init__   s:    	/t__


r   Nrr   c                 p    |                      ||          }| j         j                            |          }|S r   )r   rn   	as_linear)r   rr   r6   re   s       r   rJ   zModel.__call__   s3    
 jj''jn&&s++
r   c                 @   i }t          | j        j                  D ]}d| d|v r|d| d= d| d|v r'|d| d                             dd          |d| d<   d| d|v r'|d| d                             dd          |d| d<   d| d|v r'|d| d                             dd          |d| d<   d| d|v r'|d| d                             dd          |d| d<   |D ]1}|                    d	          s||         |d	| <   &||         ||<   2|S )
Nzh.z
.attn.biasz.attn.c_attn.weightr   r   z.attn.c_proj.weightz.mlp.c_fc.weightz.mlp.c_proj.weightzmodel.)rp   r&   r   r@   
startswith)r   weightsnew_weightsiweights        r   sanitizezModel.sanitize   s   ty()) 	" 	"A!A!!!W,,..../*A***g557>////8)Aq// 3Q3334 +A***g557>////8)Aq// 3Q3334 (A'''7224;,,,,5)Aq// 0Q0001 *A)))W446=....7)Aq// 2Q2223  	6 	6F$$X.. 618-V--..&-foF##r   c                     | j         j        S r   )r   rd   r   s    r   layerszModel.layers   s    z|r   r   )r   r   r   r   r+   r=   rM   rJ   r   propertyr   rN   rO   s   @r   r}   r}      s        %Y % % % % % %       8   X    r   r}   )dataclassesr   typingr   r   mlx.corecorer=   mlx.nnr.   baser   r   r	   r   Moduler%   rQ   rY   rg   r}   r#   r   r   <module>r      s   " ! ! ! ! !                             T T T T T T T T T T 3 3 3 3 3 3 3 3 '# '# '# '# '#	 '# '# '#T	9 	9 	9 	9 	9") 	9 	9 	9    ry   8(( (( (( (( ((	 (( (( ((V. . . . .BI . . . . .r   