
    )j\                     D   d dl mZ d dlmZmZ d dlmZ d dlm	Z	 ddl
mZ ddlmZmZmZ e G d de                      Z G d	 d
e	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)AnyOptionalN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                   x    e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zeed<   dS )	ModelArgs
model_type
block_sizelayer_norm_epsn_embdn_head
n_kv_headsn_layer
rope_theta
vocab_sizeTtie_word_embeddingsN)	__name__
__module____qualname__str__annotations__intfloatr   bool     b/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/lille-130m.pyr   r      sz         OOOOOOKKKKKKOOOLLLOOO $$$$$$r    r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
Lille130mAttentionargsc                 0   t                                                       |j        | _        |j        | _        |j        |j        z  | _        | j        dz  | _        t          j        |j        |j        d|j        z  z   | j        z  d          | _	        t          j        |j        | j        z  |j        d          | _
        t          j        |j        |j                  | _        t          j        |j        |j        z  d|j                  | _        d S )Ng         FbiasepsT)super__init__r   r   r   head_dimscalennLinearqkv_projout_projRMSNormr   normRoPEr   ropeselfr$   	__class__s     r!   r,   zLille130mAttention.__init__   s    k/t{2]D(
	K$+DO(;;t}LSX
 
 
 	$+"=t{QVWWWJt{0CDDD	GDK4;6doNN			r    Nxmaskcachereturnc                    |j         \  }}}|                     |                     |                    }| j        | j        z  }| j        | j        z  }	t          j        ||||	z   gd          \  }
}}|
                    ||| j        d          	                    dddd          }
|                    ||| j        d          	                    dddd          }|                    ||| j        d          	                    dddd          }|R| 
                    |
|j                  }
| 
                    ||j                  }|                    ||          \  }}n*| 
                    |
          }
| 
                    |          }t          |
|||| j        |          }|	                    dddd                              ||d          }|                     |          S )	N)axisr   r&   r      )offset)r<   r.   r;   )shaper1   r4   r   r-   r   mxsplitreshape	transposer6   rB   update_and_fetchr
   r.   r2   )r8   r:   r;   r<   BLDqkvq_sizekv_sizequerieskeysvaluesoutputs                 r!   __call__zLille130mAttention.__call__,   s    '1ammDIIaLL))t},/DM1 "vv7G.Hr R R Rv//!QR88BB1aANN||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??}}V$$$r    NNr   r   r   r   r,   rD   arrayr   r   rS   __classcell__r9   s   @r!   r#   r#      s        OY O O O O O O& $(#	 %  %8 % rx  % }	 %
 
 %  %  %  %  %  %  %  %r    r#   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )Lille130mMLPr$   c                    t                                                       dt          t          d|j        z  dz            dz            z  }t          j        |j        |j                  | _        t          j	        |j        |d          | _
        t          j	        |j        |d          | _        t          j	        ||j        d          | _        d S )N      rA   r)   Fr'   )r+   r,   roundr   r   r/   r3   r   r4   r0   	gate_projup_proj	down_proj)r8   r$   
hidden_dimr9   s      r!   r,   zLille130mMLP.__init__P   s    5Q_q%8!9!9C!?@@@
Jt{0CDDD	4;
GGGyjuEEE:t{GGGr    r:   r=   c                     |                      |          }|                     t          |                     |          |                     |                              S N)r4   ra   r   r_   r`   )r8   r:   hs      r!   rS   zLille130mMLP.__call__Y   sC    IIaLL~~fT^^A%6%6QHHIIIr    )	r   r   r   r   r,   rD   rV   rS   rW   rX   s   @r!   rZ   rZ   O   s{        HY H H H H H HJ"( Jrx J J J J J J J Jr    rZ   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
Lille130Blockr$   c                     t                                                       t          |          | _        t	          |          | _        d S rd   )r+   r,   r#   	attentionrZ   feed_forwardr7   s     r!   r,   zLille130Block.__init___   s>    +D11(..r    Nr:   r;   r<   r=   c                 j    ||                      |||          z   }||                     |          z   }|S rd   )ri   rj   )r8   r:   r;   r<   re   outs         r!   rS   zLille130Block.__call__d   s;     q$...$##A&&&
r    rT   rU   rX   s   @r!   rg   rg   ^   s        /Y / / / / / / $(#	 8 rx  }	
 
       r    rg   c                   \     e Zd Zdef fdZ	 ddej        dee         dej        fdZ	 xZ
S )	Lille130r$   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 0    g | ]}t                     S ))r$   )rg   ).0_r$   s     r!   
<listcomp>z%Lille130.__init__.<locals>.<listcomp>s   s$    MMMA}$///MMMr    r)   )r+   r,   r/   	Embeddingr   r   tok_embeddingsranger   layersr3   r   r4   r7   s    `r!   r,   zLille130.__init__p   sv     l4?DKHHMMMMt|9L9LMMMJt{0CDDD			r    Ninputsr<   r=   c                 8   |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }| j                             |                     |                    S )Nr   r<   )ru   lenrw   r	   zip	as_linearr4   )r8   rx   r<   re   r;   layercs          r!   rS   zLille130.__call__v   s    
 ''=FS---E$Qa11DK// 	( 	(HE1aQ'''AA",,TYYq\\:::r    rd   rU   rX   s   @r!   rn   rn   o   s        EY E E E E E E  $; ;; }; 
	; ; ; ; ; ; ; ;r    rn   c                   x     e Zd Zdef fdZ	 d
dej        dee         dej        fdZ	e
d             Zd	 Z xZS )Modelr$   c                     t                                                       || _        |j        | _        t	          |          | _        d S rd   )r+   r,   r$   r   rn   transformerr7   s     r!   r,   zModel.__init__   s=    	/#D>>r    Nrx   r<   r=   c                 0    |                      ||          S )Nrz   )r   )r8   rx   r<   s      r!   rS   zModel.__call__   s    
 e444r    c                     | j         j        S rd   )r   rw   )r8   s    r!   rw   zModel.layers   s    &&r    c                 >    d |                                 D             S )Nc                 "    i | ]\  }}d |v	||S )
rotary_embr   )rq   kvs      r!   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s(    JJJAL4I4I14I4I4Ir    )items)r8   weightss     r!   sanitizezModel.sanitize   s    JJJJJJr    rd   )r   r   r   r   r,   rD   rV   r   r   rS   propertyrw   r   rW   rX   s   @r!   r   r      s        *Y * * * * * *  $5 55 }5 
	5 5 5 5 ' ' X'K K K K K K Kr    r   )dataclassesr   typingr   r   mlx.corecorerD   mlx.nnr/   activationsr   baser   r	   r
   r   Moduler#   rZ   rg   rn   r   r   r    r!   <module>r      s   " ! ! ! ! !                                   T T T T T T T T T T 
% 
% 
% 
% 
% 
% 
% 
%1% 1% 1% 1% 1% 1% 1% 1%hJ J J J J29 J J J    BI   "; ; ; ; ;ry ; ; ;2K K K K KBI K K K K Kr    