
    )j	                     8   d dl mZ d dlmZmZ d dlmZ d dlm	Z	 ddl
mZmZmZ e G d de                      Z G d d	e	j                  Z G d
 de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)AnyOptionalN   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   dZeed<   d	Zeed
<   dZ	eed<   dZ
eed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsnum_key_value_headsgh㈵>norm_epsiloni   
vocab_sizei 
rope_thetaTtie_word_embeddingsN)__name__
__module____qualname__str__annotations__intr   floatr   r   r   bool     b/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/starcoder2.pyr   r      s         OOOL%JJ $$$$$$r   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       || _        |j        }|j        x| _        }|j        x| _        }|j        |j        z  }|dz  | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        |||z  d          | _        t          j
        ||z  |d          | _        t          j        |d|j                  | _        d S )Ng      TbiasF)traditionalbase)super__init__r#   r   r   n_headsr   
n_kv_headsscalennLinearq_projk_projv_projo_projRoPEr   rope)selfr#   dimr+   r,   head_dim	__class__s         r    r*   zAttention.__init__   s    	!%!99w'+'??*#t'??t^
iWx%7dCCCiZ(%:FFFiZ(%:FFFi( 2CdCCCGH%doNNN			r   Nxmaskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)r<   r-   r;   )shaper0   r1   r2   reshaper+   	transposer,   r5   rB   update_and_fetchr	   r-   r3   )r6   r:   r;   r<   BLDquerieskeysvaluesoutputs              r    __call__zAttention.__call__,   s    '1a $AAAv //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r   NNr   r   r   r   r*   mxarrayr   r   rN   __classcell__r9   s   @r    r"   r"      s        OY O O O O O O( $(#	# #8# rx # }	#
 
# # # # # # # #r   r"   c                   $     e Zd Z fdZd Z xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        d S )NTr%   )r)   r*   r.   r/   c_fcc_proj)r6   r7   
hidden_dimr9   s      r    r*   zMLP.__init__L   sM    Ic:D999	i
Cd;;;r   c                 v    |                      t          j        |                     |                              S N)rY   r.   gelurX   )r6   r:   s     r    rN   zMLP.__call__Q   s(    {{27499Q<<00111r   )r   r   r   r*   rN   rS   rT   s   @r    rV   rV   K   sG        < < < < <
2 2 2 2 2 2 2r   rV   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr#   c                    t                                                       |j        | _        |j        | _        t          |          | _        t          |j        |j                  | _	        t          j        |j        |j                  | _        t          j        |j        |j                  | _        || _        d S )Neps)r)   r*   r   r   r+   r"   	self_attnrV   r   mlpr.   	LayerNormr   input_layernormpost_attention_layernormr#   r6   r#   r9   s     r    r*   zTransformerBlock.__init__V   s    +/"4t')?@@!|D,<$BSTTT(*$"3)
 )
 )
% 			r   Nr:   r;   r<   r=   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r\   )rc   rf   rd   rg   )r6   r:   r;   r<   rhouts          r    rN   zTransformerBlock.__call__c   s]     NN4//22D%@@EHHT2215566!e
r   rO   rP   rT   s   @r    r_   r_   U   s        Y        $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r   r_   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )Starcoder2Modelr#   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r#   )r_   ).0_r#   s     r    
<listcomp>z,Starcoder2Model.__init__.<locals>.<listcomp>x   s2     
 
 
,-$'''
 
 
r   ra   )r)   r*   r#   r   r   r.   	Embeddingr   embed_tokensrangelayersre   r   normrh   s    `r    r*   zStarcoder2Model.__init__q   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 L!1t7HIII			r   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )ru   lenrw   r   ziprx   )r6   ry   r<   rk   r;   layercs          r    rN   zStarcoder2Model.__call__}   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r   r\   )	r   r   r   r   r*   rQ   rR   rN   rS   rT   s   @r    rn   rn   p   ss        
JY 
J 
J 
J 
J 
J 
J         r   rn   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr#   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr%   )r)   r*   r#   r   rn   modelr   r.   r/   r   r   lm_headrh   s     r    r*   zModel.__init__   sq    	/$T**
' 	T9T%5tUSSSDLLL	T 	Tr   Nry   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r\   )r   r#   r   ru   	as_linearr   )r6   ry   r<   rl   s       r    rN   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r   c                     | j         j        S r\   )r   rw   )r6   s    r    rw   zModel.layers   s    z  r   r\   )r   r   r   r   r*   rQ   rR   rN   propertyrw   rS   rT   s   @r    r   r      s        TY T T T T T T 
 

 
 
 
 ! ! X! ! ! ! !r   r   )dataclassesr   typingr   r   mlx.corecorerQ   mlx.nnr.   r(   r   r   r	   r   Moduler"   rV   r_   rn   r   r   r   r    <module>r      s   " ! ! ! ! !                             T T T T T T T T T T 
% 
% 
% 
% 
% 
% 
% 
%.# .# .# .# .#	 .# .# .#b2 2 2 2 2") 2 2 2    ry   6    bi   >! ! ! ! !BI ! ! ! ! !r   