
    )jL                        d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z
 d dlmZ ddlmZmZmZ e G d de                      Zd	 ZddZ G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z ee
j        d          dd            Z G d dej                  ZdS )    N)	dataclass)partial)AnyOptional   )BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dS )	ModelArgsnanochat
model_typei   hidden_size   num_hidden_layers
   num_attention_headsnum_key_value_headsi   
vocab_sizei   max_position_embeddingsi   intermediate_size     @
rope_thetaN)__name__
__module____qualname__r   str__annotations__r   intr   r   r   r   r   r   r   float     `/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/nanochat.pyr   r      s          J   Ks!!!!!!!!J#'S'''!s!!!Jr"   r   c                 D    t           j                            | dd          S )z0Functional RMSNorm with no learnable parameters.Ngh㈵>)mxfastrms_norm)xs    r#   r'   r'      s    7AtT***r"   r   c           	         | j         d         }|Q|dz  }t          j        t          j        d|t          j                  t          j        |          |z  z             }t          j                            | |dd|d|          S )	a@  Apply RoPE with blocked layout.


    Args:
        x: Input tensor in (B, H, T, D) format
        offset: Position offset for KV caching
        base: RoPE base frequency (default 10000.0)
        freqs: Precomputed negated frequencies (optional)

    Returns:
        Tensor with RoPE applied, same shape as input
    N           dtypeFg      ?)dimstraditionalbasefreqsscaleoffset)	shaper%   exparangefloat32mathlogr&   rope)r(   r4   r1   r2   head_dimhalf_Ds         r#   apply_rotary_embr>       s     wr{H}QIc64448OP
 
 

 7<<	    r"   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        | _        |j        | _        |j        | _        | j        | j        z  | _        | j        dz  | _        |j	        | _	        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j        d          | _        | j        dz  }t!          j        t!          j        d|t           j                  t)          j        | j	                  |z  z             | _        d S )Ng      Fbiasr+   r,   r-   )super__init__r   r   	num_headsr   num_kv_headsr<   r3   r   nnLinearc_qc_kc_vc_projr%   r6   r7   r8   r9   r:   _rope_freqs)selfrA   r=   	__class__s      r#   rF   zAttention.__init__C   sd   +1 4(DN:]D(
/9dnt}<5
 
 
 9d/$-?e
 
 
 9d/$-?e
 
 
 i 0$2BOOO !#FIc6444x((613
 
 
r"   Nr(   maskcachereturnc                    |j         \  }}}|                     |          }|                     |          }|                     |          }	|                    ||| j        | j                                      dddd          }|                    ||| j        | j                                      dddd          }|	                    ||| j        | j                                      dddd          }	||j	        nd}
t          ||
| j        | j                  }t          ||
| j        | j                  }t          |          }t          |          }||                    ||	          \  }}	t          |||	|| j        |          }|                    dddd                              ||| j                  }|                     |          S )Nr   r+   r      )r4   r1   r2   )rS   r3   rR   )r5   rK   rL   rM   reshaperG   r<   	transposerH   r4   r>   r   rO   r'   update_and_fetchr
   r3   r   rN   )rP   r(   rR   rS   BL_querieskeysvaluesr4   outputs               r#   __call__zAttention.__call___   s    '1a((1++xx{{! //!QFFPPq!Q
 
 ||Aq$"3T]CCMMq!Q
 
 1d&7GGQQq!Q
 

 "'!2"F@P
 
 
  doT=M
 
 

 7##~~  11$??LD&-T6djt
 
 

 !!!Q1--55aD<LMM{{6"""r"   NNr   r   r   r   rF   r%   arrayr   r   ra   __classcell__rQ   s   @r#   r@   r@   B   s        
Y 
 
 
 
 
 
> $(#	.# .#8.# rx .# }	.#
 
.# .# .# .# .# .# .# .#r"   r@   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )MLPrA   c                     t                                                       t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        d S NFrC   )rE   rF   rI   rJ   r   r   c_fcrN   rP   rA   rQ   s     r#   rF   zMLP.__init__   sY    Id.0FUSSS	i 68HuUUUr"   r(   rT   c                 ~    |                      |          }t          j        |          }|                     |          S N)rk   rI   relu2rN   )rP   r(   s     r#   ra   zMLP.__call__   s.    IIaLLHQKK{{1~~r"   	r   r   r   r   rF   r%   rd   ra   re   rf   s   @r#   rh   rh      sq        VY V V V V V V
"( rx        r"   rh   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockrA   c                     t                                                       t          |          | _        t	          |          | _        d S rn   )rE   rF   r@   attnrh   mlprl   s     r#   rF   zTransformerBlock.__init__   s7    dOO	t99r"   Nr(   rR   rS   rT   c                     ||                      t          |          ||          z   }||                     t          |                    z   }|S )NrR   rS   )rt   r'   ru   )rP   r(   rR   rS   houts         r#   ra   zTransformerBlock.__call__   sG     		(1++D	>>>$((8A;;'''
r"   rb   rc   rf   s   @r#   rr   rr      s        Y       $(#		 	8	 rx 	 }		
 
	 	 	 	 	 	 	 	r"   rr   c                   L     e Zd Zdef fdZ	 ddej        dej        fdZ xZS )NanoChatModelrA   c                     t                                                       | _        t          j        j        j                  | _        fdt          j	                  D             | _
        d S )Nc                 .    g | ]}t                    S r!   )rr   ).0r\   rA   s     r#   
<listcomp>z*NanoChatModel.__init__.<locals>.<listcomp>   s"    PPPQ"4((PPPr"   )rE   rF   rA   rI   	Embeddingr   r   wteranger   rx   rl   s    `r#   rF   zNanoChatModel.__init__   sc    	<1ABBPPPP%8N2O2OPPPr"   NinputsrT   c                    |                      |          }t          |          }|d gt          | j                  z  }t	          ||d                   }t          | j        |          D ]\  }} ||||          }t          |          }|S )Nr   rw   )r   r'   lenrx   r	   zip)rP   r   rS   rx   rR   layercs          r#   ra   zNanoChatModel.__call__   s    
 HHVQKK=FS[[(E$Qa11DFE** 	- 	-HE1ad!,,,AA QKKr"   rn   rp   rf   s   @r#   r{   r{      s        QY Q Q Q Q Q Q   
	       r"   r{   T)	shapeless      .@c                 6    |t          j        | |z            z  S rn   )r%   tanh)logitscaps     r#   softcapr      s    #&&&&r"   c                   b     e Zd Zdef fdZ	 ddej        dej        fdZed             Z	 xZ
S )	ModelrA   c                     t                                                       || _        |j        | _        t	          |          | _        t          j        |j        |j	        d          | _
        d S rj   )rE   rF   rA   r   r{   transformerrI   rJ   r   r   lm_headrl   s     r#   rF   zModel.__init__   sZ    	/(..y!14?OOOr"   Nr   rT   c                 |    |                      ||          }|                     |          }t          |          }|S )N)rS   )r   r   r   )rP   r   rS   ry   r   s        r#   ra   zModel.__call__   s?    
 vU33c"" r"   c                     | j         j        S rn   )r   rx   )rP   s    r#   layerszModel.layers   s    !!r"   rn   )r   r   r   r   rF   r%   rd   ra   propertyr   re   rf   s   @r#   r   r      s        PY P P P P P P   
	    " " X" " " " "r"   r   )r   N)r   )r9   dataclassesr   	functoolsr   typingr   r   mlx.corecorer%   mlx.nnrI   r1   r   r	   r
   r   r'   r>   Moduler@   rh   rr   r{   compiler   r   r!   r"   r#   <module>r      s	    ! ! ! ! ! !                                   T T T T T T T T T T 	  	  	  	  	  	  	  	 + + +
   DK# K# K# K# K#	 K# K# K#\
 
 
 
 
") 
 
 
    ry   $    BI   < 	t$$$' ' ' %$'" " " " "BI " " " " "r"   