
    )j                     j   d dl mZ d dlmZmZ d dlmZ d dlm	Z	 ddl
mZ ddlmZmZmZ e G d de                      Z G d	 d
e	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)AnyOptionalN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   dZ
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dS )	ModelArgs
model_typei    hidden_size(   num_hidden_layersi X  intermediate_size@   num_attention_headsnum_key_value_headsg    ^A
rope_thetai  
vocab_sizegh㈵>layer_norm_epsg      ?logit_scaleFattention_biaslayer_norm_biasuse_qk_normN)__name__
__module____qualname__str__annotations__r   intr   r   r   r   r   floatr   r   r   r   boolr   r        ^/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/cohere.pyr   r      s         OOOKs"s"""!!!!!!!!!J!!!J!NE!!!K ND   !OT!!!Kr%   r   c                   $     e Zd Z fdZd Z xZS )LayerNorm2Dc                     t                                                       t          j        ||f          | _        || _        d S N)super__init__mxzerosweighteps)selfd1d2r0   	__class__s       r&   r,   zLayerNorm2D.__init__    s9    hBx((r%   c                 `    | j         t          j                            |d d | j                  z  S r*   )r/   r-   fast
layer_normr0   r1   xs     r&   __call__zLayerNorm2D.__call__%   s'    {RW//4txHHHHr%   r   r   r   r,   r:   __classcell__r4   s   @r&   r(   r(      sN            
I I I I I I Ir%   r(   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       || _        |j        }|j        x| _        }|j        x| _        }|j        |j        z  }|dz  | _        |j	        }t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        |j        | _        | j        rBt#          | j        ||j                  | _        t#          | j        ||j                  | _        t          j        |d|j                  | _        d S )Ng      ࿩bias)r0   T)traditionalbase)r+   r,   r@   r   r   n_headsr   
n_kv_headsscaler   nnLinearq_projk_projv_projo_projr   r(   r   q_normk_normRoPEr   rope)r1   r@   dimrF   rG   head_dimattetion_biasr4   s          r&   r,   zAttention.__init__*   sP   	!%!99w'+'??*#t'??t^
+iWx%7mLLLiZ(%:OOOiZ(%:OOOi( 2CmLLL+ 	%dlH$BUVVVDK%t/B  DK GH$T_MMM			r%   Nr9   maskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d          }|                    ||| j        d          }| j        r*|                     |          }| 	                    |          }|
                    dddd          }|
                    dddd          }|	                    ||| j        d          
                    dddd          }	|R|                     ||j                  }|                     ||j                  }|                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|

                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rW   rH   rV   )shaperK   rL   rM   reshaperF   rG   r   rO   rP   	transposerR   r]   update_and_fetchr
   rH   rN   )r1   r9   rV   rW   BLDquerieskeysvaluesoutputs              r&   r:   zAttention.__call__E   s    '1a $AAAv//!Qb99||Aq$/266 	%kk'**G;;t$$D##Aq!Q//~~aAq))1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r%   NNr   r   r   r   r,   r-   arrayr   r   r:   r<   r=   s   @r&   r?   r?   )   s        NY N N N N N N< $(#	!# !#8!# rx !# }	!#
 
!# !# !# !# !# !# !# !#r%   r?   c                   $     e Zd Z fdZd Z xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S )NFrB   )r+   r,   rI   rJ   	gate_projup_proj	down_proj)r1   rS   
hidden_dimr4   s      r&   r,   zMLP.__init__j   se    3
???yju===:s???r%   c                     |                      t          |                     |          |                     |                              S r*   )rq   r   ro   rp   r8   s     r&   r:   zMLP.__call__p   s4    ~~fT^^A%6%6QHHIIIr%   r;   r=   s   @r&   rm   rm   i   sS        @ @ @ @ @J J J J J J Jr%   rm   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr@   c                 B   t                                                       |j        | _        |j        | _        t          |          | _        t          |j        |j                  | _	        t          j        |j        |j        |j                  | _        || _        d S )Nr0   rC   )r+   r,   r   r   rF   r?   	self_attnrm   r   mlprI   	LayerNormr   r   input_layernormr@   r1   r@   r4   s     r&   r,   zTransformerBlock.__init__u   s    +/"4t')?@@!|$"5D<P 
  
  
 			r%   Nr9   rV   rW   rX   c                     |                      |          }|                     |||          }|                     |          }||z   |z   S r*   )r{   rx   ry   )r1   r9   rV   rW   hattn_hff_hs          r&   r:   zTransformerBlock.__call__   sJ       ##4//xx{{}q  r%   ri   rj   r=   s   @r&   ru   ru   t   s        
Y 
 
 
 
 
 
 $(#		! 	!8	! rx 	! }		!
 
	! 	! 	! 	! 	! 	! 	! 	!r%   ru   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )CohereModelr@   c                    t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r@   )ru   ).0_r@   s     r&   
<listcomp>z(CohereModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r%   rw   )r+   r,   r@   r   r   rI   	Embeddingr   embed_tokensrangelayersrz   r   r   normr|   s    `r&   r,   zCohereModel.__init__   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 L$"5D<P
 
 
			r%   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r	   zipr   )r1   r   rW   r~   rV   layercs          r&   r:   zCohereModel.__call__   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r%   r*   )	r   r   r   r   r,   r-   rk   r:   r<   r=   s   @r&   r   r      sl        
Y 
 
 
 
 
 
"         r%   r   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr@   c                     t                                                       |j        | _        t          |          | _        || _        d S r*   )r+   r,   r   r   modelr@   r|   s     r&   r,   zModel.__init__   s<    / &&
			r%   Nr   c                     |                      ||          }| j         j                            |          }|| j         j        j        z  }|S r*   )r   r   	as_linearr@   r   )r1   r   rW   outs       r&   r:   zModel.__call__   sE    
 jj''j%//44DJO//
r%   c                     | j         j        S r*   )r   r   )r1   s    r&   r   zModel.layers   s    z  r%   r*   )r   r   r   r   r,   r-   rk   r:   propertyr   r<   r=   s   @r&   r   r      s        Y            ! ! X! ! ! ! !r%   r   )dataclassesr   typingr   r   mlx.corecorer-   mlx.nnrI   activationsr   rE   r   r	   r
   r   Moduler(   r?   rm   ru   r   r   r$   r%   r&   <module>r      s   " ! ! ! ! !                                   T T T T T T T T T T         I I I I I") I I I=# =# =# =# =#	 =# =# =#@J J J J J") J J J! ! ! ! !ry ! ! !2    ")   B! ! ! ! !BI ! ! ! ! !r%   