
    )j[                     T   d dl mZ d dlmZmZ d dlmZ d dlm	Z	 ddl
mZ ddlmZmZmZ ddlmZmZ e G d d	e                      Z G d
 de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  ZdS )    )	dataclass)OptionalTupleN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCacheRotatingKVCachec                       e Zd ZU eed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   dZ
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dS )	ModelArgs
model_typei   hidden_size   head_dim    num_hidden_layersi 8  intermediate_sizenum_attention_heads   num_key_value_headsg     j@
rope_thetai  
vocab_sizegh㈵>layer_norm_epsg      ?logit_scaleFattention_biaslayer_norm_biassliding_window   sliding_window_patternN)__name__
__module____qualname__str__annotations__r   intr   r   r   r   r   r   floatr   r   r   r   boolr   r   r!        _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/cohere2.pyr   r      s         OOOKHcs"s"""!!!!    JJ!NE!!!K ND   !OT!!!NC"#C#####r+   r   c            
            e Zd Zdedef fdZ	 	 d
dej        deej                 dee	ej        ej        f                  dej        fd	Z
 xZS )	Attentionargs	layer_idxc                    t                                                       || _        || _        |j        }|j        x| _        }|j        x| _        }|j	        x| _	        }||z  |k    rt          d| d| d          |dz  | _        |j        }t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        t          j        |d|j                  | _        |dz   |j        z  d	k    | _        d S )
Nz?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).g      ࿩biasT)traditionalbaser   r   )super__init__r/   r0   r   r   n_headsr   
n_kv_headsr   
ValueErrorscaler   nnLinearq_projk_projv_projo_projRoPEr   roper!   use_sliding_window)	selfr/   r0   dimr8   r9   r   attetion_bias	__class__s	           r,   r7   zAttention.__init__"   sd   	"!%!99w'+'??*#'=0w3&&1RU 1 1%,1 1 1   t^
+iWx%7mLLLiZ(%:OOOiZ(%:OOOi( 2CmLLLGH$T_MMM	#,q=D4O"OST"Tr+   Nxmaskcachereturnc                 4   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	| j        re|+| 	                    |          }| 	                    |          }n8| 	                    ||j
                  }| 	                    ||j
                  }||                    ||	          \  }}	|j        t          j        k    rt          j        n|j        }
t!          |                    |
          ||	|| j        |                              |j                  }|                    dddd                              ||d          }|                     |          S )Nr      r      )offset)rK   r;   rJ   )shaper>   r?   r@   reshaper8   	transposer9   rD   rC   rQ   update_and_fetchdtypemxfloat16float32r
   astyper;   rA   )rE   rI   rJ   rK   BLDquerieskeysvalues	sdpa_typeoutputs               r,   __call__zAttention.__call__=   s    '1a $AAAv//!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPP " 	<}))G,,yy))GEL)AAyyely;; 11$??LD& #*-2:"="=BJJ7=	-NN9%%*
 
 
 &

 	 !!!Q1--55aB??{{6"""r+   NNr"   r#   r$   r   r'   r7   rW   arrayr   r   rc   __classcell__rH   s   @r,   r.   r.   !   s        UY U3 U U U U U U< $(59	&# &#8&# rx &# bh012	&#
 
&# &# &# &# &# &# &# &#r+   r.   c                   $     e Zd Z fdZd Z xZS )MLPc                     t                                                       t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S )NFr2   )r6   r7   r<   r=   	gate_projup_proj	down_proj)rE   rF   
hidden_dimrH   s      r,   r7   zMLP.__init__g   se    3
???yju===:s???r+   c                     |                      t          |                     |          |                     |                              S N)rn   r   rl   rm   )rE   rI   s     r,   rc   zMLP.__call__m   s4    ~~fT^^A%6%6QHHIIIr+   )r"   r#   r$   r7   rc   rg   rh   s   @r,   rj   rj   f   sS        @ @ @ @ @J J J J J J Jr+   rj   c            
            e Zd Zdedef fdZ	 	 d
dej        deej                 dee	ej        ej        f                  dej        fd	Z
 xZS )TransformerBlockr/   r0   c                 D   t                                                       |j        | _        |j        | _        t          ||          | _        t          |j        |j                  | _	        t          j        |j        |j        |j                  | _        || _        d S )Nepsr3   )r6   r7   r   r   r8   r.   	self_attnrj   r   mlpr<   	LayerNormr   r   input_layernormr/   )rE   r/   r0   rH   s      r,   r7   zTransformerBlock.__init__r   s    +/"433t')?@@!|$"5D<P 
  
  
 			r+   NrI   rJ   rK   rL   c                     |                      |          }|                     |||          }|                     |          }||z   |z   S rq   )rz   rw   rx   )rE   rI   rJ   rK   hattn_hff_hs          r,   rc   zTransformerBlock.__call__~   sJ       ##4//xx{{}q  r+   rd   re   rh   s   @r,   rs   rs   q   s        
Y 
3 
 
 
 
 
 
 $(59	! !8! rx ! bh012	!
 
! ! ! ! ! ! ! !r+   rs   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )CohereModelr/   c                    t                                                       | _        j        | _        j        | _        | j        dk    sJ j        | _        t          j        j        j	                  | _
        fdt          j                  D             | _        t          j        j	        j        j                  | _        d S )Nr   c                 2    g | ]}t          |           S ))r/   r0   )rs   ).0ir/   s     r,   
<listcomp>z(CohereModel.__init__.<locals>.<listcomp>   s6     
 
 
 $!444
 
 
r+   ru   )r6   r7   r/   r   r   r   window_sizer<   	Embeddingr   embed_tokensrangelayersry   r   r   normrE   r/   rH   s    `r,   r7   zCohereModel.__init__   s    	/!%!7"""".L$:JKK
 
 
 
4122
 
 
 L$"5D<P
 
 
			r+   Ninputsc                    |                      |          }|d gt          | j                  z  }| j        j        }t          |||dz
                     }t          ||d         | j                  }t          t          | j        |                    D ];\  }\  }}	|| j        j        z  | j        j        dz
  k    }
|
r|n|} ||||	          }<| 	                    |          S )Nr   r   )r   )
r   lenr   r/   r!   r	   r   	enumeratezipr   )rE   r   rK   r|   j	full_maskswa_maskr   layerc	is_globalrJ   s               r,   rc   zCohereModel.__call__   s    
 f%%=FS---EI,)!U1q5\::	(E!H$BRSSS&s4;'>'>?? 	" 	"MAzqDI4493a78 
 !*799xDaq!!AAyy||r+   rq   )	r"   r#   r$   r   r7   rW   rf   rc   rg   rh   s   @r,   r   r      sl        
Y 
 
 
 
 
 
&         r+   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr/   c                     t                                                       |j        | _        t          |          | _        || _        d S rq   )r6   r7   r   r   modelr/   r   s     r,   r7   zModel.__init__   s<    / &&
			r+   Nr   c                     |                      ||          }| j         j                            |          }|| j         j        j        z  }|S rq   )r   r   	as_linearr/   r   )rE   r   rK   outs       r,   rc   zModel.__call__   sE    
 jj''j%//44DJO//
r+   c                 "   g }t          | j        j                  D ]r}|| j        j        z  | j        j        dz
  k    r"|                    t                                 D|                    t          | j        j        d                     s|S )Nr   r   )max_sizekeep)r   r/   r   r!   appendr   r   r   )rE   cachesr   s      r,   
make_cachezModel.make_cache   s    ty233 		 		ADI4493a78 8 gii((((#TY-EANNN    r+   c                     | j         j        S rq   )r   r   )rE   s    r,   r   zModel.layers   s    z  r+   rq   )r"   r#   r$   r   r7   rW   rf   rc   r   propertyr   rg   rh   s   @r,   r   r      s        Y              ! ! X! ! ! ! !r+   r   )dataclassesr   typingr   r   mlx.corecorerW   mlx.nnr<   activationsr   r5   r   r	   r
   rK   r   r   r   Moduler.   rj   rs   r   r   r*   r+   r,   <module>r      s   " ! ! ! ! ! " " " " " " " "                   T T T T T T T T T T + + + + + + + + $ $ $ $ $ $ $ $$B# B# B# B# B#	 B# B# B#JJ J J J J") J J J! ! ! ! !ry ! ! !6) ) ) ) )") ) ) )X!! !! !! !! !!BI !! !! !! !! !!r+   