
    )j                     X   d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_ropec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   dZee         ed<   dZ	eed<   dZ
eed<   dZeeeeeef         f                  ed<   dZeed<   dS )	ModelArgs
model_typehidden_sizedim_model_basenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizenum_key_value_headsscale_depth	scale_embNmax_position_embeddingsg    .A
rope_thetaFrope_traditionalrope_scalingtie_word_embeddings)__name__
__module____qualname__str__annotations__intfloatr   r   r   r   boolr   r   r   r        _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/minicpm.pyr   r      s         OOOOOO-1Xc]111!J!!!"d""";?L(4U3:%6 678??? %%%%%%r)   r   c                   $     e Zd Z fdZd Z xZS )MLPc                 ,   t                                                       t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        t          j        |j        |j        d          | _        d S NFbias)	super__init__nnLinearr   r   	gate_projup_proj	down_projselfargs	__class__s     r*   r2   zMLP.__init__#   sz    4#3T5KRWXXXy!143IPUVVV4#94;KRWXXXr)   c                     |                      t          |                     |          |                     |                              S N)r7   r	   r5   r6   )r9   xs     r*   __call__zMLP.__call__)   s4    ~~fT^^A%6%6QHHIIIr)   )r    r!   r"   r2   r?   __classcell__r;   s   @r*   r,   r,   "   sS        Y Y Y Y YJ J J J J J Jr)   r,   c                   j     e Zd Zdef fdZ	 	 ddej        deej                 dee         fdZ	 xZ
S )		Attentionr:   c                    t                                                       || _        |j        | _        |j        x| _        }|j        | _        |j        |z  x| _        }|dz  | _        |j	        | _	        | j        | j	        z  | _
        t          j        | j        | j        | j        z  d          | _        t          j        | j        | j	        | j        z  d          | _        t          j        | j        | j	        | j        z  d          | _        t          j        | j        | j        z  | j        d          | _        t#          | j        |j        |j        |j        |j                  | _        d S )Ng      Fr/   )r1   r2   r:   r   r   	num_headsr   head_dimscaler   num_key_value_groupsr3   r4   q_projk_projv_projo_projr   r   r   r   rope)r9   r:   n_headsrF   r;   s       r*   r2   zAttention.__init__.   sf   	+#'#;;/#'#3w#>>t^
#'#; $(Nd6N$N!idnt}<5
 
 
 id6FU
 
 
 id6FU
 
 
 iNT]*D,<5
 
 
 $MO!(
 
			r)   Nr>   maskcachec                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rP   rG   rO   )shaperI   rJ   rK   reshaperE   	transposer   rM   rU   update_and_fetchr   rG   rL   )r9   r>   rO   rP   BL_querieskeysvaluesattn_outputs              r*   r?   zAttention.__call__Q   s    '1a $AAAv//!Q;;EEaAqQQ||Aq$":B??II!QPQSTUU1d&>CCMMq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D2T6djt
 
 
 "++Aq!Q77??1bII{{;'''r)   NNr    r!   r"   r   r2   mxarrayr   r   r?   r@   rA   s   @r*   rC   rC   -   s        !
Y !
 !
 !
 !
 !
 !
L $(#	( (8( rx ( }	( ( ( ( ( ( ( (r)   rC   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
DecoderLayerr:   c                    t                                                       || _        |j        | _        |j        | _        t          |          | _        t          |          | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        |j        | _        |j        | _        d S )Neps)r1   r2   r:   r   r   rC   	self_attnr,   mlpr3   RMSNormr   input_layernormpost_attention_layernormr   r8   s     r*   r2   zDecoderLayer.__init__s   s    	+!%!7"4t99!z$*:@QRRR(*
$"3)
 )
 )
%  +!%!7r)   Nr>   rO   rP   returnc                 
   |                      |                     |          ||          }||| j        | j        dz  z  z  z   }|                     |                     |                    }||| j        | j        dz  z  z  z   }|S )Ng      ?)rj   rm   r   r   rk   rn   )r9   r>   rO   rP   rhouts          r*   r?   zDecoderLayer.__call__   s     NN4//22D%@@T%(>(CCDDHHT2215566!t'$*@#*EEFF
r)   ra   rb   rA   s   @r*   rf   rf   r   s        8Y 8 8 8 8 8 8& $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r)   rf   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )MiniCPMModelr:   c                 f   t                                                       | _        j        | _        | j        dk    sJ t	          j        j        j                  | _        fdt          j	                  D             | _
        t	          j        j        j                  | _        d S )Nr   c                 .    g | ]}t                    S r(   )rf   ).0r\   r:   s     r*   
<listcomp>z)MiniCPMModel.__init__.<locals>.<listcomp>   s!    QQQa|D))QQQr)   rh   )r1   r2   r:   r   r3   	Embeddingr   embed_tokensranger   layersrl   r   normr8   s    `r*   r2   zMiniCPMModel.__init__   s    	/""""L$:JKKQQQQ59O3P3PQQQJt/T5FGGG			r)   Ninputsc                     |                      |          | j        j        z  }|d gt          | j                  z  }t          ||d                   }t          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r{   r:   r   lenr}   r   zipr~   )r9   r   rP   rr   rO   layercs          r*   r?   zMiniCPMModel.__call__   s    
 f%%	(;;=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r)   r=   )	r    r!   r"   r   r2   rc   rd   r?   r@   rA   s   @r*   ru   ru      ss        HY H H H H H H         r)   ru   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr:   c                     t                                                       || _        |j        | _        t	          |          | _        | j        j        s(t          j        |j	        |j
        d          | _        d S d S r.   )r1   r2   r:   r   ru   modelr   r3   r4   r   r   lm_headr8   s     r*   r2   zModel.__init__   ss    	/!$''
y, 	T9T%5tUSSSDLLL	T 	Tr)   Nr   c                     |                      ||          }| j        j        s0|                     || j        j        | j        j        z  z            }n|| j         j        j        j        z  }|S r=   )	r   r:   r   r   r   r   r{   weightT)r9   r   rP   rs   s       r*   r?   zModel.__call__   se    
 jj''y, 	9,,sdi&;di>V&VWXXCC
/688C
r)   c                 $    d|vr|d         |d<   |S )Nzlm_head.weightzmodel.embed_tokens.weightr(   )r9   weightss     r*   sanitizezModel.sanitize   s#    7**(/0K(LG$%r)   c                     | j         j        S r=   )r   r}   )r9   s    r*   r}   zModel.layers   s    z  r)   r=   )r    r!   r"   r   r2   rc   rd   r?   r   propertyr}   r@   rA   s   @r*   r   r      s        TY T T T T T T       
 ! ! X! ! ! ! !r)   r   )dataclassesr   typingr   r   r   r   mlx.corecorerc   mlx.nnr3   activationsr	   baser
   r   r   
rope_utilsr   r   Moduler,   rC   rf   ru   r   r(   r)   r*   <module>r      s   " ! ! ! ! ! - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' ' & & & & & & & &&J J J J J") J J JB( B( B( B( B(	 B( B( B(J    29   <    29   :! ! ! ! !BI ! ! ! ! !r)   