
    )j                     d   d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z d dlmZ ddlmZ ddlmZmZmZ ddlmZ e G d	 d
e                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)partial)AnyDictOptionalUnionN   )XieLU)BaseModelArgscreate_attention_maskscaled_dot_product_attention)initialize_ropec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   dZeed<   dZ	e
eeeeef         f                  ed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizemlp_biasnum_attention_headsattention_biasrms_norm_eps
vocab_sizenum_key_value_headsmax_position_embeddings
rope_theta	post_normqk_normtie_word_embeddingsFrope_traditionalNrope_scaling)__name__
__module____qualname__str__annotations__intboolfloatr    r!   r   r   r        _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/apertus.pyr   r      s         OOONNNOOO    OOOMMM"d""";?L(4U5#:%6 678?????r+   r   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )
ApertusMLPargsc                    t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j        |j                  | _        t                      | _
        d S )Nbias)super__init__nnLinearr   r   r   up_proj	down_projr
   act_fnselfr/   	__class__s     r,   r4   zApertusMLP.__init__%   sz    yd44=
 
 
 "D$44=
 
 
 ggr+   xreturnc                 x    |                      |                     |                     |                              S N)r8   r9   r7   )r;   r=   s     r,   __call__zApertusMLP.__call__/   s*    ~~dkk$,,q//::;;;r+   )	r"   r#   r$   r   r4   mxarrayrA   __classcell__r<   s   @r,   r.   r.   $   sj        Y      <"( <rx < < < < < < < <r+   r.   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
ApertusAttentionr/   c                 $   t                                                       |j        | _        |j        | _        |j        |j        z  | _        | j        dz  | _        t          j        |j        |j        | j        z  d          | _	        t          j        |j        |j        | j        z  d          | _
        t          j        |j        |j        | j        z  d          | _        t          j        |j        | j        z  |j        d          | _        t          j        | j        |j                  | _        t          j        | j        |j                  | _        t#          | j        |j        |j        |j        |j                  | _        d S )Ng      Fr1   eps)r3   r4   r   r   r   head_dimscaler5   r6   q_projk_projv_projo_projRMSNormr   q_normk_normr   r   r    r!   r   roper:   s     r,   r4   zApertusAttention.__init__4   sn   #'#; #'#; (D,DD]D(
id6FU
 
 
 id6FU
 
 
 id6FU
 
 
 i$t}4d6FU
 
 
 jD4EFFFjD4EFFF#MO!(
 
			r+   Nr=   maskcacher>   c                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |                    ||| j        d                                        dddd          }|                     |                    ||| j	        d                                        dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r	      )offset)rV   rL   rU   )shaperM   rN   rO   rR   reshaper   	transposerS   r   rT   r[   update_and_fetchr   rL   rP   )r;   r=   rU   rV   BLDquerieskeysvaluesoutputs              r,   rA   zApertusAttention.__call__T   s    '1a $AAAv++OOAq$":B??
 

)Aq!Q

 	 {{4<<1d.FKKLLVVq!Q
 
 1d&>CCMMq!Q
 
 iii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r+   NNr"   r#   r$   r   r4   rB   rC   r   r   rA   rD   rE   s   @r,   rG   rG   3   s        
Y 
 
 
 
 
 
F $(#	# #8# rx # }	#
 
# # # # # # # #r+   rG   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
ApertusDecoderLayerr/   c                 ,   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )NrI   )r3   r4   rG   	self_attnr.   mlpr5   rQ   r   r   attention_layernormfeedforward_layernormr:   s     r,   r4   zApertusDecoderLayer.__init__v   sw    )$//d###%:d.>DDU#V#V#V %'Z0@dFW%X%X%X"""r+   Nr=   rU   rV   r>   c                     ||                      |                     |          ||          z   }||                     |                     |                    z   }|S r@   )rl   rn   rm   ro   )r;   r=   rU   rV   houts         r,   rA   zApertusDecoderLayer.__call__~   sU     t77::D%HHH$((455a88999
r+   rg   rh   rE   s   @r,   rj   rj   u   s        YY Y Y Y Y Y Y $(#	 8 rx  }	
 
       r+   rj   c                   \     e Zd Zdef fdZ	 ddej        dee         dej        fdZ	 xZ
S )	ApertusModelr/   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 0    g | ]}t                     S ))r/   )rj   ).0_r/   s     r,   
<listcomp>z)ApertusModel.__init__.<locals>.<listcomp>   s2     
 
 
/0T***
 
 
r+   rI   )r3   r4   r5   	Embeddingr   r   embed_tokensranger   layersrQ   r   normr:   s    `r,   r4   zApertusModel.__init__   s    L$:JKK
 
 
 
49$:P4Q4Q
 
 
 Jt/T5FGGG			r+   NinputsrV   r>   c                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rU   rV   )r{   lenr}   r   zipr~   )r;   r   rV   rq   rU   layercs          r,   rA   zApertusModel.__call__   s    
 f%%=FS---E$Qa11DK// 	- 	-HE1ad!,,,AAyy||r+   r@   rh   rE   s   @r,   rt   rt      s        HY H H H H H H  $  } 
	       r+   rt   c                   x     e Zd Zdef fdZ	 d
dej        dee         dej        fdZ	d Z
ed	             Z xZS )Modelr/   c                     t                                                       || _        |j        | _        t	          |          | _        t          j        |j        |j	        d          | _
        d S )NFr1   )r3   r4   r/   r   rt   modelr5   r6   r   r   lm_headr:   s     r,   r4   zModel.__init__   sY    	/!$''
y!14?OOOr+   Nr   rV   r>   c                 X    |                      ||          }|                     |          S r@   )r   r   )r;   r   rV   rr   s       r,   rA   zModel.__call__   s)    
 jj''||C   r+   c                     |                                 D ]F\  }}|                    d          s|                    d          r|                                ||<   G|S )Nalpha_palpha_n)itemsendswithsqueeze)r;   weightskvs       r,   sanitizezModel.sanitize   s[    MMOO 	) 	)DAqzz)$$ )

9(=(= )YY[[
r+   c                     | j         j        S r@   )r   r}   )r;   s    r,   r}   zModel.layers   s    z  r+   r@   )r"   r#   r$   r   r4   rB   rC   r   r   rA   r   propertyr}   rD   rE   s   @r,   r   r      s        PY P P P P P P  $! !! }! 
	! ! ! !   ! ! X! ! ! ! !r+   r   )dataclassesr   	functoolsr   typingr   r   r   r   mlx.corecorerB   mlx.nnr5   activationsr
   baser   r   r   
rope_utilsr   r   Moduler.   rG   rj   rt   r   r*   r+   r,   <module>r      s   " ! ! ! ! !       - - - - - - - - - - - -                   T T T T T T T T T T ' ' ' ' ' ' @ @ @ @ @ @ @ @(< < < < < < < <?# ?# ?# ?# ?#ry ?# ?# ?#D    ")   (    29   6! ! ! ! !BI ! ! ! ! !r+   