
    )jI&                        d dl mZ d dlmZmZmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZmZmZmZ ddlmZmZ e G d d	e                      Z G d
 de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  ZdS )    )	dataclass)AnyListOptionalN   )swiglu)BaseModelArgscreate_attention_maskcreate_ssm_maskscaled_dot_product_attention)ArraysCacheKVCachec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   dZeed<   dZ	e
e         ed<   dZe
ee                  ed<   dZe
ee                  ed<   d ZdS )	ModelArgs
model_type
vocab_sizehidden_sizenum_hidden_layersnum_attention_headsnum_key_value_headsmax_position_embeddingsnorm_eps	conv_biasconv_L_cache	block_dimblock_ff_dimblock_multiple_ofblock_ffn_dim_multiplierblock_auto_adjust_ff_dimg    .A
rope_thetaNrope_parametersfull_attn_idxslayer_typesc                     | j         d| j         v r| j         d         | _        | j        | j        | _        | j        %d t          | j                  D             | _        d S d S )Nr    c                 $    g | ]\  }}|d k    |S )full_attention ).0i
layer_types      \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/lfm2.py
<listcomp>z+ModelArgs.__post_init__.<locals>.<listcomp>.   s2     # # #!Az!111 111    )r!   r    r   r   r"   	enumerater#   selfs    r+   __post_init__zModelArgs.__post_init__(   s~    +@T0T0T"2<@DO#+'+'?D$&# #%.t/?%@%@# # #D '&r-   )__name__
__module____qualname__str__annotations__intfloatboolr    r!   r   dictr"   r   r#   r1   r'   r-   r+   r   r      s         OOOOOO    OOOOOONNN####""""!J!!!&*OXd^****.NHT#Y'...'+K$s)$+++
 
 
 
 
r-   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                    t                                                       |j        }|j        x| _        }|j        x| _        }|j        |z  x| _        }|dz  | _        t          j
        ||j                  | _        t          j
        ||j                  | _        t          j        |||z  d          | _        t          j        |||z  d          | _        t          j        |||z  d          | _        t          j        ||z  |d          | _        t          j        | j        |j        d          | _        d S )Ng      ࿩epsFbias)basetraditional)super__init__r   r   n_headsr   
n_kv_headshead_dimscalennRMSNormr   q_layernormk_layernormLinearq_projk_projv_projout_projRoPEr    rope)r0   r=   dimrG   rH   rI   	__class__s         r+   rF   zAttention.__init__6   s-   !%!99w'+'??*#'#3w#>>t^
:hDMBBB:hDMBBBiWx%7eDDDiZ(%:GGGiZ(%:GGG	'H"4cFFFGM
 
 
			r-   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                     |                    ||| j        d                                        dddd          }|                     |                    ||| j	        d                                        dddd          }|	                    ||| j	        d                              dddd          }	|R| 
                    ||j                  }| 
                    ||j                  }|                    ||	          \  }}	n*| 
                    |          }| 
                    |          }t          |||	||| j                  }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r      )offset)rZ   rY   rJ   )shaperP   rQ   rR   rM   reshaperG   	transposerN   rH   rU   r`   update_and_fetchr   rJ   rS   )r0   rX   rY   rZ   BLDquerieskeysvaluesoutputs              r+   __call__zAttention.__call__O   s    '1a $AAAv""7??1ar#J#JKKUUq!Q
 
 Q4?B G GHHRRq!Q
 
 1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6T
 
 
 !!!Q1--55aB??}}V$$$r-   NN)r2   r3   r4   r   rF   mxarrayr   r   rl   __classcell__rW   s   @r+   r<   r<   5   s        
Y 
 
 
 
 
 
8 $(#	% %8% rx % }	%
 
% % % % % % % %r-   r<   c                   n     e Zd Zdedef fdZ	 	 d	dej        deej                 dee	         fdZ
 xZS )
	ShortConvr=   	layer_idxc                    t                                                       || _        || _        |j        | _        |j        | _        t          j	        |j
        |j
        | j        |j
        | j                  | _        t          j        |j
        d|j
        z  | j                  | _        t          j        |j
        |j
        | j                  | _        d S )N)in_channelsout_channelskernel_sizegroupsrB   r_   rA   )rE   rF   r=   rt   r   L_cacher   rB   rK   Conv1dr   convrO   in_projrS   r0   r=   rt   rW   s      r+   rF   zShortConv.__init__q   s    
 		"(N	I()#
 
 
	 y!11t7G3GdiXXX	$"2D4D49UUUr-   NrX   rY   rZ   c                 d   |                      |          }t          j        |dd          \  }}}||z  }|t          j        |d         |d          }|
|d         <t          j        |j        d         | j        dz
  | j        j        f|j	                  }n|d         }t          j
        ||gd          }| j        dz
  }	|j        d         }
|j        ]t          j        |j        d|
          }|d d d f         t          j        |	          z   d         }t          j        ||d          |d<   n|d d |	 d d d f         |d<   |                    |
           n"t          j        |d| j        dz
  dfdg          }|                     |          }||z  }|                     |          S )	Nr_   r]   )axis).Nr   r   )dtype)r   r   )r}   rn   splitwherezerosra   rz   r=   r   r   concatenatelengthscliparangetake_along_axisadvancepadr|   rS   )r0   rX   rY   rZ   BCxre   CBxstaten_keeptends	positionsconv_outys                  r+   rl   zShortConv.__call__   s    ll1oo(3+++1aU$y/2q11BQxXa[$,"2DI4IJ(  
 a!444B\A%F
A}(wu}a33!!!!T']RYv->->>	J	-b)!DDDaaaa&111n-aMM!VdlQ&6%:FCDDB99R==L}}Qr-   rm   r2   r3   r4   r   r7   rF   rn   ro   r   r   rl   rp   rq   s   @r+   rs   rs   p   s        VV V V V V V V0 $(#	$  $ 8$  rx $  }	$  $  $  $  $  $  $  $ r-   rs   c                   V     e Zd Zdededededee         f
 fdZdej	        fdZ
 xZS )	MLPrV   ff_dimmultiple_ofauto_adjust_ff_dimffn_dim_multiplierc                 b   t                                                       |r7t          d|z  dz            }|t          ||z            }|||z   dz
  |z  z  }t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _        d S )Nr^   r_   r   FrA   )rE   rF   r7   rK   rO   w1w3w2)r0   rV   r   r   r   r   rW   s         r+   rF   zMLP.__init__   s     	 	OVa((F!-/&899 Vk%9A%=+$MNF)Ce444)Ce444)FCe444r-   r[   c                     |                      t          |                     |          |                     |                              S N)r   r   r   r   )r0   rX   s     r+   rl   zMLP.__call__   s2    wwvdggajj$''!**55666r-   )r2   r3   r4   r7   r9   r   r8   rF   rn   ro   rl   rp   rq   s   @r+   r   r      s        55 5 	5
 !5 %UO5 5 5 5 5 5&7RX 7 7 7 7 7 7 7 7r-   r   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )Lfm2DecoderLayerr=   rt   c                    t                                                       ||j        v | _        | j        rt	          |          | _        nt          ||          | _        t          |j	        |j
        |j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S )N)rV   r   r   r   r   r?   )rE   rF   r"   is_attention_layerr<   	self_attnrs   r|   r   r   r   r   r   r   feed_forwardrK   rL   r   r   operator_normffn_normr~   s      r+   rF   zLfm2DecoderLayer.__init__   s    "+t/B"B" 	3&t__DNN!$	22DI$.#<#<
 
 
  Z(8dmLLL
4#3GGGr-   NrX   rY   rZ   r[   c                 "   | j         r,|                     |                     |          ||          }n+|                     |                     |          ||          }||z   }||                     |                     |                    z   }|S )N)rY   rZ   )r   r   r   r|   r   r   )r0   rX   rY   rZ   rhouts          r+   rl   zLfm2DecoderLayer.__call__   s     " 	t11!444uMMAA		""1%%   A
 E$##DMM!$4$4555
r-   rm   r   rq   s   @r+   r   r      s        HY H3 H H H H H H, $(#	 8 rx  }	
 
       r-   r   c                   Z     e Zd Zdef fdZ	 	 ddej        deej                 fdZ xZ	S )	Lfm2Modelr=   c                    t                                                       | _        j        | _        j        | _        t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        j        d         | _        d| _        t          j                  D ]}|j        v r| xj        dz  c_         d S d S )Nc                 2    g | ]}t          |           S ))rt   )r   )r(   r)   r=   s     r+   r,   z&Lfm2Model.__init__.<locals>.<listcomp>   s4     
 
 
45TQ///
 
 
r-   r?   r   r   )rE   rF   r=   r   r   rK   	Embeddingr   embed_tokensrangelayersrL   r   embedding_normr"   fa_idxconv_idx)r0   r=   r)   rW   s    ` r+   rF   zLfm2Model.__init__   s   	/!%!7L$:JKK
 
 
 
9>t?U9V9V
 
 
 !j)9t}MMM)!,t-.. 	 	AD'''"		 	r-   Ninputsinput_embeddingsc                 h   ||}n|                      |          }|d gt          | j                  z  }t          ||| j                           }t          ||| j                           }t          | j        |          D ]\  }}|j        r|n|}	 |||	|          }| 	                    |          S )N)rZ   )
r   lenr   r
   r   r   r   zipr   r   )
r0   r   rZ   r   r   	attn_mask	conv_masklayercrY   s
             r+   rl   zLfm2Model.__call__  s     ' AA!!&))A=FS---E)!U4;-?@@	#AuT]';<<	DK// 	( 	(HE1 % 8G99iDaQ'''AA""1%%%r-   rm   )
r2   r3   r4   r   rF   rn   ro   r   rl   rp   rq   s   @r+   r   r      s        Y      . /3	& && #28,	& & & & & & & &r-   r   c                   |     e Zd Zdef fdZ	 	 d
dej        deej                 fdZd Z	e
d             Zd	 Z xZS )Modelr=   c                     t                                                       || _        |j        | _        t	          |          | _        d S r   )rE   rF   r=   r   r   model)r0   r=   rW   s     r+   rF   zModel.__init__  s:    	/t__


r-   Nr   r   c                 n    |                      |||          }| j         j                            |          S r   )r   r   	as_linear)r0   r   rZ   r   r   s        r+   rl   zModel.__call__!  s3     jj(899z&00555r-   c                     i }|                                 D ]A\  }}d|v r3|j        d         |j        d         k    r|                    ddd          }|||<   B|S )Nzconv.weightr]   r   r   r^   )itemsra   rc   )r0   weightssanitized_weightsnameparams        r+   sanitizezModel.sanitize*  sn    "==?? 	, 	,KD%$$;r?U[^33!OOAq!44E&+d##  r-   c                     | j         j        S r   )r   r   r/   s    r+   r   zModel.layers4  s    z  r-   c                 $    d | j         D             S )Nc                 X    g | ]'}|j         rt                      nt          d           (S )r   )size)r   r   r   )r(   ls     r+   r,   z$Model.make_cache.<locals>.<listcomp>9  sC     
 
 
 -FGIII;A3F3F3F
 
 
r-   )r   r/   s    r+   
make_cachezModel.make_cache8  s%    
 
[
 
 
 	
r-   rm   )r2   r3   r4   r   rF   rn   ro   r   rl   r   propertyr   r   rp   rq   s   @r+   r   r     s        %Y % % % % % % /3	6 66 #28,	6 6 6 6! ! ! ! ! X!
 
 
 
 
 
 
r-   r   )dataclassesr   typingr   r   r   mlx.corecorern   mlx.nnrK   activationsr   rC   r	   r
   r   r   rZ   r   r   r   Moduler<   rs   r   r   r   r   r'   r-   r+   <module>r      s   ! ! ! ! ! ! & & & & & & & & & &                              ( ' ' ' ' ' ' '        D8% 8% 8% 8% 8%	 8% 8% 8%v:  :  :  :  : 	 :  :  : z7 7 7 7 7") 7 7 70% % % % %ry % % %P*& *& *& *& *&	 *& *& *&Z"
 "
 "
 "
 "
BI "
 "
 "
 "
 "
r-   