
    )jw!                     d   d dl Z d dlmZ d dlmZmZmZ d dlmZ	 d dl
mZ ddlmZ ddlmZmZ ddlmZ ddlmZ e G d	 d
e                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    N)	dataclass)OptionalTupleUnion   )swiglu)BaseModelArgscreate_ssm_maskArraysCache)
ssm_updatec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eeef         ed<   e	eef         ed<   dZ
ee         ed<   dZeed<   d ZdS )	ModelArgs
model_type	num_headshead_dim
vocab_sizehidden_sizeintermediate_size
state_sizenum_hidden_layerslayer_norm_epsilonconv_kerneln_groupsuse_biasuse_conv_biastie_word_embeddingstime_step_limittime_step_rankNssm_state_sizei  max_position_embeddingsc                     | j         dk    r!t          j        | j        dz            | _         | j        | j        | _        d S d S )Nauto   )r   mathceilr   r    r   selfs    ^/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/mamba2.py__post_init__zModelArgs.__post_init__%   sK    &(("&)D,<r,A"B"BD&"&/D '&    )__name__
__module____qualname__str__annotations__intfloatboolr   r   r    r   r!   r*    r+   r)   r   r      s        OOONNNMMMOOOOOOMMMNNN5%<((((#s(O###$(NHSM(((#'S'''2 2 2 2 2r+   r   c                   ^     e Zd Zd
dedef fdZddej        dej        dej        fd	Z xZ	S )MambaRMSNormGatedư>r   epsc                     t                                                       || _        t          j        |          | _        d S N)super__init__r8   mxonesweight)r(   r   r8   	__class__s      r)   r<   zMambaRMSNormGated.__init__-   s5    gk**r+   Nhidden_statesgatereturnc                 |    |t          ||          }t          j                            || j        | j                  S r:   )r   r=   fastrms_normr?   r8   )r(   rA   rB   s      r)   __call__zMambaRMSNormGated.__call__2   s5    "477Mwt{DHEEEr+   )r7   r:   )
r,   r-   r.   r1   r2   r<   r=   arrayrG   __classcell__r@   s   @r)   r6   r6   ,   s        + +C +e + + + + + +
F Fbh Fbh F"( F F F F F F F Fr+   r6   c                   @    e Zd Zdedef fdZdej        dee	         deej                 dej        fdZ
d	ej        d
ej        dej        dej        dee	         deej                 dej        fdZ	 dd	ej        deej                 dee	         dej        fdZ xZS )Mamba2Blockargs	layer_idxc                 4   t                                                       || _        |j        | _        |j        | _        |j        | _        |j        | _        |j        |j        z  | _	        |j
        | _
        |j        | _        |j        | _        |j        | _        | j        | j        z  | _        |j        | _        | j	        d| j        z  | j        z  z   | _        t!          j        | j        | j        |j        d| j        |j
                  | _        | j	        | j        z   | j        z   }t!          j        | j        ||j                  | _        t+          j        | j                  | _        t+          j        t+          j        d| j        dz   t*          j                            | _        t+          j        | j                  | _        t;          | j	        |j                  | _        t!          j        | j	        | j        |j                  | _         d S )N   r   )in_channelsout_channelskernel_sizepaddinggroupsbiasrV   r   dtyper8   )!r;   r<   rN   r   r   r    r   conv_kernel_sizer   r   r   r   r   heads_per_groupr   conv_dimnnConv1dconv1dLinearin_projr=   r>   dt_biaslogarangefloat32A_logDr6   r   normout_proj)r(   rM   rN   projection_sizer@   s       r)   r<   zMamba2Block.__init__9   s   "+"1 $ 0!%$-!?!/#3#~>.T]1BTEX1XXi(=#
 
 
 04=@4>Qy!1?WWWwt~..VBIa!);2:NNNOO
((%"(?
 
 
	 	"D$44=
 
 
r+   
conv_inputcachemaskrC   c                    |t          j        |d         |d          }||d         7t          j        |j        d         | j        dz
  | j        f|j                  }n|d         }t          j        ||gd          }| j        dz
  }|j        m|j        d         }t          j	        |j        d||z
            }|d d d f         t          j
        |          z   d         }	t          j        ||	d          |d<   n8|d d | d d d f         |d<   n"t          j        |d| j        dz
  dfdg          }|                     |          }
t          j        |
          S )N).Nr   r   rX   axis)r   r   )r=   wherezerosshaper[   r]   rY   concatenatelengthsclipre   take_along_axispadr`   r^   silu)r(   rl   rm   rn   
conv_statepadded_inputn_keeptends	positionsconv_outputs              r)   _convzMamba2Block._conva   s}    $y/:qAAJQxX%a($*?!*CT]S$*  


 #1X
>:z*BKKKL*Q.F}( &q)wu}aV<<!!!!T']RYv->->>	J	-lIANNNa'F788QQQ7a6Vd&;a&?%CVL L kk,//w{###r+   rA   BCdtc                    |j         \  }}}	|                    ||| j        | j                  }|                    ||| j        | j                  }|                    ||| j        | j                  }|r|d         }
|j        }nd\  }
}t          || j        ||| j	        || j
        |
| j        ||          \  }}
|r|
|d<   |                    ||| j                  S )Nr   )NN)rt   reshaper   r   r   r    rv   r   rg   rh   rc   r   r   )r(   rA   r   r   r   rm   rn   
batch_sizeseq_len_staterv   ys                r)   _ssmzMamba2Block._ssm   s     "/!4
GQ%--
 
 IIj'4=$:MNNIIj'4=$:MNN 	(!HEmGG'NE7JFL 
 
5  	E!HyyWd.DEEEr+   Nc                    |                      |          }t          j        || j        | j        | j        z   gd          \  }}}|                     |||          }t          j        || j        | j        | j        | j        z  z   gd          \  }}	}
|                     ||	|
|||          }|r |	                    |j
        d                    |                     ||          }|                     |          S )Nrp   )rn   r   )rb   r=   splitr   r]   r   r   r    r   advancert   ri   rj   )r(   rA   rn   rm   	projectedrB   rl   r   r   r   r   r   s               r)   rG   zMamba2Block.__call__   s    LL//	!x#T%;dm%KL 
  
  
j"
 jjUD99 h&&9L)LL 
 
 
q! IImQ2u4I@@ 	&MM!'!*%%%IIa}}Qr+   r:   )r,   r-   r.   r   r1   r<   r=   rH   r   r   r   r   rG   rI   rJ   s   @r)   rL   rL   8   se       &
Y &
3 &
 &
 &
 &
 &
 &
P $H $ $ $ rx 	 $
 
 $  $  $  $D#Fx#F 8#F 8	#F
 H#F $#F rx #F 
#F #F #F #FR (,	   x  rx   $	 
 
               r+   rL   c            	       z     e Zd Zdedef fdZ	 d
dej        deej                 dee	         dej        fd	Z
 xZS )ResidualBlockrM   rN   c                     t                                                       t          ||          | _        t	          j        |j                  | _        d S r:   )r;   r<   rL   mixerr^   RMSNormr   ri   )r(   rM   rN   r@   s      r)   r<   zResidualBlock.__init__   sB     y11
Jt/00			r+   Nxrn   rm   rC   c                 `    |                      |                     |          ||          }||z   S r:   )r   ri   )r(   r   rn   rm   outputs        r)   rG   zResidualBlock.__call__   s-     DIIaLL$66zr+   r:   )r,   r-   r.   r   r1   r<   r=   rH   r   r   rG   rI   rJ   s   @r)   r   r      s        1Y 13 1 1 1 1 1 1 UY !)"(!3<D[<Q	       r+   r   c                   h     e Zd Zdef fdZ	 ddej        deee	                  dej        fdZ
 xZS )	Mamba2rM   c                 4   t                                                       | _        t          j        j        j                  | _        fdt          j	                  D             | _
        t          j        j        j                  | _        d S )Nc                 0    g | ]}t          |          S r4   )r   ).0irM   s     r)   
<listcomp>z#Mamba2.__init__.<locals>.<listcomp>   s#    UUU!}T1--UUUr+   rZ   )r;   r<   rM   r^   	Embeddingr   r   
embeddingsranger   layersr   r   norm_fr(   rM   r@   s    `r)   r<   zMamba2.__init__   s    	,t8HIIUUUUuT=S7T7TUUUj!1t7NOOOr+   Nr   rm   rC   c                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r
   zipr   )r(   r   rm   hiddenrn   layercs          r)   rG   zMamba2.__call__   s     ##=FS---EvuQx00DK// 	, 	,HE1U64++FF{{6"""r+   r:   )r,   r-   r.   r   r<   r=   rH   r   listr   rG   rI   rJ   s   @r)   r   r      s        PY P P P P P P AE# ##"*4+<"=#	# # # # # # # #r+   r   c                        e Zd Zdef fdZ	 ddej        deee	                  dej        fdZ
dd	edee	         fd
Zed             Zd Z xZS )ModelrM   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFrW   )r;   r<   rM   r   r   backboner   r^   ra   r   r   lm_headr   s     r)   r<   zModel.__init__   so    	/t' 	T9T%5tUSSSDLLL	T 	Tr+   Ninputsrm   rC   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r:   )r   rM   r   r   	as_linearr   )r(   r   rm   r   logitss        r)   rG   zModel.__call__   sT     vu--9( 	*]-77??FF\\&))Fr+   r   r   c                 H    d t          | j        j                  D             S )Nc                 .    g | ]}t          d           S )rP   )sizer   )r   r   s     r)   r   z$Model.make_cache.<locals>.<listcomp>   s#    PPP###PPPr+   )r   rM   r   )r(   r   s     r)   
make_cachezModel.make_cache   s#    PPU493N-O-OPPPPr+   c                     | j         j        S r:   )r   r   r'   s    r)   r   zModel.layers   s    }##r+   c                     |                                 D ]3\  }}d|v r*|j        d         dk    r|                    dd          ||<   4|S )Nzconv1d.weightr   r   rP   )itemsrt   moveaxis)r(   weightskvs       r)   sanitizezModel.sanitize  sU    MMOO 	. 	.DAq!##q(8(8ZZ1--
r+   r:   )r   )r,   r-   r.   r   r<   r=   rH   r   r   r   rG   r1   r   propertyr   r   rI   rJ   s   @r)   r   r      s        TY T T T T T T FJ	 	h	'/[0A'B			 	 	 	Q QS Qk1B Q Q Q Q $ $ X$      r+   r   )r%   dataclassesr   typingr   r   r   mlx.corecorer=   mlx.nnr^   activationsr   baser	   r
   rm   r   ssmr   r   Moduler6   rL   r   r   r   r4   r+   r)   <module>r      s    ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )                   0 0 0 0 0 0 0 0             2 2 2 2 2 2 2 26	F 	F 	F 	F 	F	 	F 	F 	FI  I  I  I  I ") I  I  I X
 
 
 
 
BI 
 
 
# # # # #RY # # #.         BI          r+   