
    )j                        d dl Z d dlmZ d dlmZ d dlmZ ddlm	Z	 ddl
mZ ddlmZ e G d de                      Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    N)	dataclass   )swiglu)BaseModelArgsArraysCachec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   dZeed<   dZeed<   dZ	e
ed<   d ZdS )	ModelArgs
model_type
vocab_sizehidden_sizeintermediate_size
state_sizenum_hidden_layersconv_kerneluse_biasuse_conv_biastime_step_rankTtie_word_embeddingsFuse_bcdt_rmsgư>mixer_rms_epsc                 F   t          | d          st          | d          r| j        | _        t          | d          st          | d          r| j        | _        t          | d          st          | d          r| j        | _        t          | d          st          | d          r| j        | _        t          | d          st          | d	          r| j	        | _        t          | d
          st          | d          r| j
        | _        t          | d          st          | d          r| j        | _        t          | d          st          | d          r| j        | _        | j        dk    r!t#          j        | j        dz            | _        | j        dk    r	d| _        d S d S )Nr   d_modelr   d_innerr   d_stater   n_layern_layersr   d_convr   biasr   	conv_biasauto   falcon_mambaT)hasattrr   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   mathceilr   r   selfs    ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/mamba.py__post_init__zModelArgs.__post_init__   s   t]++ 	,i0H0H 	,#|Dt011 	2gdI6N6N 	2%)\D"t\** 	+wtY/G/G 	+"lDOt011 	2gdI6N6N 	2%)\D"t011 	3gdJ6O6O 	3%)]D"t]++ 	+h0G0G 	+#{DtZ(( 	&WT6-B-B 	& IDMt_-- 	0'$2L2L 	0!%D&(("&)D,<r,A"B"BD?n,, $D -,    N)__name__
__module____qualname__str__annotations__intboolr   r   r   floatr*    r+   r)   r
   r
      s         OOOOOOOOONNN $$$$L$M5% % % % %r+   r
   c                   8     e Zd Zdef fdZddZd Zd Z xZS )
MambaBlockargsc                    t                                                       | _        j        | _        j        | _        j        | _        j        | _        t          j
                  | _
        j        | _        j        | _        | j        r
fd| _        t          j        | j        | j        dz  j                  | _        t          j        | j        | j        | j        | j        | j        d          | _        t          j        | j        | j
        d| j        z  z   d          | _        t          j        | j
        | j        d          | _        t-          j        t-          j        d| j        dz                                 d	| j        g          | j        d
          }t-          j        |          | _        t-          j        | j        g          | _        t          j        | j        | j        j                  | _        d S )Nc                     t           j                            | t          j        | j        d         | j                  j                  S )N)eps)mxfastrms_normonesshapedtyper   )xr7   s    r)   <lambda>z%MambaBlock.__init__.<locals>.<lambda>C   s<    (8(827172;00d6H )9 ) ) r+      r   r   )in_channelsout_channelskernel_sizegroupsr   paddingFTg      ?r   )repeatsaxis)super__init__r7   r   r   ssm_state_sizer   conv_kernel_sizer   r1   r   r   r   
mixer_normnnLinearr   in_projConv1dconv1dx_projdt_projr<   repeatarangereshapelogA_logr?   Dout_proj)r(   r7   A	__class__s    ` r)   rN   zMambaBlock.__init__7   s   	+"o $ 0!%!7!$"566!/ - 	   DO yd4q8t}
 
 
 i./-)#
 
 
 i"!d&9"99
 
 

 y!4d6LSWXXXIIc4.455==q$BU>VWW*
 
 

 VAYY
$0122	"D$44=
 
 
r+   Nc           	         | j         }|                     |          }t          | j        r| j        nd t          j        || j        | j        | j        z   gd                    \  }}}| j        rt          | j        |||f          \  }}}t          j
        |                     |                    }t          j        ||z  d          t          j        |d          z  }	|0|	|t          j        t          j        |d          |z            z  z  }	|	t          j        |d          z                      d          }
|
||z  z   }
|
|	fS )Nc                     | S Nr4   )rB   s    r)   rC   z%MambaBlock.ssm_step.<locals>.<lambda>k   s     r+   r:   rL   r   rD   )r^   rW   mapr   rQ   r<   splitr   rO   rR   softplusrX   expand_dimsexpsqueeze)r(   rB   r`   stater^   deltaBCdeltaBC	new_stateys              r)   ssm_stepzMambaBlock.ssm_stepg   sF   F++a..#0ADOOkkH$d&9D<O&OP  
 
q!  	>doq!}==KE1aDLL//00N519b11BN1a4H4HH	ub(A(AA(E!F!FFFI2...77::AI)|r+   c                    |j         \  }}}|                     |          }|                    dd          \  }}| j        }	|t	          j        ||gd          }
nt	          j        |d|	dz
  dfdg          }
|                     |
          }|
d d |	dz
   d d d f         }t          j	        |          }t	          j
        | j                   }|}g }t          |          D ];}|                     |d d |f         ||          \  }}|                    |           <t	          j        |d          }|                     t#          ||                    }|||ffS )NrD   r:   )indices_or_sectionsrL   r   re   )r   r   r   )r@   rT   rg   rP   r<   concatenatepadrV   rR   silurj   r]   rangers   appendstackr_   r   )r(   rB   
conv_cachestate_cachero   Tr^   xzzKx_fullconv_outnew_conv_cacher`   current_staterr   ty_ts                     r)   _process_sequencezMambaBlock._process_sequence|   si   '1a\\!__xxABx771!!^ZO!<<<FFVAQ
F;<<F;;v&&QU8::qqq 01GHVDJ#q 	 	A!%qAw=!I!ICHHSMMMMHQQMM&A,,''>=111r+   c                     |d\  }}n|d         |d         }}|                      |||          \  }\  }}t          |t                    r
||d<   ||d<   |S )N)NNr   r   )r   
isinstancer   )r(   rB   cacher|   r}   outputr   new_state_caches           r)   __call__zMambaBlock.__call__   su    =&0#J&+AhaJ484J4Jz;5
 5
11 e[)) 	'%E!H&E!Hr+   rd   )	r,   r-   r.   r
   rN   rs   r   r   __classcell__ra   s   @r)   r6   r6   6   sw        .
Y .
 .
 .
 .
 .
 .
`   *2 2 2,      r+   r6   c                   :     e Zd Zdef fdZdej        fdZ xZS )ResidualBlockr7   c                     t                                                       t          |          | _        t	          j        |j                  | _        d S rd   )rM   rN   r6   mixerrR   RMSNormr   normr(   r7   ra   s     r)   rN   zResidualBlock.__init__   s@    %%
Jt/00			r+   rB   c                 Z    |                      |                     |          |          |z   S rd   )r   r   )r(   rB   r   s      r)   r   zResidualBlock.__call__   s%    zz$))A,,..22r+   	r,   r-   r.   r
   rN   r<   arrayr   r   r   s   @r)   r   r      sa        1Y 1 1 1 1 1 1
3"( 3 3 3 3 3 3 3 3r+   r   c                   :     e Zd Zdef fdZdej        fdZ xZS )Mambar7   c                    t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j                  | _        d S )Nc                 .    g | ]}t                    S r4   )r   ).0_r7   s     r)   
<listcomp>z"Mamba.__init__.<locals>.<listcomp>   s!    RRRq}T**RRRr+   )rM   rN   rR   	Embeddingr   r   
embeddingsry   r   layersr   norm_fr   s    `r)   rN   zMamba.__init__   sp    ,t8HIIRRRRE$:P4Q4QRRRj!122r+   rB   c                     |                      |          }|d gt          | j                  z  }t          | j        |          D ]\  }} |||          }|                     |          S rd   )r   lenr   zipr   )r(   rB   r   layercs        r)   r   zMamba.__call__   sn    OOA=FS---EDK// 	 	HE1aAA{{1~~r+   r   r   s   @r)   r   r      sa        3Y 3 3 3 3 3 3"(        r+   r   c                   ^     e Zd Zdef fdZd	dej        fdZd Ze	d             Z
d Z xZS )
Modelr7   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFrE   )rM   rN   r7   r   r   backboner   rR   rS   r   r   lm_headr   s     r)   rN   zModel.__init__   so    	/d' 	T9T%5tUSSSDLLL	T 	Tr+   Ninputsc                     |j         \  }}|                     ||          }| j        j        r | j        j                            |          }n|                     |          }|S rd   )r@   r   r7   r   r   	as_linearr   )r(   r   r   ro   r~   rB   logitss          r)   r   zModel.__call__   s[    |1MM&%((9( 	%]-77::FF\\!__Fr+   c                 X    d t          t          | j                            D             S )Nc                 .    g | ]}t          d           S )rD   )sizer   )r   r   s     r)   r   z$Model.make_cache.<locals>.<listcomp>   s#    EEE###EEEr+   )ry   r   r   r'   s    r)   
make_cachezModel.make_cache   s(    EEU3t{3C3C-D-DEEEEr+   c                     | j         j        S rd   )r   r   r'   s    r)   r   zModel.layers   s    }##r+   c                     |                                 D ]3\  }}d|v r*|j        d         dk    r|                    dd          ||<   4|S )Nzconv1d.weightr:   r   rD   )itemsr@   moveaxis)r(   weightskvs       r)   sanitizezModel.sanitize   sU    MMOO 	. 	.DAq!##q(8(8ZZ1--
r+   rd   )r,   r-   r.   r
   rN   r<   r   r   r   propertyr   r   r   r   s   @r)   r   r      s        TY T T T T T T
 
rx 
 
 
 
F F F $ $ X$      r+   r   )r%   dataclassesr   mlx.corecorer<   mlx.nnrR   activationsr   baser   r   r   r
   Moduler6   r   r   r   r4   r+   r)   <module>r      sv    ! ! ! ! ! !                               $% $% $% $% $% $% $% $%Nj j j j j j j jZ3 3 3 3 3BI 3 3 3    BI             BI          r+   