
    )j 7                        d dl mZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZ e G d d	e                      Z eej        d
          d             Z eej        d
          d             Zej        d             Zd Z e            Zdej        dej        dej        dej        dej        dej        dej        fdZ G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d  d!e
j                  Z G d" d#e
j                  Z G d$ d%e
j                  Z G d& d'e
j                  Z dS )(    )	dataclass)partial)OptionalN   )BaseModelArgsArraysCachec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   dZeed<   dS )	ModelArgs
model_type
vocab_sizehidden_sizeintermediate_sizenorm_epshead_dimnum_hidden_layersa_low_rank_dimv_low_rank_dimgate_low_rank_dimdecay_low_rank_dimFtie_word_embeddingsN)	__name__
__module____qualname__str__annotations__intfloatr   bool     ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/rwkv7.pyr   r      s         OOOOOOOOOMMM %%%%%%r!   r   T)	shapelessc                     | ||z  z   S Nr    )xyzs      r"   addcmulr)      s    q1u9r!   c                 r    | t          j        t           j                            | dd          d          z  S )NTaxiskeepdimsgHz>)mxmaximumlinalgnorm)r&   s    r"   l2_normr3   #   s-    rz")..d.CCTJJJJr!   c                     ||d         z  |dd d d f         z  }||d d d d d d d f         z  |d         |dd d d f         z  z   |z   }|| d         z  }||fS )N).N.r    )	rwkvabstatesabr'   s	            r"   _wkv7_step_opsr=   (   s~    1Y<1S$\?
2CAaaaD!!!m$$q|aT111o'EEKE)Ae8Or!   c                      t           j                                        sd S d} g d}t           j                            d|ddg|           S )Na  
        auto n = thread_position_in_grid.z;
        auto b_idx = n / H;
        auto h_idx = n % H;
        constexpr int n_per_t = D / 32;

        // [B, T, H, D]
        auto r_ = r + b_idx * T * H * D + h_idx * D;
        auto w_ = w + b_idx * T * H * D + h_idx * D;
        auto k_ = k + b_idx * T * H * D + h_idx * D;
        auto v_ = v + b_idx * T * H * D + h_idx * D;
        auto a_ = a + b_idx * T * H * D + h_idx * D;
        auto b_ = b + b_idx * T * H * D + h_idx * D;
        y += b_idx * T * H * D + h_idx * D;

        auto dk_idx = thread_position_in_threadgroup.x;
        auto dv_idx = thread_position_in_grid.y;

        // state_in, state_out: [B, H, D, D]
        auto i_state = state_in  + (n * D + dv_idx) * D;
        auto o_state = state_out + (n * D + dv_idx) * D;

        float state[n_per_t];
        for (int i = 0; i < n_per_t; ++i) {
          auto s_idx = n_per_t * dk_idx + i;
          state[i] = static_cast<float>(i_state[s_idx]);
        }

        for (int t = 0; t < T; ++t) {
          float sa = 0.0f;
          for (int i = 0; i < n_per_t; ++i) {
            auto s_idx = n_per_t * dk_idx + i;
            sa += state[i] * a_[s_idx];
            state[i] = state[i] * w_[s_idx];
          }
          sa = simd_sum(sa);

          float out = 0.0f;
          for (int i = 0; i < n_per_t; ++i) {
            auto s_idx = n_per_t * dk_idx + i;
            state[i] = state[i] + k_[s_idx] * v_[dv_idx] + sa * b_[s_idx];
            out += state[i] * r_[s_idx];
          }
          out = simd_sum(out);
          if (thread_index_in_simdgroup == 0) {
            y[dv_idx] = static_cast<InT>(out);
          }

          // Increment data pointers to next time step
          r_ += H * D;
          w_ += H * D;
          k_ += H * D;
          v_ += H * D;
          a_ += H * D;
          b_ += H * D;
          y  += H * D;
        }
        for (int i = 0; i < n_per_t; ++i) {
          auto s_idx = n_per_t * dk_idx + i;
          o_state[s_idx] = static_cast<InT>(state[i]);
        }
    )r5   r6   r7   r8   r9   r:   state_inTwkv7_kernelr'   	state_out)nameinput_namesoutput_namessource)r/   metalis_availablefastmetal_kernel)rF   inputss     r"   _make_wkv7_kernelrL   0   sb    8  "" t=F| =<<F7;'	     r!   r5   r6   r7   r8   r9   r:   r;   c           
          | j         \  }}}	}
| j        }t          | |||||||gd|fd|	fd|
fgd|
||	z  fd|||	|
f|j         g||g          S )NInTHD    )rQ      r   )rK   templategridthreadgroupoutput_shapesoutput_dtypes)shapedtype_wkv7_kernel)r5   r6   r7   r8   r9   r:   r;   Br@   rO   rP   input_dtypes               r"   rA   rA   }   s     JAq!Q'K1aAq%+K !H!H

 !QU^1a|U[1"K0   r!   c                   $     e Zd Z fdZd Z xZS )LayerNormPerHeadc                     t                                                       t          j        ||f          | _        t          j        ||f          | _        || _        d S r%   )super__init__r/   zerosweightbiaseps)selfr   	num_headsre   	__class__s       r"   ra   zLayerNormPerHead.__init__   sP    h	8455Hi233	r!   c                 p    | j         t          j                            |d d | j                  z  | j        z   S r%   )rc   r/   rI   
layer_normre   rd   rf   r&   s     r"   __call__zLayerNormPerHead.__call__   s.    {RW//4txHHH49TTr!   )r   r   r   ra   rl   __classcell__rh   s   @r"   r^   r^      sN            U U U U U U Ur!   r^   c                   h     e Zd Z	 	 ddedededee         dee         f
 fdZd	ej	        fd
Z
 xZS )LoRATtanh	input_dim
output_dimlow_rank_dimrd   
activationc                 :   t                                                       || _        || _        || _        || _        |t          j                    | _        np|dk    rt          j	                    | _        nQ|dk    rt          j
                    | _        n2|dk    rt          j                    | _        nt          d| d          t          j        | j        | j        d          | j        t          j        | j        | j        | j                  g| _        d S )Nsigmoidrq   reluzUnsupported activation type: .Frd   )r`   ra   rr   rs   rt   rd   nnIdentityru   SigmoidTanhReLU
ValueErrorLinearlora)rf   rr   rs   rt   rd   ru   rh   s         r"   ra   zLoRA.__init__   s     	"$(	 kmmDOO9$$ jllDOO6!! giiDOO6!! giiDOOJZJJJKKK Idnd&7eDDDOId'tyIII
			r!   returnc                 ~     | j         d          | j         d          | j         d         |                              S )N   r   r   )r   rk   s     r"   rl   zLoRA.__call__   s6    ty|LDIaL1a99:::r!   )Trq   )r   r   r   r   r   r   r   ra   r/   arrayrl   rm   rn   s   @r"   rp   rp      s          $$*
 

 
 	

 tn
 SM
 
 
 
 
 
>;RX ; ; ; ; ; ; ; ;r!   rp   c                       e Zd Zd ZdS )
TokenShiftc                     |j         \  }}}|t          j        |d|f|j                  }|dk    r|S t          j        ||d d d dd d f         gd          S )Nr   r+   r-   )rX   r/   rb   rY   concatenate)rf   r&   r;   r[   LrP   s         r"   rl   zTokenShift.__call__   sk    '1a=HaAY00E66L>5!AAAssAAAI,"7a@@@@r!   N)r   r   r   rl   r    r!   r"   r   r      s(        A A A A Ar!   r   c                   :     e Zd Zdef fdZdej        fdZ xZS )Rwkv7ChannelMixingargsc                 ,   t                                                       |j        }|j        }t	          j        ||d          | _        t	          j        ||d          | _        t          j	        |          | _
        t                      | _        d S NFrz   )r`   ra   r   r   r{   r   keyvaluer/   rb   x_kr   token_shift)rf   r   
hidden_dimr   rh   s       r"   ra   zRwkv7ChannelMixing.__init__   s    %
 29Z):GGGY0*5III
8Z))%<<r!   r   c                    ||d         nd }|                      ||          }t          |||z
  | j                  }||d d dd d d f         |d<   |                     t	          j        |                     |                              S )Nr   r+   )r   r)   r   r   r{   relu2r   )rf   r&   cacher;   x_prevxxs         r"   rl   zRwkv7ChannelMixing.__call__   s    !-a4!!!U++Q
DH--BCC|E!Hzz"(488B<<00111r!   	r   r   r   r   ra   r/   r   rl   rm   rn   s   @r"   r   r      sa        (Y ( ( ( ( ( (2BH 2 2 2 2 2 2 2 2r!   r   c                   4     e Zd Zdedef fdZd Zd Z xZS )Rwkv7TimeMixingr   	layer_idxc                 n   t                                                       || _        || _        |j        | _        |j        | _        | j        | j        z  | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        t                      | _        t          j        dd| j        f          | _        t          j        dd| j        f          | _        t          j        dd| j        f          | _        t          j        dd| j        f          | _        t          j        dd| j        f          | _        t          j        dd| j        f          | _        t          j        | j        | j        f          | _        t          j        | j        | j        f          | _        t          j        | j        | j        f          | _        t1          j        | j        | j        d          | _        t1          j        | j        | j        d          | _        t1          j        | j        | j        d          | _        t1          j        | j        | j        d          | _        t=          | j        | j        d          | _        tA          | j        | j        | j
        d          | _!        | j        dk    r'tA          | j        | j        | j        d           | _"        tA          | j        | j        | j        d           | _#        tA          | j        | j        | j	        d	d
          | _$        d S )Nr   Frz   gh㈵D?re   rq   )rt   ru   r   rw   )rt   ru   rd   )%r`   ra   r   r   r   r   rg   r   r   r   r   r   r   r/   rb   x_rx_wr   x_vx_ax_gk_kk_ar_kr{   r   r_projk_projv_projo_projr^   g_normrp   w_lorav_loraa_lorag_lorarf   r   r   rh   s      r"   ra   zRwkv7TimeMixing.__init__   s   "	+)T]:"1"1!%!7"&"9%<<8Q4#34558Q4#34558Q4#34558Q4#34558Q4#34558Q4#34558T^T];<<8T^T];<<8T^T];<<i 0$2BOOOi 0$2BOOOi 0$2BOOOi 0$2BOOO&t}dn%PPP0	
 
 
 >A  !0	  DK ,	
 
 
 / 
 
 
r!   c                    |j         \  }}	}
}
|.t          j        || j        | j        | j        f|j                  }t          j                    t          j        k    r3t          j        	                                rt          |||||||          S g }t          |	          D ]k}t          |d d |f         |d d |f         |d d |f         |d d |f         |d d |f         |d d |f         |          \  }}|                    |           lt          j        |d                              |j                  }||fS )N)rY   r   r   )rX   r/   rb   rg   r   rY   default_devicegpurG   rH   rA   ranger=   appendstackastype)rf   r5   r6   r7   r8   r9   r:   r;   r[   r   _ystr'   s                 r"   _wkv7zRwkv7TimeMixing._wkv7'  sK   W
1a=HDNDM4=A  E "&((RX-B-B-D-D(q!Q1a777B1XX  )aaadGQqqq!tWa1gqAw!!!Q$111a4% 5 		!!$$$++AG44Ae8Or!   c           	      h   |d\  }}n|d         |d         }}|j         \  }}}|                     ||          }	|	|z
  }
t          ||
| j                  }t          ||
| j                  }t          ||
| j                  }t          ||
| j                  }t          ||
| j                  }t          ||
| j                  }| 	                    |          
                    ||| j        | j                  }|                     |          
                    ||| j        | j                  }|                     |          
                    ||| j        | j                  }t          j        |                     |                    
                    ||| j        | j                  }|                     |          }| j        dk    r|}n[t          j        |                     |                    
                    ||| j        | j                  }t          |||z
  |          }t          j        |                     |          
                    ||| j        | j                                                t          j                  }t          j        d|z                                |j                  }t5          || j        z            }|d|dz
  | j        z  z   z  }| }||z  }|                     |||||||          \  }}|                     |
                    ||| j        | j                            }|||z  | j        z                       dd          |z  z   
                    |||g          }||d d dd d d f         |d<   ||d<   | !                    ||z            }||fS )N)NNr   r   g]S hr+   Tr,   )"rX   r   r)   r   r   r   r   r   r   r   reshaperg   r   r   r   r/   rw   r   r   r   r   r   r   float32exprY   r3   r   r   r   r   r   sumr   )rf   r&   v_firstr   token_shift_cachestate_cacher[   r   rP   r   r   xrxwxkxvxaxgr   r   
receptanceiclrgatevvdecaykkr9   r:   outnew_state_caches                                r"   rl   zRwkv7TimeMixing.__call__;  si   =-7*{{-21XuQx{'1a!!!%677aZQDH%%QDH%%QDH%%QDH%%QDH%%QDH%%kk"oo%%aDNDMJJB''1dndmLL[[__,,Q4>4=QQ
z$++b//**221aWW{{2>QGGDKKOO,,441dndm B E7U?B77E
KKOO##Aq$.$-HH
 

&

 	 y5())001ABBcDHn&&Q$(dh../CI#zzsE1a 
  
_ kk#++aDNDMJJKK:#dh.33d3KKeSS
'1a)

 	 BCC|E!H&E!Hkk#*%%G|r!   )	r   r   r   r   r   ra   r   rl   rm   rn   s   @r"   r   r      sj        <
Y <
3 <
 <
 <
 <
 <
 <
|  (5 5 5 5 5 5 5r!   r   c                   .     e Zd Zdedef fdZd Z xZS )
Rwkv7Layerr   r   c                    t                                                       || _        | j        dk    r%t          j        |j        |j                  | _        t          || j                  | _	        t          |          | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S )Nr   r   r   )r`   ra   r   r{   	LayerNormr   r   pre_normr   attnr   ffn	attn_normffn_normr   s      r"   ra   zRwkv7Layer.__init__t  s    ">QL)9t}MMMDM#DDNCCC	%d++d&6DMJJJT%54=IIIr!   c                    | j         dk    r|                     |          }|                     |                     |          ||          \  }}||z   }||                     |                     |          |          z   }||fS )Nr   )r   r   r   r   r   r   )rf   r&   r   r   hr   s         r"   rl   zRwkv7Layer.__call__~  s|    >Qa  AYYt~~a00'5AA
7E$((4==++U333G|r!   )r   r   r   r   r   ra   rl   rm   rn   s   @r"   r   r   s  sb        JY J3 J J J J J J      r!   r   c                   :     e Zd Zdef fdZdej        fdZ xZS )
Rwkv7Modelr   c                 &   t                                                       t          j        j        j                  | _        fdt          j                  D             | _	        t          j
        j        j                  | _        d S )Nc                 2    g | ]}t          |           S )r   )r   ).0ir   s     r"   
<listcomp>z'Rwkv7Model.__init__.<locals>.<listcomp>  s3     
 
 
./Jtq)))
 
 
r!   r   )r`   ra   r{   	Embeddingr   r   
embeddingsr   r   layersr   r   r2   rf   r   rh   s    `r"   ra   zRwkv7Model.__init__  s    ,t8HII
 
 
 
389O3P3P
 
 
 L!1t}EEE			r!   r&   c                     |                      |          }|d gt          | j                  z  }d }t          | j        |          D ]\  }} ||||          \  }}|                     |          S r%   )r   lenr   zipr2   )rf   r&   r   r   layercs         r"   rl   zRwkv7Model.__call__  s{    OOA=FS---EDK// 	. 	.HE1q'1--JAwwyy||r!   r   rn   s   @r"   r   r     sh        FY F F F F F F"(        r!   r   c                   t     e Zd Zdef fdZd
dej        fdZd Ze	d             Z
d Ze	d	             Z xZS )Modelr   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S r   )r`   ra   r   r   r   modelr   r{   r   r   r   lm_headr   s     r"   ra   zModel.__init__  sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr!   NrK   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r%   )r   r   r   r   	as_linearr   )rf   rK   r   r&   logitss        r"   rl   zModel.__call__  sP    JJvu%%9( 	%Z*44Q77FF\\!__Fr!   c                 X    d t          t          | j                            D             S )Nc                 .    g | ]}t          d           S )   )sizer   )r   r   s     r"   r   z$Model.make_cache.<locals>.<listcomp>  s#    EEE###EEEr!   )r   r   r   rf   s    r"   
make_cachezModel.make_cache  s(    EEU3t{3C3C-D-DEEEEr!   c                     | j         j        S r%   )r   r   r   s    r"   r   zModel.layers  s    z  r!   c                     |                                 D ]Q\  }}d|v sd|v sd|v r@||                             | j        j        | j        j        z  | j        j                  ||<   R|S )Nr   r   r   )itemsr   r   r   r   )rf   weightsr7   r8   s       r"   sanitizezModel.sanitize  st    MMOO 	 	DAqzzUaZZ8q==$QZ//I)TY-??AS 
 r!   c                     d }|S )Nc                     d| v sd| v rddiS dS )Nzlora.2r   bits   Tr    )pathr   s     r"   	predicatez(Model.quant_predicate.<locals>.predicate  s&    4<4#7#7{"4r!   r    )rf   r	  s     r"   quant_predicatezModel.quant_predicate  s    	 	 	
 r!   r%   )r   r   r   r   ra   r/   r   rl   r   propertyr   r  r
  rm   rn   s   @r"   r   r     s        TY T T T T T T rx    F F F ! ! X!     X    r!   r   )!dataclassesr   	functoolsr   typingr   mlx.corecorer/   mlx.nnr{   baser   r   r	   r   compiler)   r3   r=   rL   rZ   r   rA   Moduler^   rp   r   r   r   r   r   r   r    r!   r"   <module>r     s@   " ! ! ! ! !                                     & & & & & & & & 	t$$$  %$ 	t$$$K K %$K   G G GT ! ""	x	x 
x 
x	
 
x 
x 8   4U U U U Ury U U U!; !; !; !; !;29 !; !; !;HA A A A A A A A2 2 2 2 2 2 2 2.H H H H Hbi H H HV       *       () ) ) ) )BI ) ) ) ) )r!   