
    )js/                     |   d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ e G d de                      Z G d dej                  Z G d dej                  Z  G d dej!                  Z" eej#        d          d             Z$ G d dej                  Z% G d dej                  Z& G d dej                  Z' G d dej                  Z( G d dej                  Z) G d  d!ej                  Z* G d" d#ej                  Z+dS )$    N)	dataclass)partial)
accumulate)AnyDictOptionalUnion   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)ConcatenateKVCacheKVCache)initialize_ropec                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZeed	<   d
Zeed<   dZ	eed<   dS )	ModelArgs
model_type
vocab_size
hidden_dim
num_layersnum_kv_reuse_layers	num_headsnum_kv_headsg      
@hidden_dim_scale_factoriP  
rope_thetagh㈵>rms_norm_epsN)
__name__
__module____qualname__str__annotations__intr   floatr   r        \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/afm7.pyr   r      s         OOOOOOOOOOOONNN%)U)))JL%r&   r   c                   \     e Zd Z	 	 	 ddedee         dededef
 fd	ZddefdZd Z	 xZ
S )FusedLoRALinear                 4@
input_dimsoutput_dimsrdropoutscalec                 8   t                                                       t          |          | _        t	          j        |          | _        | _        dt          j	                  z  fd|D             | _
        fd|D             | _        d S )N)pr
   c                 Z    g | ]'}t           j                             f           (S ))lowhighshape)mxrandomuniform).0_r-   r/   r1   s     r'   
<listcomp>z,FusedLoRALinear.__init__.<locals>.<listcomp>0   sF     
 
 
 I5&uZOLL
 
 
r&   c                 <    g | ]}t          j        |f          S r%   )r8   zeros)r;   odr/   s     r'   r=   z,FusedLoRALinear.__init__.<locals>.<listcomp>4   s'    ???RrxB((???r&   )super__init__FusedLinearlinearnnDropoutr0   r1   mathsqrtlora_alora_b)selfr-   r.   r/   r0   r1   	__class__s    ` ` `r'   rB   zFusedLoRALinear.__init__!   s     	!*k::zG,,,
DIj)))
 
 
 
 
 
 
 
 
 @???;???r&   F
dequantizec                    
  j         }|j        }t          |t                    }|j        
|r8|j        j        
t          j        ||j        |j        |j	        |j
                  }|j        d         }|j        }t          ||          }||_        
 fdt           j         j                  D             }t          j        |d          }	||	z   |_        |r"|s |                    |j	        |j
                  }|S )Nc                 j    g | ]/\  }}j         |j        z  |j        z                                0S r%   )r1   Tastype)r;   abdtyperK   s      r'   r=   z(FusedLoRALinear.fuse.<locals>.<listcomp>L   sK     
 
 
1 j13!#%--e44
 
 
r&   r   axis)rD   weight
isinstanceFusedQuantizedLinearrU   scalesr8   rM   biases
group_sizebitsr7   r.   rC   ziprI   rJ   concatenateto_quantized)rK   rM   rD   rX   is_quantizedr-   r.   fused_lineardeltasdeltarU   s   `         @r'   fusezFusedLoRALinear.fuse6   s!   !&*>??  	M'E]! F \"%
(":{;;$
 
 
 
 
DK55
 
 
 vA...$un 	U
 	U'44V5FTTLr&   c                 
    j                                        }                               fdt           j         j                  D             }t           fdt          ||          D                       S )Nc                 &    g | ]\  }}|z  |z  S r%   r%   )r;   rS   rT   xs      r'   r=   z,FusedLoRALinear.__call__.<locals>.<listcomp>\   s%    CCCTQa!eq[CCCr&   c              3   ^   K   | ]'\  }}|j         |z                                z   V  (d S N)r1   rR   )r;   yizidtrK   s      r'   	<genexpr>z+FusedLoRALinear.__call__.<locals>.<genexpr>]   s@      OO62rR4:?222666OOOOOOr&   )rU   rD   r0   r_   rI   rJ   tuple)rK   ri   yzrn   s   ``  @r'   __call__zFusedLoRALinear.__call__X   s    WKKNNLLOOCCCCSdk%B%BCCCOOOOOSAYYOOOOOOr&   r*   r+   r,   )F)r   r   r    r#   listr$   rB   boolrf   rs   __classcell__rL   s   @r'   r)   r)       s        
 @ @@ #Y@ 	@
 @ @ @ @ @ @ @*   t        DP P P P P P Pr&   r)   c                   v     e Zd Zddedef fdZed             Zed             Z fdZddede	de	fdZ
 xZS )rZ   @      r]   r^   c                     t          |          ^ }}|| _        t                                          ||d||           d S )NF)biasr]   r^   r   indicesrA   rB   )rK   r-   r.   r]   r^   r   rL   s         r'   rB   zFusedQuantizedLinear.__init__a   sR     *; 7 7+%JT 	 	
 	
 	
 	
 	
r&   c                 6    | j         j        d         | j        z  S NrO   )r[   r7   r]   rK   s    r'   r-   zFusedQuantizedLinear.input_dimsh   s    { $t66r&   c                     dg| j         z   | j        j        d         gz   fdt          dt	                              D             S )Nr   c                 8    g | ]}|         |d z
           z
  S r
   r%   r;   ir   s     r'   r=   z4FusedQuantizedLinear.output_dims.<locals>.<listcomp>o   *    LLL
WQU^+LLLr&   r
   r   rX   r7   rangelenrK   r   s    @r'   r.   z FusedQuantizedLinear.output_dimsl   M    #$(9!(<'==LLLLU1c'll5K5KLLLLr&   c                 ~    t                                          |          }|                    | j        d          S NrO   rV   rA   rs   splitr   rK   ri   rL   s     r'   rs   zFusedQuantizedLinear.__call__q   2    GGQwwt|"w---r&   r*   r+   r,   r/   r0   r1   c                 N    t          | j        | j        |||          }| |_        |S rk   r)   r-   r.   rD   rK   r/   r0   r1   lora_lins        r'   to_lorazFusedQuantizedLinear.to_lorau   )    "4?D4DaRWXXr&   rz   r{   rt   )r   r   r    r#   rB   propertyr-   r.   rs   r$   r   rw   rx   s   @r'   rZ   rZ   `   s        
 
C 
C 
 
 
 
 
 
 7 7 X7 M M XM. . . . .  5 u        r&   rZ   c                   |     e Zd Z fdZed             Zed             Z fdZddedefd	Z	ddede
de
fdZ xZS )rC   c                     t          |          ^ }}|| _        t                                          ||d           d S NFr}   r~   )rK   r-   r.   r   rL   s       r'   rB   zFusedLinear.__init__|   sA     *; 7 7+[u=====r&   c                 &    | j         j        d         S r   )rX   r7   r   s    r'   r-   zFusedLinear.input_dims   s    { $$r&   c                     dg| j         z   | j        j        d         gz   fdt          dt	                              D             S )Nr   c                 8    g | ]}|         |d z
           z
  S r   r%   r   s     r'   r=   z+FusedLinear.output_dims.<locals>.<listcomp>   r   r&   r
   r   r   s    @r'   r.   zFusedLinear.output_dims   r   r&   c                 ~    t                                          |          }|                    | j        d          S r   r   r   s     r'   rs   zFusedLinear.__call__   r   r&   rz   r{   r]   r^   c                     | j         }| j        }t          ||||          }t          j        | j        ||          \  |_        |_        |_        |S rk   )r-   r.   rZ   r8   quantizerX   r[   r\   )rK   r]   r^   r-   r.   qls         r'   ra   zFusedLinear.to_quantized   sK    _
&!*k:tLL*,+dk:t*T*T'	29bi	r&   r*   r+   r,   r/   r0   r1   c                 N    t          | j        | j        |||          }| |_        |S rk   r   r   s        r'   r   zFusedLinear.to_lora   r   r&   r   rt   )r   r   r    rB   r   r-   r.   rs   r#   ra   r$   r   rw   rx   s   @r'   rC   rC   {   s        > > > > >
 % % X% M M XM. . . . . s s      5 u        r&   rC   T)	shapelessc                     | j         }|                     t          j                  } | |z                                  } t          j        | dd          } | |z                      |          S )Ni   )rU   rR   r8   float32roundclip)ri   r1   rn   s      r'   fake_8bit_quantr      s\    	
B	A	
UA
4AIb!!!r&   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 `   t                                                       |j        }|j        x| _        }|j        x| _        }|j        |z  x| _        }|dz  | _        |d|z  z   |z  }t          |||z  gd||z  gz  z             | _
        t          j        ||d          | _        t          | j        |j        d          | _        t          j        |          | _        t          j        |          | _        t)          j        d          | _        t)          j        d          | _        d S )N         Fr   Tg      ?)rA   rB   r   r   n_headsr   
n_kv_headshead_dimr1   rC   qkv_projrE   Linearout_projr   r   ropeRMSNormq_normk_normr8   arrayquant_key_scalequant_value_scale)rK   r   dimr   r   r   qkv_dimrL   s          r'   rB   zAttention.__init__   s    o!%/w'+'88*#'?g#==t^
Q^+x7#'H$%Z(-B,C(CC
 
 	#s777#MO
 
	
 j**j**!x}}!##r&   Nri   maskcachereturnc                 p   |j         \  }}}|                     |          \  }}}	|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	||                     |                     ||j                            }| 	                    |                     ||j                            }t          || j                  }t          |	| j                  }	|                    ||	          \  }}	nz|                     |                     |                    }| 	                    |                     |                    }t          || j                  }t          |	| j                  }	t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S NrO   r   r   r
      )offset)r   r1   r   )r7   r   reshaper   	transposer   r   r   r   r   r   r   r   update_and_fetchr   r1   r   )rK   ri   r   r   BLDquerieskeysvaluesoutputs              r'   rs   zAttention.__call__   s    '1a !%a 0 0v //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPkk$))GEL)"I"IJJG;;tyyelyCCDDD"4)=>>D$VT-CDDF 11$??LD&&kk$))G"4"455G;;tyy//D"4)=>>D$VT-CDDF-T6djt
 
 
 !!!Q1--55aB??}}V$$$r&   NNr   r   r    r   rB   r8   r   r   r   rs   rw   rx   s   @r'   r   r      s        /Y / / / / / /6 $(#	!% !%8!% rx !% }	!%
 
!% !% !% !% !% !% !% !%r&   r   c                        e Zd Zdef fdZ	 d
dej        dej        dej        deej                 dej        f
d	Z xZ	S )KVReuseAttentionr   c                    t                                                       |j        }|j        x| _        }|j        |z  x| _        }|dz  | _        t          j        ||d          | _	        t          j        ||d          | _
        t          | j        |j        d          | _        t          j        |          | _        d S )Nr   Fr   T)rA   rB   r   r   r   r   r1   rE   r   q_projr   r   r   r   r   r   )rK   r   r   r   r   rL   s        r'   rB   zKVReuseAttention.__init__   s    o!%/w#'?g#==t^
iSu555	#s777#MO
 
	
 j**r&   Nri   r   r   r   r   c                    |j         \  }}}|j         \  }}}	}|                     |          }
|
                    ||| j        d                              dddd          }
|                     |                     |
|	|z
                      }
t          |
||d | j        |          }|                    dddd                              ||d          }| 	                    |          S r   )
r7   r   r   r   r   r   r   r   r1   r   )rK   ri   r   r   r   r   r   r   r<   Sr   r   s               r'   rs   zKVReuseAttention.__call__   s     '1aZ
1a++a..//!Qb99CCAq!QOO++diiAi>>??-T6TZd
 
 
 !!!Q1--55aB??}}V$$$r&   rk   
r   r   r    r   rB   r8   r   r   rs   rw   rx   s   @r'   r   r      s        +Y + + + + + +, $(% %8% h% 	%
 rx % 
% % % % % % % %r&   r   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPr   c                 ,   t                                                       |j        }t          ||j        z            }t          j        ||d          | _        t          j        ||d          | _        t          j        ||d          | _	        d S r   )
rA   rB   r   r#   r   rE   r   	gate_proj	down_projup_proj)rK   r   r   r   rL   s       r'   rB   zMLP.__init__  s    ot;;<<
3
???:s???yju===r&   r   c                     |                      |          }|                     |          }|                     t          ||                    S rk   )r   r   r   r   )rK   ri   gs      r'   rs   zMLP.__call__  s<    NN1LLOO~~fQll+++r&   	r   r   r    r   rB   r8   r   rs   rw   rx   s   @r'   r   r   
  sa        >Y > > > > > >,RX , , , , , , , ,r&   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr   c                 ,   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S Neps)rA   rB   r   	self_attnr   mlprE   r   r   r   input_layernormpost_attention_layernormrK   r   rL   s     r'   rB   zTransformerBlock.__init__  sx    "4t99!z$/t?PQQQ(*
O!2)
 )
 )
%%%r&   Nri   r   r   r   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S rk   r   r   r   r   )rK   ri   r   r   r/   houts          r'   rs   zTransformerBlock.__call__%  s]     NN4//22D%@@EHHT2215566!e
r&   r   r   rx   s   @r'   r   r     s        
Y 
 
 
 
 
 
 $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r&   r   c                        e Zd Zdef fdZ	 d
dej        dej        dej        deej                 dej        f
d	Z xZ	S )KVReuseTransformerBlockr   c                 ,   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S r   )rA   rB   r   r   r   r   rE   r   r   r   r   r   r   s     r'   rB   z KVReuseTransformerBlock.__init__3  sz    )$//t99!z$/t?PQQQ(*
O!2)
 )
 )
%%%r&   Nri   r   r   r   r   c                     |                      |                     |          |||          }||z   }|                     |                     |                    }||z   }|S rk   r   )rK   ri   r   r   r   r/   r   r   s           r'   rs   z KVReuseTransformerBlock.__call__<  s_     NN4//22D&$GGEHHT2215566!e
r&   rk   r   rx   s   @r'   r   r   2  s        
Y 
 
 
 
 
 
 $( 8 h 	
 rx  
       r&   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )AFMModelr   c                    t                                                       | _        j        | _        t	          j        j        j                  | _        fdt          j	        j
        z
            D             | _        fdt          j
                  D             | _        t	          j        j        j                  | _        d S )Nc                 .    g | ]}t                    S r%   )r   r;   r<   r   s     r'   r=   z%AFMModel.__init__.<locals>.<listcomp>Q  s1     
 
 
 T""
 
 
r&   c                 .    g | ]}t                    S r%   )r   r   s     r'   r=   z%AFMModel.__init__.<locals>.<listcomp>U  s/      
  
  
./#D)) 
  
  
r&   r   )rA   rB   r   r   rE   	Embeddingr   	embeddingr   r   r   layerskv_reuse_layersr   r   output_normr   s    `r'   rB   zAFMModel.__init__K  s    	/dotGG
 
 
 
4?T-EEFF
 
 
 
  
  
  
389Q3R3R 
  
  
 :do4;LMMMr&   Ninputsc                 z   |                      |          }|)d gt          | j                  z  }t                      |d<   t	          ||d                   }t          | j        |          D ]\  }} ||||          }|d         j        \  }}| j        D ]} |||||          }|                     |          S )NrO   r   )r   )	r  r   r  r   r   r_   stater  r  )	rK   r  r   r   r   layercr   r   s	            r'   rs   zAFMModel.__call__Z  s    
 NN6""=FS---E*,,E"I$Qa11DK// 	( 	(HE1aQ'''AARyf) 	- 	-Eavt,,AA"""r&   rk   r   rx   s   @r'   r   r   J  ss        NY N N N N N N$ # ## # # # # # # #r&   r   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr   c                     t                                                       || _        |j        | _        t	          |          | _        d S rk   )rA   rB   r   r   r   modelr   s     r'   rB   zModel.__init__r  s:    	/d^^


r&   Nr  c                 p    |                      ||          }| j         j                            |          }|S rk   )r  r  	as_linear)rK   r  r   r   s       r'   rs   zModel.__call__x  s4    
 jj''j",,S11
r&   c                 b    d t          t          | j        j                            D             S )Nc                 *    g | ]}t                      S r%   )r   )r;   r<   s     r'   r=   z$Model.make_cache.<locals>.<listcomp>  s    AAAa		AAAr&   )r   r   r  r  r   s    r'   
make_cachezModel.make_cache  s+    AA5TZ->)?)?#@#@AAAAr&   c                 4    | j         j        | j         j        z   S rk   )r  r  r  r   s    r'   r  zModel.layers  s    z 4:#===r&   rk   )r   r   r    r   rB   r8   r   rs   r  r   r  rw   rx   s   @r'   r  r  q  s        $Y $ $ $ $ $ $     B B B > > X> > > > >r&   r  ),rG   dataclassesr   	functoolsr   	itertoolsr   typingr   r   r   r	   mlx.corecorer8   mlx.nnrE   activationsr   baser   r   r   r   r   r   
rope_utilsr   r   Moduler)   QuantizedLinearrZ   r   rC   compiler   r   r   r   r   r   r   r  r%   r&   r'   <module>r!     s    ! ! ! ! ! !                   - - - - - - - - - - - -                   T T T T T T T T T T . . . . . . . . ' ' ' ' ' ' 
 
 
 
 
 
 
 
=P =P =P =P =Pbi =P =P =P@    2-   6    ")   B 	t$$$" " %$":% :% :% :% :%	 :% :% :%z%% %% %% %% %%ry %% %% %%P, , , , ,") , , ,"    ry   .    bi   0$# $# $# $# $#ry $# $# $#N> > > > >BI > > > > >r&   