
    )j&              	          d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ ddlmZmZmZ ddlmZmZ dd	lmZ  eej        d
          dej        dej        dej        dej        fd            Z eej        d
          dej        dej        dej        fd            Z eej        d
          dej        dej        dej        dej        fd            Ze G d de                      Z  G d dej!                  Z" G d dej!                  Z# G d dej!                  Z$ G d d ej!                  Z% G d! d"ej!                  Z& G d# d$ej!                  Z'dS )%    )	dataclass)partial)AnyDictListOptionalTupleUnionN)shard_linear   )BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCacheRotatingKVCache)initialize_ropeT)	shapelessqueryweightbiasreturnc                     | |d d d d d f                              dd          z  }||d         z   }t          j        |          S )N).NN)swapaxesmxsigmoid)r   r   r   gate_logitss       g/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/iquestloopcoder.py_compute_gater       sN    &D!!!,55b"===K_ 55K:k"""    gateupc                 0    t          j        |           |z  S N)nnsilu)r"   r#   s     r   	_silu_mulr(      s    74==2r!   attn_global
attn_localc                     | |z  d| z
  |z  z   S )Nr    )r"   r)   r*   s      r   _mix_attentionr-      s     +TZ 777r!   c                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zeed<   dZe	ed<   dZ
e	ed<   dZeed<   dZeeeeeef         f                  ed<   dZe	ed<   dZeed<   dZeed<   dS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizehead_dimnum_key_value_headsi   max_position_embeddingsFattention_biasmlp_biasg    A
rope_thetaNrope_scalingtie_word_embeddings   loop_num@   loop_window_size)__name__
__module____qualname__str__annotations__intfloatr9   r:   boolr;   r<   r=   r   r   r
   r>   r@   rB   r,   r!   r   r/   r/   #   s        OOOOOOMMM#)S))) ND   Hd J   ;?L(4U5#:%6 678??? %%%%Hccr!   r/   c                   L     e Zd Zdedef fdZdej        dej        fdZ xZS )LoopGateProjection	num_headsr7   c                     t                                                       || _        || _        t	          j        ||f          | _        t	          j        |f          | _        d S r%   )super__init__rM   r7   r   zerosr   r   )selfrM   r7   	__class__s      r   rP   zLoopGateProjection.__init__9   sT    " h	8455Hi\**			r!   r   r   c                 8    t          || j        | j                  S r%   )r    r   r   )rR   r   s     r   __call__zLoopGateProjection.__call__@   s    UDK;;;r!   )	rC   rD   rE   rH   rP   r   arrayrU   __classcell__rS   s   @r   rL   rL   8   sq        +# + + + + + + +<bh <28 < < < < < < < <r!   rL   c                        e Zd Zdef fdZ	 ddej        dedeej        ej        ej        f         fdZ		 	 dd	ej        d
ej        dej        de
ej                 de
e         dej        fdZ xZS )	Attentionargsc                 .   t                                                       |j        }|j        x| _        }|j        x| _        }|j        x| _        }|dz  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        |||z  |j                  | _        t          j
        ||z  ||j                  | _        t!          ||j        d|j        |j                  | _        d S )Ng      ࿩r   F)traditionalscaling_configr9   )rO   rP   r1   r4   n_headsr8   
n_kv_headsr7   scaler&   Linearr:   q_projk_projv_projo_projr   r<   r=   r9   rope)rR   r[   dimr`   ra   r7   rS   s         r   rP   zAttention.__init__E   s	   !%!99w'+'??*#'=0t^
iWx%7d>QRRRiZ(%:ATUUUiZ(%:ATUUUi( 2Cd>QRRR#O,$($@
 
 
			r!   r   xoffsetr   c                 "   |j         \  }}}|                     |                              ||| j        d                              dddd          }|                     |                              ||| j        d                              dddd          }|                     |                              ||| j        d                              dddd          }|                     ||          }|                     ||          }|||fS )Nr   r   r?   r      )rk   )	shaperd   reshaper`   	transposere   ra   rf   rh   )	rR   rj   rk   BL_querieskeysvaluess	            r   get_qkvzAttention.get_qkv[   s     '1a++a..((At|R@@JJ1aQRTUVV{{1~~%%aDOR@@JJ1aQRTUVVQ''1dorBBLLQPQSTVWXX))GF)33yyfy--f$$r!   Nrt   ru   rv   maskcachec                 6    t          ||||| j        |          S )N)ry   rb   rx   )r   rb   )rR   rt   ru   rv   rx   ry   s         r   	attentionzAttention.attentionh   s*     ,T6djt
 
 
 	
r!   )r   )NN)rC   rD   rE   r/   rP   r   rV   rH   r	   rw   r   r   r{   rW   rX   s   @r   rZ   rZ   D   s        
Y 
 
 
 
 
 
. *+% %%#&%	rx28+	,% % % %$ $(#

 



 h

 	


 rx 

 }

 


 

 

 

 

 

 

 

r!   rZ   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )MLPr[   c                 *   t                                                       |j        }|j        }t	          j        |||j                  | _        t	          j        |||j                  | _        t	          j        |||j                  | _	        d S )Nr]   )
rO   rP   r1   r3   r&   rc   r;   	gate_proj	down_projup_proj)rR   r[   ri   
hidden_dimrS   s       r   rP   zMLP.__init__v   s{    +
3
GGG:sGGGyjt}EEEr!   rj   r   c                     |                      t          |                     |          |                     |                              S r%   )r   r(   r   r   )rR   rj   s     r   rU   zMLP.__call__~   s4    ~~iq(9(94<<??KKLLLr!   )	rC   rD   rE   r/   rP   r   rV   rU   rW   rX   s   @r   r}   r}   u   s{        FY F F F F F FM"( Mrx M M M M M M M Mr!   r}   c                   $     e Zd Zdef fdZ xZS )TransformerBlockr[   c                 ,   t                                                       t          |          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )Neps)rO   rP   rZ   	self_attnr}   mlpr&   RMSNormr1   r5   input_layernormpost_attention_layernormrR   r[   rS   s     r   rP   zTransformerBlock.__init__   sz    "4t99!z$*:@QRRR(*
$"3)
 )
 )
%%%r!   )rC   rD   rE   r/   rP   rW   rX   s   @r   r   r      sD        
Y 
 
 
 
 
 
 
 
 
 
r!   r   c                   Z     e Zd Zdef fdZ	 ddej        deee	                  fdZ
 xZS )IQuestLoopCoderModelr[   c                     t                                                       j        dk    sJ dj                     | _        j        | _        t          j        j        j                  | _        fdt          j
                  D             | _        t          j        j        j                  | _        fdt          j
                  D             | _        j        | _        j        | _        d S )Nr?   z"Only loop_num=2 is supported, got c                 0    g | ]}t                     S ))r[   )r   .0rs   r[   s     r   
<listcomp>z1IQuestLoopCoderModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r!   r   c                 D    g | ]}t          j        j                  S r,   )rL   r4   r7   r   s     r   r   z1IQuestLoopCoderModel.__init__.<locals>.<listcomp>   s8     !
 !
 !
 t7GG!
 !
 !
r!   )rO   rP   r@   r[   r6   r&   	Embeddingr1   embed_tokensranger2   layersr   r5   normgate_projectionsrB   r   s    `r   rP   zIQuestLoopCoderModel.__init__   s   }!!!#W#W#W!!!	/L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG	!
 !
 !
 !
4122!
 !
 !
  $ 5r!   Ninputsry   c           	      f   |j         d d         \  }}|                     |          }|d gdt          | j                  z  z  }t	          ||d                   }t	          ||t          | j                           | j                  }g }t          | j        |          D ]\  }	}
|	                    |          }|
|
j        nd}|	j	        
                    ||          \  }}}|
|
                    ||          \  }}|                    ||f           |	j	                            |||||
          }|	j	                            |                    dddd                              ||d                    }||z   }|	                    |	                    |                    }||z   }t          | j        | j        |t          | j                  d          |          D ]6\  }	}}
\  }}|	                    |          }|
|
j        nd}|	j	        
                    ||          \  }}} ||          }|	j	                            |||||
          }|
|
                    ||          \  }}|	j	                            |||||
          }t)          |||          }|	j	                            |                    dddd                              ||d                    }||z   }|	                    |	                    |                    }||z   }8|                     |          S )Nr?   r   )window_size)ry   r   rm   r   )rn   r   lenr   r   rB   zipr   rk   r   rw   update_and_fetchappendr{   rg   rp   ro   r   r   r   r-   r   )rR   r   ry   rq   rr   hrx   window_maskloop1_kvlayerch_normrk   q1k1v1outrr   q2k2v2r"   r)   r*   mixeds                             r   rU   zIQuestLoopCoderModel.__call__   s@   
 |BQB1f%%=Fa#dk"2"223E$Qa11+uS%%&D4I
 
 
 DK// 	 	HE1**1--F!"QXXAF00@@JBB}++B33BOORH%%%/++BBA+FFC&&s}}Q1a'@'@'H'HAr'R'RSSAAA		%88;;<<AAAA-0K.c$+6F6F6H6H0I8.
 .
 	 	)E9a"b **1--F!"QXXAF00@@JBB9R==D/33BBA3NNK}++B33B22 3  J #4jAAE&&uq!Q'B'B'J'J1aQS'T'TUUAAA		%88;;<<AAAAyy||r!   r%   )rC   rD   rE   r/   rP   r   rV   r   r   r   rU   rW   rX   s   @r   r   r      s        6Y 6 6 6 6 6 6( &*9 99 S	"9 9 9 9 9 9 9 9r!   r   c                        e Zd Zdef fdZ	 d
dej        fdZd
deej	        j
                 fdZed             Zd	 Z xZS )Modelr[   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr]   )rO   rP   r[   r0   r   modelr>   r&   rc   r1   r6   lm_headr   s     r   rP   zModel.__init__   sq    	/)$//
' 	T9T%5tUSSSDLLL	T 	Tr!   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r%   )r   r[   r>   r   	as_linearr   )rR   r   ry   r   s       r   rU   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r!   groupc                    |pt           j                                        }|                                }|                                }t          | j        j                  D ]\  }}t          |j	        j
        d|          |j	        _
        t          |j	        j        d|          |j	        _        t          |j	        j        d|          |j	        _        t          |j	        j        d|          |j	        _        |j	        xj        |z  c_        |j	        xj        |z  c_        t          |j        j        d|          |j        _        t          |j        j        d|          |j        _        t          |j        j        d|          |j        _        | j        j        |         }|j        |z  }||z  }||z   }	|j        ||	d d f         |_        |j        ||	         |_        ||_        d S )Nzall-to-sharded)r   zsharded-to-all)r   distributedinitsizerank	enumerater   r   r   r   rd   re   rf   rg   r`   ra   r   r   r   r   r   rM   r   r   )
rR   r   Nr   ir   r   heads_per_rankstartends
             r   shardzModel.shard   s   .,,..JJLLzz||!$*"344  	1  	1HAu%1&(8& & &EO" &2&(8& & &EO" &2&(8& & &EO" &2&(8& & &EO" O##)##O&&1,&&".	#%5U# # #EI #/	#%5U# # #EI !-	!#35! ! !EI 
3A6I&0A5N>)E.(C(/c	111=I&^E#I6IN"0IA 	1  	1r!   c                     | j         j        S r%   )r   r   rR   s    r   r   zModel.layers  s    z  r!   c                 N     d  j         D              fd j         D             z   S )Nc                 *    g | ]}t                      S r,   )r   )r   rs   s     r   r   z$Model.make_cache.<locals>.<listcomp>  s    ///a		///r!   c                 D    g | ]}t          j        j                   S ))max_size)r   r[   rB   )r   rs   rR   s     r   r   z$Model.make_cache.<locals>.<listcomp>  s6     3
 3
 3
EFOTY%?@@@3
 3
 3
r!   )r   r   s   `r   
make_cachezModel.make_cache  sH    //4;/// 3
 3
 3
 3
JN+3
 3
 3
 
 	
r!   r%   )rC   rD   rE   r/   rP   r   rV   rU   r   r   Groupr   propertyr   r   rW   rX   s   @r   r   r      s        TY T T T T T T 
 

 
 
 
%1 %18BN$89 %1 %1 %1 %1N ! ! X!
 
 
 
 
 
 
r!   r   )(dataclassesr   	functoolsr   typingr   r   r   r   r	   r
   mlx.corecorer   mlx.nnr&   mlx.nn.layers.distributedr   baser   r   r   ry   r   r   
rope_utilsr   compilerV   r    r(   r-   r/   ModulerL   rZ   r}   r   r   r   r,   r!   r   <module>r      s:   " ! ! ! ! !       : : : : : : : : : : : : : : : :             2 2 2 2 2 2 T T T T T T T T T T + + + + + + + + ' ' ' ' ' ' 	t$$$# #28 #28 # # # # %$# 	t$$$BH "( rx    %$ 	t$$$8
(8!#879x8X8 8 8 %$8        (	< 	< 	< 	< 	< 	< 	< 	<.
 .
 .
 .
 .
	 .
 .
 .
b
M 
M 
M 
M 
M") 
M 
M 
M
 
 
 
 
ry 
 
 
K K K K K29 K K K\C
 C
 C
 C
 C
BI C
 C
 C
 C
 C
r!   