
    )j#                     x   d dl mZ d dlmZmZmZmZmZ d dlm	Z
 d dlmZ d dlmZ ddlmZ ddlmZmZmZ ddlmZmZ dd	lmZ e G d
 de                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z dS )    )	dataclass)AnyDictListOptionalUnionN)shard_linear   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)KVCacheRotatingKVCache)initialize_ropec                   j   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZee         ed	<   dZ	ee         ed
<   dZ
ee         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeeeeeef         f                  ed<   dZeed<   dZeee                  ed<   dZee         ed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizeNhead_dimmax_position_embeddingsnum_key_value_headsFattention_biasmlp_biasi'  
rope_thetarope_traditionalrope_scalingTtie_word_embeddingslayer_typessliding_windowc                 ^    | j         | j        | _         | j        dg| j        z  | _        d S d S )Nfull_attention)r   r   r$   r   selfs    ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/llama.py__post_init__zModelArgs.__post_init__%   s@    #+'+'?D$# 01D4JJD $#    )__name__
__module____qualname__str__annotations__intfloatr   r   r   r   r   boolr   r    r!   r"   r   r   r#   r$   r   r%   r+    r,   r*   r   r      s]        OOOOOO"Hhsm"""-1Xc]111)-#--- ND   HdJ"d""";?L(4U5#:%6 678??? $$$$'+K$s)$+++$(NHSM(((K K K K Kr,   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 `   t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  x| _        }|dz  | _        t          |d          r|j
        }nd}t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        |||z  |          | _        t          j        ||z  ||          | _        t#          | j        |j        |j        |j        |j                  | _        d S )Ng      r   Fbias)super__init__r   r   n_headsr   
n_kv_headsr   scalehasattrr   nnLinearq_projk_projv_projo_projr   r    r!   r"   r   rope)r)   r8   dimr>   r?   r   r   	__class__s          r*   r=   zAttention.__init__.   s,   !%!99w'+'??*#'=#OD4D4OOt^
4)** 	#!0NN"NiWx%7nMMMiZ(%:PPPiZ(%:PPPi( 2CnMMM#MO!(
 
			r,   Nxmaskcachereturnc                 |   |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          S )Nr      r
      )offset)rM   r@   rL   )shaperD   rE   rF   reshaper>   	transposer?   rH   rS   update_and_fetchr   r@   rG   )r)   rK   rL   rM   BLDquerieskeysvaluesoutputs              r*   __call__zAttention.__call__J   s    '1a $AAAv //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r,   NN)r-   r.   r/   r   r=   mxarrayr   r   r_   __classcell__rJ   s   @r*   r7   r7   -   s        
Y 
 
 
 
 
 
> $(#	# #8# rx # }	#
 
# # # # # # # #r,   r7   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPr8   c                 @   t                                                       |j        }|j        }t	          |d          r|j        }nd}t          j        |||          | _        t          j        |||          | _	        t          j        |||          | _
        d S )Nr   Fr:   )r<   r=   r   r   rA   r   rB   rC   	gate_proj	down_projup_proj)r)   r8   rI   
hidden_dimr   rJ   s        r*   r=   zMLP.__init__j   s    +
4$$ 	}HHH3
BBB:sBBByjx@@@r,   rN   c                     |                      t          |                     |          |                     |                              S N)ri   r   rh   rj   )r)   rK   s     r*   r_   zMLP.__call__x   s4    ~~fT^^A%6%6QHHIIIr,   )	r-   r.   r/   r   r=   ra   rb   r_   rc   rd   s   @r*   rf   rf   i   sq        AY A A A A A AJRX J J J J J J J Jr,   rf   c            	       ~     e Zd Zddedef fdZ	 	 ddej        deej                 dee	         d	ej        fd
Z
 xZS )TransformerBlockFr8   use_slidingc                 x   t                                                       |j        | _        |j        | _        || _        t          |          | _        t          |          | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        || _        d S )Neps)r<   r=   r   r   rp   r7   	self_attnrf   mlprB   RMSNormr   input_layernormpost_attention_layernormr8   )r)   r8   rp   rJ   s      r*   r=   zTransformerBlock.__init__}   s    #'#; +&"4t99!z$*:@QRRR(*
$"3)
 )
 )
% 			r,   NrK   rL   rM   rN   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S rm   )rt   rw   ru   rx   )r)   rK   rL   rM   rhouts          r*   r_   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r,   )Fr`   )r-   r.   r/   r   r4   r=   ra   rb   r   r   r_   rc   rd   s   @r*   ro   ro   |   s         Y T        $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r,   ro   c                   Z     e Zd Zdef fdZ	 	 ddej        deej                 fdZ xZ	S )
LlamaModelr8   c                 6   t                                                       | _        j        | _        j        | _        j        | _        j        | _        | j        dk    sJ t          j        j        j	                  | _
        fd| j        D             | _        t          j        j	        j                  | _        | j                            d          | _        d | _        t%          | j                  D ]\  }}|j        r
|| _         d S d S )Nr   c                 :    g | ]}t          |d k              S )sliding_attention)r8   rp   )ro   ).0
layer_typer8   s     r*   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>   s<     
 
 
 $JBU4UVVV
 
 
r,   rr   r'   )r<   r=   r8   r   r   r$   r%   rB   	Embeddingr   embed_tokenslayersrv   r   normindexfa_idxswa_idx	enumeraterp   )r)   r8   elrJ   s    `  r*   r=   zLlamaModel.__init__   s&   	/!%!7+"1""""L$:JKK
 
 
 
".
 
 
 Jt/T5FGGG	&,,-=>>dk** 	 	DAq}  	 	r,   Ninputsinput_embeddingsc                    ||}n|                      |          }|d gt          | j                  z  }t          ||| j                           }| j        "t          ||| j                 | j                  }t          | j        |          D ]\  }}|j        r|n|} ||||          }| 	                    |          S )N)window_size)rM   )
r   lenr   r   r   r   r%   ziprp   r   )	r)   r   rM   r   r{   fa_maskswa_masklayerrL   s	            r*   r_   zLlamaModel.__call__   s     ' AA!!&))A=FS---E'5+=>><#,5&D4G  H  U33 	, 	,LE5$0=88gDaU+++AAyy||r,   r`   )
r-   r.   r/   r   r=   ra   rb   r   r_   rc   rd   s   @r*   r~   r~      s        Y      0 /3	  #28,	       r,   r~   c                        e Zd Zdef fdZ	 	 ddej        deej                 fdZd Z	ddeej
        j                 fd	Zed
             Zd Z xZS )Modelr8   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr:   )r<   r=   r8   r   r~   modelr#   rB   rC   r   r   lm_head)r)   r8   rJ   s     r*   r=   zModel.__init__   sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr,   Nr   r   c                     |                      |||          }| j        j        r | j         j                            |          }n|                     |          }|S rm   )r   r8   r#   r   	as_linearr   )r)   r   rM   r   r|   s        r*   r_   zModel.__call__   sW     jj(8999( 	$*)33C88CC,,s##C
r,   c                     d |                                 D             }| j        j        r|                    dd            |S )Nc                 "    i | ]\  }}d |v	||S )zself_attn.rotary_emb.inv_freqr5   )r   kvs      r*   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s1     
 
 
Q0OWX0X0XAq0X0X0Xr,   zlm_head.weight)itemsr8   r#   pop)r)   weightss     r*   sanitizezModel.sanitize   sQ    
 
$]]__
 
 
 9( 	0KK($///r,   groupc                    |pt           j                                        }|                                }| j        j        D ]7}t          |j        j        d|          |j        _        t          |j        j	        d|          |j        _	        t          |j        j
        d|          |j        _
        t          |j        j        d|          |j        _        |j        xj        |z  c_        |j        xj        |z  c_        t          |j        j        d|          |j        _        t          |j        j        d|          |j        _        t          |j        j        d|          |j        _        9d S )Nzall-to-sharded)r   zsharded-to-all)ra   distributedinitsizer   r   r	   rt   rD   rE   rF   rG   r>   r?   ru   rh   ri   rj   )r)   r   Nr   s       r*   shardzModel.shard   s   .,,..JJLLZ& 	 	E%1&(8& & &EO" &2&(8& & &EO" &2&(8& & &EO" &2&(8& & &EO" O##)##O&&1,&& #/	#%5U# # #EI #/	#%5U# # #EI !-	!#35! ! !EI1	 	r,   c                     | j         j        S rm   )r   r   r(   s    r*   r   zModel.layers  s    z  r,   c                 *      fd j         D             S )Nc                 n    g | ]1}|j         rt          j        j                   nt	                      2S ))max_size)rp   r   r   r%   r   )r   r   r)   s     r*   r   z$Model.make_cache.<locals>.<listcomp>  sO     
 
 
  $)BCCCCYY	
 
 
r,   )r   r(   s   `r*   
make_cachezModel.make_cache
  s2    
 
 
 
 
 
 
 	
r,   r`   rm   )r-   r.   r/   r   r=   ra   rb   r   r_   r   r   Groupr   propertyr   r   rc   rd   s   @r*   r   r      s        TY T T T T T T /3	  #28,	      8BN$89    > ! ! X!
 
 
 
 
 
 
r,   r   )!dataclassesr   typingr   r   r   r   r   mlx.corecorera   mlx.nnrB   mlx.nn.layers.distributedr	   activationsr   baser   r   r   rM   r   r   
rope_utilsr   r   Moduler7   rf   ro   r~   r   r5   r,   r*   <module>r      s   " ! ! ! ! ! 3 3 3 3 3 3 3 3 3 3 3 3 3 3             2 2 2 2 2 2       T T T T T T T T T T + + + + + + + + ' ' ' ' ' ' K K K K K K K K89# 9# 9# 9# 9#	 9# 9# 9#xJ J J J J") J J J&    ry   6. . . . . . . .bJ
 J
 J
 J
 J
BI J
 J
 J
 J
 J
r,   