
    )ju                     `   d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z d dlmZ ddlmZ ddlmZmZmZ ddlmZ e G d d	e                      Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictListOptionalTupleUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attention)SuScaledRoPEc                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZee         ed	<   d
Z	eed<   dZ
eed<   dZeeeeeee         f         f                  ed<   dZeed<   dZeed<   dZeed<   dZeed<   d ZdS )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsrms_norm_eps
vocab_sizeNnum_key_value_headsi'  
rope_thetaFrope_traditionalrope_scaling      ?partial_rotary_factori   max_position_embeddingsi    original_max_position_embeddingstie_word_embeddingsc                       j          j         _          j        rXddh}t           fd|D                       st	          d|            j        d         dvrt          d           d  _        d S d S d S )Nlong_factortypec              3   *   K   | ]}|j         v V  d S N)r   ).0keyselfs     \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/phi3.py	<genexpr>z*ModelArgs.__post_init__.<locals>.<genexpr>&   s+      IICsd//IIIIII    zrope_scaling must contain keys )longropesulinearzt[WARNING] rope_scaling 'type' currently only supports 'linear', 'su', and 'longrope'; setting rope scaling to false.)r   r   r   all
ValueErrorprint)r)   required_keyss   ` r*   __post_init__zModelArgs.__post_init__    s    #+'+'?D$ 		)*F3MIIII=IIIII T !R=!R!RSSS (0LLL K   %)!!!		) 		)
 MLr,   )__name__
__module____qualname__str__annotations__intfloatr   r   r   r   boolr   r   r	   r   r   r   r    r!   r4    r,   r*   r   r      s        OOOOOO)-#---J"d"""CGL(4U5$u++=%> >?@GGG#&5&&&#)S))),0$c000 %%%%) ) ) ) )r,   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc           	      B   t                                                       |j        }|j        x| _        }|j        J |j        x| _        }|j        | _        |j        |z  x| _        }|dz  | _	        ||z  d||z  z  z   }t          j        ||d          | _        t          j        ||z  |d          | _        t          ||j        z            }|j        rP|j        d         dv rAt#          ||j        |j        |j        |j        d         |j        d         	          | _        d S d
}|j        rC|j        d         dk    r2t-          |j        d         t.                    sJ d|j        d         z  }t          j        ||j        |j        |          | _        d S )Ng         Fbiasr$   )r-   r.   short_factorr#   )baser   r    rE   r#   r   r/   factorr
   )traditionalrF   scale)super__init__r   r   n_headsr   
n_kv_headsr   head_dimrI   nnLinearqkv_projo_projr:   r   r   r   r   r   r    rope
isinstancer;   RoPEr   )
r)   r@   dimrL   rM   rN   op_sizerope_dim
rope_scale	__class__s
            r*   rK   zAttention.__init__1   s   !%!99w'333'+'??*!%!7#'#3w#>>t^
H$qJ,A'BB	#wU;;;i( 2CeDDDx$"<<== 	!26!:>P!P!P$_(,(D151V!.~> -m<  DIII J  =T%6v%>(%J%J!$"3H"=uEEEEE!28!<<
 1_ 	  DIIIr,   Nxmaskcachereturnc                    |j         \  }}}|                     |          }| j        | j        z  }t	          j        |||| j        | j        z  z   gd          \  }	}
}|	                    ||| j        d                              dddd          }	|
                    ||| j        d                              dddd          }
|                    ||| j        d                              dddd          }|R| 	                    |	|j
                  }	| 	                    |
|j
                  }
|                    |
|          \  }
}n*| 	                    |	          }	| 	                    |
          }
t          |	|
||| j        |          }|                    dddd                              ||d          }|                     |          S )	Naxisr   rB   r
      )offset)r]   rI   r\   )shaperQ   rL   rN   mxsplitrM   reshape	transposerS   rd   update_and_fetchr   rI   rR   )r)   r[   r\   r]   BLDqkv	query_posquerieskeysvaluesoutputs                r*   __call__zAttention.__call__W   s    '1ammAL4=0	 ")Y4=)HHIPR!
 !
 !
v
 //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??{{6"""r,   NNr5   r6   r7   r   rK   rf   arrayr   r   rt   __classcell__rZ   s   @r*   r?   r?   0   s        $Y $ $ $ $ $ $R $(#	# #8# rx # }	#
 
# # # # # # # #r,   r?   c                   4     e Zd Z fdZdej        fdZ xZS )MLPc                     t                                                       t          j        |d|z  d          | _        t          j        ||d          | _        d S )NrB   FrC   )rJ   rK   rO   rP   gate_up_proj	down_proj)r)   rV   
hidden_dimrZ   s      r*   rK   zMLP.__init__z   sR    Ic1z>FFF:s???r,   r^   c                     |                      |          }t          j        |dd          \  }}|                     t	          ||                    S )NrB   r`   ra   )r}   rf   rg   r~   r   )r)   r[   gates      r*   rt   zMLP.__call__   sI    a  (1ab)))a~~fT1oo...r,   )r5   r6   r7   rK   rf   rw   rt   rx   ry   s   @r*   r{   r{   y   sZ        @ @ @ @ @
/RX / / / / / / / /r,   r{   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr@   c                    t                                                       |j        | _        |j        | _        t	          |          | _        t          |j        |j                  | _        t          j
        |j        |j                  | _        t          j
        |j        |j                  | _        || _        d S )Neps)rJ   rK   r   r   r?   	self_attnr{   r   mlprO   RMSNormr   input_layernormpost_attention_layernormr@   r)   r@   rZ   s     r*   rK   zTransformerBlock.__init__   s    #'#; +"4t')?@@!z$*:@QRRR(*
$"3)
 )
 )
% 			r,   Nr[   r\   r]   r^   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S r&   )r   r   r   r   )r)   r[   r\   r]   rhouts          r*   rt   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r,   ru   rv   ry   s   @r*   r   r      s        
Y 
 
 
 
 
 
 $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r,   r   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )	Phi3Modelr@   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 0    g | ]}t                     S ))r@   )r   )r'   _r@   s     r*   
<listcomp>z&Phi3Model.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r,   r   )rJ   rK   r@   r   r   rO   	Embeddingr   embed_tokensrangelayersr   r   normr   s    `r*   rK   zPhi3Model.__init__   s    	/!%!7""""L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r,   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )r   lenr   r   zipr   )r)   r   r]   r   r\   layercs          r*   rt   zPhi3Model.__call__   s    
 f%%=FS---E$Qa11DK// 	" 	"HE1aq!!AAyy||r,   r&   )	r5   r6   r7   r   rK   rf   rw   rt   rx   ry   s   @r*   r   r      ss        
HY 
H 
H 
H 
H 
H 
H         r,   r   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr@   c                     t                                                       |j        | _        t          |          | _        |j        s&t          j        |j        |j	        d          | _
        || _        d S )NFrC   )rJ   rK   r   r   modelr!   rO   rP   r   r   lm_headr@   r   s     r*   rK   zModel.__init__   sc    /t__
' 	T9T%5tUSSSDL			r,   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r&   )r   r@   r!   r   	as_linearr   )r)   r   r]   r   s       r*   rt   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r,   c                     | j         j        S r&   )r   r   )r)   s    r*   r   zModel.layers   s    z  r,   r&   )r5   r6   r7   r   rK   rf   rw   rt   propertyr   rx   ry   s   @r*   r   r      s        Y       
 

 
 
 
 ! ! X! ! ! ! !r,   r   )dataclassesr   typingr   r   r   r   r   r	   mlx.corecorerf   mlx.nnrO   activationsr   rF   r   r   r   
rope_utilsr   r   Moduler?   r{   r   r   r   r=   r,   r*   <module>r      s   " ! ! ! ! ! : : : : : : : : : : : : : : : :                   T T T T T T T T T T $ $ $ $ $ $ ) ) ) ) ) ) ) )BF# F# F# F# F#	 F# F# F#R	/ 	/ 	/ 	/ 	/") 	/ 	/ 	/    ry   4    	   >! ! ! ! !BI ! ! ! ! !r,   