
    )j                        d dl mZ d dlmZmZmZmZmZ d dlm	Z
 d dlmZ ddlmZ ddlmZmZmZ e G d de                      Z	 	 dd
eeef         dee         deeeef                  deeef         fdZ G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)AnyDictListOptionalUnionN   )swiglu)BaseModelArgscreate_attention_maskscaled_dot_product_attentionc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   d
Zeed<   d
Z	eed<   d
Z
eed<   dZeed<   dZeed<   dS )	ModelArgs
model_typehead_dimnum_transformer_layers	model_dim
vocab_sizeffn_dim_divisornum_query_headsnum_kv_headsffn_multipliersTffn_with_glunormalize_qk_projectionsshare_input_output_layersgư>rms_norm_epsi'  rope_freq_constantN)__name__
__module____qualname__str__annotations__intr   r   boolr   r   r   floatr        _/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/openelm.pyr   r      s         OOOMMMNNNOOOL$%)d)))&*t***L% %%%%%%r'   r      vdivisor	min_valuereturnc                 |    ||}t          |t          | |dz  z             |z  |z            }|d| z  k     r||z  }|S )a  
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by the divisor
    It can be seen at:
    https://github.com/tensorflow/models/blob/2cfc99eff5e5eb729c6793d2f3d03aa1c9be2b15/research/slim/nets/mobilenet/mobilenet.py#L62
    Args:
        v: input value
        divisor: default to 8
        min_value: minimum divisor value
    Returns:
        new_v: new divisible value
    N   g?)maxr#   )r*   r+   r,   new_vs       r(   make_divisibler2      sT    " 		3q7Q;//7:WDEEEsQwLr'   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )	Attentionargslayer_idc                 f   t                                                       |j        x| _        }|| _        |j        x| _        }|j        |         x| _        }|j        |         x| _        }|dz  | _	        ||dz  z   |z  }t          j        ||d          | _        t          j        ||z  |d          | _        |j        | _        | j        r@t          j        ||j                  | _        t          j        ||j                  | _        t          j        |d|j                  | _        d S )Ng      r/   Fbiaseps)traditionalbase)super__init__r   r6   r   r   n_headsr   
n_kv_headsscalennLinearqkv_projout_projr   RMSNormr   q_normk_normRoPEr   rope)	selfr5   r6   r   r   r@   rA   op_size	__class__s	           r(   r?   zAttention.__init__:   s   #'=0 %)^3!%!5h!??w'+'8'BB*t^
j1n-9	)W5AAA	'H"4ieLLL(,(E%( 	F*X43DEEEDK*X43DEEEDKGH%d>UVVV			r'   Nxmaskcacher-   c                 <   |j         \  }}}|                     |          }|                    ||| j        | j        dz  z   | j                                      dddd          }t          j        || j        | j        | j        z   gd          \  }}	}
| j	        r*| 
                    |          }|                     |	          }	|R|                     ||j                  }|                     |	|j                  }	|                    |	|
          \  }	}
n*|                     |          }|                     |	          }	t          ||	|
|| j        |          }|                    dddd                              ||d          }|                     |          S )	Nr/   r   r	      axis)offset)rQ   rB   rP   )shaperE   reshaper@   rA   r   	transposemxsplitr   rH   rI   rK   rV   update_and_fetchr   rB   rF   )rL   rO   rP   rQ   BLDqkvquerieskeysvaluesoutputs               r(   __call__zAttention.__call__P   s    '1ammAkkq$,$/A"56
 

)Aq!Q

 	 !#$,t >?a!
 !
 !
v
 ( 	%kk'**G;;t$$Diii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??}}V$$$r'   NNr   r   r    r   r#   r?   r[   arrayr   r   rf   __classcell__rN   s   @r(   r4   r4   9   s        WY W# W W W W W W2 $(#	%% %%8%% rx %% }	%%
 
%% %% %% %% %% %% %% %%r'   r4   c                   >     e Zd Zdedef fdZdej        fdZ xZ	S )MLPr5   r6   c                 J   t                                                       || _        |j        }|j        |         }t          t          ||j        z  |j                            }t          j	        |d|z  d          | _
        t          j	        ||d          | _        d S )N)r+   r/   Fr8   )r>   r?   r5   r   r   r#   r2   r   rC   rD   proj_1proj_2)rL   r5   r6   dimffn_multiplierintermediate_dimrN   s         r(   r?   zMLP.__init__y   s    	n-h7/,  
 
 iQ)9%9FFFi 0#EBBBr'   r-   c                     |                      |          }t          j        |dd          \  }}|                     t	          ||                    S )Nr/   rW   rT   )ro   r[   r\   rp   r
   )rL   rO   gates      r(   rf   zMLP.__call__   sE    KKNN(1ab)))a{{6$??+++r'   )
r   r   r    r   r#   r?   r[   ri   rf   rj   rk   s   @r(   rm   rm   x   sp        CY C# C C C C C C ,RX , , , , , , , ,r'   rm   c            	       |     e Zd Zdedef fdZ	 	 d
dej        deej                 dee	         dej        fd	Z
 xZS )TransformerBlockr5   r6   c                 .   t                                                       |j        }t          ||          | _        t          ||          | _        t          j        ||j	                  | _
        t          j        ||j	                  | _        d S )Nr6   r:   )r>   r?   r   r4   attnrm   ffnrC   rG   r   ffn_norm	attn_norm)rL   r5   r6   rq   rN   s       r(   r?   zTransformerBlock.__init__   s~    ndX666	th///
3D,=>>>CT->???r'   NrO   rP   rQ   r-   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S N)rz   r}   r{   r|   )rL   rO   rP   rQ   rhouts          r(   rf   zTransformerBlock.__call__   sY     IIdnnQ''u55EHHT]]1%%&&!e
r'   rg   rh   rk   s   @r(   rw   rw      s        @Y @# @ @ @ @ @ @ $(#	
 
8
 rx 
 }	

 

 
 
 
 
 
 
 
r'   rw   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )OpenELMModelr5   c                 ~   t                                                       | _        j        | _        j        | _        | j        dk    sJ t          j        j        j                  | _        fdt          | j                  D             | _
        t          j        j        j                  | _        d S )Nr   c                 2    g | ]}t          |           S )ry   )rw   ).0r6   r5   s     r(   
<listcomp>z)OpenELMModel.__init__.<locals>.<listcomp>   s6     
 
 
 TH555
 
 
r'   r:   )r>   r?   r5   r   r   rC   	Embeddingr   token_embeddingsrangelayersrG   r   normrL   r5   rN   s    `r(   r?   zOpenELMModel.__init__   s    	/&*&A#"""" "T_dn M M
 
 
 
!$"=>>
 
 
 Jt~43DEEE			r'   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rQ   )r   lenr   r   zipr   )rL   r   rQ   r   rP   layercs          r(   rf   zOpenELMModel.__call__   s    
 !!&))=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r'   r   )	r   r   r    r   r?   r[   ri   rf   rj   rk   s   @r(   r   r      ss        FY F F F F F F          r'   r   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr5   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr8   )r>   r?   r5   r   r   transformerr   rC   rD   r   r   lm_headr   s     r(   r?   zModel.__init__   sq    	/'--- 	R9T^T_5QQQDLLL	R 	Rr'   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S r   )r   r5   r   r   	as_linearr   )rL   r   rQ   r   s       r(   rf   zModel.__call__   sW    
 vu--9. 	$"3==cBBCC,,s##C
r'   c                     | j         j        S r   )r   r   )rL   s    r(   r   zModel.layers   s    &&r'   r   )r   r   r    r   r?   r[   ri   rf   propertyr   rj   rk   s   @r(   r   r      s        RY R R R R R R      ' ' X' ' ' ' 'r'   r   )r)   N)dataclassesr   typingr   r   r   r   r   mlx.corecorer[   mlx.nnrC   activationsr
   r=   r   r   r   r   r%   r#   r2   Moduler4   rm   rw   r   r   r&   r'   r(   <module>r      s   " ! ! ! ! ! 3 3 3 3 3 3 3 3 3 3 3 3 3 3                   T T T T T T T T T T & & & & & & & && -1 UCZc] eSj)* 5#:	   4<% <% <% <% <%	 <% <% <%~, , , , ,") , , ,.    ry   ,    29   >' ' ' ' 'BI ' ' ' ' 'r'   