
    )j|                     d   d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z d dlmZ ddlmZmZmZ ddlmZ ddlmZ e G d	 d
e                      Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  ZdS )    )	dataclass)partial)AnyDictOptionalUnionN   )BaseModelArgscreate_attention_maskscaled_dot_product_attention)	BitLinear)initialize_ropec                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   d	Zee         ed
<   d	Z	ee         ed<   dZ
eed<   dZeed<   dZeed<   dZeed<   d	Zeeeeeef         f                  ed<   dZeed<   d	S )	ModelArgs
model_typehidden_sizenum_hidden_layersintermediate_sizenum_attention_headsnum_key_value_headsrms_norm_eps
vocab_sizeNhead_dimmax_position_embeddingsFattention_biasmlp_biasi'  
rope_thetarope_traditionalrope_scalingTtie_word_embeddings)__name__
__module____qualname__str__annotations__intfloatr   r   r   r   boolr   r   r   r   r   r   r         ^/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/bitnet.pyr   r      s        OOOOOO"Hhsm"""-1Xc]111 ND   HdJ"d""";?L(4U5#:%6 678??? $$$$$$r*   r   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
	Attentionargsc                 \   t                                                       |j        }|j        x| _        }|j        x| _        }|j        p	|j        |z  x| _        }|dz  | _        |j	        }t          |||z  |          | _        t          |||z  |          | _        t          |||z  |          | _        t          ||z  ||          | _        t          | j        |j        |j        |j        |j                  | _        t+          j        |j        |j                  | _        d S )Ng      ࿩biaseps)super__init__r   r   n_headsr   
n_kv_headsr   scaler   r   q_projk_projv_projo_projr   r   r   r   r   ropennRMSNormr   attn_sub_norm)selfr.   dimr6   r7   r   r   	__class__s          r+   r5   zAttention.__init__$   s+   !%!99w'+'??*#'=#OD4D4OOt^
,Wx%7nMMMZ(%:PPPZ(%:PPP( 2CnMMM#MO!(
 
	  Z(8d>OPPPr*   Nxmaskcachereturnc                    |j         \  }}}|                     |          |                     |          |                     |          }	}}|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j	                  }|                     ||j	                  }|
                    ||	          \  }}	n*|                     |          }|                     |          }t          |||	|| j        |          }
|
                    dddd                              ||d          }
|                     |
          }
|                     |
          }
|
S )Nr      r	      )offset)rF   r8   rE   )shaper9   r:   r;   reshaper6   	transposer7   r=   rL   update_and_fetchr   r8   r@   r<   )rA   rD   rE   rF   BLDquerieskeysvaluesoutputs              r+   __call__zAttention.__call__>   s    '1a $AAAv //!Qb99CCAq!QOO||Aq$/266@@Aq!LL1dor::DDQ1aPPiii==G99T%,977D 11$??LD&&ii((G99T??D-T6djt
 
 
 !!!Q1--55aB??##F++V$$r*   NNr!   r"   r#   r   r5   mxarrayr   r   rX   __classcell__rC   s   @r+   r-   r-   #   s        QY Q Q Q Q Q Q: $(#	 8 rx  }	
 
       r*   r-   c                   :     e Zd Zdef fdZdej        fdZ xZS )MLPr.   c                 l   t                                                       |j        }|j        }t	          |d          r|j        }nd}t          |||          | _        t          |||          | _        t          |||          | _	        t          j        |j        |j                  | _        d S )Nr   Fr0   r2   )r4   r5   r   r   hasattrr   r   	gate_proj	down_projup_projr>   r?   r   ffn_sub_norm)rA   r.   rB   
hidden_dimr   rC   s        r+   r5   zMLP.__init__a   s    +
4$$ 	}HHH"3
BBB":sBBB jx@@@Jt'=4CTUUUr*   rG   c                     t          j        |                     |                    |                     |          z  }|                     |          }|                     |          }|S N)r>   relu2rc   re   rf   rd   )rA   rD   s     r+   rX   zMLP.__call__q   sU    HT^^A&&''$,,q//9a  NN1r*   	r!   r"   r#   r   r5   r[   r\   rX   r]   r^   s   @r+   r`   r`   `   sh        VY V V V V V V RX        r*   r`   c            	       x     e Zd Zdef fdZ	 	 d	dej        deej                 dee         dej        fdZ	 xZ
S )
TransformerBlockr.   c                 \   t                                                       |j        | _        |j        | _        t	          |          | _        t          |          | _        t          j	        |j        |j
                  | _        t          j	        |j        |j
                  | _        d S )Nr2   )r4   r5   r   r   r-   	self_attnr`   mlpr>   r?   r   input_layernormpost_attention_layernormrA   r.   rC   s     r+   r5   zTransformerBlock.__init__y   s    #'#; +"4t99!z$*:@QRRR(*
$"3)
 )
 )
%%%r*   NrD   rE   rF   rG   c                     |                      |                     |          ||          }||z   }|                     |                     |                    }||z   }|S ri   )ro   rq   rp   rr   )rA   rD   rE   rF   rhouts          r+   rX   zTransformerBlock.__call__   s]     NN4//22D%@@EHHT2215566!e
r*   rY   rZ   r^   s   @r+   rm   rm   x   s        	
Y 	
 	
 	
 	
 	
 	
 $(#	 8 rx  }	
 
       r*   rm   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )
LlamaModelr.   c                 d   t                                                       | _        j        | _        j        | _        t          j        j        j                  | _        fdt          j                  D             | _
        t          j        j        j                  | _        d S )Nc                 0    g | ]}t                     S ))r.   )rm   ).0_r.   s     r+   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>   s2     
 
 
,-$'''
 
 
r*   r2   )r4   r5   r.   r   r   r>   	Embeddingr   embed_tokensrangelayersr?   r   normrs   s    `r+   r5   zLlamaModel.__init__   s    	/!%!7L$:JKK
 
 
 
16t7M1N1N
 
 
 Jt/T5FGGG			r*   Ninputsc                    |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          S )Nr   )rF   )r   lenr   r   zipr   )rA   r   rF   rv   rE   layercs          r+   rX   zLlamaModel.__call__   s    
 f%%=FS---E$Qa11DK// 	( 	(HE1aQ'''AAyy||r*   ri   rk   r^   s   @r+   ry   ry      ss        	HY 	H 	H 	H 	H 	H 	H         r*   ry   c                   Z     e Zd Zdef fdZ	 ddej        fdZd Ze	d             Z
 xZS )	Modelr.   c                     t                                                       || _        |j        | _        t	          |          | _        |j        s(t          j        |j	        |j
        d          | _        d S d S )NFr0   )r4   r5   r.   r   ry   modelr    r>   Linearr   r   lm_headrs   s     r+   r5   zModel.__init__   sq    	/%%
' 	T9T%5tUSSSDLLL	T 	Tr*   Nr   c                     |                      ||          }| j        j        r | j         j                            |          }n|                     |          }|S ri   )r   r.   r    r   	as_linearr   )rA   r   rF   rw   s       r+   rX   zModel.__call__   sT    
 jj''9( 	$*)33C88CC,,s##C
r*   c                     d |                                 D             }| j        j        r|                    dd            |S )Nc                 "    i | ]\  }}d |v	||S )zself_attn.rotary_emb.inv_freqr)   )r|   kvs      r+   
<dictcomp>z"Model.sanitize.<locals>.<dictcomp>   s1     
 
 
Q0OWX0X0XAq0X0X0Xr*   zlm_head.weight)itemsr.   r    pop)rA   weightss     r+   sanitizezModel.sanitize   sQ    
 
$]]__
 
 
 9( 	0KK($///r*   c                     | j         j        S ri   )r   r   )rA   s    r+   r   zModel.layers   s    z  r*   ri   )r!   r"   r#   r   r5   r[   r\   rX   r   propertyr   r]   r^   s   @r+   r   r      s        TY T T T T T T 
 

 
 
 
   ! ! X! ! ! ! !r*   r   )dataclassesr   	functoolsr   typingr   r   r   r   mlx.corecorer[   mlx.nnr>   baser
   r   r   bitlinear_layersr   
rope_utilsr   r   Moduler-   r`   rm   ry   r   r)   r*   r+   <module>r      s   " ! ! ! ! !       - - - - - - - - - - - -             T T T T T T T T T T ' ' ' ' ' ' ' ' ' ' ' ' % % % % % % % %&: : : : :	 : : :z    ")   0    ry   4       < !  !  !  !  !BI  !  !  !  !  !r*   