
    )j                     z   d dl Z d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 ddlmZ ddlmZmZ 	 d dlZn&# e$ r  ed            e j        d           Y nw xY we G d d	e                      Z G d
 de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  ZdS )    N)	dataclass)AnyOptional   )swiglu)BaseModelArgscreate_attention_maskz2To run olmo install ai2-olmo: pip install ai2-olmoc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZeed	<   d
Ze	ed<   dZ
eed<   d
Ze	ed<   d ZdS )	ModelArgs
model_typed_modeln_layersmlp_hidden_sizen_heads
vocab_sizeembedding_sizei'  
rope_thetaFrope_traditional   	mlp_ratioweight_tyingc                 J    | j         | j         n| j        | j        z  | _         d S N)r   r   r   selfs    \/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/models/olmo.py__post_init__zModelArgs.__post_init__"   s2     #/   $,. 	    N)__name__
__module____qualname__str__annotations__intr   floatr   boolr   r   r    r   r   r   r      s         OOOLLLMMMLLLOOOJ"d"""IsL$
 
 
 
 
r   r   c            	            e Zd Zdef fdZ	 	 d
dej        deej                 dee         dej        fdZ		 	 d
dej        deej                 dee         dej        fd	Z
 xZS )TransformerBlockargsc                 \   t                                                       |j        | _        |j        }t	          j        ||j        d          | _        t	          j        |j        dz  |d          | _        t	          j	        |d          | _
        t	          j	        |d          | _        || j        z  }|dz  | _        t	          j        |d|z  d          | _        t	          j        ||d          | _        t	          j        ||j        |j                  | _        || _        d S )NFbias   affineg         )traditionalbase)super__init__r   r   nnLinearr   ff_projff_out	LayerNormatt_normff_normscaleatt_projattn_outRoPEr   r   roper*   )r   r*   dimhead_dim	__class__s       r   r5   zTransformerBlock.__init__+   s   |lyd&:GGGi 4 93UKKKS777|C666$,&t^
	#q3wU;;;	#s777G-
 
 
	 			r   Nxmaskcachereturnc                    |j         \  }}}t          j        |                     |          dd          \  }}}	|                    ||| j        d                              dddd          }|                    ||| j        d                              dddd          }|	                    ||| j        d                              dddd          }	|R|                     ||j                  }|                     ||j                  }|	                    ||	          \  }}	n*|                     |          }|                     |          }|| j
        z  |                    dddd          z  }
||
|z  }
t          j        |
                    t          j                  d                              |
j                  }
|
|	z                      dddd                              ||d          }|                     |          S )Nr1   axisr   r.   r   )offset)shapemxsplitr>   reshaper   	transposerA   rM   update_and_fetchr=   softmaxastypefloat32dtyper?   )r   rE   rF   rG   BLDquerieskeysvaluesscoresoutputs               r   attendzTransformerBlock.attendD   s    '1a "q)9)912 F F Fv //!Qb99CCAq!QOO||Aq$,33==aAqII1dlB77AA!Q1MMiii==G99T%,977D 11$??LD&&ii((G99T??DDJ&$..Aq!*D*DDdNFFMM"*55B???FFv|TT6/,,Q1a88@@ArJJ}}V$$$r   c                 0   |                      |                     |          ||          }||z   }t          j        |                     |                     |                    dd          \  }}||                     t          ||                    z   }|S )Nr.   rJ   rK   )r`   r;   rO   rP   r8   r<   r9   r   )	r   rE   rF   rG   rhx1x2outs	            r   __call__zTransformerBlock.__call__b   s     KKa(($66E$,,t||A77DDDB$++fRnn---
r   )NN)r   r    r!   r   r5   rO   arrayr   r   r`   rg   __classcell__rD   s   @r   r)   r)   *   s        Y      8 $(#	% %8% rx % }	%
 
% % % %B $(#	 8 rx  }	
 
       r   r)   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )Transformerr*   c                    t                                                       j        | _        j        | _        t	          j        j        j                  | _        fdt          j                  D             | _
        | j        s&t	          j        j        j        d          | _        t	          j        j        d          | _        d S )Nc                 0    g | ]}t                     S ))r*   )r)   ).0_r*   s     r   
<listcomp>z(Transformer.__init__.<locals>.<listcomp>x   s%    QQQq'T222QQQr   Fr,   r/   )r4   r5   r   r   r6   	Embeddingr   r   wterangeblocksr7   r9   r:   normr   r*   rD   s    `r   r5   zTransformer.__init__r   s     -< 3T\BBQQQQE$-<P<PQQQ  	S)DL$2EERRRDKLe<<<			r   Ninputsc                 v   |                      |          }|d gt          | j                  z  }t          ||d                   }t	          | j        |          D ]\  }} ||||          }|                     |          }| j        r| j                             |          |fS |                     |          S )Nr   )	rs   lenru   r	   ziprv   r   	as_linearr9   )r   rx   rG   rc   rF   blockcs          r   rg   zTransformer.__call__}   s    
 HHV=FS---E$Qa11DK// 	" 	"HE1aq!!AAIIaLL 	08%%a((%//{{1~~r   r   	r   r    r!   r   r5   rO   rh   rg   ri   rj   s   @r   rl   rl   q   sl        	=Y 	= 	= 	= 	= 	= 	=         r   rl   c                   >     e Zd Zdef fdZ	 ddej        fdZ xZS )	OlmoModelr*   c                 p    t                                                       t          |          | _        d S r   )r4   r5   rl   transformerrw   s     r   r5   zOlmoModel.__init__   s/    &t,,r   Nrx   c                 .    |                      ||          S r   )r   r   rx   rG   s      r   rg   zOlmoModel.__call__   s    
 ...r   r   r   rj   s   @r   r   r      sl        -Y - - - - - - / // / / / / / / /r   r   c                   T     e Zd Zdef fdZ	 ddej        fdZed             Z	 xZ
S )Modelr*   c                     t                                                       |j        | _        t          |          | _        || _        d S r   )r4   r5   r   r   modelr*   rw   s     r   r5   zModel.__init__   s:    /t__
			r   Nrx   c                 .    |                      ||          S r   )r   r   s      r   rg   zModel.__call__   s    
 zz&%(((r   c                 $    | j         j        j        S r   )r   r   ru   r   s    r   layerszModel.layers   s    z%,,r   r   )r   r    r!   r   r5   rO   rh   rg   propertyr   ri   rj   s   @r   r   r      s        Y       ) )) ) ) ) - - X- - - - -r   r   )sysdataclassesr   typingr   r   mlx.corecorerO   mlx.nnr6   activationsr   r3   r   r	   hf_olmoImportErrorprintexitr   Moduler)   rl   r   r   r'   r   r   <module>r      s   


 ! ! ! ! ! !                                   6 6 6 6 6 6 6 6NNNN   	E
>???CHQKKKKK
 
 
 
 
 
 
 
 
*D D D D Dry D D DN         ")      F
/ 
/ 
/ 
/ 
/	 
/ 
/ 
/- - - - -BI - - - - -s   3  AA