
    )j/              	          d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZm Z  d
 Z!ej"        ddfdej#        de$de%fdZ&	 dde'de(de(de(fdZ)d Z*dS )    N)Path)tree_map)tqdm)load_dataset)kl_div_loss)grad_checkpointiterate_batches)print_trainable_parameters)loadload_tokenizerpipeline_loadquantize_modelsavec                      t           j                                                                         fd} |||d            |||d           d S )Nc           
      J   dk    r||z  }|                     dd           t          t          t          | 
                    t	          |           
z  d| dk              x}D ]\  }\  }}|d d d df         } |          }t          j        |t
          j                  }t          j        |           dk    rYt          j	        |d	d
          dd	d f         }t          j
        ||d          }||ddz  }	t          j        |	||d           d S )Nr   T)parentsexist_okseedzComputing targets for )totaldescdisablestreami )kthaxis.r   010d.safetensors)logitsindices)mkdirr   	enumerater	   lenmxstop_gradientcpuevalargpartitiontake_along_axissave_safetensors)datapathsplitpbaribatch_r!   idxfile
batch_sizemax_seq_lengthmodelrankr   s             Z/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/quant/dwq.py_compute_targetsz-compute_dwq_targets.<locals>._compute_targets(   s`   199%<DJJtdJ333/$
NQUVVVWW$ii:-5e55		   D	N 	NMAzq !!!SbS&MEU5\\F%fRV<<<FGFOOOqyyof%bAAA#uvv+N+FCbAAA55555#DV*L*LMMM%	N 	N    validtrain)r&   distributedinitr9   )	r8   save_dir
train_data
valid_datar6   r7   r   r;   r9   s	   `   ``` @r:   compute_dwq_targetsrD      s     >  %%''DN N N N N N N N N2 Z7333Z733333r<   Fg       @dtypegradient_checkpointtemperaturec                     !"#$ t           j                                        }|                                $|                                !!fd"d }                                                       |           t                      |	rt           j	        d                    d|
z  # #fd  fd} "$fd}t          d                                            }d	}d}d}t          j                    } ||d
          x}}t          t          t          |                    t!          |          z            x}D ]\  }\  }}|d d d df         } ||d          }t          j        |            |||||          \  }}}t          j        ||           t           j                            |t           j                                                  $z  }t           j                            |t           j                                                  }||z  }|||z  z  }!dk    r|                    d|d           |dz   dz  dk    rm|t          j                    |z
  z  }t          j                    dz  }||z  }||z  } "d|d|dd|d|dd|d
           t          j                    }d}d}|dz   dz  dk    r |||
          } |||
          }||k     r "d|dd|dd                                t          fd |                     d S )!Nc                  :    dk    rt          j        | i | d S d S )Nr   )r   write)argskwargsr9   s     r:   rprintzdwq_quantize.<locals>.rprintV   s.    199J'''''' 9r<   c                     t          |d          rAt          |d          r3|j        dk    r*|j        dk     r!|                    ddgd           d S d S d S d S d S )	Nbits
group_sizeaffine   scalesbiasesF)keysrecurse)hasattrmoderO   unfreeze)r3   ms     r:   rY   zdwq_quantize.<locals>.unfreezeZ   s    Av	A<((	A (""

JJXx0%J@@@@@	A 	A 	A 	A #"
r<   r      c                                         t          
fd|                       |          }t          |t                    r|\  }}t	          j        ||d          }t          |z  |z            }t	          j        dd|j        d         z             |d d dd f         k     }|	                                }||z  	                                |z  }	|	|fS )Nc                 .    |                                S NastypexrE   s    r:   <lambda>z/dwq_quantize.<locals>.loss_fn.<locals>.<lambda>m   s     r<   r   r   r[   )
updater   
isinstancetupler&   r+   r   arangeshapesum)paramsrb   targetslengthsr!   idslossesmaskntokslossrE   r8   scales             r:   loss_fnzdwq_quantize.<locals>.loss_fnl   s    X7777@@AAAqgu%% 	>"LGS'"===FUV^UW_==yAa 0011GAAAqrrENB

v""$$u,U{r<   c                      t          j                  || ||          \  \  }}}t          j        |          }                    ||          }|||fS r^   )r&   value_and_gradnnaverage_gradientsapply_gradients)	inputsrk   rl   rj   rq   rp   gradsrs   opts	          r:   stepzdwq_quantize.<locals>.stepx   se    9r099FGW 
  
uu $U++$$UF33UF""r<   c           
         d}d}t          t          t          
                    t                    
z  dd          D ]\  }\  }}|d d d df         } ||d	          }t	          j        |            | |||          \  }}	t	          j        ||	           t          j                            |t          j        
          	                                z  }t          j                            |	t          j        
          	                                }	||	z  }|||	z  z  }||z  } d|d|d           |S )N        r   r   zComputing validation lossF)r   r   leaver   r=   r/   r   zValidation: it=z, loss=.3f)
r   r$   r	   r%   r&   r)   r?   all_sumr(   item)rj   itv_lossv_tokensr1   r2   rl   rk   rq   rp   r6   rs   r7   rM   r   	target_fnrC   
world_sizes             r:   validatezdwq_quantize.<locals>.validate   sv   #'
JTRRR  j//Z/,$
 $
 $
 	# 	#Aw !!!SbS&MEiq888GGG!'&%'BBKD%GD%   >))$rv)>>CCEE
RDN**5*@@EEGGEHdUl"FF 0b00T000111r<   c                 @    |                      t          j                  S r^   )r`   r&   float32)rb   s    r:   rc   zdwq_quantize.<locals>.<lambda>   s    !((2:&& r<   r~   )r   r   )r   r   r>   r   r   zloss=z.4f)r      g    eAzit=z, avg_loss=z, total_tokens=z, toks_per_sec=r   z, peak_memory_gb=   u*   ❌❌❌
[WARNING] Final validation loss z' is worse than initial validation loss u2   . Model quality will likely be degraded.
❌❌❌c                 .    |                                S r^   r_   ra   s    r:   rc   zdwq_quantize.<locals>.<lambda>   s    AHHUOO r<   )r&   r?   r@   sizer9   r>   apply_to_modulesr
   r   layersr   trainable_parameterstimer   r$   r	   r%   r)   r   r(   r   set_descriptionget_peak_memoryrd   )%r8   r   r{   rB   rC   r6   r7   r   rE   rF   rG   grouprY   r|   r   rj   
total_losstotal_tokenstokensticinitial_valid_loss
valid_lossr0   r   r2   rl   rk   rq   rp   toks_per_secpeak_memory_gbavg_lossrs   r9   rM   rr   r   s%   ``` `````                       @@@@@r:   dwq_quantizer   E   s    N!!EJ::<<D( ( ( ( (A A A 
KKMMM	8$$$u%%% )Q(((OE
 
 
 
 
 
 
# # # # # #           2 &&""$$ F
 JLF
)++C '/hv!&<&<&<< 
JTRRR  j//Z/	
 
 
 	
 1  1UG aaa"f)E2W555
"d5'7FCCeV
f~%%d26%::??AAJN&&uRV&<<AACC%dUl"
199  nnnn 555Q"}!!%s):;!#!3!5!5!;%.&Cr C Ch? C C| C C$BC C-;BC C   ikk
Fc>Q!&R000J&R(((JJ&&A*N A A2DKA A A	
 	
 	
 
LL3333V<<=====r<       	data_pathnum_samplesr7   num_valid_samplesc                    t          j        |ddddd          }t          ||           d         t          j                            t                              }|d |                                         }||||z                                            }fdfd	|D             }	fd
|D             }
|	|
fS )Nr>   z	train[:1])r.   train_splitvalid_splitTF)
hf_datasetr>   testr   c                 X                         |                    \  }}|d          |fS r^   )process)r4   r   offsetdatasetr7   s      r:   r   zload_data.<locals>.process   s0     66'00r<   c                 &    g | ]} |          S  r   .0r1   r   s     r:   
<listcomp>zload_data.<locals>.<listcomp>   !    ,,,AWWQZZ,,,r<   c                 &    g | ]} |          S r   r   r   s     r:   r   zload_data.<locals>.<listcomp>   r   r<   )typesSimpleNamespacer   nprandompermutationr%   tolist)	tokenizerr   r   r7   r   rK   perm
train_perm
valid_permr>   r=   r   r   s      `       @@r:   	load_datar      s     "&
 

   D 4++A.G9  W..Dl{l#**,,JkK2C$CCDKKMMJ1 1 1 1 1 1 -,,,,,,E,,,,,,,E%<r<   c                  	   t          j                    } |                     dddt          d           |                     dt          d d           |                     d	d
d           |                     dt          dd           |                     dt          dd           |                     dt          dd           |                     dt          d           |                     dt          d           |                     dt
          d           |                     dt          d           |                     dt          dd            |                     d!d"d#$           |                     d%t          d d&           |                     d'd"d($           |                     d)d"d*$           |                                 }t          j        	                                }|j
        }|j        sJ||                                z  d+k    r/||                                ||                                z  z
  z  }t          j                            |j                   t          j                            |j                   |j        )t#          |j                                                  }nd,}d t'          |j                  }t+          ||j        |j
        |j                  \  }}|r|j        V|j        r3|                                d-k    rt3          |j        d.          \  }}	nt5          |j        dd/          \  }}	nd |s)'t7          |||j        |j        |j        0           d}|j        rt=          d+           |rfd1}
nfd2}
|j        /t5          |j        dd3          \  }}}	d4|	vrt?          d5          n4tA          j!                  }tE          ||	|j#        |j$        6          \  }}	|rt          j%        &                                r-t          j'                    d7         }t          j(        |           tS          j*        |j+        d8          }tY          ||
||||j        |j        |j        |j-        9	  	         t]          |j/        |j        |||	           d S ):Nz--modelz-mzA model to distill from for DWQ. If `quantized-model` is not given the student model will be this model quantized according to `bits` and `group-size`.T)helptyperequiredz--quantized-modelzCAn already quantized model (the student model) to improve with DWQ.)r   defaultr   z
--mlx-path	mlx_modelz!Path to save the quantized model.)r   r   z--bits   z!Bits per weight for quantization.z--group-size@   zGroup size for quantization.z--num-samplesi   z&Number of samples to use for training.z--max-seq-lengthi  )r   r   z--seed{   z--learning-rategư>z--batch-sizez--data-pathzallenai/tulu-3-sft-mixturezIA Hugging Face dataset which is compatible with an mlx-lm dataset format.z--grad-checkpoint
store_truez0Use gradient checkpointing to reduce memory use.)actionr   z--target-dirzDirectory to save/load targets.z--targets-onlyzCompute the targets and exit.z
--pipelinez/Use pipeline parallel instead of data parallel.r   Fr[   )return_config)r   lazy)r6   r7   r   c                 `    t          j        |z  |ddz            }|d         |d         fS )Nr   r    r!   r"   )r&   r   )r3   r4   r/   rk   
target_dirs       r:   r   zmain.<locals>.target_fnc  s=    gj50c3L3L3L3LLMMG8$gi&888r<   c                      |           S r^   r   )r2   r4   r/   r8   s      r:   r   zmain.<locals>.target_fni  s    5<<r<   )r   r   quantizationz*Quantized model must already be quantized.)rP   rO    max_recommended_working_set_size)learning_ratebias_correction)r6   r7   r   rF   )0argparseArgumentParseradd_argumentstrintfloat
parse_argsr&   r?   r@   r   pipeliner   r   r   r   r   r   existsr   r8   r   r   r7   quantized_modelr   r   rD   r6   targets_onlyexit
ValueErrorcopydeepcopyr   rP   rO   metalis_availabledevice_infoset_wired_limit
optimizersAdamr   r   r   r   mlx_path)parserrK   r   r   has_targetsr   rB   rC   r3   configr   q_modelmax_rec_sizer{   r8   r   s                 @@r:   mainr      s   $&&F
'      R	     k0S     0	     S"3Q     5	     *dCCC
sC888
)tDDD
S!<<<
,X	     ?    
 S$5V     4S     >     DN!!E"K= A[5::<<7!;;uzz||kEJJLL&@@@INN49INN49"$/**
 ''))
tz**I&4>4#3T5H J

  $.6= 	OUZZ\\A--,TZtLLLE1ff#DJdNNNE1ff  
:1.	
 	
 	
 	
  Q 	 	9 	9 	9 	9 	9 	9	  	  	  	  	  '%) &
 &
 &
"F
 ''IJJJ ( -&&"	
 
 
	6  u(	x )~''(JK
<(((
/(:D
Q
Q
QC?*Y 0
 
 
 
 	
    r<   )r   )+r   r   r   r   pathlibr   mlx.corecorer&   mlx.nnrv   mlx.optimizersr   numpyr   	mlx.utilsr   r   mlx_lm.tuner.datasetsr   mlx_lm.tuner.lossesr   mlx_lm.tuner.trainerr   r	   mlx_lm.tuner.utilsr
   mlx_lm.utilsr   r   r   r   r   rD   bfloat16Dtypeboolr   r   r   r   r   r   r   r<   r:   <module>r      s                         # # # # # #                 . . . . . . + + + + + + A A A A A A A A 9 9 9 9 9 9             %4 %4 %4b k %L> L> 8L> L> L> L> L> L>h     	
    <e e e e er<   