
    )jL                     B   d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZmZ ddlZddlmZ ddlmZ ddlZddlmZ ddlmZ ddlmZ d	d
lmZ d	dlmZ d	dl m!Z! d	dl"m#Z# dZ$d Z%d Z&d Z'd Z( ed           G d de                      Z)d Z*dS )z9
Adapted from a PyTorch implementation by David Grangier
    N)version)Path)AnyCallableOptional)LM)register_model)tqdm   )batch_generate)make_prompt_cache)make_sampler)loadi    c                      t                      fd|D             }fd|D             } dt          |                   S )zFLimit a string <s> to the first occurrence of any substring in untils.c                 :    g | ]}                     |          S  )find).0uss     Y/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/evaluate.py
<listcomp>z!_rstrip_until.<locals>.<listcomp>$   s#    ###q###    c                 $    g | ]}|d k     rn|S r   r   )r   xls     r   r   z!_rstrip_until.<locals>.<listcomp>%   s%    &&&qa!ee&&&r   N)lenmin)r   untilsfr   s   `  @r   _rstrip_untilr"   !   sU    AA####F###A&&&&A&&&AXs1vvX;r   c                 p    |                      |          x}dk    r| |t          |          z   d         S | S )zHTruncate the prefix of the string after the first occurrence of pattern.N)r   r   )r   patternidxs      r   _lstripr'   )   s=    vvg2%%s7||#%%&&Hr   c                     t          j        d | D                       }|                                t          j        fd| D             d          }t	          j        |          t	          j        |          fS )Nc                 ,    g | ]}t          |          S r   r   )r   r   s     r   r   z_pad_inputs.<locals>.<listcomp>1   s    ///1A///r   c           
      ^    g | ])}t          j        |d t          |          z
  f          *S r   )nppadr   )r   r   maxlens     r   r   z_pad_inputs.<locals>.<listcomp>4   s4    999QAvA'	(	(999r   r   axis)r,   arraymaxstackmx)inputslengthspaddedr.   s      @r   _pad_inputsr8   0   s    h/////00G[[]]FX9999&999  F 8FRXg....r   c                  $     ddt           f fd}|S )NTreturnc                 4     | j         j        |fd|| dS )NF)tokenizeadd_generation_promptcontinue_final_message)	tokenizerapply_chat_template)selfchat_historyr=   extra_kwargss      r   r@   z-chat_template_fn.<locals>.apply_chat_template;   s?    1t~1
"7'<#<	
 

 
 
 	
r   )T)str)rC   r@   s   ` r   chat_template_fnrE   :   s5    
 
s 
 
 
 
 
 
 r   mlxlmc                   L    e Zd Z e            Z	 	 	 	 	 ddedee         dedee         ded	ee	e
j        ge
j        f                  d
df fdZddefdZddee         defdZd Zed
efd            Zd
eeeef                  fdZd
ee         fdZd
ee         fdZ xZS )MLXLMN   Fpath_or_hf_repo
max_tokens
batch_sizeuse_chat_templatetrust_remote_codesamplerr:   c                     t                                                       d|rdnd i}t          ||          \  | _        | _        || _        || _        || _        || j        j        d u| _        || _	        d S )NrN   T)tokenizer_config)
super__init__r   _modelr?   _max_tokens_batch_sizerM   chat_template_sampler)	rA   rJ   rK   rL   rM   rN   rO   rQ   	__class__s	           r   rS   zMLXLM.__init__L   s     	/9J1TPTU&*.>'
 '
 '
#T^ &%!2$%)^%A%MD"r      	step_sizec                    t          j        |          d          }t          | j                  }t	          d|j        d         |          D ]Y}|                     |d d |||z   f         |          }t          j        d |D                        t          j                     Zt          j	        |d d dd d f         
                    t           j                            }||fS )Nr   r   cachec                     g | ]	}|j         
S r   )stater   cs     r   r   z)MLXLM._process_prompt.<locals>.<listcomp>f   s    ,,,QW,,,r   r$   )r4   r1   r   rT   rangeshapeevalclear_cachennlog_softmaxastypefloat32)rA   promptr[   r^   ilogitslogprobss          r   _process_promptzMLXLM._process_prompta   s    &!!$'!$+..q&,q/955 	 	A[[1q9}+<(<!=U[KKFG,,e,,,---N>&B"2"9"9"*"E"EFFr   r^   c           	         t          |          \  }}|dd df         |ddd f         }}|pt          | j                  }d}g g }}t          d|j        d         |          D ]Z}	|d d |	|	|z   f         }
|
j        d         }|                     |
|          }t          j        |                    t          j	                            }t          j
        ||d d |	|	|z   t          j        f         d          d         }|d d |	|	|z   f         t          j        |d          k    }t          j        t          j        |||z             |d d d f         k     |d          }t          j        ||           t          j                     |                    |           |                    |           ||z  }\t          j        |d          }t          j        |d          }|||fS )	N.r$   r   r   r]   r/   ).r   F)r8   r   rT   rc   rd   rg   rh   ri   r4   rj   take_along_axisnewaxisargmaxwherearangere   rf   appendconcatenate)rA   r5   r^   r[   r6   targetsoffsetscores	is_greedyrl   inpTrm   	log_probsscoreigs                   r   	_score_fnzMLXLM._score_fnk   s   %f-- crc*F37O7*4;77	q&,q/955 	 	AAI--.C	!A[[E[22Fv}}RZ'@'@AAI&7111a!i-&7#CD2  E AI--.")F2L2L2LLB")FAJ77'!!!T':JJBPUVVBGE2NR   MM%   aKFFQ///N91555	w	))r   c                        fd|D             S )Nc                 n    g | ]1}t          j                            |j                              2S )add_special_tokens)tupler?   encoderM   )r   trA   s     r   r   z#MLXLM._tokenize.<locals>.<listcomp>   sS     
 
 
  %%a@V<V%WW 
 
 
r   r   )rA   textss   ` r   	_tokenizezMLXLM._tokenize   s0    
 
 
 
 	
 
 
 	
r   c                 B    | j         j                            dd          S )N/__)r?   name_or_pathreplace)rA   s    r   tokenizer_namezMLXLM.tokenizer_name   s    ~*223===r   c                 2  $ t          j        dt          |          z             t          j                                        }t          j        t                    }t          |          D ]8\  }}||j
        d                                      ||j
        d         f           9t          |                                          }g }g }|                                D ]8}	t          |	 \  }}
|                    |           |                    |
           9||                                d|                                         }||                                d|                                         }d}g g }}t#          t          ||          t          |                    D ]\  $}|                     $g          d         }|                     $fd|D                       }t'          d |D                       }| j        pt*          }t'          d||z
  dz
            }t          |          }t'          t          |          |z
  d          }|t          |          |z
  d         }|dk    rk|dz  }t,                              t1          d           gt          |          z             t2                              d	gt          |          z             :|                     |          \  }}t          j        |                                          }|D ]3}|t          |          d         }|                    |d|d         f                                                    |                    |d         |k               t          |          dk    r|                     t          j        |          dddf         t?          j         |          
          \  }}}|dxx         t          j!        |                                          z  cc<   |dxx         t          j"        |                                          z  cc<   5|dk    rt          j        d| ddz              t          |          }t          j        #                    t          |          t          j$                                                  }|dg|t          |          z
  z  z   }|d	g|t          |          z
  z  z   }t          j        |          }t          j        |          }t          j        %                    |t          j$                  }t          j        %                    |t          j$                  }t          j&        ||           g } tO          |                                          D ]W}!d ||!d|                                         D             }"|"|g|t          |"          z
  z  z  }"|                     |"           Xt          j(        t          j        |                     }#|d|         |#         }|d|         |#         }t          t          |)                                |)                                                    S )a  Compute log-likelihood of generating a continuation from a context.
        Downstream tasks should attempt to use loglikelihood instead of other
        LM calls whenever possible.
        :param requests: list[Instance]
            A list of Instance objects, with property `args` which returns a tuple (context, continuation).
            `context: str`
                Context string. Implementations of LM must be able to handle an
                empty context string.
            `continuation: str`
                The continuation over which log likelihood will be calculated. If
                there is a word boundary, the space should be in the continuation.
                For example, context="hello" continuation=" world" is correct.
        :return: list[tuple[float, bool]]
            A list of pairs (logprob, isgreedy)
            `logprob: float`
                The log probability of `continuation`.
            `isgreedy`:
                Whether `continuation` would be generated by greedy sampling from `context`.
        z&Estimating loglikelihood for %d pairs.r   r   N)totalc                     g | ]}|z   S r   r   )r   rqs     r   r   z'MLXLM.loglikelihood.<locals>.<listcomp>   s    ,?,?,?qQU,?,?,?r   c              3   4   K   | ]}t          |          V  d S Nr*   )r   r   s     r   	<genexpr>z&MLXLM.loglikelihood.<locals>.<genexpr>   s(      !A!AQ#a&&!A!A!A!A!A!Ar   infFr]   r$   zPrefix eliminated for z requests with zcompletion longer than context.streamc                     g | ]	}|D ]}|
S r   r   )r   questionr&   s      r   r   z'MLXLM.loglikelihood.<locals>.<listcomp>   s9        X FI   r   )*logginginfor   r4   distributedinitcollectionsdefaultdictlist	enumerateargsrv   keysvalueszipranksizer
   r   r2   rU   DEFAULT_MAX_TOKENS
all_scoresextendfloatall_is_greedyro   rs   itemr   r1   copydeepcopysumallall_maxcpu
all_gatherre   rc   argsorttolist)%rA   requestsgroup
group_reqsr&   req	questions	responsesindicesvresplong_completionsrz   r{   rsprefixfull_sequencesmax_completed_lrK   
truncationorig_prefix_lprefix_lrn   r^   max_idxr   r5   r   _r   num_results	per_groupall_indicesr   rank_indicesinv_sortr   s%                                       @r   loglikelihoodzMLXLM.loglikelihood   s   ( 	=HMNNN##%% !,T22
!(++ 	? 	?HCsx{#**C!+=>>>>**++		""$$ 	# 	#AQICNN3T"""" ejjll:ejjll:;	ejjll:ejjll:;		#i333y>>JJJ $	3 $	3EAr^^QC((+F!^^,?,?,?,?B,?,?,?@@N!!A!A.!A!A!AAAO )?-?JQ* <q @AAJKKM3v;;3Q77HCKK(2445F 1}} A% !!E%LL=/CGG";<<<$$eWs2ww%6777 #226::OHei))..00G# 3 33v;;==) hq&)|499;;<<<  &)w"6888v;;!###~~HV$$T111W-T]55I5I  .    q" r


bfUmm00222


"!2!223 aLJ)9JJJ34   (mmN**3v;;rv*FFKKMM	1#S[[!8999s9~~+E FF	&!!HY''	**6"&*AAN--i-GG	
	""" %**,,'' 	- 	-D $+D,@EJJLL,@$A  L [MY\9J9J-JKKL|,,,,:bh{3344%h/l{l+H5	C)9)9););<<===r   c                 "   t          j        dt          |          z             |                     d |D                       }g }t	          t          dt          |          | j                            D ]}|||| j        z            }|                     |          \  }}}t          j	        |j
        d                   |dddf         k     }	|                    |	|z                      d                                                     |S )a  Compute full log-likelihood of a string, with no truncation, for perplexity computation
        - We will use the full max context length of the model.
        - For inputs that exceed the max context length, we divide the tokenized string into chunks of up to
        the max context length.
        - IMPORTANT: Each document's loglikelihood/perplexity is computed *separately*, unlike other implementations
          which may simply concatenate multiple documents together.
        - IMPORTANT: We maximize the amount of context for each prediction. Specifically, for inputs that we break into
          multiple chunks, the last input will still a full-sized context.
          Example:
            Input tokens: [ 0 1 2 3 4 5 6 7 8 9 ]
            Prefix: EOT
            Max context length: 4
            Resulting input/prediction pairs:
                INPUT:  EOT   0   1   2
                PRED:     0   1   2   3
                INPUT:    3   4   5   6
                PRED:     4   5   6   7
                INPUT:    5   6   7   8
                PRED:             8   9
          Observe that:
            1. Each token is predicted exactly once
            2. For the last pair, we provide the full context, but only score the last two tokens
        :param requests: list[Instance]
            A list of Instance objects with property `args` which returns a tuple (context,).
            string: str
                String for which we are computing overall loglikelihood
        :return: list[tuple[float]]
            A list of tuples (logprob,)
            logprob: float
                The log probability of `context` conditioned on the EOT token.
        z2Estimating loglikelihood rolling for %d sequences.c                 (    g | ]}|j         d          S r   r   r   r   s     r   r   z/MLXLM.loglikelihood_rolling.<locals>.<listcomp>+  s     A A A! A A Ar   r   r$   Nr/   )r   r   r   r   r
   rc   rV   r   r4   ru   rd   r   r   r   )
rA   r   r5   r   rl   batchrz   r6   r   masks
             r   loglikelihood_rollingzMLXLM.loglikelihood_rolling  s   @ 	@3x==P	
 	
 	
  A A A A ABB
eAs6{{D,<==>> 	E 	EA1q4#3334E!%!6!6FGQ9V\"-..D1AADtf}11r1::AACCDDDDr   c                     t           j                                        }t          |          }||                                d|                                         }t          j        dt          |          z             t          d |D              \  }} fd|D             } fd|D             }t           j
         j        ||d j                  j        }t          t          ||                    D ]J\  }\  }	}
t          |	|
d                   ||<    j        j        rt#          |	 j        j                  ||<   K|                                d	k    r&t          j        t           j                  5  ||                                z   d	z
  |                                z  }|t          |          z
  }d
 |D             }t          j        t-          d |D                                 t           j                                                                      t          j        d |D             dg|z  z             }t          j        fd|D             dgz  g|z  z   t           j                  }t           j                            |d                                       dd	                              dd	                                          }t           j                            |d                                       dd	                              dd	                                          }|d|         }|d|         }d t          ||          D             }ddd           n# 1 swxY w Y   |S )aC  Generate greedily until a stopping sequence
        :param requests: list[Instance]
            A list of Instance objects with property `args` which returns a tuple (context, until).
            context: str
                Context string
            until: [str]
                The string sequences to generate until. These string sequences
                may each span across multiple tokens, or may be part of one token.
        :return: list[str]
            A list of strings continuation
            continuation: str
                The generated continuation.
        Nz)Generating continuation for %d sequences.c                     g | ]	}|j         
S r   r   r   s     r   r   z(MLXLM.generate_until.<locals>.<listcomp>J  s    !?!?!?s#(!?!?!?r   c                 T    g | ]$}j                             |j                    %S r   )r?   r   rM   )r   contextrA   s     r   r   z(MLXLM.generate_until.<locals>.<listcomp>O  sM     
 
 
  N!!0F,F "  
 
 
r   c                 T    g | ]$}j         p|                    d t                    %S )max_gen_tokens)rU   getr   )r   optrA   s     r   r   z(MLXLM.generate_until.<locals>.<listcomp>W  sA     
 
 
 M(8:L M M
 
 
r   T)modelr?   promptsrK   verboserO   untilr   c                 R    g | ]$}t          |                    d                     %S )zutf-8)r   r   ra   s     r   r   z(MLXLM.generate_until.<locals>.<listcomp>o  s,    LLL1tAHHW$5$566LLLr   c              3   4   K   | ]}t          |          V  d S r   r*   ra   s     r   r   z'MLXLM.generate_until.<locals>.<genexpr>p  s(      &C&C!s1vv&C&C&C&C&C&Cr   c                 ,    g | ]}t          |          S r   r*   ra   s     r   r   z(MLXLM.generate_until.<locals>.<listcomp>r  s    #@#@#@qCFF#@#@#@r   r   c                 B    g | ]}|d gt          |          z
  z  z   S r   r*   )r   rb   max_lens     r   r   z(MLXLM.generate_until.<locals>.<listcomp>t  s0    GGGaQ!#a&& 011GGGr   c                 f    g | ].\  }}t          |d |                                                   /S r   )	bytearraydecode)r   rb   r   s      r   r   z(MLXLM.generate_until.<locals>.<listcomp>  sE       26!QIae$$++--  r   )r4   r   r   r   r   r   r   r   r   r   rT   r?   rX   r   r   r"   has_thinkingr'   	think_endr   r   r1   r2   r   r   uint8r   swapaxesflattenr   )rA   r   r   total_requestscontextsoptionsrK   completionsetextr   pad_tor-   r6   r   s   `             @r   generate_untilzMLXLM.generate_until5  s    ##%% XEJJLL8EJJLL89@3x==PQQQ!?!?h!?!?!?@'

 
 
 
 $	
 
 

 
 
 

 
 


 %+n!M
 
 
  	 (K(A(ABB 	I 	INA{c*4W>>KN~* I!(t~/G!H!HA ::<<!26""  (5::<<7!;

Ls;///LLLLL(3&C&C{&C&C&C#C#CDD.0099>>@@(#@#@K#@#@#@A39#LMM hGGGG;GGGsW}o+,H  N--k$.?@@Xa^^WQ]]VXX	  N--gdm<<Xa^^WQ]]VXX	  */>/:!/>/2 :=k7:S:S  5              < s   =G1M::M>M>)NrI   NFN)rZ   )NrZ   )__name__
__module____qualname__rE   r@   rD   r   intboolr   r4   r1   rS   ro   r   r   r   propertyr   r   r   r   r   r   r   __classcell__)rY   s   @r   rH   rH   G   s        +*,,
 %),0"'<@     SM  	 
 $D>     (BH:rx#789  
           *     * *x} * * * * *B
 
 
 > > > > X>n>eE4K.@)A n> n> n> n>`+e + + + +ZU$s) U U U U U U U Ur   rH   c                  n	   t          j        d          } |                     ddd           |                     ddd           |                     d	d
d           |                     dt          dd           |                     dt          d d           |                     dt          dd            |                     dd dt                     |                     dt          dd           |                     dddd            |                     d!t           j        d"d             |                     d#t
          j        d$d%           |                     d&dd'd            |                     d(dd)*           |                     d+t          d,d-           |                     d.t          d/d0           |                     d1t          d2d3           |                                 }t          |j
                  }|                    dd4           d5t          j        d6<   t          j                            |j                   t          j                                        }t          j        t          j                            d7t          j        8                     |                                d7k    r=|                                d2k    r%t1          d9|                                 d:           t3          |j        |j        |j        ;          }t;          |j        |j        |j         |j!        |j"        |<          }tG          dGi |j$        t:          _!        tK          j&        ||j'        |j(        |j)        |j*        |j+        |j        |j        |j        |j        |j,        =          }d>|j        -                    d?d@          t]          dA          g}|j*        ||j*        dBgz  }||j'        z  }d@/                    |          }|                                d2k    r||z  }	|	0                    t          j1        |dC         dDE                     t1          dF           |dC         2                                D ]'}
t1          t          j1        |
dDE                     &d S d S )HNz2Evaluate an MLX model using lm-evaluation-harness.z--modelzModel to evaluateT)helprequiredz--tasks+)nargsr  z--output-dir.z"Output directory for result files.)defaultr  z--batch-size   z
Batch size)typer	  r  z--num-shotszNumber of shotsz--max-tokenszhMaximum number of tokens to generate. When set, this value takes precedence over task specific defaults.)r  r  r	  z--limitz&Limit the number of examples per task.)r	  r  r  z--seed{   zRandom seed.z--fewshot-as-multiturn
store_truezZWhether to provide the fewshot examples as a multiturn conversation or a single user turn.F)actionr  r	  z--apply-chat-templatezSpecifies whether to apply a chat template to the prompt. If the model has a chat template, this defaults to `True`, otherwise `False`.z--chat-template-argszvA JSON formatted string of arguments for the tokenizer's
        apply_chat_template, e.g. '{"enable_thinking":false}'z{}z--confirm-run-unsafe-codez?Confirm that you want to run tasks that execute untrusted code.z--trust-remote-codez)Enable trusting remote code for tokenizer)r  r  z--tempg        zSampling temperaturez--top-pg      ?zSampling top-pz--top-kr   zSampling top-k)parentsexist_okfalseTOKENIZERS_PARALLELISMr   r   zEvaluating with z nodes)temptop_ptop_k)rK   rL   rM   rN   rO   )r   tasksfewshot_as_multiturnr@   num_fewshotlimitrandom_seednumpy_random_seedtorch_random_seedfewshot_random_seedconfirm_run_unsafe_codere   r   r   lm_eval02dresults   )indentzResults:r   )3argparseArgumentParseradd_argumentr   BooleanOptionalActionjsonloadsr   
parse_argsr   
output_dirmkdirosenvironr4   randomseedr   r   re   all_sumr   r   r   printr   r  r  r  rH   r   rK   rL   r@   rN   rE   chat_template_argsr  simple_evaluater  r  rM   	num_shotsr  r  r   r   join
write_textdumpsr   )parserr   r+  worldrO   lmr!  	file_keysfilenameoutput_pathresults              r   mainr@    s   $< F 	(;dKKK
	t<<<
*N     S"<PPP
CDUVVV
3     5	     sCnMMM
 .     -      ZA     #N	     8    
 uc@VWWW
	sAQRRR
	Q=MNNNDdo&&JTD111 ,3BJ'(INN49 N!!EGBN""1RV"44555zz||aEJJLLA--5555666Yjj  G
 

??20
 
 
B !1 K K43J K KE%j!60NjI)) I $ <  G ++C55wy7I7IJI~!--..	Ixx	""Hzz||q 8+tz')*<QGGGHHHji(//11 	0 	0F$*VA...//// 	0 	0r   )+__doc__r$  r   r   r(  r   r-  importlib.metadatar   pathlibr   typingr   r   r   r  mlx.corecorer4   mlx.nnrg   numpyr,   lm_eval.api.modelr   lm_eval.api.registryr	   r
   generater   models.cacher   sample_utilsr   utilsr   r   r"   r'   r8   rE   rH   r@  r   r   r   <module>rO     s            				 & & & & & &       * * * * * * * * * *                              / / / / / /       $ $ $ $ $ $ + + + + + + & & & & & &           / / /
 
 
 B B B B BB B B BJ
t0 t0 t0 t0 t0r   