o
    :/iq                     @   s6  d Z ddlZddlZddlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlZddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ eeZdZ dZ!G dd deZ"eG dd dZ#eG dd dZ$G dd deZ%G dd deej&dddZ'G dd dej&dddZ(dS )z(Sampling parameters for text generation.    N)field)EnumIntEnum)cached_property)Any)	dataclass)ModelConfigSpeculativeConfigStructuredOutputsConfig)VLLMValidationError)init_logger)TokenizerLike)is_mistral_tokenizer)PydanticMsgspecMixingh㈵>g{Gz?c                   @      e Zd ZdZdZdZdS )SamplingTyper         N)__name__
__module____qualname__GREEDYRANDOMRANDOM_SEED r   r   a/lsinfo/ai/hellotax_ai/llm_service/venv_vllm/lib/python3.10/site-packages/vllm/sampling_params.pyr      s    r   c                   @   s   e Zd ZU dZeeB dB ed< dZedB ed< dZe	e dB ed< dZ
edB ed< dZedB ed< dZeed< dZeed	< dZedB ed
< dZedB ed< edddZedB ed< 	 edddZeed< 	 dd ZdefddZdefddZdS )StructuredOutputsParamsNjsonregexchoicegrammarjson_objectFdisable_any_whitespacedisable_additional_propertieswhitespace_patternstructural_tag)defaultinit_backend_backend_was_autoc                 C   sl   t | jdu| jdu| jdu| jdu| jdu| jdug}|dkr(td| j |dk r4td| j dS )z1Validate that some fields are mutually exclusive.Nr   zWYou can only use one kind of structured outputs constraint but multiple are specified: zOYou must use one kind of structured outputs constraint but none are specified: )	sumr   r   r   r    r!   r%   
ValueError__dict__)selfcountr   r   r   __post_init__6   s,   
z%StructuredOutputsParams.__post_init__returnc                       t  fdddD S )S
        Returns True if all structured-output constraint fields are None.
        c                 3       | ]
}t  |d u V  qd S Ngetattr.0r   r-   r   r   	<genexpr>Q   
    
z?StructuredOutputsParams.all_constraints_none.<locals>.<genexpr>)r   r   r   r    r!   r%   allr9   r   r9   r   all_constraints_noneM      z,StructuredOutputsParams.all_constraints_nonec                    r1   )r2   c                 3   r3   r4   r5   r7   r9   r   r   r:   a   r;   zRStructuredOutputsParams.all_non_structural_tag_constraints_none.<locals>.<genexpr>)r   r   r   r    r!   r<   r9   r   r9   r   'all_non_structural_tag_constraints_none]   r?   z?StructuredOutputsParams.all_non_structural_tag_constraints_none)r   r   r   r   strdict__annotations__r   r   listr    r!   boolr"   r#   r$   r%   r   r(   r)   r/   r>   r@   r   r   r   r   r   #   s"   
 r   c                   @   sD   e Zd ZU dZdZeed< 	 dZeed< 	 dZeed< 	 dd Z	dS )	RepetitionDetectionParamszEParameters for detecting repetitive N-gram patterns in output tokens.r   max_pattern_sizemin_pattern_size	min_countc                 C   sL   | j dk s| jdk s| j| j krtd| j dkr"| jdk r$tdd S d S )Nr   zmax_pattern_size, min_pattern_size must be >=0, with min_pattern_size <= max_pattern_size. Set both to 0 to disable repetitive pattern detection.r   zmin_count must be >= 2 to detect repetitive patterns in engine output. If you do not wish to detect repetitive patterns, set max_pattern_size to 0.)rG   rH   r+   rI   r9   r   r   r   r/      s   

z'RepetitionDetectionParams.__post_init__N)
r   r   r   __doc__rG   intrC   rH   rI   r/   r   r   r   r   rF   m   s   
 rF   c                   @   r   )RequestOutputKindr   r   r   N)r   r   r   
CUMULATIVEDELTA
FINAL_ONLYr   r   r   r   rL      s    rL   c                <   @   s  e Zd ZU dZdZeed< 	 dZeed< 	 dZ	eed< 	 dZ
eed< 	 dZeed	< 	 dZeed
< 	 dZeed< 	 dZeed< 	 dZedB ed< 	 dZeee B dB ed< 	 dZee dB ed< 	 dZeed< 	 dZedB ed< 	 dZeed< 	 dZedB ed< 	 dZedB ed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 ej Z!eed< dZ"eed < 	 dZ#eed!< dZ$edB ed"< e%j&e'd#Z(e'e ed$< dZ)e*dB ed%< 	 dZ+e,eef dB ed&< 	 dZ-ee dB ed'< 	 dZ.e,ee/f dB ed(< 	 dZ0ee dB ed)< 	 dZ1eee  dB ed*< dZ2edB ed+< dZ3e4dB ed,< 	 e5dddddddddddddddddddddej ddddddfdedB dedB dedB dedB d	edB d
edB dedededB deee B dB dee dB d)ee dB dedededB dededB dedB dedededed%e*dB d&e,eef e,eef B dB d'ee dB d(e,ee/f dB d ed,e4dB d-d f:d.d/Z6d\d0d1Z7d\d2d3Z8d\d4d5Z9	d]d6e,ee/f d7edB d-dfd8d9Z:d:e;d-dfd;d<Z<e=d-e>fd=d>Z?e@d-edB fd?d@ZAe@d-e'e fdAdBZBe@d-eee  dB fdCdDZCd^dEdFZDdGeEdHeFdB dIeGdB d:e;dB d-df
dJdKZHdGeEd-dfdLdMZIdGeEd-dfdNdOZJdGeEd-dfdPdQZKd:e;dB d-dfdRdSZLdHeFdB d-dfdTdUZMdIeGdB d:e;dB d-dfdVdWZNd-efdXdYZOe5d^dZd[ZPdS )_SamplingParamsa  Sampling parameters for text generation.

    Overall, we follow the sampling parameters from the OpenAI text completion
    API (https://platform.openai.com/docs/api-reference/completions/create).
    In addition, we support beam search, which is not supported by OpenAI.
    r   n        presence_penaltyfrequency_penalty      ?repetition_penaltytemperaturetop_pr   top_kmin_pNseedstopstop_token_idsF
ignore_eos   
max_tokens
min_tokenslogprobsprompt_logprobsflat_logprobsT
detokenizeskip_special_tokensspaces_between_special_tokensinclude_stop_str_in_outputoutput_kind
skip_cloneoutput_text_buffer_length_eos_token_id)default_factory_all_stop_token_idsstructured_outputs
logit_biasallowed_token_ids
extra_args	bad_words_bad_words_token_idsskip_reading_prefix_cacherepetition_detectionr0   c                 C   s  |d urdd |  D }td"i d| d u rdn| d|d u r dn|d|d u r)dn|d|d u r2d	n|d
|d u r;d	n|d|d u rDd	nF|d|d|d|d|	d|
d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d |d!|S d|d|d|d|	d|
d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d |d!|S )#Nc                 S   s&   i | ]\}}t |td td|qS )g      Y@g      Y)rK   minmax)r8   tokenbiasr   r   r   
<dictcomp>G  s    z0SamplingParams.from_optional.<locals>.<dictcomp>rQ   r   rS   rR   rT   rV   rU   rW   rX   rY   rZ   r[   r\   r]   rs   rh   r^   r`   ra   rb   rc   re   rf   rg   ri   ro   rp   rq   rr   rj   rv   r   )itemsrP   )rQ   rS   rT   rV   rW   rX   rY   rZ   r[   r\   r]   rs   rh   r^   r`   ra   rb   rc   re   rf   rg   ri   ro   rp   rq   rr   rj   rv   r   r   r   from_optional%  s   	
	
zSamplingParams.from_optionalc                 C   s0  d| j   k rtk rn ntd| j tt t| j t| _ | jdkr%d | _| jd u r.g | _nt| jtr9| jg| _| j	d u rAg | _	| j
d u rIg | _
| jdu rQd| _| jdu rYd| _| jrl| jsltdd | jD d | _|   | j tk rd| _d| _d	| _|   | j| j	 | jd u r| jd u| _d S d S )
Nr   zstemperature %s is less than %s, which may cause numerical errors nan or inf in tensors. We have maxed it out to %s.Tr   c                 s   s    | ]}t |V  qd S r4   )len)r8   sr   r   r   r:     s    z/SamplingParams.__post_init__.<locals>.<genexpr>rU   rR   )rW   	_MAX_TEMPloggerwarningrx   r[   r\   
isinstancerA   r]   rs   rb   rc   rh   rk   _verify_args_SAMPLING_EPSrX   rY   rZ   _verify_greedy_samplingrn   updateru   r9   r   r   r   r/   m  sD   








zSamplingParams.__post_init__c                 C   s  t | jtstdt| j | jdk rtd| j dd| j  kr)dks3n td| j dd| j  kr>dksHn td| j d| jd	krVtd
| j d| jd	k rht	d| j dd| jdd	| j
  k rsdksn t	d| j
 dd| j
d| jdk rtd| j dt | jtstdt| jj d	| j  krdksn td| j d| jd ur| jdk rt	d| j dd| jd| jdk rtd| j d| jd ur| j| jkrtd| j d| j d| jd ur| jdkr| jdk rt	d| j dd| jd| jd ur0| jdkr0| jdk r0t	d| j dd| jdt | jts9J tdd  | jD sMtd!| j dt | jtsVJ td"d  | jD retd#| jrq| jsstd$d S d S )%Nz!n must be an int, but is of type r   zn must be at least 1, got .g       g       @z)presence_penalty must be in [-2, 2], got z*frequency_penalty must be in [-2, 2], got rR   z2repetition_penalty must be greater than zero, got z&temperature must be non-negative, got rW   	parametervaluerU   ztop_p must be in (0, 1], got rX   r~   z.top_k must be 0 (disable), or at least 1, got ztop_k must be an integer, got zmin_p must be in [0, 1], got z#max_tokens must be at least 1, got r`   r   z3min_tokens must be greater than or equal to 0, got z4min_tokens must be less than or equal to max_tokens=z, got z)logprobs must be non-negative or -1, got rb   z0prompt_logprobs must be non-negative or -1, got rc   c                 s   s    | ]}t |tV  qd S r4   )r   rK   )r8   st_idr   r   r   r:     s    z.SamplingParams._verify_args.<locals>.<genexpr>z/stop_token_ids must contain only integers, got c                 s   s    | ]}| V  qd S r4   r   )r8   stop_strr   r   r   r:     s    z$stop cannot contain an empty string.zYstop strings are only supported when detokenize is True. Set detokenize=True to use stop.)r   rQ   rK   r+   typerS   rT   rV   rW   r   rX   rY   	TypeErrorr   rZ   r`   ra   rb   rc   r]   rD   r=   r\   anyre   r9   r   r   r   r     s   




$zSamplingParams._verify_argsc                 C   s    | j dkrtd| j  dd S )Nr   z,n must be 1 when using greedy sampling, got r   )rQ   r+   r9   r   r   r   r     s   
z&SamplingParams._verify_greedy_samplinggeneration_configeos_token_idc                 C   s   | j s|| _|dur| j| |d }durMt|tr!|hnt|}|dur.|| |rO| j	| | j sQ| j
dus@J |	| j
 t|| _
dS dS dS dS )z=Update if there are non-default values from generation_configNr   )r^   rl   rn   addgetr   rK   setdiscardr   r]   rD   )r-   r   r   eos_idsr   r   r   update_from_generation_config  s"   
z,SamplingParams.update_from_generation_config	tokenizerc                    s   | j sd S g | _| j D ]:}dD ]5}|rdnd}||  } j|dd}|r>|rD|d | jd d krDt|t| jd krD| j| qq fdd	| jD }t|dkrltd
 jd  d| d j dd| j dd S )N)FT  F)textadd_special_tokensr   r~   c                    s,   g | ]}|D ]}|d k s| j kr|qqS r   )max_token_id)r8   bad_words_token_idstoken_idr   r   r   
<listcomp>4  s    z8SamplingParams.update_from_tokenizer.<locals>.<listcomp>zThe model vocabulary size is r   z2, but the following tokens were specified as bad: zF. All token id values should be integers satisfying: 0 <= token_id <= r   rs   r   )rs   rt   lstripencoder   appendr   r   )r-   r   bad_wordadd_prefix_spaceprefixpromptprompt_token_idsinvalid_token_idsr   r   r   update_from_tokenizer  s@   

z$SamplingParams.update_from_tokenizerc                 C   s&   | j tk rtjS | jd urtjS tjS r4   )rW   r   r   r   r[   r   r   r9   r   r   r   sampling_typeE  s
   

zSamplingParams.sampling_typec                 C      | j S r4   )rl   r9   r   r   r   r   M     zSamplingParams.eos_token_idc                 C   r   r4   )rn   r9   r   r   r   all_stop_token_idsQ  r   z!SamplingParams.all_stop_token_idsc                 C   r   r4   )rt   r9   r   r   r   r   U  s   z"SamplingParams.bad_words_token_idsc                 C   s   | j rt| S t| S )z>If skip_clone is True, uses shallow copy instead of deep copy.)rj   copydeepcopyr9   r   r   r   cloneZ  s   

zSamplingParams.clonemodel_configspeculative_configstructured_outputs_configc                 C   sB   |  | | | | | | | | | | || d S r4   )_validate_logprobs_validate_logit_bias_validate_logits_processors_validate_allowed_token_ids_validate_spec_decode_validate_structured_outputs)r-   r   r   r   r   r   r   r   verifya  s   




zSamplingParams.verifyc                 C   s   |j }|dkr| }| j }r)|dkr| }||kr)td| d| d|d| j }rG|dkr6| }||krItd| d| d|dd S d S )Nr~   zRequested sample logprobs of z%, which is greater than max allowed: rb   r   zRequested prompt logprobs of rc   )max_logprobsget_vocab_sizerb   r   rc   )r-   r   r   num_logprobsnum_prompt_logprobsr   r   r   r   o  s6   

z!SamplingParams._validate_logprobsc                    sH   | j sdS |   fdd| j D }|r"td| d  d|ddS )z:Validate logit_bias token IDs are within vocabulary range.Nc                        g | ]}|d k s| kr|qS r   r   r8   r   
vocab_sizer   r   r     
    z7SamplingParams._validate_logit_bias.<locals>.<listcomp>ztoken_id(s) z@ in logit_bias contain out-of-vocab token ids. Vocabulary size: rp   r   )rp   r   r   )r-   r   r   r   r   r   r     s   
z#SamplingParams._validate_logit_biasc                 C   s   ddl m} ||j|  d S )Nr   )%validate_logits_processors_parameters)vllm.v1.sample.logits_processorr   logits_processors)r-   r   r   r   r   r   r     s   z*SamplingParams._validate_logits_processorsc                    sh   | j }|d u r	d S t|dkrtdd|d|d ur0t|  fdd|D }|r2tdd|dd S d S )Nr   z(allowed_token_ids is not None and empty!rq   r   c                    r   r   r   r   r   r   r   r     r   z>SamplingParams._validate_allowed_token_ids.<locals>.<listcomp>z1allowed_token_ids contains out-of-vocab token id!)rq   r   r   )r-   r   rq   r   r   r   r   r     s,   
z*SamplingParams._validate_allowed_token_idsc                 C   s(   |d u rd S | j tks| jrtdd S )Nz]The min_p and logit_bias sampling parameters are not yet supported with speculative decoding.)rZ   r   rp   r+   )r-   r   r   r   r   r     s   z$SamplingParams._validate_spec_decodec                 C   s  |d u s	| j d u rd S |d u rtd|j}| j j }r4||kr3|dkr(| j js3td| d| dn|| j _t| j jtrM| j jsMtd| j j dt| j jt	r`| j j
 dkr`td	d
dlm}m} d
dlm} d
dlm} d
dlm}	 |dr|	|  n{|drt|rtd|| d d ng|dkr||  n^|dkrt|rtd||  nMz
|	|  d| j _W n> ty   | j }
d}|
jrt|
jt	rt|
j}n|
j}||}t|s|r||  d| j _n
|| d d d| j _Y nw d| j _| j   d S )NzVStructured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'autoz[Request-level structured output backend selection is not supported. The request specified 'z"', but vLLM was initialised with 'zF'. This error can be resolved by removing '_backend' from the request.zChoice 'z' cannot be an empty listr   z4structured_outputs.grammar cannot be an empty stringr   )&has_guidance_unsupported_json_featuresvalidate_guidance_grammar)5validate_structured_output_request_lm_format_enforcer)+validate_structured_output_request_outlines)validate_xgrammar_grammarxgrammarguidancezMistral tokenizer is not supported for the 'guidance' structured output backend. Please use ['xgrammar', 'outlines'] backends or tokenizer_mode='hf' instead.r   outlineszlm-format-enforcerzMistral tokenizer is not supported for the 'lm-format-enforcer' structured output backend. Please use ['xgrammar', 'outlines'] backends or tokenizer_mode='hf' instead.FT)ro   r+   backendr(   r)   r   r   rD   r    rA   strip*vllm.v1.structured_output.backend_guidancer   r   4vllm.v1.structured_output.backend_lm_format_enforcerr   *vllm.v1.structured_output.backend_outlinesr   *vllm.v1.structured_output.backend_xgrammarr   
startswithr   r   json_modloadsr/   )r-   r   r   r   r(   r   r   r   r   r   	so_paramsskip_guidanceschemar   r   r   r     s   





z+SamplingParams._validate_structured_outputsc                 C   s  d g d| j d| j d| j d| j d| j d| j d| j d	| j d
| j	 d| j
 d| j d| j d| j d| j d| j d| j d| j d| j d| j d| j d| j d| j dS )Nr   zSamplingParams(n=z, presence_penalty=z, frequency_penalty=z, repetition_penalty=z, temperature=z, top_p=z, top_k=z, min_p=z, seed=z, stop=z, stop_token_ids=z, bad_words=z, include_stop_str_in_output=z, ignore_eos=z, max_tokens=z, min_tokens=z, logprobs=z, prompt_logprobs=z, skip_special_tokens=z , spaces_between_special_tokens=z, structured_outputs=z, extra_args=))joinrQ   rS   rT   rV   rW   rX   rY   rZ   r[   r\   r]   rs   rh   r^   r`   ra   rb   rc   rf   rg   ro   rr   r9   r   r   r   __repr__O  sX   	

zSamplingParams.__repr__c                   C   s0   t dddddddddddd	gd
dggdd
dS )z-Set parameters to exercise all sampler logic.g?2   g?g      ?g333333?r   g      )r   r   r   r      )rW   rX   rY   rZ   rT   rS   rV   ra   rp   rt   rb   rc   )rP   r   r   r   r   for_sampler_warmupj  s   z!SamplingParams.for_sampler_warmup)r0   Nr4   )r0   rP   )Qr   r   r   rJ   rQ   rK   rC   rS   floatrT   rV   rW   rX   rY   rZ   r[   r\   rA   rD   r]   r^   rE   r`   ra   rb   rc   rd   re   rf   rg   rh   rL   rM   ri   rj   rk   rl   msgspecr   r   rn   ro   r   rp   rB   rq   rr   r   rs   rt   ru   rv   rF   staticmethodr}   r/   r   r   r   r   r   r   r   r   propertyr   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   rP      s  
 	




G
6
W

)



 rP   T)omit_defaultsrB   c                   @   sR   e Zd ZU dZeed< eed< dZeed< dZe	ed< dZ
e	ed	< dZeed
< dS )BeamSearchParamsz+Beam search parameters for text generation.
beam_widthr`   Fr^   rR   rW   rU   length_penaltyrh   N)r   r   r   rJ   rK   rC   r^   rE   rW   r   r   rh   r   r   r   r   r   }  s   
 r   ))rJ   r   r   r   dataclassesr   enumr   r   	functoolsr   typingr   r   pydantic.dataclassesr   vllm.configr   r	   r
   vllm.exceptionsr   vllm.loggerr   vllm.tokenizersr   vllm.utils.mistralr   vllm.v1.serial_utilsr   r   r   r   r   r   r   rF   rL   StructrP   r   r   r   r   r   <module>   sP   I$
	     
g
