o
    Q5i@                  4   @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZmZmZmZ d dlmZ defd	d
ZG dd dZdd ZdefddZejre  e  d dlZd dlZdZ ej!dk rzd dl"Z"e #e"$  dZ W n	 e%y   Y nw e& Z'e'(de
j)d  e
j*d  e
j+d  e
j,d  e
j-d  e
j.e
j/d  e
j0d  e
j1d  e
j2d  e
j3d  e
j4d  e5 e
j6d  e
j7d  e
j8d  e
j9e
j:e
j;e
j<e
j=e
j>e
j?fdedede@dedeAdedddeAdeAd d!d"eAd#ed$d%d&d'd(eAd)eAd*eAd+eAd,ed-ed.ed/e@d0d1f.d2dZBe'(d3ejCdVi ed4d5d6iejCdVi edd5d7iejCdVi edd5d8iejCdVi edd5d9iejCdVi edd5d:iejCdVi edd5d;iejCdVi edd5d<iejCdVi edd5d=iejCdVi ed d5d>iejCdVi ed?d5d@iejCdVi ed"d5dAiejCdVi ed#d5dBiejCdVi ed$d5dCiejCdVi ed&d5dDiejCdVi ed(d5dEiejCdVi ed)d5dFiejCdVi ed*d5dGiejCdVi ed.d5dHiejCdVi ed/d5dIiejCdVi ed-edJdKejCdVi edd5dLiejCdVi ed0d5dMiejCdVi ed+d5dNiejCdVi ed,d5dOiejCdVi edPd5dQifd4eDe deDe deDe@ deDe deDeA deDe deDeA deDeA d eDe d?eDe d"eDeA d#eDe d$eDe d&eDe d(eDeA d)eDeA d*eAd.ed/e@d-eded0d1d+eAd,edPef2dRd3ZEdSdT ZFeGdUkrdTeH v reF  dS dS dS )W    N)CHECK_TYPERCHECK_UVICORN)
EngineArgs)MANAGER)UVICORN_LOG_LEVELSlogger)DeviceDeviceIDDtypeEmbeddingDtypeInferenceEnginePoolingMethod)create_serverpathc                 C   s   t d| r| S td)z
    This regex matches:
    - An empty string or A single '/'
    - A string that starts with '/' and does not end with '/'
    z^$|^/$|^/.*[^/]$z2Path must start with '/' and must not end with '/')rematchtyperBadParameter)r    r   ^/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/infinity_emb/cli.pyvalidate_url   s   
r   c                   @   s4   e Zd ZdZdefddZdefddZdd	 Zd
S )AutoPaddingz:itertools.cycle with custom behaviour to pad to max lengthlengthc                 K   s   || _ || _d S )N)r   kwargs)selfr   r   r   r   r   __init__*   s   
zAutoPadding.__init__	iterationc                 C   s^   t |}t|ttfs|S t|dkr|d S t|| jkr"|| S td| j dt| )zpad x to length of self.length   r   zExpected length z	 but got )typer_option_resolve
isinstancelisttuplelenr   
ValueError)r   xr   r   r   r   _resolve.   s   zAutoPadding._resolvec                 c   sD    t | jD ]}i }| j D ]\}}| ||||< q|V  qdS )z9iterate over kwargs and pad them to length of self.lengthN)ranger   r   itemsr%   )r   r   r   keyvaluer   r   r   __iter__:   s   zAutoPadding.__iter__N)__name__
__module____qualname____doc__intr   r%   r*   r   r   r   r   r   '   s
    r   c                  G   sH   t | dkrt| d drt| d dr| d jS | d S dd | D S )z&returns the value or the default valuer   r   defaultenvvarc                 s   s.    | ]}t |d rt |dr|jn|V  qdS )r0   r1   N)hasattrr0   ).0ar   r   r   	<genexpr>K   s   , z'typer_option_resolve.<locals>.<genexpr>)r"   r2   r0   )argsr   r   r   r   C   s   
r   namec                 C   s    t tt| dt|  ddS )z@constructs the default entry and type hint for the variable name`)r0   r1   )dictgetattrr   to_name)r7   r   r   r   
_constructN   s   r<   auto)      uvloopv1model_name_or_pathserved_model_name
batch_sizerevisiontrust_remote_coderedirect_slashenginer   model_warmupvector_disk_cachedevicer   lengths_via_tokenizedtypeembedding_dtyper   pooling_methodr   compilebettertransformerpreload_onlypermissive_corsapi_key
url_prefixhostport	log_levelr   c                 C   s   |rt d|tjks|t kst dtd tdi d| gd|gd|gd|gd|gd	|gd
|gd|gd|	gd|gd|gd|
gd|gd|gdtjgd|d|d|d|d|d|d|d|dd dS )uT   Infinity API ♾️  cli v1 - deprecated, consider use cli v2 via `infinity_emb v2`.z9api_key is not supported in `v1`. Please migrate to `v2`.zKselecting embedding_dtype is not supported in `v1`. Please migrate to `v2`.zOCLI v1 is deprecated. Consider use CLI `v2`, by specifying `v2` as the command.model_idrC   rD   rE   rF   rH   rM   rO   rK   rI   rJ   rL   rP   rQ   rN   rR   rU   rV   rW   rG   rX   rS   rT   proxy_root_path Nr   )r#   r   float32default_valuer   warningv2)rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   r   r   r   rA   n   sv   
	
r_   rY   helpztHuggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference&zBthe nickname for the API, under which the model_id can be selectedz maximum batch size for inferencez!huggingface  model repo revision.zCif potential remote modeling code from huggingface repo is trusted.zWhich backend to use. `torch` uses Pytorch GPU/CPU, optimum uses ONNX on GPU/CPU/NVIDIA-TensorRT, `CTranslate2` uses torch+ctranslate2 on CPU/GPU.z=if model should be warmed up after startup, and before ready.zLIf hash(request)/results should be cached to SQLite for latency improvement.z3device to use for computing the model forward pass.	device_idzcdevice id defines the model placement. e.g. `0,1` will place the model on MPS/CUDA/GPU 0 and 1 eachz`if True, returned tokens is based on actual tokenizer count. If false, uses len(input) as proxy.zdtype for the model weights.zQdtype post-forward pass. If != `float32`, using Post-Forward Static quantization.z5overwrite the pooling method if inferred incorrectly.zEEnable usage of `torch.compile(dynamic=True)` if engine relies on it.ziEnables varlen flash-attention-2 via the `BetterTransformer` implementation. If available for this model.zwIf true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile.z#host for the FastAPI uvicorn serverz#port for the FastAPI uvicorn serverzeprefix for all routes of the FastAPI uvicorn server. Useful if you run behind a proxy / cascaded API.)callbackr`   z"where to redirect `/` requests to.zconsole log level.z!whether to allow permissive cors.z(api_key used for authentication headers.rZ   z\Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/c              
   C   s  	 t |  dd t|	D }tdi dt| d| d|d|d|d|d	|d
|d|d|d|
d|d|d|d|d|d|}g }|D ]}|tdi | qOt|||||||||	\	}}}}}}}}}t||t	||d|||||d}t
j||||jdtd dS )uA  Infinity API ♾️  cli v2. MIT License. Copyright (c) 2023-now Michael Feil 

        

        Multiple Model CLI Playbook: 

        - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id model/id2 --batch-size 8 --batch-size 4` 

        - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;" 

        - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8. 

        c                 S   s   g | ]}t |qS r   )r	   )r3   dr   r   r   
<listcomp>:  s    zv2.<locals>.<listcomp>r   rB   rD   rE   rF   rH   rI   vector_disk_cache_pathrK   ra   rL   rM   rN   rO   rP   rQ   rC   )rV   rW   )engine_args_listrU   	doc_extrarG   rR   rS   rT   rZ   	httptools)rV   rW   rX   httploopNr   )r   setLevelto_intr   r   r"   appendr   r   r9   uvicornrunr7   loopname)rY   rC   rD   rE   rF   rH   rI   rJ   rK   ra   rL   rM   rN   rO   rP   rQ   rR   rV   rW   rU   rG   rX   rS   rT   rZ   device_id_typedpadderengine_argsr   appr   r   r   r_      s   f	


c                   C   sl   t   ttjdkstjd dvr1ttjdks tjd dvr1tdtj d tj	dd t
  d S )Nr   )rA   r_   r`   --helpz--show-completionz--install-completion)rA   r_   r`   ru   zpError: No command given. Please use infinity with the `v2` command. This is deprecated since 0.0.32. You are on z. Usage: `infinity_emb v2 --model-id BAAI/bge-large-en-v1.5. defaulting to `v2` since 0.0.75. Please pin your revision for future upgrades.r_   )r   mark_requiredr"   sysargvr   errorinfinity_emb__version__inserttpr   r   r   r   cli~  s   
r~   __main__r   )Iasyncior   rw   rz   infinity_emb._optional_importsr   r   infinity_emb.argsr   infinity_emb.envr   infinity_emb.log_handlerr   r   infinity_emb.primitivesr   r	   r
   r   r   r   infinity_emb.infinity_serverr   strr   r   r   r<   is_availablerv   r   rn   rp   version_infor@   set_event_loop_policyEventLoopPolicyImportErrorTyperr}   commandrY   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   r]   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   r/   boolrA   Optionr    r_   r~   r+   localsr   r   r   r   <module>   s*   
	
E!%)-048<AEFGLORUX J

