o
    Q5iD=                     @  s   d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZ G dd	 d	ZG d
d dZdS )    )annotations)	Semaphore)IterableIteratorOptionalUnion)
EngineArgs)BatchHandlerselect_model)logger)ClassifyReturnTypeEmbeddingReturnTypeImageClassTypeModelCapabilitesRerankReturnTypec                   @  s   e Zd ZdZ		dFdGdd	ZedHddZdIddZdd Zdd Z	dd Z
dd Zdd ZdJddZedJdd ZedKd"d#ZedLd$d%Z	dMdNd+d,Zd-dd.dOd5d6Zd-d7dPd9d:Zdd;dQd>d?Zdd;dRdBdCZdDdE ZdS )SAsyncEmbeddingEnginea  
    An LLM engine that receives requests and embeds them asynchronously.

    This is the main worker of the infinity-emb library. It is responsible for
    handling the requests and embedding them asynchronously.

    Initialize via `from_args` method.
    NTmodel_name_or_pathOptional[str]returnNonec                 K  sT   |rt d |dur||d< tdi || _d| _d| _t| j\| _| _| _	dS )zvCreating a Async EmbeddingEngine object.
        preferred way to create an engine is via `from_args` method.
        zaAsyncEmbeddingEngine() is deprecated since 0.0.25. Use `AsyncEmbeddingEngine.from_args()` insteadNr   F )
r   warningr   _engine_argsrunning_running_sepamorer
   _model_replicas_min_inference_t_max_inference_t)selfr   _show_deprecation_warningkwargsr   r   a/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/infinity_emb/engine.py__init__#   s   
zAsyncEmbeddingEngine.__init__engine_argsr   'AsyncEmbeddingEngine'c                 C  s*   t d| | di | ddi}|S )zpcreate an engine from EngineArgs

        Args:
            engine_args (EngineArgs): EngineArgs object
        z'Creating AsyncEmbeddingEngine from `%s`r   FNr   )r   debugto_dict)clsr#   enginer   r   r!   	from_args<   s   
zAsyncEmbeddingEngine.from_argsstrc                 C  s$   d| j  d| j| jg d| j dS )NzAsyncEmbeddingEngine(running=z, inference_time=z, ))r   r   r   r   r   r   r   r!   __str__K   s   

zAsyncEmbeddingEngine.__str__c              	     s   | j du rtd| _ | j 4 I dH 9 | js@d| _t| jj| j| jjtj	dk| jj
d| _| j I dH  W d  I dH  dS W d  I dH  dS 1 I dH sQw   Y  dS )zstartup engineN   T
   )max_batch_sizemodel_replicasvector_disk_cache_pathverboselengths_via_tokenize)r   r   r   r	   r   
batch_sizer   r2   r   levelr4   _batch_handlerspawnr,   r   r   r!   astartR   s"   

.zAsyncEmbeddingEngine.astartc              	     s   | j du rdS | j 4 I dH % | jr)d| _| j I dH  W d  I dH  dS W d  I dH  dS 1 I dH s:w   Y  dS )zstop engineNF)r   r   r7   shutdownr,   r   r   r!   astopc   s   
.zAsyncEmbeddingEngine.astopc                      |   I d H  d S N)r9   r,   r   r   r!   
__aenter__l      zAsyncEmbeddingEngine.__aenter__c                   r<   r=   )r;   )r   argsr   r   r!   	__aexit__o   r?   zAsyncEmbeddingEngine.__aexit__c                 C     |    | j S r=   )_assert_runningr7   overload_statusr,   r   r   r!   rD   r      
z$AsyncEmbeddingEngine.overload_statusboolc                 C  rB   r=   )rC   r7   is_overloadedr,   r   r   r!   rG   v   rE   z"AsyncEmbeddingEngine.is_overloadedc                 C     | j S r=   )r   r,   r   r   r!   
is_runningz      zAsyncEmbeddingEngine.is_runningset[ModelCapabilites]c                 C  s   | j d jS )Nr   )r   capabilitiesr,   r   r   r!   rL   ~   s   z!AsyncEmbeddingEngine.capabilitiesc                 C  rH   r=   )r   r,   r   r   r!   r#      rJ   z AsyncEmbeddingEngine.engine_args	sentences	list[str]matryoshka_dim
int | None'tuple[list['EmbeddingReturnType'], int]c                   ,   |    | jj||dI dH \}}||fS )a
  embed multiple sentences

        Kwargs:
            sentences (list[str]): sentences to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences),embed_dim )
            int: token usage
        )rM   rO   N)rC   r7   embed)r   rM   rO   
embeddingsusager   r   r!   rS         zAsyncEmbeddingEngine.embedF
raw_scorestop_nquerydocsrX   rY   Optional[int]$tuple[list['RerankReturnType'], int]c                  s0   |    | jj||||dI dH \}}||fS )a  rerank multiple sentences

        Kwargs:
            query (str): query to be reranked
            docs (list[str]): docs to be reranked
            raw_scores (bool): return raw scores instead of sigmoid
            top_n (Optional[int]): number of top scores to return after reranking
                if top_n is None, <= 0 or out of range, all scores are returned

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `rerank`
                capabilities

        Returns:
            list[float]: list of scores
            int: token usage
        rZ   r[   rX   rY   N)rC   r7   rerank)r   rZ   r[   rX   rY   scoresrU   r   r   r!   r_      s   zAsyncEmbeddingEngine.rerankrX   $tuple[list[ClassifyReturnType], int]c                  rR   )a  classify multiple sentences

        Kwargs:
            sentences (list[str]): sentences to be classified
            raw_scores (bool): if True, return raw scores, else softmax

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `embed`
                capabilities

        Returns:
            list[ClassifyReturnType]: list of class encodings
            int: token usage
        rM   rX   N)rC   r7   classify)r   rM   rX   r`   rU   r   r   r!   rd      s   zAsyncEmbeddingEngine.classifyrO   images)list[Union[str, 'ImageClassType', bytes]]c                  rR   )aE  embed multiple images

        Kwargs:
            images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `image_embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences),embed_dim )
            int: token usage
        rf   rO   N)rC   r7   image_embed)r   rf   rO   rT   rU   r   r   r!   ri      s   z AsyncEmbeddingEngine.image_embedaudioslist[Union[str, bytes]]c                  rR   )a(  embed multiple audios

        Kwargs:
            audios (list[Union[str, Audiobytes]]): list of audio data, to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `audio_embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences), embed_dim )
            int: token usage
        rj   rO   N)rC   r7   audio_embed)r   rj   rO   rT   rU   r   r   r!   rm      rV   z AsyncEmbeddingEngine.audio_embedc                 C  s   | j stdd S )Nzgdidn't start `AsyncEmbeddingEngine`  recommended use is via AsyncContextManager `async with engine: ..`)r   
ValueErrorr,   r   r   r!   rC     s
   z$AsyncEmbeddingEngine._assert_running)NT)r   r   r   r   )r#   r   r   r$   )r   r*   r   rF   )r   rK   )r   r   r=   )rM   rN   rO   rP   r   rQ   )
rZ   r*   r[   rN   rX   rF   rY   r\   r   r]   )rM   rN   rX   rF   r   rb   )rf   rg   rO   rP   r   rQ   )rj   rk   rO   rP   r   rQ   )__name__
__module____qualname____doc__r"   classmethodr)   r-   r9   r;   r>   rA   rD   rG   propertyrI   rL   r#   rS   r_   rd   ri   rm   rC   r   r   r   r!   r      s@    
	
%r   c                   @  s   e Zd ZdZd;ddZed<d
dZd=ddZdd Zdd Z	ddd>ddZ
d?dd Zd!dd"d@d(d)Zd!d*dAd,d-ZdddBd0d1ZdCd5d6ZdddDd9d:ZdS )EAsyncEngineArrayz<EngineArray is a collection of AsyncEmbeddingEngine objects.engines Iterable['AsyncEmbeddingEngine']c                 C  sJ   |st dtt|ttdd |D krt ddd |D | _d S )NzEngines cannot be emptyc                 s  s    | ]}|j jV  qd S r=   r#   served_model_name.0r(   r   r   r!   	<genexpr>$  s    z,AsyncEngineArray.__init__.<locals>.<genexpr>z$Engines must have unique model namesc                 S  s   i | ]}|j j|qS r   ry   r{   r   r   r!   
<dictcomp>'  s    z-AsyncEngineArray.__init__.<locals>.<dictcomp>)rn   lenlistsetengines_dict)r   rw   r   r   r!   r"      s   zAsyncEngineArray.__init__engine_args_arrayIterable[EngineArgs]r   'AsyncEngineArray'c                 C  s   t tj|}| t|dS )z|create an engine from EngineArgs

        Args:
            engine_args_array (list[EngineArgs]): EngineArgs object
        )rw   )mapr   r)   tuple)r'   r   rw   r   r   r!   r)   )  s   zAsyncEngineArray.from_args Iterator['AsyncEmbeddingEngine']c                 C  s   t | j S r=   )iterr   valuesr,   r   r   r!   __iter__4  s   zAsyncEngineArray.__iter__c                   $   | j  D ]	}| I dH  qdS )zstartup enginesN)r   r   r9   r   r(   r   r   r!   r9   7     zAsyncEngineArray.astartc                   r   )zstop enginesN)r   r   r;   r   r   r   r!   r;   <  r   zAsyncEngineArray.astopNre   modelr*   rM   rN   rO   r\   rQ   c                     | | j ||dI dH S )a9  embed multiple sentences

        Kwargs:
            model (str): model name to be used
            sentences (list[str]): sentences to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences),embed_dim )
            int: token usage
        re   N)rS   )r   r   rM   rO   r   r   r!   rS   A     zAsyncEngineArray.embedrF   c                 C  s   t dd | j D S )Nc                 s  s    | ]}|j V  qd S r=   )rI   r{   r   r   r!   r}   X  s    z.AsyncEngineArray.is_running.<locals>.<genexpr>)allr   r   r,   r   r   r!   rI   W  s   zAsyncEngineArray.is_runningFrW   rZ   r[   rX   rY   r]   c                  s   | | j ||||dI dH S )ae  rerank multiple sentences

        Kwargs:
            model (str): model name to be used
            query (str): query to be reranked
            docs (list[str]): docs to be reranked
            raw_scores (bool): return raw scores instead of sigmoid
            top_n (Optional[int]): number of top scores to return after reranking

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `rerank`
                capabilities

        Returns:
            list[float]: list of scores
            int: token usage
        r^   N)r_   )r   r   rZ   r[   rX   rY   r   r   r!   r_   Z  s   zAsyncEngineArray.rerankra   rb   c                  r   )a  classify multiple sentences

        Kwargs:
            model (str): model name to be used
            sentences (list[str]): sentences to be classified
            raw_scores (bool): if True, return raw scores, else softmax

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `embed`
                capabilities

        Returns:
            list[ClassifyReturnType]: list of class encodings
            int: token usage
        rc   N)rd   )r   r   rM   rX   r   r   r!   rd   w  s   zAsyncEngineArray.classifyrf   "list[Union[str, 'ImageClassType']]c                  r   )at  embed multiple images

        Kwargs:
            model (str): model name to be used
            images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `image_embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences),embed_dim )
            int: token usage
        rh   N)ri   )r   r   rf   rO   r   r   r!   ri     s   zAsyncEngineArray.image_embedindex_or_nameUnion[str, int]r$   c                 C  sx   t | jdkrt| j d S t|trt| j | S t|tr-|| jv r-| j| S td| dt| j  )zresolve engine by model name -> Auto resolve if only one engine is present

        Args:
            model_name (str): model name to be used
        r.   r   zEngine for model name `z'` not found. Available model names are )	r   r   r   r   
isinstanceintr*   
IndexErrorkeys)r   r   r   r   r!   __getitem__  s   

zAsyncEngineArray.__getitem__rj   rk   c                  r   )aQ  embed multiple audios

        Kwargs:
            model (str): model name to be used
            audios (list[Union[str, bytes]]): list of audio data, to be embedded
            matryoshka_dim (int): Length of matryoshka embedding

        Raises:
            ValueError: raised if engine is not started yet
            ModelNotDeployedError: If loaded model does not expose `audio_embed`
                capabilities

        Returns:
            list["EmbeddingReturnType"]: embeddings
                2D list-array of shape( len(sentences),embed_dim )
            int: token usage
        rl   N)rm   )r   r   rj   rO   r   r   r!   rm     r   zAsyncEngineArray.audio_embed)rw   rx   )r   r   r   r   )r   r   )r   r*   rM   rN   rO   r\   r   rQ   ro   )r   r*   rZ   r*   r[   rN   rX   rF   rY   r\   r   r]   )r   r*   rM   rN   rX   rF   r   rb   )r   r*   rf   r   rO   r\   r   rQ   )r   r   r   r$   )r   r*   rj   rk   rO   r\   r   rQ   )rp   rq   rr   rs   r"   rt   r)   r   r9   r;   rS   rI   r_   rd   ri   r   rm   r   r   r   r!   rv     s*    
	


	
rv   N)
__future__r   asyncior   typingr   r   r   r   infinity_emb.argsr   infinity_emb.inferencer	   r
   infinity_emb.log_handlerr   infinity_emb.primitivesr   r   r   r   r   r   rv   r   r   r   r!   <module>   s   	  