o
    Q5iX                     @   sZ  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZm Z  d d
l!m"Z"m#Z#m$Z$ errd dl%m&Z& de'e de'e(e  fddZ)ej*i ej+ej,ej-ej.ej/dde'e de0de1e0e	f de0de2de2de0de0fddZ3dS )    N)asynccontextmanager)AnyOptionalUnionTYPE_CHECKING)
EngineArgs)AsyncEmbeddingEngineAsyncEngineArray)MANAGER)docserrors)logger)AudioCorruptionImageCorruptionModalityModelCapabilitesMatryoshkaDimErrorModelNotDeployedError)PostHogStartupTelemetrytelemetry_log_info)DataURIorURLengine_args_listcapabilities_listc              	   C   sF   t d t j}t| |D ]\}}tt|t	| ||d qd S )N<   )engine_argsnum_enginescapabilities
session_id)
timesleepuuiduuid4hexzipr   capturer   len)r   r   r   argr    r(   j/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/infinity_emb/infinity_server.pysend_telemetry_start!   s   

r*   )
url_prefix	doc_extraredirect_slashpreload_onlypermissive_corsapi_keyproxy_root_pathr+   r,   r-   r.   r/   r0   r1   c           !         s  ddl m}m}	mm}
m ddlm} ddlm	}m
} ddlm} ddlm}m}m m}m}mm}m}m td|	f	
fdd	}|	tjtjtjtjtd
d d dddd||d
g }|rvj|dgddgdgd r|dd}||fdt | ffdd}|!|| | "#t$j%t$j& j'dd|
j(ddtt)t*f fdd}rdd l+m, -d!sJ d"'d!fd#d$}j' d%||
j(|d&d'fd(d)}d*t)dd+ffd,d-d.t.d/t/d/ f dt/t.t)t0f  fd0d1j1 d2|
j(|d3d'd4|ffd5d6}j1 d7|
j(|d8d'd4|ffd9d:}j1 d;|
j(| d<d=d4|f fd>d?}j1 d@|
j(|dAddBdCd4|ffdDdE}j1 dF|
j(|dGddHdCd4|ffdIdJ} S )Kz>
    creates the FastAPI server for a set of EngineArgs.

    r   )DependsFastAPIHTTPException	responsesstatus)CORSMiddleware)HTTPAuthorizationCredentials
HTTPBearer)Instrumentator)	AudioEmbeddingInputClassifyInputClassifyResultImageEmbeddingInputMultiModalOpenAIEmbeddingOpenAIEmbeddingResultOpenAIModelInfoRerankInputReRankResultappc                   s    |  tdt ddd D   t  t| _tj	t
dd | jD fd}d|_|  | j I d H  ttj dd  d	d d
 rkdtfdd}td d t|d d V  | j I d H  d S )Nz	Creating z
 engines: c                 S      g | ]}|j qS r(   )served_model_name.0er(   r(   r)   
<listcomp>U       z3create_server.<locals>.lifespan.<locals>.<listcomp>c                 S   rE   r(   )r   rG   r(   r(   r)   rJ   [   rK   )targetargsThostport)rN   rO   prefixsecondsc                    s(   t | I d H  tt tj d S )N)asyncior    oskillgetpidsignalSIGINT)rQ   r(   r(   r)   
kill_laterl   s   z3create_server.<locals>.lifespan.<locals>.kill_laterz&Preloaded configuration successfully. z  -> exit .   )exposer   infor&   r   r	   	from_argsengine_array	threadingThreadr*   daemonstartastartr   startup_messagepopintrR   create_taskastop)rD   thrX   )r,   r   instrumentatorr.   r+   r(   r)   lifespanQ   s6   


zcreate_server.<locals>.lifespanzMichael Feil, Raphael Wirth)namez/docsz/openapi.jsonzMIT LicenseMIT)rk   
identifier)
titlesummarydescriptionversioncontactdocs_urlopenapi_urllicense_inforj   	root_path*T)allow_originsallow_credentialsallow_methodsallow_headersF)
auto_error
credentialc                    s,   | d u s
| j kr jdddidd S )NUnauthorizedzWWW-AuthenticateBearer)status_codedetailheaders)credentialsHTTP_401_UNAUTHORIZED)r}   )r4   r0   r6   r(   r)   validate_token   s   z%create_server.<locals>.validate_tokenz/healthhealth)operation_idresponse_classreturnc                      s   dt   iS )zq
        health check endpoint

        Returns:
            dict(unix=float): dict with unix time stamp
        unix)r   r(   r(   r(   r)   _health   s   zcreate_server.<locals>._health)RedirectResponse/z redirect_slash must start with /c                     s    d} | S )N)urlr(   )response)r   r-   r(   r)   redirect   s   
zcreate_server.<locals>.redirectz/modelsmodels)response_modelr   dependenciesr   c                     s~    j } g }| D ]1}|j}|t|jt| j| j| j|j	d|j
|jj|jj|jj|j|j|jjd	 qt|dS )zget models endpoint)queue_fractionqueue_absoluteresults_pending
batch_size)	idstatsr   backendembedding_dtypedtyperevisionlengths_via_tokenizedevice)data)r]   r   appenddictrF   overload_statusr   r   results_absoluter   r   enginerk   r   r   r   r   r   )r]   r   r   r   )rD   r(   r)   _models   s0   	
zcreate_server.<locals>._modelsmodelr   c              
      sb   z j |  }W n ty } ztjd| jdd }~ww | r/tjd|  djd|S )NzInvalid model: codezmodel z is currently overloaded)r]   
IndexErrorr   OpenAIExceptionHTTP_400_BAD_REQUESTis_overloadedHTTP_429_TOO_MANY_REQUESTS)r   r   ex)rD   r6   r(   r)   _resolve_engine   s   
z&create_server.<locals>._resolve_engineinputsr   c                 S   s@   t | drt| g}|S t | dr| jg}|S dd | D }|S )NrN   mimetypec                 S   s$   g | ]}t |d rt|n|jqS )rN   hasattrstrr   )rH   dr(   r(   r)   rJ      s    z?create_server.<locals>._resolve_mixed_input.<locals>.<listcomp>r   )r   urls_or_bytesr(   r(   r)   _resolve_mixed_input   s   

	
z+create_server.<locals>._resolve_mixed_inputz/embeddings
embeddingsr   c              
      s  | j j}| j }|j}zt }|tjkr;t|jt	r"|jg}n|j}t
dt| |j||jdI dH \}}n?|tjkr[|j}t
dt| |j||jdI dH \}}n|tjkrz|j}t
dt| |j||jdI dH \}}t | d }	t
d	|	  j||j|j|d
W S  ty }
 ztjd|j d|j d|
 jdd}
~
w tttfy }
 ztj|
j d|
 jdd}
~
w ty }
 ztjd|
 j dd}
~
ww )a^  Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.

        ## Running Text Embeddings
        ```python
        import requests, base64
        requests.post("http://..:7997/embeddings",
            json={"model":"openai/clip-vit-base-patch32","input":["Two cute cats."]})
        ```

        ## Running Image Embeddings
        ```python
        requests.post("http://..:7997/embeddings",
            json={
                "model": "openai/clip-vit-base-patch32",
                "encoding_format": "base64",
                "input": [
                    "http://images.cocodataset.org/val2017/000000039769.jpg",
                    # can also be base64 encoded
                ],
                # set extra modality to image to process as image
                "modality": "image"
        )
        ```

        ## Running Audio Embeddings
        ```python
        import requests, base64
        url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"

        def url_to_base64(url, modality = "image"):
            '''small helper to convert url to base64 without server requiring access to the url'''
            response = requests.get(url)
            response.raise_for_status()
            base64_encoded = base64.b64encode(response.content).decode('utf-8')
            mimetype = f"{modality}/{url.split('.')[-1]}"
            return f"data:{mimetype};base64,{base64_encoded}"

        requests.post("http://localhost:7997/embeddings",
            json={
                "model": "laion/larger_clap_general",
                "encoding_format": "float",
                "input": [
                    url, url_to_base64(url, "audio")
                ],
                # set extra modality to audio to process as audio
                "modality": "audio"
            }
        )
        ```

        ## Running via OpenAI Client
        ```python
        from openai import OpenAI # pip install openai==1.51.0
        client = OpenAI(base_url="http://localhost:7997/")
        client.embeddings.create(
            model="laion/larger_clap_general",
            input=[url_to_base64(url, "audio")],
            encoding_format="float",
            extra_body={
                "modality": "audio"
            }
        )

        client.embeddings.create(
            model="laion/larger_clap_general",
            input=["the sound of a beep", "the sound of a cat"],
            encoding_format="base64", # base64: optional high performance setting
            extra_body={
                "modality": "text"
            }
        )
        ```

        ### Hint: Run all the above models on one server:
        ```bash
        infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general
        ```
        u,   [📝] Received request with %s input texts )	sentencesmatryoshka_dimNu-   [📝] Received request with %s input audios )audiosr   u-   [📝] Received request with %s input images )imagesr        [✅] Done in %s msr   r   encoding_formatusageModelNotDeployedError: model=`z)` does not support `embed` for modality `z`. Reason: r    -> InternalServerError: )!rootmodalityr   r   perf_counterr   text
isinstanceinputr   r   debugr&   embed
dimensionsaudioaudio_embedimageimage_embedto_embeddings_responser   r   r   r   r   valuer   r   r   r   	__class__	ExceptionHTTP_500_INTERNAL_SERVER_ERROR)r   r   	data_rootr   ra   input_	embeddingr   r   durationr   r@   r   r   r6   r(   r)   _embeddings   s|   W






z"create_server.<locals>._embeddingsz/rerankrerankc              
      s   | j }z:tdt| j t }|j| j| j| j	| j
dI dH \}}t | d }td|  j||jj|| jdW S  ty[ } ztjd| j  d| jd	d}~w tyq } ztjd
| jd	d}~ww )a  Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)

        ```python
        import requests
        requests.post("http://..:7997/rerank",
            json={
                "model":"mixedbread-ai/mxbai-rerank-xsmall-v1",
                "query":"Where is Munich?",
                "documents":["Munich is in Germany.", "The sky is blue."]
            })
        ```
        %   [📝] Received request with %s docs )queryr   
raw_scorestop_nNr   r   )scoresr   r   return_documentsr   z%` does not support `rerank`. Reason: r   r   )r   r   r   r&   	documentsr   r   r   r   r   r   to_rerank_responser   rF   r   r   r   r   r   r   r   )r   r   ra   r   r   r   r   )rC   r   r6   r(   r)   _rerank  s@   
zcreate_server.<locals>._rerankz	/classifyclassify)r   r   r   r   c              
      s   | j }z4tdt| j t }|j| j| jdI dH \}}t | d }td|  j	||j
j|dW S  tyU } ztjd| j  d| jd	d}~w tyk } ztjd
| jd	d}~ww )zScore or Classify Sentiments

        ```python
        import requests
        requests.post("http://..:7997/classify",
            json={"model":"SamLowe/roberta-base-go_emotions","input":["I am not having a great day."]})
        ```
        r   )r   r   Nr   r   )scores_labelsr   r   r   z'` does not support `classify`. Reason: r   r   )r   r   r   r&   r   r   r   r   r   to_classify_responser   rF   r   r   r   r   r   r   )r   r   ra   r   r   r   r   )r=   r   r6   r(   r)   	_classify  s8   
z create_server.<locals>._classifyz/embeddings_imageembeddings_imagez;Deprecated: Use `embeddings` with `modality` set to `image`)r   r   r   r   
deprecatedro   c              
        | j }| j}z1tdt| t }|j|dI dH \}}t | d }td|  j||j	| j
|dW S  ttfyX } ztj|j d| jdd}~w tyr } ztjd	| j  d
| jdd}~w ty } ztjd| jdd}~ww )a  Encode Embeddings from Image files

        Supports URLs of Images and Base64-encoded Images

        ```python
        import requests
        requests.post("http://..:7997/embeddings_image",
            json={
                "model":"openai/clip-vit-base-patch32",
                "input": [
                    "http://images.cocodataset.org/val2017/000000039769.jpg",
                    "data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE"
                ]
            })
        ```
        %   [📝] Received request with %s Urls )r   Nr   r   r   r   r   r   z*` does not support `image_embed`. Reason: r   )r   r   r   r   r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ra   r   r   r   r   r   r(   r)   _embeddings_image  D   

z(create_server.<locals>._embeddings_imagez/embeddings_audioembeddings_audioz;Deprecated: Use `embeddings` with `modality` set to `audio`c              
      r   )aB  Encode Embeddings from Audio files

        Supports URLs of Audios and Base64-encoded Audios

        ```python
        import requests
        requests.post("http://..:7997/embeddings_audio",
            json={
                "model":"laion/larger_clap_general",
                "input": [
                    "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav",
                    "data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO"
                ]
            })
        ```
        r   )r   Nr   r   r   r   r   r   z*` does not support `audio_embed`. Reason: r   )r   r   r   r   r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   r)   _embeddings_audio'  r   z(create_server.<locals>._embeddings_audio)2fastapir2   r3   r4   r5   r6   fastapi.middleware.corsr7   fastapi.securityr8   r9   !prometheus_fastapi_instrumentatorr:   %infinity_emb.fastapi_schemas.pymodelsr;   r<   r=   r>   r?   r@   rA   rB   rC   r   r   FASTAPI_TITLEFASTAPI_SUMMARYFASTAPI_DESCRIPTIONinfinity_emb__version__r   add_middlewarer   r   
instrumentadd_exception_handlerr   r   openai_exception_handlergetORJSONResponser   floatfastapi.responsesr   
startswithr   listbytespost)!r   r+   r,   r-   r.   r/   r0   r1   r2   r3   r5   r7   r8   r9   r:   r;   r<   r>   r?   rA   rB   rj   route_dependenciesoauth2_schemer   r   r   r   r   r   r   r   r   r(   )r=   r4   r@   rC   r   r   r   r0   rD   r,   r   ri   r.   r-   r6   r+   r)   create_server2   s   ,%

	
 -%	2	2r  )4rR   rS   rV   r   r^   r!   
contextlibr   typingr   r   r   r   r   infinity_emb.argsr   infinity_emb.enginer   r	   infinity_emb.envr
   infinity_emb.fastapi_schemasr   r   infinity_emb.log_handlerr   infinity_emb.primitivesr   r   r   r   r   r   infinity_emb.telemetryr   r   r   r   r   r  setr*   r+   r-   r.   r/   r0   r1   r   r   boolr  r(   r(   r(   r)   <module>   s^    


	