
    jx                         d dl mZmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZmZ d dlmZ d dlmZ erd dlmZ e G d	 d
e                      Ze G d de                      ZdS )    )	dataclassfield)TYPE_CHECKINGListOptionalN)Element)BaseEmbeddingEncoderEmbeddingConfig)EmbeddingEncoderConnectionError)requires_dependenciesHuggingFaceEmbeddingsc                       e Zd ZU dZee         ed<    ed           Zee	         ed<    ed           Z
ee	         ed<   dZee	         ed	<   dS )
HuggingFaceEmbeddingConfigz&sentence-transformers/all-MiniLM-L6-v2
model_namec                  
    ddiS )Ndevicecpu r       h/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/embed/huggingface.py<lambda>z#HuggingFaceEmbeddingConfig.<lambda>   s    (EAR r   )default_factorymodel_kwargsc                  
    ddiS )Nnormalize_embeddingsFr   r   r   r   r   z#HuggingFaceEmbeddingConfig.<lambda>   s    CY[`Ba r   encode_kwargsNcache_folder)__name__
__module____qualname__r   r   str__annotations__r   r   dictr   r   r   r   r   r   r      s          HJHHH#(59R9R#S#S#SL(4.SSS$)E:a:a$b$b$bM8D>bbb#'L(4.'''''r   r   c                   j   e Zd ZU eed<    edd          Zed         ed<    edd          Zee	e
                  ed<   edd	            Zede	e
         fd
            Zd Zd Zd Zd Zde	e         de	e         fdZde	e         fdZej         eddgd          dd                        ZdS )HuggingFaceEmbeddingEncoderconfigFN)initdefaultr   _client_exemplary_embeddingreturnc                 P    | j         |                                 | _         | j         S N)r*   create_clientselfs    r   clientz"HuggingFaceEmbeddingEncoder.client   s%    <--//DL|r   c                 \    | j         | j                            d          | _         | j         S )NQ)r+   r2   embed_queryr0   s    r   exemplary_embeddingz/HuggingFaceEmbeddingEncoder.exemplary_embedding%   s,    $,(,(?(?(D(DD%((r   c                     | j         }dS )z9Creates a langchain HuggingFace object to embed elements.N)r2   )r1   _s     r   
initializez&HuggingFaceEmbeddingEncoder.initialize+   s    Kr   c                 4    t          j        | j                  S r.   )npshaper6   r0   s    r   num_of_dimensionsz-HuggingFaceEmbeddingEncoder.num_of_dimensions/   s    x0111r   c                 p    t          j        t           j                            | j                  d          S )Ng      ?)r;   iscloselinalgnormr6   r0   s    r   is_unit_vectorz*HuggingFaceEmbeddingEncoder.is_unit_vector2   s%    z")..)ABBCHHHr   c                 P    | j                             t          |                    S r.   )r2   r5   r"   )r1   querys     r   r5   z'HuggingFaceEmbeddingEncoder.embed_query5   s    {&&s5zz222r   elementsc                 z    | j                             d |D                       }|                     ||          }|S )Nc                 ,    g | ]}t          |          S r   )r"   ).0es     r   
<listcomp>z?HuggingFaceEmbeddingEncoder.embed_documents.<locals>.<listcomp>9   s    1K1K1KQ#a&&1K1K1Kr   )r2   embed_documents_add_embeddings_to_elements)r1   rE   
embeddingselements_with_embeddingss       r   rK   z+HuggingFaceEmbeddingEncoder.embed_documents8   sB    [001K1K(1K1K1KLL
#'#C#CHj#Y#Y ''r   c                     t          |          t          |          k    sJ g }t          |          D ]'\  }}||         |_        |                    |           (|S r.   )len	enumeraterM   append)r1   rE   rM   elements_w_embeddingielements         r   rL   z7HuggingFaceEmbeddingEncoder._add_embeddings_to_elements=   sh    8}}J////!#H-- 	1 	1JAw!+AG ''0000r   	langchainsentence_transformerszembed-huggingface)extrasc                 P    ddl m}  |di | j                                        }|S )z@Creates a langchain Huggingface python client to embed elements.r   r   r   )langchain.embeddingsr   r'   to_dict)r1   r   r2   s      r   r/   z)HuggingFaceEmbeddingEncoder.create_clientF   s?     	?>>>>>&&??)<)<)>)>??r   )r,   r   )r   r    r!   r   r#   r   r*   r   r+   r   floatpropertyr2   r6   r9   r=   rB   r5   r   rK   rL   r   wrapr   r/   r   r   r   r&   r&      s        &&&&16E41P1P1PGX-.PPP27%UD2Q2Q2Q(4;/QQQ   X
 )T%[ ) ) ) X)
  2 2 2I I I3 3 3(W ($w- ( ( ( (
4=     %)	-."    	  *)
  r   r&   )dataclassesr   r   typingr   r   r   numpyr;   unstructured.documents.elementsr   unstructured.embed.interfacesr	   r
   unstructured.ingest.errorr   unstructured.utilsr   rZ   r   r   r&   r   r   r   <module>rf      s5   ( ( ( ( ( ( ( ( 0 0 0 0 0 0 0 0 0 0          P O O O O O O O E E E E E E 4 4 4 4 4 4 ;:::::: ( ( ( ( ( ( ( ( 6 6 6 6 6"6 6 6 6 6 6r   