
    -j.                    T   d dl mZ d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# ej$        rd dl%mZ& d dl'm(Z( ej)        ej*        e+         ej,        ej-                 f         Z. ej/        e0          Z1 G d de          Z G d de2e          Z3 G d de          Z4 G d de          Z5 G d de2e          Z6e6j7        fd/dZ8e8Z9	 	 	 	 d0d1d,Z:e G d- d.e5                      Z;dS )2    )annotationsN)ABCabstractmethod)	dataclassfield)Enum)TextSplitter)Document)Field)BaseRagasEmbeddings)ExceptionInRunner)Executor)	RunConfig)rng)	Extractorc                      e Zd ZU  ed           Zded<    edd          Zded	<   ed
             Ze	dd            Z
e	dd            ZddZdS )r
   c                 B    t          t          j                              S N)struuiduuid4     `/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/ragas/testset/docstore.py<lambda>zDocument.<lambda>!   s    DJLL0A0A r   default_factoryr   doc_idNFdefaultreprzt.Optional[t.List[float]]	embeddingc                    | j                             d          }|| j         d         }n't                              d| j                   | j        }|S )Nfilenamez9Document [ID: %s] has no filename, using `doc_id` instead)metadatagetloggerinfor   )selfr$   s     r   r$   zDocument.filename$   sV    =$$Z00}Z0HHKKKT[   {Hr   doc
LCDocumentc                r    t          t          j                              } | |j        |j        |          S N)page_contentr%   r   )r   r   r   r.   r%   clsr*   r   s      r   from_langchain_documentz Document.from_langchain_document1   s<    TZ\\""s)\
 
 
 	
r   LlamaindexDocumentc                r    t          t          j                              } | |j        |j        |          S r-   )r   r   r   textr%   r/   s      r   from_llamaindex_documentz!Document.from_llamaindex_document:   s;    TZ\\""s\
 
 
 	
r   returnboolc                "    | j         |j         k    S r   r   )r)   others     r   __eq__zDocument.__eq__C   s    {el**r   )r*   r+   )r*   r2   )r6   r7   )__name__
__module____qualname__r   r   __annotations__r"   propertyr$   classmethodr1   r5   r;   r   r   r   r
   r
       s         %(A(ABBBFBBBB+05E+J+J+JIJJJJ
 
 X
 
 
 
 [
 
 
 
 [
+ + + + + +r   r
   c                  "    e Zd ZdZdZdZdZdZdS )	Directionz0
    Direction for getting adjascent nodes.
    nextprevupdownN)r<   r=   r>   __doc__NEXTPREVUPDOWNr   r   r   rC   rC   H   s.          DD	BDDDr   rC   c                      e Zd ZU  eed          Zded<    eed          Zded<    edd          Z	d	ed
<   dZ
ded<   ed             Zed             ZdS )NodeF)r   r!   zt.List[str]
keyphraseszt.Dict[Direction, t.Any]relationshipsNr   t.Optional[float]doc_similarityr   intwinsc                J    | j                             t          j                  S r   )rP   r&   rC   rI   r)   s    r   rD   z	Node.nextY       !%%in555r   c                J    | j                             t          j                  S r   )rP   r&   rC   rJ   rV   s    r   rE   z	Node.prev]   rW   r   )r<   r=   r>   r   listrO   r?   dictrP   rR   rT   r@   rD   rE   r   r   r   rN   rN   S   s         #eDuEEEJEEEE.3eDu.U.U.UMUUUU(-d(G(G(GNGGGGDMMMM6 6 X6 6 6 X6 6 6r   rN   c                      e Zd Zd Zeddd            Zedd d            Zed!d            Zed"d#d            Ze	 d$d%d            Z	d&dZ
dS )'DocumentStorec                    i | _         d S r   )	documentsrV   s    r   __init__zDocumentStore.__init__c   s    r   Tdocst.Sequence[Document]c                    d S r   r   )r)   r`   show_progresss      r   add_documentszDocumentStore.add_documentsf       r   nodest.Sequence[Node]c                    d S r   r   )r)   rf   rc   s      r   	add_nodeszDocumentStore.add_nodesj   re   r   node_idr   r6   rN   c                    d S r   r   r)   rj   s     r   get_nodezDocumentStore.get_noden   re   r      t.List[Node]c                    d S r   r   )r)   ks     r   get_random_nodeszDocumentStore.get_random_nodesr   re   r   ffffff?   node	thresholdfloattop_krS   't.Union[t.List[Document], t.List[Node]]c                    d S r   r   )r)   ru   rv   rx   s       r   get_similarzDocumentStore.get_similarv   s	     	r   
run_configr   c                    d S r   r   r)   r|   s     r   set_run_configzDocumentStore.set_run_config|       r   NTr`   ra   rf   rg   rj   r   r6   rN   )rn   r6   ro   rs   rt   ru   rN   rv   rw   rx   rS   r6   ry   r|   r   )r<   r=   r>   r_   r   rd   ri   rm   rr   r{   r   r   r   r   r\   r\   b   s               ^     ^    ^     ^ ?@    ^
     r   r\   c                      e Zd ZdZdZdZdZdS )SimilarityModezModes for similarity/distance.cosinedot_product	euclideanN)r<   r=   r>   rH   DEFAULTDOT_PRODUCT	EUCLIDEANr   r   r   r   r      s#        ((GKIIIr   r   
embedding1	Embedding
embedding2moder6   rw   c                   |t           j        k    rTt          t          j                            t          j        |           t          j        |          z
                       S |t           j        k    rt          j        | |          S t          j        | |          }t          j                            |           t          j                            |          z  }||z  S )zGet embedding similarity.)	r   r   rw   nplinalgnormarrayr   dot)r   r   r   productr   s        r   
similarityr      s     ~'''binnRXj%9%9BHZ<P<P%PQQRRRR	+	+	+vj*---&Z00y~~j))BINN:,F,FF~r   query_embedding
embeddingst.List[Embedding]similarity_fn"t.Optional[t.Callable[..., float]]similarity_top_kt.Optional[int]embedding_idst.Optional[t.List]similarity_cutoffrQ   t.Tuple[t.List[float], t.List]c                   |)t          t          t          |                              }|pt          }t	          j        |          }t	          j        |           }g }t          |          D ]_\  }	}
 |||
          }|||k    rFt          j        ||||	         f           |r't          |          |k    rt          j	        |           `t          |d d          }d |D             }d |D             }||fS )zm
    Get top nodes by similarity to the query.
    returns the scores and the embedding_ids of the nodes
    Nc                    | d         S )Nr   r   )xs    r   r   z&get_top_k_embeddings.<locals>.<lambda>   s
    ! r   T)keyreversec                    g | ]\  }}|S r   r   ).0s_s      r   
<listcomp>z(get_top_k_embeddings.<locals>.<listcomp>   s    555A1555r   c                    g | ]\  }}|S r   r   )r   r   ns      r   r   z(get_top_k_embeddings.<locals>.<listcomp>   s    ,,,1!,,,r   )rY   rangelendefault_similarity_fnsr   r   	enumerateheapqheappushheappopsorted)r   r   r   r   r   r   embeddings_npquery_embedding_npsimilarity_heapiembr   result_tupsresult_similarities
result_idss                  r   get_top_k_embeddingsr      s"    U3z??3344!;%;MHZ((M/2257OM** / /3"]#5s;;
$
5F(F(FN?Zq9I,JKKK /C$8$8;K$K$Ko...nndKKKK55555,,,,,J
**r   c                  ,   e Zd ZU ded<    edd          Zded<    edd          Zded	<    ee
          Zded<    ee
          Z	ded<    ee

          Zded<   dZded<   d3dZd4d5dZd4d6dZd Zd Zd7d"Zd8d$Zd9d:d'Z	 d;d<d0Zd=d2ZdS )>InMemoryDocumentStorer	   splitterNFr   zt.Optional[Extractor]	extractorzt.Optional[BaseRagasEmbeddings]r   r   ro   rf   r   node_embeddings_listzt.Dict[str, Node]node_mapzt.Optional[RunConfig]r|   items/t.Union[t.Sequence[Document], t.Sequence[Node]]c                    d S r   r   )r)   r   s     r   _embed_itemsz"InMemoryDocumentStore._embed_items   r   r   Tr`   ra   c                    | j         
J d            d | j                            |          D             }|                     ||           dS )z.
        Add documents in batch mode.
        NEmbeddings must be setc                B    g | ]}t                               |          S r   )rN   r1   )r   ds     r   r   z7InMemoryDocumentStore.add_documents.<locals>.<listcomp>   s6     
 
 
 ((++
 
 
r   )rc   )r   r   transform_documentsri   )r)   r`   rc   rf   s       r   rd   z#InMemoryDocumentStore.add_documents   sg     **,D***
 
]66t<<
 
 
 	uM:::::r   rg   c                \   | j         
J d            | j        
J d            i }i }t          ddd| j                  }d}t	          |          D ]\  }}|j        G|                    ||i           |                    | j         j        |j	        d| d	
           |dz  }|j
        sB|                    ||i           |                    | j        j        |d| d	
           |dz  }|                                }	|	st                      t	          |          D ]\  }}||                                v r|	||                  |_        ||                                v r|	||                  }
|
|_
        |j        |j
        g k    rx| j                            |           || j        |j        <   t'          |j        t(          t*          j        f          s
J d            | j                            |j                   |                                  |                                  d S )Nr   zExtractor must be setzembedding nodesFT)desckeep_progress_barraise_exceptionsr|   r   zembed_node_task[])namern   zkeyphrase-extraction[z$Embedding must be list or np.ndarray)r   r   r   r|   r   r"   updatesubmit
embed_textr.   rO   extractresultsr   keysrf   appendr   r   
isinstancerY   r   ndarrayr   calculate_nodes_docs_similarityset_node_relataionships)r)   rf   rc   nodes_to_embednodes_to_extractexecutor
result_idxr   r   r   rO   s              r   ri   zInMemoryDocumentStore.add_nodes   s   **,D***~))+B)))  "#!	
 
 
 
e$$ 	  	 DAq{"%%q*o666O.N0A000     
 a
<   ''J888N*5555     
 a
""$$ 	&#%%%e$$ 	> 	>DAqN''))))%nQ&78$))++++$%5a%89
){&1<2+=+=
!!!$$$*+ah'!K$
!3  : :9: :  )00===,,...$$&&&&&r   c                   t          | j                  D ]\  }}|dk    rq| j        |dz
           }|j        |j        k    r)||j        t          j        <   ||j        t          j        <   n(d |j        t          j        <   d |j        t          j        <   |t          | j                  dz
  k    rd |j        t          j        <   d S )Nr   rn   )r   rf   r$   rP   rC   rJ   rI   r   )r)   r   ru   	prev_nodes       r   r   z-InMemoryDocumentStore.set_node_relataionships  s     ,, 
	: 
	:GAt1uu Jq1u-	%669BD&y~6>BI+IN;;9=D&y~6>BI+IN;C
OOa'''59"9>2
	: 
	:r   c                j   i }t          d | j        D                       }t          d | j        D                       }t          |          t          |          k    r-t                              d           | j        D ]	}d|_        
d S |D ]ct          j        fd| j        D                       }|                    t          |          d          }t          j	        |d          |<   d| j        D ]8}|j
        
J d	            t          |j
        ||j                           |_        9d S )
Nc                *    g | ]}|j         	|j         S r   )r$   r   ru   s     r   r   zIInMemoryDocumentStore.calculate_nodes_docs_similarity.<locals>.<listcomp>"  s!    OOOtT]5NT]5N5N5Nr   c                    g | ]	}|j         
S r   r9   r   s     r   r   zIInMemoryDocumentStore.calculate_nodes_docs_similarity.<locals>.<listcomp>$  s    ;;;;;;r   z/Filename and doc_id are the same for all nodes.g      ?c                4    g | ]}|j         k    |j        S r   )r$   r"   )r   ru   file_ids     r   r   zIInMemoryDocumentStore.calculate_nodes_docs_similarity.<locals>.<listcomp>.  s'    WWWdmw>V>VT^>V>V>Vr   r   )axiszEmbedding cannot be None)setrf   r   r'   warningrR   r   r   reshapemeanr"   r   r$   )r)   doc_embeddingsfilename_idsnode_idsru   nodes_embeddingr   s         @r   r   z5InMemoryDocumentStore.calculate_nodes_docs_similarity  sj   OOtzOOO
 
 ;;
;;;<<|H--NNLMMM
 * *&)##* * ( K K"$(WWWW
WWW# # #2"9"9#o:N:NPR"S"S*,'/*J*J*Jw''
  ~113M111&0NN4=$A' '## r   rj   r   r6   rN   c                    | j         |         S r   )r   rl   s     r   rm   zInMemoryDocumentStore.get_node9  s    }W%%r   r   c                    t           r   )NotImplementedError)r)   r   s     r   get_documentz"InMemoryDocumentStore.get_document<  s    !!r   rn   皙?c                  	 d 		fd| j         D             }d | j         D             }t          j        |          t          j        |          z  }|t          j        |          z  }t	          j        t          j        | j                   ||                                          }|D ]7}| j                             |          }| j         |         xj        dz  c_        8|S )Nc                2    t          j        | | z            S r   )r   exp)rT   alphas     r   adjustment_factorzAInMemoryDocumentStore.get_random_nodes.<locals>.adjustment_factor@  s    65&4-(((r   c                2    g | ]} |j                   S r   )rT   )r   ru   r  r  s     r   r   z:InMemoryDocumentStore.get_random_nodes.<locals>.<listcomp>C  s)    MMM$##DIu55MMMr   c                    g | ]	}|j         
S r   )rR   r   s     r   r   z:InMemoryDocumentStore.get_random_nodes.<locals>.<listcomp>D  s    HHHTT0HHHr   )sizeprn   )	rf   r   r   sumr   choicetolistindexrT   )
r)   rq   r  scoressimilarity_scoresprobrf   ru   idxr  s
     `      @r   rr   z&InMemoryDocumentStore.get_random_nodes?  s    	) 	) 	) NMMMM$*MMMHHTZHHHx"(+<"="==bfTll"
28DJ//a4@@@GGII 	& 	&D*""4((CJsO  A%   r   rs   rt   ru   rv   rw   rx   rS   ry   c                     |}|j         t          d          t          |j          j        t          ||dz             \  }}|dd          |dd          }} fd|D             }|S )NzDocument has no embedding.rn   )r   r   r   r   r   c                *    g | ]}j         |         S r   )rf   )r   r   r)   s     r   r   z5InMemoryDocumentStore.get_similar.<locals>.<listcomp>`  s     :::F#:::r   )r"   
ValueErrorr   r   r   )r)   ru   rv   rx   r*   r  doc_idsr   s   `       r   r{   z!InMemoryDocumentStore.get_similarP  s     = 9:::.M0$'"QY
 
 
 !*gabbk::::':::r   r   c                V    | j         r| j                             |           || _        d S r   )r   r   r|   r~   s     r   r   z$InMemoryDocumentStore.set_run_configc  s-    ? 	7O**:666$r   )r   r   r   r   r   r   )r   r   r6   rN   )rn   r   r   r   r   r   )r<   r=   r>   r?   r   r   r   rY   rf   r   rZ   r   r|   r   rd   ri   r   r   rm   r   rr   r{   r   r   r   r   r   r      s        ',uT'F'F'FIFFFF27%52Q2Q2QJQQQQ%555E5555.3eD.I.I.IIIII"'%"="="=H====(,J,,,,   ; ; ; ; ;7' 7' 7' 7' 7'r: : :  4& & & &" " " "    $ @A    &% % % % % %r   r   )r   r   r   r   r   r   r6   rw   )NNNN)r   r   r   r   r   r   r   r   r   r   r   rQ   r6   r   )<
__future__r   r   loggingtypingtr   abcr   r   dataclassesr   r   enumr   numpyr   numpy.typingnptlangchain.text_splitterr	   langchain_core.documentsr
   r+   langchain_core.pydantic_v1r   ragas.embeddings.baser   ragas.exceptionsr   ragas.executorr   ragas.run_configr   ragas.testset.utilsr   TYPE_CHECKINGllama_index.core.schemar2   ragas.testset.extractorr   UnionListrw   NDArrayfloat64r   	getLoggerr<   r'   r   rC   rN   r\   r   r   r   r   r   r   r   r   r   <module>r0     s    " " " " " "        # # # # # # # # ( ( ( ( ( ( ( (                 0 0 0 0 0 0 ; ; ; ; ; ; , , , , , , 5 5 5 5 5 5 . . . . . . # # # # # # & & & & & & # # # # # #? 2FFFFFF111111GAF5M3;rz#::;			8	$	$%+ %+ %+ %+ %+z %+ %+ %+P    T   6 6 6 6 68 6 6 6    C   <    S$    *1    " $  9=(,(,+/ +  +  +  +  +F f% f% f% f% f%M f% f% f% f% f%r   