
    Xj-                     |    U d dl mZ d dlmZ d dlmZ  ee          Z G d d          Zda	edz  e
d<   defd	ZdS )
    )Any)Neo4jClient)
get_loggerc            
           e Zd ZdefdZdededefdZdeee	e
f                  deee	e
f                  dededef
d	Z	 ddededefdZdede	dedefdZdS )RelationExtractorneo4j_clientc                     || _         d S )Nr   )selfr   s     M/lsinfo/ai/hellotax_ai/base_platform/app/services/graph/relation_extractor.py__init__zRelationExtractor.__init__   s    (    document_id	tenant_idkb_idc                    d}	 | j                             ||||d          }t                              dt	          |           d|            d S # t
          $ r(}t                              d|            Y d }~d S d }~ww xY w)Na/  
        MATCH (d1:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MATCH (d1)-[:CONTAINS]->(e:Entity)
        MATCH (e)<-[:CONTAINS]-(d2:Document)
        WHERE d2.tenant_id = $tenant_id
          AND d2.kb_id = $kb_id
          AND d2.id <> $doc_id
        WITH d1, d2, count(e) as shared_entities
        WHERE shared_entities >= 2
        MERGE (d1)-[r:SIMILAR_TO]-(d2)
        SET r.similarity = toFloat(shared_entities) / 10.0,
            r.method = 'entity_overlap'
        RETURN d2.id as related_doc_id, shared_entities
        )doc_idr   r   
parameterszCreated z document relations for doc z%Failed to create document relations: r   execute_writeloggerinfolen	Exceptionerror)r   r   r   r   queryresultses          r   create_document_relationsz+RelationExtractor.create_document_relations   s     O		F'55[y[`"a"a 6  G KKZ3w<<ZZ[ZZ[[[[[ 	F 	F 	FLLDDDEEEEEEEEE	Fs   AA 
BB  Bentities	relationsc                 <   |sd S d |D             }|D ]}|d         }|d         }	||vs|	|vrd}
	 | j                             |
||	|d         |d         |||d           O# t          $ r-}t                              d	| d
|	 d|            Y d }~d }~ww xY wd S )Nc                     h | ]
}|d          S )name ).0r   s     r   	<setcomp>z<RelationExtractor.create_entity_relations.<locals>.<setcomp>%   s    444a&	444r   sourcetargetaG  
            MATCH (e1:Entity {name: $source, tenant_id: $tenant_id, kb_id: $kb_id})
            MATCH (e2:Entity {name: $target, tenant_id: $tenant_id, kb_id: $kb_id})
            MERGE (e1)-[r:RELATED_TO {type: $rel_type}]->(e2)
            SET r.confidence = $confidence,
                r.document_id = $doc_id
            type
confidence)r)   r*   rel_typer,   r   r   r   r   z!Failed to create entity relation z->z: )r   r   r   r   r   )r   r!   r"   r   r   r   entity_namesrelationr)   r*   r   r   s               r   create_entity_relationsz)RelationExtractor.create_entity_relations   s,     	F448444! 	Z 	ZHh'Fh'F\))V<-G-G dEZ!//"("($,V$4&.|&<"-%.!&    0      Z Z ZXXX6XXUVXXYYYYYYYYZ'	Z 	Zs   0A""
B,#BB   min_co_occurrencec                     d}	 | j                             ||||d           t                              d|            d S # t          $ r(}t                              d|            Y d }~d S d }~ww xY w)Na  
        MATCH (d:Document {tenant_id: $tenant_id, kb_id: $kb_id})
        MATCH (d)-[:CONTAINS]->(e1:Entity)
        MATCH (d)-[:CONTAINS]->(e2:Entity)
        WHERE e1.name < e2.name
        WITH e1, e2, collect(d.id) as docs, count(d) as co_count
        WHERE co_count >= $min_count
        MERGE (e1)-[r:CO_OCCURS_WITH]-(e2)
        SET r.count = co_count,
            r.documents = docs
        )r   r   	min_countr   z'Created co-occurrence relations for kb z*Failed to create co-occurrence relations: )r   r   r   r   r   r   )r   r   r   r2   r   r   s         r   create_co_occurrence_relationsz0RelationExtractor.create_co_occurrence_relations<   s     n	K++)2UQbcc ,    KKI%IIJJJJJ 	K 	K 	KLLIaIIJJJJJJJJJ	Ks   =A 
A5A00A5contentc                    d}	 | j                             |||||d          }|r/t                              dt	          |           d|            d S d S # t
          $ r(}t                              d|            Y d }~d S d }~ww xY w)Na  
        MATCH (d1:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MATCH (d2:Document {tenant_id: $tenant_id, kb_id: $kb_id})
        WHERE d2.id <> $doc_id
          AND d2.title IS NOT NULL
          AND $content CONTAINS d2.title
        MERGE (d1)-[r:REFERENCES]->(d2)
        SET r.context = 'title_mention',
            r.confidence = 0.8
        RETURN d2.id as referenced_doc_id
        )r   r6   r   r   r   z	Inferred z document references for doc z%Failed to infer document references: r   )r   r   r6   r   r   r   r   r   s           r   infer_document_referencesz+RelationExtractor.infer_document_referencesI   s     ~	F'55)&!*"	  6  G  b`G``S^``aaaaab b 	F 	F 	FLLDDDEEEEEEEEE	Fs   AA 
B
"BB
N)r1   )__name__
__module____qualname__r   r   intr    listdictstrr   r0   r5   r8   r&   r   r   r   r      sD       )[ ) ) ) )FS FS FQT F F F FZtCH~&Z S#X'Z 	Z
 Z Z Z Z ZD DEK KK%(K=@K K K KFS F3 FSV F_b F F F F F Fr   r   Nrelation_extractorreturnc                  H    t           ddlm}  t          |           a t           S )Nr   r
   )r@   app.services.graph.neo4j_clientr   r   r
   s    r   get_relation_extractorrD   ^   s0    !@@@@@@ /|<<r   )typingr   rC   r   common_loggingr   r9   r   r   r@   __annotations__rD   r&   r   r   <module>rH      s           7 7 7 7 7 7 % % % % % %	H		
LF LF LF LF LF LF LF LF^ 04 %, 3 3 3 1      r   