
    Xj[8                         U d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	  e	e
          Z G d d          Zdaedz  ed	<   d
efdZdS )    )Any)EntityExtractor)Neo4jClient)RelationExtractor)
get_loggerc                      e Zd ZdededefdZ	 	 	 	 	 	 	 ddededed	edz  d
edededz  de	dz  dedz  dedz  de	e         dz  dedz  de
eef         fdZ	 	 ddeded	edz  d
edededz  dedz  fdZde	ded
ededef
dZdede	d
edefdZdeded
edefdZdede	e
eef                  d
edede
eef         f
dZ	 dded
ede	de
eef         fdZdS ) GraphBuilderneo4j_cliententity_extractorrelation_extractorc                 0    || _         || _        || _        d S N)r
   r   r   )selfr
   r   r   s       H/lsinfo/ai/hellotax_ai/base_platform/app/services/graph/graph_builder.py__init__zGraphBuilder.__init__   s"     ) 0"4    Ndocument_idtitlecontentsummary	tenant_idkb_idcategory_idtag_ids
doc_number
doc_statussupersedes_doc_idsparent_doc_idreturnc           
          	 t                               d| d|            |                     ||||||
|           | j                            |||	          }|                    dg           }|                    dg           }|                     ||||          }| j                            |||||           | j        	                    |||           | j        
                    ||||           |r|                     ||||           |r|                     ||||           |r |D ]}| j                            ||           |r#d}| j                            |||||d	           t                               d
| d| dt!          |           d           d||t!          |          dS # t"          $ r=}t                               d| d|            d|t'          |          dcY d }~S d }~ww xY w)NzBuilding graph for document : )r   r   )r   r   
db_sessionentities	relationsa;  
                MATCH (parent:Document {id: $parent_id, tenant_id: $tenant_id, kb_id: $kb_id})
                MATCH (att:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
                MERGE (parent)-[:HAS_ATTACHMENT]->(att)
                MERGE (att)-[:IS_ATTACHMENT_OF]->(parent)
                )doc_id	parent_idr   r   
parametersz&Graph built successfully for document z entities, z
 relationsT)successr   entity_countrelation_countz#Failed to build graph for document Fr)   r   error)loggerinfo_create_document_noder   extract_entitiesget_create_entity_nodesr   create_entity_relationscreate_document_relationsinfer_document_references
_link_tags_link_categoryr
   create_supersedes_relationexecute_writelen	Exceptionr-   str)r   r   r   r   r   r   r   r   r   r"   r   r   r   r   extraction_resultr#   r$   r*   	target_idqueryes                        r   build_document_graphz!GraphBuilder.build_document_graph   s    5	SKKM{MMeMMNNN&&%% '    !% 5 F FW !G ! ! ),,Z<<H)--k2>>I44X{IW\]]L#;;)[)U   #==k9V[\\\#==Wi    HWiGGG P##KiOOO! Y!3 Y YI%@@iXXXX 
 [!//"-%2%.!&	    0    KK{{{{{adenaoao{{{    * ,"%i..	    	S 	S 	SLLQ{QQaQQRRR$[3q66RRRRRRRR	Ss   F3F6 6
G= 2G82G=8G=c           
      j    ||nd}d}| j                             ||||pd||pd|pdd|           d S )Nr   a  
        MERGE (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        SET d.title = $title,
            d.summary = $summary,
            d.doc_number = $doc_number,
            d.doc_status = $doc_status,
            d.updated_at = datetime()
         	effective)r%   r   r   r   r   r   r(   r   r
   r:   )	r   r   r   r   r   r   r   r   r@   s	            r   r0   z"GraphBuilder._create_document_node^   sv     "+!6IIA	 h''%"=b(.B(7K    	( 	
 	
 	
 	
 	
r   r#   c                 n    |sdS d}| j                             ||||d|           t          |          S )Nr   a  
        UNWIND $entities AS entity
        MERGE (e:Entity {
            name: entity.name,
            tenant_id: $tenant_id,
            kb_id: $kb_id
        })
        SET e.type = entity.type,
            e.description = entity.description,
            e.confidence = entity.salience
        WITH e, entity
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[r:CONTAINS]->(e)
        SET r.salience = entity.salience,
            r.frequency = 1
        )r#   r%   r   rF   )r
   r:   r;   )r   r#   r   r   r   r@   s         r   r3   z!GraphBuilder._create_entity_nodesw   sZ      	1 \''$,eTT 	( 	
 	
 	

 8}}r   c                 L    d}| j                             |||||d           d S )Nz
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        UNWIND $tag_ids AS tag_id
        MERGE (t:Tag {id: tag_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[:HAS_TAG]->(t)
        )r%   r   r   r   r'   rG   )r   r   r   r   r   r@   s         r   r7   zGraphBuilder._link_tags   sM     }''%"&	  	( 	
 	
 	
 	
 	
r   c                 L    d}| j                             |||||d           d S )Nz
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (c:Category {id: $cat_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[:IN_CATEGORY]->(c)
        )r%   cat_idr   r   r'   rG   )r   r   r   r   r   r@   s         r   r8   zGraphBuilder._link_category   sM     d''%%&	  	( 	
 	
 	
 	
 	
r   chunksc                    	 t                               d| dt          |           d           d}d}|D ]'}|                    d          }|s2t                               d|                    dd                      L| j                            |||                    d	d
          |                    dd
          |                    dd          |                    dd          |                    d          |||                    di                               d          |                    di                               d                     |dz  })|D ]}|                    d          }|                    dg           }	|r|	s2|	D ]}
t          |
t                    s|
                    d          }|s0	 | j        	                    |||
                    d          |
                    dd          ||           |dz  }~# t          $ r-}t                               d| d| d|            Y d }~d }~ww xY wt                               d| d| d| d           d|||d S # t          $ r=}t                               d!| d|            d|t          |          d"cY d }~S d }~ww xY w)#Nz"Building chunk graph for document r!   z chunksr   chunk_idz"Chunk missing chunk_id, skipping: chunk_indexunknowntextrD   chunk_level	is_parentFparent_chunk_idmetadatadoc_typer   )rN   r   
chunk_textrR   rO   rS   rT   r   r   rV   r      
referencestarget_doc_numberarticle_number
confidenceg?)source_chunk_idrZ   target_articler\   r   r   z Failed to create reference from z to z,Chunk graph built successfully for document z	 chunks, z referencesT)r)   r   chunk_countreference_countz)Failed to build chunk graph for document r,   )r.   r/   r;   r2   warningr
   create_chunk_node
isinstancedictcreate_chunk_referencer<   r-   r=   )r   r   rL   r   r   r_   r`   chunkrN   rY   refrZ   rA   s                r   build_chunk_graphzGraphBuilder.build_chunk_graph   s   =	SKK`[``CPVKK```aaaKO ! ! 99Z00 NNbUYY}V_=`=`bb   !33% +$yy44 %		- < < %		- ; ;#iiU;;$)II.?$@$@'"YYz266:::FF$yyR88<<\JJ 4    q    99Z00"YY|R88
 z %  C%c400 ! (+0C(D(D%, ! )@@,4.?+.773C+D+D'*ww|S'A'A&/"' A    (1,$   exeeM^eebcee       !( KK A{  A  Ak  A  Ads  A  A  A    **#2	    	S 	S 	SLLW[WWTUWWXXX$[3q66RRRRRRRR	SsI   GI9 AHI9 
I
#I I9 I

.I9 9
K 2J;5K ;K 	documentsc                    ddl m} ddlm}m} t
                              d|             |            }	 |                    |                              |j	        |k              
                                }	|	rd|	_        |                                 d}
d}d}|D ]}	 |                     |d         |d         |d         |                    d	          |||                    d
          |                    dg           |	  	        }|d         r|
dz  }
||d         z  }|                    |                              |j	        |d         k              
                                }|r(d|_        |d         |_        |                                 nk|dz  }|                    |                              |j	        |d         k              
                                }|rd|_        |                                 t
                              d|d          d|d          d|                    dd                      # t           $ r6}|dz  }t
                              d|d          d|            Y d }~d }~ww xY w	 | j                            ||           n4# t           $ r'}t
                              d|            Y d }~nd }~ww xY w|                    |                              |j	        |k              
                                }	|	r*|dk    rdnd|	_        ||	_        |                                 t
                              d| d|
 d| d| d	           d||
||d|                                 S # t           $ r}t
                              d| d|            	 |                    |                              |j	        |k              
                                }	|	rd|	_        |                                 n# t           $ r Y nw xY w d }~ww xY w# |                                 w xY w)Nr   )SessionLocal)KnowledgeBaseKnowledgeDocumentz$Rebuilding graph for knowledge base buildingidr   r   r   r   r   )	r   r   r   r   r   r   r   r   r"   r)   rX   r*   	completedfailedzProcessed document r!   z, entities: zFailed to process document z*Failed to create co-occurrence relations: zGraph rebuild completed for kb z
 success, z	 failed, z total entitiesT)r)   r   success_countfailed_counttotal_entitieszGraph rebuild failed for kb )app.db.sessionrk   app.models.knowledge_baserl   rm   r.   r/   r@   filterro   firstgraph_statuscommitrB   r2   r*   r<   r-   r   create_co_occurrence_relationsclose)r   r   r   ri   r"   rk   rl   rm   dbkbrr   rs   rt   docresultdoc_objrA   s                    r   rebuild_knowledge_base_graphz)GraphBuilder.rebuild_knowledge_base_graph   s    	0/////NNNNNNNNB5BBCCC\^^O	-((//0@E0IJJPPRRB ",		MLN  (Q (Q'Q!66$'I!'l #I #	 2 2"+#$'GGM$:$: #	2 6 6#% 7 
 
F i( (%*&&*@@HH%677#V$5$8CI$EFF"UWW  
 # (3>G039.3IG0IIKKK$)HH%677#V$5$8CI$EFF"UWW  
 # (3;G0IIKKKKKyc$iyy6);LyyZ`ZdZdesuvZwZwyy    ! Q Q Q A%LLL!Os4y!O!OA!O!OPPPPPPPPQO'FFyRWXXXX O O OM!MMNNNNNNNNO-((//0@E0IJJPPRRB 1=1B1B++"0		KK K%  K  K=  K  KT`  K  Kky  K  K  K    !. ,"0 $ HHJJJJ  		 		 		LLDDDDDEEEXXm,,33M4D4MNNTTVV  &.BOIIKKK   		 HHJJJJs   A+N #F$I	N 	
J	+J>N J		N J) (N )
K3KN KB N 
P1 P,:A PP,
P(%P,'P((P,,P11P4 4Q
)NNNNNNN)NNr   )__name__
__module____qualname__r   r   r   r   intr=   listrd   r   rB   r0   r3   r7   r8   rh   r    r   r   r	   r	      s       5!5 *5 .	5 5 5 5$ #'#!%!%/3$(ES ESES ES 	ES
 tES ES ES 4ZES ES $JES $JES !I,ES TzES 
c3hES ES ES ES\ "&!%
 

 
 t	

 
 
 $J
 $J
 
 
 
2+.;>GJ	   

c 

D 

S 

QT 

 

 

 



# 

C 

C 

X[ 

 

 

 

@S@S(,T#s(^(<@SIL@SUX@S	c3h@S @S @S @SF GKW WW%(W59W	c3hW W W W W Wr   r	   Ngraph_builderr   c                  t    t           +ddlm}  ddlm} ddlm} t          ||  |                      a t           S )Nr   )r   )r
   )get_relation_extractor)r   #app.services.graph.entity_extractorr   app.services.graph.neo4j_clientr
   %app.services.graph.relation_extractorr   r	   )r   r
   r   s      r   get_graph_builderr   ;  s`    HHHHHH@@@@@@PPPPPP$\3CE[E[E]E]^^r   )typingr   r   r   r   r   r   r   common_loggingr   r   r.   r	   r   __annotations__r   r   r   r   <module>r      s           ? ? ? ? ? ? 7 7 7 7 7 7 C C C C C C % % % % % %	H		j j j j j j j jZ	 &*|d" ) ) )<      r   