o
    Ղi/R                     @   s   U d Z ddlmZmZmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ eeZG dd dZdaee ed	< d
efddZdS )zP
Graph Builder Service
Orchestrates knowledge graph construction from documents
    )DictAnyOptionalListN)Neo4jClient)EntityExtractor)RelationExtractorc                   @   s  e Zd ZdZdededefddZ							d'ded	e	d
e	de
e	 dedede
e de
e de
e	 de
e	 de
ee  de
e dee	ef fddZ		d(ded	e	de
e	 dedede
e	 de
e	 fddZdededededef
ddZdedededefddZdedededefddZded eee	ef  dededee	ef f
d!d"Zded eee	ef  dededee	ef f
d#d"Z	d)deded$edee	ef fd%d&ZdS )*GraphBuilderzb
    Knowledge graph builder service
    Coordinates entity extraction and graph construction
    neo4j_cliententity_extractorrelation_extractorc                 C   s   || _ || _|| _d S N)r
   r   r   )selfr
   r   r    r   H/lsinfo/ai/hellotax_ai/base_platform/app/services/graph/graph_builder.py__init__   s   
zGraphBuilder.__init__Ndocument_idtitlecontentsummary	tenant_idkb_idcategory_idtag_ids
doc_number
doc_statussupersedes_doc_idsparent_doc_idreturnc              
   C   s  zt d| d|  | j||||||
|d | jj|||	d}|dg }|dg }| ||||}| j||||| | j	||| | j
|||| |rZ| |||| |rd| |||| |rr|D ]	}| j|| qh|rd}| jj|||||dd	 t d
| d| dt| d d||t|dW S  ty } zt d| d|  d|t|dW  Y d}~S d}~ww )a  
        Build knowledge graph for a single document

        Args:
            document_id: Document ID
            title: Document title
            content: Document content
            summary: Document summary
            tenant_id: Tenant ID
            kb_id: Knowledge base ID
            category_id: Optional category ID
            tag_ids: Optional list of tag IDs

        Returns:
            Build result with entity and relation counts
        zBuilding graph for document : )r   r   )r   r   
db_sessionentities	relationsa;  
                MATCH (parent:Document {id: $parent_id, tenant_id: $tenant_id, kb_id: $kb_id})
                MATCH (att:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
                MERGE (parent)-[:HAS_ATTACHMENT]->(att)
                MERGE (att)-[:IS_ATTACHMENT_OF]->(parent)
                )doc_id	parent_idr   r   
parametersz&Graph built successfully for document z entities, z
 relationsT)successr   entity_countrelation_countz#Failed to build graph for document Fr'   r   errorN)loggerinfo_create_document_noder   extract_entitiesget_create_entity_nodesr   create_entity_relationscreate_document_relationsinfer_document_references
_link_tags_link_categoryr
   create_supersedes_relationexecute_writelen	Exceptionr+   str)r   r   r   r   r   r   r   r   r   r    r   r   r   r   extraction_resultr!   r"   r(   	target_idqueryer   r   r   build_document_graph    sx    


z!GraphBuilder.build_document_graphc           	   	   C   sD   |dur|nd}d}| j j||||pd||pd|pdd|d dS )z'Create or update document node in Neo4jNr   a  
        MERGE (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        SET d.title = $title,
            d.summary = $summary,
            d.doc_number = $doc_number,
            d.doc_status = $doc_status,
            d.updated_at = datetime()
         	effective)r#   r   r   r   r   r   r&   r   r
   r8   )	r   r   r   r   r   r   r   r   r>   r   r   r   r.      s   	
z"GraphBuilder._create_document_noder!   c                 C   s.   |sdS d}| j j||||d|d t|S )zs
        Create entity nodes and link to document

        Returns:
            Number of entities created
        r   a  
        UNWIND $entities AS entity
        MERGE (e:Entity {
            name: entity.name,
            tenant_id: $tenant_id,
            kb_id: $kb_id
        })
        SET e.type = entity.type,
            e.description = entity.description,
            e.confidence = entity.salience
        WITH e, entity
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[r:CONTAINS]->(e)
        SET r.salience = entity.salience,
            r.frequency = 1
        )r!   r#   r   rC   )r
   r8   r9   )r   r!   r   r   r   r>   r   r   r   r1      s   
z!GraphBuilder._create_entity_nodesc                 C   "   d}| j j|||||dd dS )zLink document to tagsz
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        UNWIND $tag_ids AS tag_id
        MERGE (t:Tag {id: tag_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[:HAS_TAG]->(t)
        )r#   r   r   r   r%   NrD   )r   r   r   r   r   r>   r   r   r   r5      s   
zGraphBuilder._link_tagsc                 C   rE   )zLink document to categoryz
        MATCH (d:Document {id: $doc_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (c:Category {id: $cat_id, tenant_id: $tenant_id, kb_id: $kb_id})
        MERGE (d)-[:IN_CATEGORY]->(c)
        )r#   cat_idr   r   r%   NrD   )r   r   r   r   r   r>   r   r   r   r6     s   
zGraphBuilder._link_categorychunksc                 C     zt d| dt| d d}d}|D ]L}|d}|s+t d|dd  q| jj|||d	d
|dd
|dd|dd|d|||di d|di dd |d7 }q|D ]^}|d}|dg }	|ru|	svqd|	D ]I}
t|
tsqx|
d}|sqxz| jj	|||
d|
dd||d |d7 }W qx t
y } zt d| d| d|  W Y d}~qxd}~ww qdt d| d| d| d d |||d!W S  t
y } zt d"| d|  d|t|d#W  Y d}~S d}~ww $aS  
        Build chunk-level knowledge graph with REFERENCES relationships

        Args:
            document_id: Document ID
            chunks: List of chunk dictionaries with metadata
            tenant_id: Tenant ID
            kb_id: Knowledge base ID

        Returns:
            Build result with chunk and reference counts
        z"Building chunk graph for document r   z chunksr   chunk_idz"Chunk missing chunk_id, skipping: chunk_indexunknowntextrA   chunk_level	is_parentFparent_chunk_idmetadatadoc_typer   )rJ   r   
chunk_textrN   rK   rO   rP   r   r   rR   r      
referencestarget_doc_numberarticle_number
confidenceg?)source_chunk_idrV   target_articlerX   r   r   z Failed to create reference from z to Nz,Chunk graph built successfully for document z	 chunks, z referencesT)r'   r   chunk_countreference_countz)Failed to build chunk graph for document r*   r,   r-   r9   r0   warningr
   create_chunk_node
isinstancedictcreate_chunk_referencer:   r+   r;   r   r   rG   r   r   r[   r\   chunkrJ   rU   refrV   r?   r   r   r   build_chunk_graph     









(zGraphBuilder.build_chunk_graphc                 C   rH   rI   r]   rc   r   r   r   rf   y  rg   	documentsc                 C   s  ddl m}m} ddlm} td|  | }zcz(|||j	|k
 }	|	r3d|	_|  d}
d}d}|D ]}z~| j|d |d |d |d	|||d
|dg |d	}|d r|
d7 }
||d 7 }|||j	|d k
 }|rd|_|d |_|  n|d7 }|||j	|d k
 }|rd|_|  td|d  d|d  d|dd  W q; ty } z|d7 }td|d  d|  W Y d}~q;d}~ww z	| j|| W n ty } ztd|  W Y d}~nd}~ww |||j	|k
 }	|	r%|dkrdnd|	_||	_|  td| d|
 d| d| d	 d||
||dW W |  S  ty} } z,td| d|  z|||j	|k
 }	|	rrd|	_|  W  W     Y  d}~ww |  w )a*  
        Rebuild entire knowledge base graph

        Args:
            kb_id: Knowledge base ID
            tenant_id: Tenant ID
            documents: List of documents to process
            db_session: Database session for updating status

        Returns:
            Build statistics
        r   )KnowledgeBaseKnowledgeDocument)SessionLocalz$Rebuilding graph for knowledge base buildingidr   r   r   r   r   )	r   r   r   r   r   r   r   r   r    r'   rT   r(   	completedfailedzProcessed document r   z, entities: zFailed to process document Nz*Failed to create co-occurrence relations: zGraph rebuild completed for kb z
 success, z	 failed, z total entitiesT)r'   r   success_countfailed_counttotal_entitieszGraph rebuild failed for kb )app.models.knowledge_baseri   rj   app.db.sessionrk   r,   r-   r>   filterrm   firstgraph_statuscommitr@   r0   r(   r:   r+   r   create_co_occurrence_relationsclose)r   r   r   rh   r    ri   rj   rk   dbkbrp   rq   rr   docresultdoc_objr?   r   r   r   rebuild_knowledge_base_graph  s   



0&


z)GraphBuilder.rebuild_knowledge_base_graph)NNNNNNN)NNr   )__name__
__module____qualname____doc__r   r   r   r   intr;   r   listr   r   r   r@   r.   r1   r5   r6   rf   r   r   r   r   r   r	      s    
	


}
$
.



`

e
r	   graph_builderr   c                  C   s>   t du rddlm}  ddlm} ddlm} t| || a t S )z$Get or create graph builder instanceNr   )r
   )r   )get_relation_extractor)r   app.services.graph.neo4j_clientr
   #app.services.graph.entity_extractorr   %app.services.graph.relation_extractorr   r	   )r
   r   r   r   r   r   get_graph_builderW  s   r   )r   typingr   r   r   r   logginguuidr   r   r   r   r   r   	getLoggerr   r,   r	   r   __annotations__r   r   r   r   r   <module>   s    
    H