
    Xj"                         U d dl Z d dlmZ d dlmZ d dlmZ  ee          ZdZ	dZ
 G d d          Zdaedz  ed	<   d
efdZdS )    N)Any)Neo4jClient)
get_loggerg333333?g333333?c                   *   e Zd ZdefdZ	 	 ddeeeef                  dede	de	deee
f         dz  d	eeeef                  fd
Zdee	         de	de	d	ee	e
f         fdZdee	         de	de	d	ee	e
f         fdZdee	         de	de	d	ee	e
f         fdZdS )GraphRerankerneo4j_clientc                     || _         d S )Nr   )selfr   s     I/lsinfo/ai/hellotax_ai/base_platform/app/services/graph/graph_reranker.py__init__zGraphReranker.__init__   s    (    Nresultsquery	tenant_idkb_idweightsreturnc                    |s|S |ddddd}d |D             }i }|	 ddl m}	 |                    |	j        |	j                                      |	j                            |                                                    }
d |
D             }n4# t          $ r'}t          
                    d|            Y d }~nd }~ww xY w|                     |||          }|                     |||          }|                     |||          }|D ]}|d	         }|                    d
d          }|                    |d          }|                    |d          }|                    |d          }|d         |z  |d         |z  z   |d         |z  z   |d         |z  z   }|                    |d          }|dk    r|t          z  }d|d<   d|d<   n%|dk    r|t           z  }d|d<   d|d<   n
||d<   d|d<   ||d<   ||d<   ||d<   ||d<   ||d
<   |                    d d           t%          d |D                       }t                              dt)          |           d | d!           |S )"Ng?g?)vector_scorepagerankentity_overlappath_distancec                     g | ]
}|d          S )document_id .0rs     r   
<listcomp>z(GraphReranker.rerank.<locals>.<listcomp>&   s    5551]#555r   r   )KnowledgeDocumentc                 ,    i | ]}|j         |j        pd S )	effective)id
doc_status)r   rows     r   
<dictcomp>z(GraphReranker.rerank.<locals>.<dictcomp>1   s#    !X!X!XC#&#.*GK!X!X!Xr   u8   [GraphReranker] doc_status 查询失败（非致命）: r   score      ?        r   r   r   r   r#   obsoleter%   Tstatus_penalizedamendedForiginal_scorepagerank_scoreentity_overlap_scorepath_distance_scorec                     | d         S )Nr(   r   )xs    r   <lambda>z&GraphReranker.rerank.<locals>.<lambda>T   s
    1W: r   )keyreversec              3   D   K   | ]}|                     d           dV  dS )r,      N)getr   s     r   	<genexpr>z'GraphReranker.rerank.<locals>.<genexpr>U   s4      NNAAEE:L4M4MNaNNNNNNr   z	Reranked z results using graph features (z obsolete/amended penalized))app.models.knowledge_baser!   r   r$   r%   filterin_all	Exceptionloggerwarning_calculate_pagerank_calculate_entity_overlap_calculate_path_distancesr9   _OBSOLETE_SCORE_PENALTY_AMENDED_SCORE_PENALTYsortsuminfolen)r   r   r   r   r   r   dbdoc_idsdoc_status_mapr!   rowsepagerank_scoresentity_overlap_scorespath_distancesresultdoc_idr   r   r   r   combined_scorestatuspenalized_counts                           r   rerankzGraphReranker.rerank   s?     	N? #"%!$	 G 65W555)+>
_GGGGGG HH.13D3OPPV-044W==>>SUU 
 "Y!XSW!X!X!X _ _ _]Z[]]^^^^^^^^_227IuMM $ > >w	SX Y Y77ERR 	- 	-FM*F!::gs33L&**6377H266vsCCN*..vs;;M',6*%01*+n<= /*]:;  $''<<F##"99'1|$-1)**9$$"88'0|$-1)**'-|$-2)*'3F#$'/F#$-;F)*,9F(),F7OO--t<<<NNNNNNNrGrr_rrr	
 	
 	
 s   A/B 
C B;;C rL   c                 V  	
 	 d}d}	 | j                             ||||d          }n[# t          $ rN | j                             ||||d          }|rt          d |D                       nd		fd|D             }Y nw xY wd |D             }|rGt          |                                          

d	k    r 
fd
|                                D             }|S # t          $ rB}t                              d|            t          	                    |d          cY d }~S d }~ww xY w)NaT  
            MATCH (d:Document)
            WHERE d.id IN $doc_ids
              AND d.tenant_id = $tenant_id
              AND d.kb_id = $kb_id
            CALL gds.pageRank.stream({
                nodeProjection: 'Document',
                relationshipProjection: {
                    REFERENCES: {
                        type: 'REFERENCES',
                        orientation: 'NATURAL'
                    },
                    SIMILAR_TO: {
                        type: 'SIMILAR_TO',
                        orientation: 'UNDIRECTED'
                    }
                },
                maxIterations: 20,
                dampingFactor: 0.85
            })
            YIELD nodeId, score
            WITH gds.util.asNode(nodeId) AS doc, score
            WHERE doc.id IN $doc_ids
            RETURN doc.id as doc_id, score
            a  
            MATCH (d:Document)
            WHERE d.id IN $doc_ids
              AND d.tenant_id = $tenant_id
              AND d.kb_id = $kb_id
            OPTIONAL MATCH (d)-[r:REFERENCES|SIMILAR_TO]-(other:Document)
            WHERE other.tenant_id = $tenant_id
              AND other.kb_id = $kb_id
            WITH d.id as doc_id, count(r) as degree
            RETURN doc_id, degree
            rL   r   r   
parametersc                     g | ]
}|d          S )degreer   r   s     r   r    z5GraphReranker._calculate_pagerank.<locals>.<listcomp>j   s    !?!?!?!!H+!?!?!?r   r8   c                 8    g | ]}|d          |d         z  dS )rT   r^   rT   r(   r   )r   r   
max_degrees     r   r    z5GraphReranker._calculate_pagerank.<locals>.<listcomp>k   s;       STq{Qx[:5MNN  r   c                 ,    i | ]}|d          |d         S r`   r   r   s     r   r'   z5GraphReranker._calculate_pagerank.<locals>.<dictcomp>n   s"    ???!ak1W:???r   r   c                 "    i | ]\  }}||z  S r   r   r   kv	max_scores      r   r'   z5GraphReranker._calculate_pagerank.<locals>.<dictcomp>r   #    JJJ41aaYJJJr   zFailed to calculate PageRank: r*   )
r   execute_queryr?   maxvaluesitemsr@   errordictfromkeys)r   rL   r   r   r   fallback_queryr   scoresrO   ra   rg   s            @@r   rB   z!GraphReranker._calculate_pagerank[   s   	/ CE }N+99'	\a&b&b :      +99"+2UZ[[ :   ELRS!?!?w!?!?!?@@@QR
   X_   @?w???F K00	q==JJJJ6<<>>JJJFM 	/ 	/ 	/LL=!==>>>==#........	/s?   C  * C AB?C BAC 
D(&7D#D(#D(c                   	 	 d}| j                             ||||d          }d |D             }|rGt          |                                          		dk    r 	fd|                                D             }|D ]}||vrd||<   |S # t
          $ rB}t                              d|            t          	                    |d          cY d }~S d }~ww xY w)	Na  
            MATCH (d1:Document)
            WHERE d1.id IN $doc_ids
              AND d1.tenant_id = $tenant_id
              AND d1.kb_id = $kb_id
            MATCH (d1)-[:CONTAINS]->(e:Entity)<-[:CONTAINS]-(d2:Document)
            WHERE d2.id IN $doc_ids
              AND d2.tenant_id = $tenant_id
              AND d2.kb_id = $kb_id
              AND d1.id <> d2.id
            WITH d1.id as doc_id, count(DISTINCT e) as shared_entities
            RETURN doc_id, shared_entities
            rZ   r[   c                 ,    i | ]}|d          |d         S )rT   shared_entitiesr   r   s     r   r'   z;GraphReranker._calculate_entity_overlap.<locals>.<dictcomp>   s$    IIIAak1%6#7IIIr   r   c                 "    i | ]\  }}||z  S r   r   rd   s      r   r'   z;GraphReranker._calculate_entity_overlap.<locals>.<dictcomp>   rh   r   r*   z$Failed to calculate entity overlap: )
r   ri   rj   rk   rl   r?   r@   rm   rn   ro   )
r   rL   r   r   r   r   rq   rT   rO   rg   s
            @r   rC   z'GraphReranker._calculate_entity_overlapx   s!   	/ UE'55gIX]"^"^ 6  G JIIIIF K00	q==JJJJ6<<>>JJJF! ) )''%(F6NM 	/ 	/ 	/LLCCCDDD==#........	/s   BB
 

C7CCCc                 `   	 d}| j                             ||||d          }i }|D ](}|d         }t          j        |           ||d         <   )|D ]}	|	|vrd||	<   |S # t          $ rB}
t
                              d|
            t                              |d          cY d }
~
S d }
~
ww xY w)	Na  
            MATCH (d1:Document)
            WHERE d1.id IN $doc_ids
              AND d1.tenant_id = $tenant_id
              AND d1.kb_id = $kb_id
            MATCH (d2:Document)
            WHERE d2.id IN $doc_ids
              AND d2.tenant_id = $tenant_id
              AND d2.kb_id = $kb_id
              AND d1.id <> d2.id
            MATCH path = shortestPath((d1)-[*..3]-(d2))
            WITH d1.id as doc_id, avg(length(path)) as avg_distance
            RETURN doc_id, avg_distance
            rZ   r[   avg_distancerT   g?z$Failed to calculate path distances: r)   )	r   ri   mathexpr?   r@   rm   rn   ro   )r   rL   r   r   r   r   rq   r   distancerT   rO   s              r   rD   z'GraphReranker._calculate_path_distances   s    	/ ^E'55gIX]"^"^ 6  G F : :^,&*hy&9&9q{##! ) )''%(F6NM 	/ 	/ 	/LLCCCDDD==#........	/s   AA! !
B-+7B("B-(B-)NN)__name__
__module____qualname__r   r   listrn   strr   intfloatrX   rB   rC   rD   r   r   r   r   r      s       )[ ) ) ) ) ,0E Ed38n%E E 	E
 E c5j!D(E 
d38n	E E E EN/Cy/-0/9</	c5j	/ / / /:/Cy/-0/9</	c5j	/ / / /*/Cy/-0/9</	c5j	/ / / / / /r   r   graph_rerankerr   c                  H    t           ddlm}  t          |           a t           S )Nr   r
   )r   app.services.graph.neo4j_clientr   r   r
   s    r   get_graph_rerankerr      s/    @@@@@@ '|44r   )rx   typingr   r   r   common_loggingr   r{   r@   rE   rF   r   r   __annotations__r   r   r   r   <module>r      s           7 7 7 7 7 7 % % % % % %	H		   P/ P/ P/ P/ P/ P/ P/ P/f (,$ + + +M      r   