
    jM                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ  ee          Z G d d          Zde	dedefdZdS )    N)partial)Any)Session)settings)Agent)KnowledgeBase)RetrievalService)
get_logger)rag_retrieval_durationrag_rerank_durationc                   j   e Zd Z ej        d          Z ej        d          Zh dZddddddd	d
Zde	de
fdZdedefdZdedefdZdedefdZd7dededefdZ	 d8dedededee         fdZdee         deded ed!ed"ed#z  deeeef                  fd$Zd%eeeef                  dedeeeef                  fd&Z	 	 	 	 	 	 	 	 	 d9deded ed!ed*ed+ed,ed-ed#z  d.edeeef         fd/Zd0eeeef                  defd1Zd0eeeef                  deeeef                  fd2Zd:d4ed5edefd6Zd#S );AgentRAGServicez[\u3400-\u4dbf\u4e00-\u9fff]z[A-Za-z0-9]+>      哦   嗨   嗯   你好   哈喽   哦哦   嗯嗯   在么   在吗   多谢   好的   您好   收到   谢谢hiokheyokayhellou   个人所得税u   企业所得税u   研发费用加计扣除u   加计扣除u   增值税专用发票u   增值税普通发票)u   个税u	   个所税u   企税u   研发加计减扣u   加计减扣u   专票u   普票dbagentc                     || _         || _        |j        pg | _        t                              d|j         d| j                    d S )Nz%AgentRAGService initialized - Agent: z, Knowledge base IDs: )r"   r#   knowledge_basesknowledge_base_idsloggerinfoname)selfr"   r#   s      F/lsinfo/ai/hellotax_ai/base_platform/app/services/agent/rag_service.py__init__zAgentRAGService.__init__9   sY    
"'"7"=2oEJooVZVmoo	
 	
 	
 	
 	
    user_messagereturnc                 V    t          j        dd|pd                                          S )N\s+  )resubstrip)r*   r.   s     r+   _normalize_messagez"AgentRAGService._normalize_messageA   s'    vfc<#5266<<>>>r-   queryc                     |                      |          }| j                                        D ]\  }}|                    ||          }|S N)r7   _QUERY_REPLACEMENTSitemsreplace)r*   r8   normalized_querysourcetargets        r+   _normalize_queryz AgentRAGService._normalize_queryD   sY    22599"6<<>> 	H 	HNFF/77GGr-   messagec                 d    t          j        dd|                                          }|| j        v S )Nr1   r3   )r4   r5   lower_SMALL_TALK_MESSAGES)r*   rB   compact_messages      r+   _is_small_talkzAgentRAGService._is_small_talkJ   s/    &W55;;==$";;;r-      
min_lengthc                    | j         st                              d           dS |                     |          }|st                              d           dS |                     |          r t                              d| d           dS t          | j                            |                    }| j                            |          }t          d |D                       }t          |          |k    p|dk    p|}|s6t                              d| d	t          |           d
| d| d	           dS t                              d| d	t          |           d
| d| j                     dS )Nz9should_retrieve=False: Agent has no bound knowledge basesFz;should_retrieve=False: Message is empty after normalizationz should_retrieve=False: Message 'z' identified as small talkc              3   <   K   | ]}t          |          d k    V  dS )   N)len).0tokens     r+   	<genexpr>z2AgentRAGService.should_retrieve.<locals>.<genexpr>]   s,      $O$OSZZ1_$O$O$O$O$O$Or-      zDshould_retrieve=False: message too short after heuristics (message='z
', length=z, cjk_chars=z, alnum_tokens=)zshould_retrieve=True: Message='z, knowledge_base_ids=T)
r&   r'   r(   r7   rG   rM   _CJK_PATTERNfindall_ALNUM_TOKEN_PATTERNany)r*   r.   rI   normalized_messagecjk_char_countalnum_tokenshas_meaningful_acronymshould_retrieves           r+   r[   zAgentRAGService.should_retrieveN   s   & 	KKSTTT5!44\BB! 	KKUVVV5122 	KKa3Eaaa   5T.667IJJKK0889KLL!$$O$O,$O$O$O!O!O"##z1b^q5HbLb 	  	KK IWi  I  Iux  zL  vM  vM  I  I  [i  I  I  zF  I  I  I   5 p.@  p  pCPbLcLc  p  pq  p  p  W[  Wn  p  p	
 	
 	
 tr-   rL   
user_querymodelmax_queriesc                   K   d| d| d}	 |                     d|dg|t          j        t          j                   d {V }|                    |          }t          j        |          }|                    d|g          }	t          	                    d|                    d	d
           dt          |	           d           |	d |         S # t          $ r*}
t                              d|
            |gcY d }
~
S d }
~
ww xY w)NuS   你是一个税务知识库检索专家。请分析用户的查询意图，生成1-uY  个优化的检索查询。

要求：
1. 标准化税务术语（如"研发加计减扣"→"研发费用加计扣除"，"个税"→"个人所得税"）
2. 拆解复合问题（如"高新企业优惠和申报条件"→两个独立查询）
3. 扩展同义词和相关概念
4. 保持查询简洁，每个查询聚焦单一主题

用户查询：u   

请以JSON格式返回，包含intent（意图分析）和queries（查询列表）字段。
示例：{"intent": "咨询研发费用加计扣除", "queries": ["研发费用加计扣除政策", "研发费用加计扣除计算方法"]}user)rolecontent)messagesr]   temperature
max_tokensquerieszIntent analysis: intentzN/Az, Generated z queriesz.Intent analysis failed, using original query: )chat_completionr   AGENTIC_RAG_INTENT_TEMPERATUREAGENTIC_RAG_INTENT_MAX_TOKENSextract_response_textjsonloadsgetr'   r(   rM   	Exceptionwarning)r*   r\   chat_clientr]   r^   promptresponsetextresultrf   es              r+   _analyze_intentzAgentRAGService._analyze_intentk   sl      Rgr  R  R  V`  R  R  R	 (88#)f==>$C#A	 9        H 44X>>DZ%%FjjZL99GKKcFJJx$?$?ccSQX\\ccc   <K<(( 	  	  	 NNOAOOPPP<	 s   CC 
DC?9D?Drf   kb_idtop_k	thresholdmodemodel_idNc           	      d   g }|D ])}		 t          | j        |          }
dd l}|                                }|
                    |	||||          }t
          r=t          j        |                              |                                |z
             |D ]+}||d<   |j        |d<   |	|d<   |	                    |           ,t                              d|	 dt          |           d	|j                    # t          $ r+}t                              d
|	 d|            Y d }~#d }~ww xY w|S )Nr   )r8   r{   krz   r|   )r{   knowledge_base_idknowledge_base_namesource_queryzQuery 'z' retrieved z results from zFailed to retrieve for query 'z': )r	   r"   timeperf_counterretriever   labelsobserver)   appendr'   r(   rM   ro   error)r*   rf   rx   kbry   rz   r{   r|   all_resultsr8   retrieval_servicer   _t0resultsru   rv   s                   r+   _multi_query_retrievez%AgentRAGService._multi_query_retrieve   s     	 	E$4TWe$D$D!''))+44deyS[ 5   * `*1t<<<DDTEVEVEXEX[^E^___% / /F27F./46GF01-2F>*&&v....^e^^W^^UWU\^^____   KeKKKKLLL s   C.C88
D- D((D-r   c                    i }|D ]o}|                     d          |                     d          f}||vr|||<   6|                     dd          ||                              dd          k    r|||<   pt          |                                          }|                    d d           t                              dt          |           d	t          |           d
           |d |         S )Ndocument_idchunk_indexscorer   c                 .    |                      dd          S )Nr   r   rn   )xs    r+   <lambda>z8AgentRAGService._merge_and_deduplicate.<locals>.<lambda>   s    !%%*;*; r-   T)keyreversezDeduplication: u    → z unique results)rn   listvaluessortr'   r(   rM   )r*   r   ry   seenru   r   unique_resultss          r+   _merge_and_deduplicatez&AgentRAGService._merge_and_deduplicate   s     ! 	# 	#F::m,,fjj.G.GHC$"S		GQ''$s)--*C*CCC"S	dkkmm,, ; ;TJJJac+&6&6aaS=P=Paaabbbfuf%%r-         ?hybridT	tenant_id	user_roleenable_agentic
chat_modeluse_rerankerc                 2  K   |ot           j        o|d uo|	d u}|
ot           j        }|r+t          t           j        t          |dz  |                    n|}t                              d| d| d|            |                     |          }|r9t           j	        dk    r)| 
                    |||	t           j	                   d {V }n|g}g }i }t          j                    }| j        D ]}	 |dk    rP| j                            t                                         t           j        |k    t           j        dk              }n{|t                              d           w| j                            t                                         t           j        |k    t           j        dk    t           j        |k              }|                                }|s t                              d	| d
           |j        ||<   d }|j        r"ddlm} |j                                        r[| j                            |                              |j        t9          |j                  k                                              }nM| j                            |                              |j        |j        k                                              }|r6|j        }t                              d	|j         d|j         d| d           n#t                              d|j         d           |                    d t=          | j        |||||||                     d {V }|                     |           t                              dtC          |           d|j                    # tD          $ r+}t          #                    d| d|            Y d }~d }~ww xY w| $                    ||          }t           j%        rPtC          |          dk    r<	 ddl&m'} tQ          d |D                       }|r| j        r |            }| j        D ]}|)                    |||ddgdd          }d |D             }|D ]} | *                    d          }!|!r|!|vr|+                    |!| *                    dd           | *                    d!d           | *                    d!d           | *                    d"d#          d$d%           |,                    |!           n4# tD          $ r'}t                              d&|            Y d }~nd }~ww xY w|rGtC          |          |k    r3	 ddlm} dd'l-m.}" t                              d(tC          |           d)|            t           j/        }#ta          |#                                          rV| j                            |                              |j        t9          |#          k                                              }$nH| j                            |                              |j        |#k                                              }$|$r |"            }%dd l1}&|&2                                }'|                    d t=          |%j3        |||$j        | j        |*                     d {V }th          r)ti          j5        |&2                                |'z
             n1t                              d+t           j/                    |d |         }t                              d,tC          |           d-           n[# tD          $ r1}t          #                    d.|            |d |         }Y d }~n%d }~ww xY wtC          |          |k    r
|d |         }| 6                    |          }(| 7                    |          })|(|)tC          |          tQ          |8                                          d/S )0NrL   zRetrieval strategy: initial_k=z
, final_k=z, reranker_enabled=   platform_adminenabledz)tenant_id is required for non-admin userszKnowledge base z8 does not exist, is disabled, or access denied, skippingr   )Modelz using vector model: z (ID: rR   zVector model z* not found, will use default configuration)rf   rx   r   ry   rz   r{   r|   z
Retrieved z results from knowledge base z'Failed to retrieve from knowledge base z: )get_graph_query_servicec                 H    h | ]}|                     d           |d           S r   r   rN   rs     r+   	<setcomp>z3AgentRAGService.retrieve_context.<locals>.<setcomp>  s.    ^^^Q}I]I]^- 0^^^r-   
REFERENCES
SUPERSEDESrH   )document_idsr   rx   relation_typesdepthlimitc                 H    h | ]}|                     d           |d           S r   r   r   s     r+   r   z3AgentRAGService.retrieve_context.<locals>.<setcomp>"  s>     , , ,12QUU=EYEY,m,, , ,r-   idtitler3   summaryr   r   graph)r   r   rt   
chunk_textr   r?   z$Graph expansion failed (non-fatal): )get_rerank_factoryzStarting BGE Reranker: z candidates -> top )r8   	documentsr|   r"   ry   z-Rerank model not found in provider registry: zReranking completed: z results returnedz3Reranker failed, falling back to original results: )context_textsourcesretrieved_countr%   )9r   ENABLE_AGENTIC_RAGENABLE_BGE_RERANKERminBGE_RERANKER_INITIAL_Kmaxr'   r(   rA   AGENTIC_RAG_MAX_QUERIESrw   asyncioget_event_loopr&   r"   r8   r   filterr   statusrp   r   firstr)   codeapp.models.providerr   isdigitintrun_in_executorr   r   extendrM   ro   r   r   ENABLE_KNOWLEDGE_GRAPHapp.services.graph.graph_queryr   r   expand_neighborsrn   r   add0app.services.llm.backends.rerank_backend_factoryr   BGE_RERANKER_MODELstrr   r   rerankr   r   _format_context_format_sourcesr   )*r*   r8   ry   rz   r{   r   r   r   rq   r   r   use_agenticenable_reranker	initial_kr>   rf   r   knowledge_base_nameslooprx   query_kbr   r|   r   r]   
kb_resultsrv   r   doc_idsgraph_querygraph_resultsexisting_doc_idsgr	gr_doc_idr   rerank_model_keyrerank_modelrerank_factoryr   r   r   r   s*                                             r+   retrieve_contextz AgentRAGService.retrieve_context   sc	       )+)D() 4'	 	 'G8+G C/UQY1F1FGGG 	
 	mYmm%mm\kmm	
 	
 	
  0077 	)8;a?? 00 +z8;[       GG ((G!%'', 8	 8	E7 000#w}}];;BB%(E1=3G93T   HH !('RSSS #w}}];;BB%(E1%,	9%/9<   H
 ^^%% NNi%iii   .0g$U+7 999999w(( [ $e 4 4 ; ;EHBG<T U U [ [ ] ] $e 4 4 ; ;EJ"'<Q R R X X Z Z #(8fbgffBGff[cfff    _BG___   $(#7#72 '#'"+!!)	 	 	$ $      
 "":...`Z``WYW^``aaaa   SuSSPQSSTTT 11+yII* "	Ks;/?/?!/C/C!KRRRRRR^^+^^^__ @t6 @"9"9";";K!%!8 @ @(3(D(D)0&/"',8,+G"#"# )E ) ), ,6A, , ,( #0 @ @B(*tI( @Y>N-N-N + 2 27@131D1D02y"0E0E68ffY6K6K131E1E29%& %&	!" 	!" 	!" !1 4 4Y ? ? ?@  K K KIaIIJJJJJJJJK *	.s;//%77'2555555______bc+6F6Fbb[`bbccc#+#> '((0022 e,,33EHDT@U@U4UVV\\^^ !L
 e,,33EJBR4RSSYY[[ !   6%7%7%9%9NKKK++--C(,(<(<*1"'&1%1_#w"'  
) 
) 
# 
# 
# 
# 
# 
#K + O+3D4E4E4G4G#4MNNNNNeHcee   #.fuf"5KWC4D4DWWWXXXX 2 2 2VSTVVWWW)&5&12 %%%fuf-K++K88&&{33(";//#$8$?$?$A$ABB	
 
 	
sY   .A2N*!BN*5F3N**
O4 OODT! !
U+UU-G3]! !
^+'^^r   c                    |sdS dg}t          |d          D ]\  }}|                    dd          }|                    dd          }|                    dd          }|                    d	d
          }|                    d| d| d| d|dd| d           |                    d           d                    |          S )Nr3   u4   以下是从知识库中检索到的相关信息：
r   r   u   未知知识库r   u	   无标题rt   r   r   u   【参考资料 u   】
来源：z - u   
相关度：z.2fu
   
内容：
u   
请基于以上参考资料回答用户问题。如果参考资料中没有相关信息，请使用你的通用知识回答。)	enumeratern   r   join)	r*   r   context_partsiru   kb_namer   rt   r   s	            r+   r   zAgentRAGService._format_contexti  s    	2PQ"7A.. 	 	IAvjj!68IJJGJJw44E::fb))DJJw**E  s1ssGssssUZssskosss    	 O	
 	
 	
 yy'''r-   c                    ddl m} ddlm} d |D             }i }|ru| j        rn| j                            |j        |j                                      |j        	                    |                    
                                }d |D             }i }d |                                D             }|r	  |            5 }	|	rEddlm}
 |	                     |
d          d	|i                                          }d
 |D             }d d d            n# 1 swxY w Y   n4# t           $ r'}t"                              d|            Y d }~nd }~ww xY wg }t'                      }|D ]]}|                    d          }|r||v r|r|                    |           |                    d          }|ddt-          j        |           z   z  n|                    d          }|                    ||                    |          |                    d          ||                    d          ||                    d          |                    d          |                    d          |                    d          |                    d          |                    |                    |                    d           _|S )Nr   )get_data_center_db)KnowledgeDocumentc                 b    g | ],}|                     d           |                     d           -S r   r   r   s     r+   
<listcomp>z3AgentRAGService._format_sources.<locals>.<listcomp>~  s5    QQQAAEE-<P<PQ155''QQQr-   c                 (    i | ]}|j         |j        S  )r   data_center_doc_idrN   rows     r+   
<dictcomp>z3AgentRAGService._format_sources.<locals>.<dictcomp>  s    HHHC!7HHHr-   c                     g | ]}||S r   r   )rN   vs     r+   r   z3AgentRAGService._format_sources.<locals>.<listcomp>  s    55515!555r-   )rt   z=SELECT id, source_url FROM tax_documents WHERE id = ANY(:ids)idsc                 (    i | ]}|j         |j        S r   )r   
source_urlr   s     r+   r   z3AgentRAGService._format_sources.<locals>.<dictcomp>  s    )Q)Q)QS#&#.)Q)Q)Qr-   z(Failed to query data_center source_url: r   rerank_scorer   r   r   r   r   r   r   reference_url)r   r   r   r   	raw_scorer  r   r   r   r   r  reference_source_url)app.db.sessionr   app.models.knowledge_baser   r"   r8   r   r   r   in_allr   
sqlalchemyrt   executefetchallro   r'   rp   setrn   r   mathexpr   )r*   r   r   r   r   	dc_id_maprowssource_url_mapdc_idsdc_dbrt   rv   r   seen_doc_idsru   doc_idr  display_scores                     r+   r   zAgentRAGService._format_sourcesz  s   555555??????QQQQQ	 	Itw 	I/24E4XYY),0099:: 
 IH4HHHI55Y--//555 	OO'')) 	RU R333333  %}} D!`aa"FO    #(**  *R)QD)Q)Q)Q	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R  O O OM!MMNNNNNNNNOuu 	 	FZZ..F &L00 )  (((!::n55L  + Q<-00011ZZ(( 
 NN#)*3--*?*?#ZZ00*!'G!4!4$0)/4G)H)H+1::6K+L+L#)::m#<#<$*JJ~$>$>%+ZZ%@%@,:,>,>y}}V?T?T,U,U      s=   5
D ?ADD DD DD 
E)EEForiginal_prompthas_contextc                     |s|S d}||z   S )Nud  

【知识库增强模式】
你已接入专业知识库。在回答问题时：
1. 优先使用知识库中提供的参考资料
2. 如果参考资料充分，请基于资料给出准确回答
3. 如果参考资料不足，可以结合你的通用知识补充
4. 回答时保持专业、准确、简洁
5. 必要时可以引用具体的参考资料来源r   )r*   r  r  rag_instructions       r+   enhance_system_promptz%AgentRAGService.enhance_system_prompt  s#     	#"" I00r-   )rH   )rL   )	rH   r   r   NNTNNT)F)__name__
__module____qualname__r4   compilerS   rU   rE   r;   r   r   r,   r   r7   rA   boolrG   r   r[   r   rw   floatdictr   r   r   r   r   r   r  r   r-   r+   r   r      sk       2:@AAL%2:n55  , $&#8&)) 
7 
5 
 
 
 
?s ?s ? ? ? ? c  c        <c <d < < < < C S     < LM    36 EH 	c       .c 
    * 
d38n	   @&S#X/&8;&	d38n	& & & &$ #!%!v
 v
v
 v
 	v

 v
 v
 v
 v
 $Jv
 v
 
c3hv
 v
 v
 v
p(tDcN'; ( ( ( ( (";tDcN'; ;T#s(^@T ; ; ; ;z1 1S 1t 1X[ 1 1 1 1 1 1r-   r   r"   r#   r/   c                 "    t          | |          S r:   )r   )r"   r#   s     r+   get_agent_rag_servicer(    s    2u%%%r-   )r   rl   r  r4   	functoolsr   typingr   sqlalchemy.ormr   
app.configr   app.models.agentr   r
  r   $app.services.rag.langchain_retrievalr	   common_loggingr
   common_metricsr   r   r   r'   r   r(  r   r-   r+   <module>r1     sD      				             " " " " " "       " " " " " " 3 3 3 3 3 3 A A A A A A % % % % % % F F F F F F F F	H		
d1 d1 d1 d1 d1 d1 d1 d1N&g &e & & & & & & &r-   