
    Xjx+                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ  ee          Z G d
 d          Z G d d          Zddededz  defdZ	 	 	 	 ddededz  dedz  dedz  dedefdZdS )    )Any)StrOutputParser)ChatPromptTemplate)RunnablePassthrough)
ChatOpenAI)Session)get_embedding_factory)get_vector_store)
get_loggerc                   f   e Zd Zddededz  fdZ	 	 	 	 dded	ed
edededz  dee	ee
f                  fdZded
edededz  dee	ee
f                  f
dZded
edee	ee
f                  fdZded
edededz  dededee	ee
f                  fdZ	 	 	 	 dded	ed
edededz  de	ee
f         fdZdS )RetrievalServiceNdbknowledge_base_idc                 r    || _         || _        t          ||          | _        t	                      | _        d S N)r   r   r
   vector_storer	   embedding_factory)selfr   r   s      L/lsinfo/ai/hellotax_ai/base_platform/app/services/rag/langchain_retrieval.py__init__zRetrievalService.__init__   s8    !2,R1BCC!6!8!8    semantic   ffffff?querymodek	thresholdmodel_idreturnc                    	 |dk    r|                      ||||          S |dk    r|                     ||          S |dk    rF|                    dd          }|                    dd          }|                     ||||||          S t	          d|           # t
          $ r#}	t                              d	|	             d }	~	ww xY w)
Nr   keywordhybridkeyword_weightg333333?semantic_weightr   zUnsupported retrieval mode: zRetrieval failed: )_semantic_search_keyword_searchget_hybrid_search
ValueError	Exceptionloggererror)
r   r   r   r   r   r   kwargsr$   r%   es
             r   retrievezRetrievalService.retrieve   s    	z!!,,UAy(KKK""++E1555!!!',<c!B!B"(**->"D"D**1i>?   !!F!F!FGGG 	 	 	LL1a11222	s(   B B AB B 
C$CCc                 N   | j                             || j        |          }|st          d          | j                            |||          }g }|D ]O\  }}	|                    |d         |d         |d         |d         |d         |	|d	         |d
         dd           P|S )Ntextr   r   "Failed to generate query embedding)query_embeddingr   r   iddocument_idtitler3   chunk_indexcategory_id
model_namer:   r;   r6   r7   r8   r3   r9   scoremetadata)r   generate_embeddingr   r*   r   similarity_searchappend)
r   r   r   r   r   r5   resultsformatted_resultsdocr>   s
             r   r&   z!RetrievalService._semantic_search5   s     0CC47X D 
 
  	CABBB#55+qI 6 
 
 ! 	 	JC$$d)#&}#5 \K#&}#5"'*='9&),&7! !     ! r   c                    ddl m} dg}||d}| j        |                    d           | j        |d<    |dd                    |           d	          }| j                            ||          }g }|D ]W}	|                    |	j        |	j        |	j	        |	j
        |	j        t          |	j                  |	j        |	j        d
d           X|S )Nr   )r3   z(setweight(to_tsvector('simple', COALESCE(kd.title, '')), 'A') ||  setweight(to_tsvector('simple', COALESCE(dv.chunk_text, '')), 'B')) @@ plainto_tsquery('simple', :query))r   r   z)dv.knowledge_base_id = :knowledge_base_idr   as  
            SELECT
                dv.id,
                dv.document_id,
                dv.chunk_index,
                dv.chunk_text,
                dv.model_name,
                kd.title,
                kd.category_id,
                ts_rank(
                    setweight(to_tsvector('simple', COALESCE(kd.title, '')), 'A') ||
                    setweight(to_tsvector('simple', COALESCE(dv.chunk_text, '')), 'B'),
                    plainto_tsquery('simple', :query)
                ) as score
            FROM document_vectors dv
            JOIN knowledge_documents kd ON dv.document_id = kd.id
            WHERE z AND z>
            ORDER BY score DESC
            LIMIT :k
        r<   r=   )
sqlalchemyr3   r   rB   joinr   executer6   r7   r8   
chunk_textr9   floatr>   r:   r;   )
r   r   r   r3   where_clausesparamskeyword_queryresultrD   rows
             r   r'   z RetrievalService._keyword_searchR   s?   ###### z
 !q))!-  !LMMM*.*@F&' q  S
Z
  S
_
  S
_
  `
m
  S
n
  S
n
  q  q  q
 
 77 	 	C$$&#&? YN#&?"39--03s~ ^ ^ 
 
 
 
 ! r   r$   r%   c                 T   | j                             || j        |          }|st          d          | j                            ||||||          }g }	|D ]O\  }
}|	                    |
d         |
d         |
d         |
d         |
d         ||
d	         |
d
         dd           P|	S )Nr2   r4   )r   r5   r   r   r$   r%   r6   r7   r8   r3   r9   r:   r;   r<   r=   )r   r@   r   r*   r   hybrid_searchrB   )r   r   r   r   r   r$   r%   r5   rC   rD   rE   r>   s               r   r)   zRetrievalService._hybrid_searcho   s     0CC47X D 
 
  	CABBB#11+)+ 2 
 
 ! 	 	JC$$d)#&}#5 \K#&}#5"'*='9&),&7! !     ! r   c           
         	 dd l }|                                 } | j        d|||||d|}	|                                 }
|
|z
  }d |	D             }|rt          |          t          |          z  nd}|rt	          |          nd}|rt          |          nd}|||||d||	t          |	          ||||ddS # t          $ r#}t                              d|             d }~ww xY w)	Nr   r   r   r   r   r   c                     g | ]
}|d          S )r>    ).0rs     r   
<listcomp>z0RetrievalService.recall_test.<locals>.<listcomp>   s    222Qaj222r   )r   r   r   )total_results	avg_score	max_score	min_scoreelapsed_time)r   r   rM   rC   
statisticszRecall test failed: rV   )	timer0   sumlenmaxminr+   r,   r-   )r   r   r   r   r   r   r.   r`   
start_timerC   end_timer^   scoresr[   r\   r]   r/   s                    r   recall_testzRetrievalService.recall_test   sL   	KKK J#dm $!y8 W] G yy{{H#j0L22'222F5;BFc&kk11I'-4F1I'-4F1I !	xZZSYZ"%(\\!*!*!*$0     	 	 	LL333444	s   B9B< <
C)C$$C)r   )r   r   r   N)__name__
__module____qualname__r   intr   strrK   listdictr   r0   r&   r'   r)   rh   rV   r   r   r   r      s       9 97 9sTz 9 9 9 9 #   	
  * 
d38n	   4!! !-2!>ADj!	d38n	! ! ! !:!S !S !T$sCx.5I ! ! ! !:&!&! &! 	&!
 *&! &! &! 
d38n	&! &! &! &!V #& && & 	&
 & *& 
c3h& & & & & &r   r   c                       e Zd Z	 	 	 	 ddededz  dedz  dedz  def
dZ	 	 	 	 	 ddedededededz  dedz  deee	f         fdZ
dS )
RAGServiceNgpt-3.5-turbor   r   llm_api_keyllm_base_url	llm_modelc                     || _         || _        t          ||          | _        |rt	          |||d          | _        d S d | _        d S )Nr   )api_keybase_urlmodeltemperature)r   r   r   retrieval_servicer   llm)r   r   r   rs   rt   ru   s         r   r   zRAGService.__init__   s_     !2!1"6G!H!H 	!#l)Y\  DHHH DHHHr   r   r   r   questionr   r   r   r   system_promptr    c                    	 | j         st          d          | j                            |||||          }|sdg g dS d                    d t          |          D                       d}t          j        d|p|fd	g          }	fd
t                      d|	z  | j         z  t                      z  }
|

                    |          }|d |D             |dS # t          $ r#}t                              d|             d }~ww xY w)Nz*LLM not configured, cannot generate answerrT   zCSorry, I could not find relevant information in the knowledge base.)answersourcesretrieved_docsz

c           	      H    g | ]\  }}d |dz    d|d          d|d           S )z	Document    z
 (Source: r8   z):
r3   rV   )rW   irE   s      r   rY   z.RAGService.answer_question.<locals>.<listcomp>   sQ       3 QAPPWPP3v;PP  r   a  You are a professional knowledge base assistant. Please answer the user's question based on the provided document content.

Requirements:
1. Only use the provided document content to answer questions
2. If there is no relevant information in the documents, clearly inform the user
3. Answers should be accurate, concise, and professional
4. If possible, cite specific document sourcessystem)humanz9Reference documents:
{context}

User question: {question}c                     S r   rV   )xcontexts    r   <lambda>z,RAGService.answer_question.<locals>.<lambda>   s    g r   )r   r}   c                 b    g | ],}|d          |d         |d         dd         dz   |d         d-S )r7   r8   r3   N   z...r>   )r7   r8   r3   r>   rV   )rW   rE   s     r   rY   z.RAGService.answer_question.<locals>.<listcomp>   s\         (+='9!$W #FDSD 1E 9!$W	   r   zRAG Q&A failed: )r|   r*   r{   r0   rH   	enumerater   from_messagesr   r   invoker+   r,   r-   )r   r}   r   r   r   r   r~   r   default_system_promptprompt	rag_chainr   r/   r   s                @r   answer_questionzRAGService.answer_question   s   /	8 O !MNNN!3<<TQ)h =  N " c!&(  
 kk "+N";";   G %m!'5}E0EF] F .---;N;P;PQQ( "##$  %%h//F    .   #1    	 	 	LL/A//000	s   <C  BC 
D%DDNNNrr   )r   r   r   NN)ri   rj   rk   r   rl   rm   r   rK   ro   r   r   rV   r   r   rq   rq      s        
 )-"&#'(  : 4Z	
 Dj    * #$(8 88 8 	8
 8 *8 Tz8 
c3h8 8 8 8 8 8r   rq   Nr   r   r    c                 "    t          | |          S r   )r   )r   r   s     r   get_retrieval_servicer     s    B 1222r   rr   rs   rt   ru   c                 (    t          | ||||          S r   )rq   )r   r   rs   rt   ru   s        r   get_rag_servicer     s     b+[,	RRRr   r   r   )typingr   langchain_core.output_parsersr   langchain_core.promptsr   langchain_core.runnablesr   langchain_openair   sqlalchemy.ormr   3app.services.llm.backends.embedding_backend_factoryr	   )app.services.storage.vector_store_factoryr
   common_loggingr   ri   r,   r   rq   rl   r   rm   r   rV   r   r   <module>r      s         9 9 9 9 9 9 5 5 5 5 5 5 8 8 8 8 8 8 ' ' ' ' ' ' " " " " " " U U U U U U F F F F F F % % % % % %	H		
j j j j j j j jZL L L L L L L L^3 3g 3#* 3P` 3 3 3 3 %)"#$S SSTzS tS *	S
 S S S S S S Sr   