
    Xj3                         d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ  ee          Z G d d	e          Z G d
 d          Z	 	 	 	 ddededz  dedededefdZdS )    )Any)
Embeddings)
CollectionCollectionSchemaDataTypeFieldSchemaconnectionsutility)Session)KnowledgeDocument)get_embedding_factory)
get_loggerc                   v    e Zd Zd
dededz  fdZdee         deee                  fdZ	dedee         fd	Z
dS )CustomEmbeddingsNdbmodel_idc                 H    || _         || _        t                      | _        d S N)r   r   r   embedding_factory)selfr   r   s      I/lsinfo/ai/hellotax_ai/base_platform/app/services/storage/milvus_basic.py__init__zCustomEmbeddings.__init__   s$     !6!8!8    textsreturnc                     g }|D ]Z}| j                             || j        | j                  }|r|                    |           A|                    dgdz             [|S N)textr   r              )r   generate_embeddingr   r   append)r   r   
embeddingsr   	embeddings        r   embed_documentsz CustomEmbeddings.embed_documents   s    
 	0 	0D.AAdg B  I  0!!),,,,!!3%$,////r   r   c                 d    | j                             || j        | j                  }|r|ndgdz  S r   )r   r!   r   r   )r   r   r$   s      r   embed_queryzCustomEmbeddings.embed_query$   sA    *==$'DM > 
 
	 &7yyC54<7r   r   )__name__
__module____qualname__r   intr   liststrfloatr%   r'    r   r   r   r      s        9 97 9cDj 9 9 9 9

T#Y 
4U3D 
 
 
 
8 8U 8 8 8 8 8 8r   r   c                      e Zd Z	 	 	 	 	 d$dededz  ded	ed
edefdZdededz  defdZdededz  defdZd Z	dede
eeef                  de
e
e                  dede
e         f
dZ	 	 	 d%de
e         dedededz  de
eeef                  f
dZ	 	 	 	 d&dede
e         dededed ede
eeef                  fd!Zdedefd"Zdeeef         fd#ZdS )'MilvusBasicStoreNdocument_vectors	localhost19530Tr   knowledge_base_idcollection_namehostportuse_litec                 4   || _         || _        || _        || _        |                     ||          | _        	 dt          j                    v rt          	                    d           nj|r1t          j
        dd           t          	                    d           n7t          j
        d||           t          	                    d| d|            n0# t          $ r#}t                              d	|             d }~ww xY w|                                  d S )
Ndefaultu   使用现有的 Milvus 连接z./data/milvus_data.db)aliasuriu*   成功连接到 Milvus Lite (本地存储))r<   r7   r8   u   成功连接到Milvus: :u   连接Milvus失败: )r   r5   r6   r9   _get_embedding_dimensiondimr	   list_connectionsloggerinfoconnect	Exceptionerror_ensure_collection)r   r   r5   r6   r7   r8   r9   es           r   r   zMilvusBasicStore.__init__-   s2    !2. 005FGG	K8::::;<<<< E#)9PQQQQHIIII#)$TJJJJCdCCTCCDDD 	 	 	LL333444	 	!!!!!s   BC 
DC<<Dr   c           	      D   	 |rIddl m}m} |                    |                              |j        |k                                              }|r|j        r|j                                        rV|                    |                              |j        t          |j                  k                                              }nH|                    |                              |j        |j        k                                              }|r<|j
        r5t                              d|j
         d| d|j         d           |j
        S t                              d|            dS # t          $ r(}t                              d	|            Y d }~dS d }~ww xY w
Nr   )KnowledgeBaseModelzUsing dimension z for knowledge base z	 (model: )z0Using default dimension 2560 for knowledge base i 
  z#Failed to get embedding dimension: 
app.modelsrK   rL   queryfilteridfirstcodeisdigitr+   	dimensionrB   rC   namerE   rF   r   r   r5   rK   rL   kbmodelrH   s           r   r?   z)MilvusBasicStore._get_embedding_dimensionI      	  /;;;;;;;;XXm,,33M4DHY4YZZ``bb 	/"' 	/w(( V " 6 6ux3rw<<7O P P V V X X " 6 6uzRW7L M M S S U U / /}u}}Te}}pupz}}}    %.KK^K\^^___4 	 	 	LLBqBBCCC44444	   EE- E- -
F7FFc           	      D   	 |rIddl m}m} |                    |                              |j        |k                                              }|r|j        r|j                                        rV|                    |                              |j        t          |j                  k                                              }nH|                    |                              |j        |j        k                                              }|r<|j
        r5t                              d|j
         d| d|j         d           |j
        S t                              d|            dS # t          $ r(}t                              d	|            Y d }~dS d }~ww xY wrJ   rN   rX   s           r   r?   z)MilvusBasicStore._get_embedding_dimension_   r[   r\   c           
         	 t          j        | j                  r=t          | j                  | _        t
                              d| j                    n4t          dt          j	        dd          t          dt          j	                  t          dt          j	                  t          dt          j
        d	
          t          dt          j        | j                  t          dt          j
        d
          t          dt          j	                  g}t          |d          }t          | j        |          | _        ddddid}| j                            d|           t
                              d| j                    | j                                         d S # t           $ r#}t
                              d|             d }~ww xY w)Nu   使用现有集合: rR   T)rW   dtype
is_primaryauto_iddocument_id)rW   r_   chunk_index
chunk_texti  )rW   r_   
max_lengthvector)rW   r_   r@   
model_named   r5   z#Document vectors for knowledge base)fieldsdescription)rW   schemaCOSINEIVF_FLATnlistr    )metric_type
index_typeparams)
field_nameindex_paramsu   创建新集合: u   确保集合存在失败: )r
   has_collectionr6   r   
collectionrB   rC   r   r   INT64VARCHARFLOAT_VECTORr@   r   create_indexloadrE   rF   )r   ri   rk   rs   rH   s        r   rG   z#MilvusBasicStore._ensure_collectionu   s   	%d&:;; H",T-A"B"BI43GIIJJJJ  TDZ^___](.III](.III\9IV[\\\XX5JPTPXYYY\9IVYZZZ%8OOO *!/T   #-$2Fv"V"V"V#+",&o   
 ,,|,\\\F0DFFGGGO  """"" 	 	 	LL9a99:::	s   F#F' '
G1GGrb   chunksr#   rg   c           	         	 |gt          |          z  d |D             d |D             ||gt          |          z  | j        pdgt          |          z  g}| j                            |          }| j                                         t
                              dt          |           d|            |j        S # t          $ r#}t
          	                    d|             d }~ww xY w)Nc                     g | ]
}|d          S )rc   r/   .0chunks     r   
<listcomp>z2MilvusBasicStore.add_documents.<locals>.<listcomp>   s    :::%}%:::r   c                     g | ]
}|d          S r   r/   r~   s     r   r   z2MilvusBasicStore.add_documents.<locals>.<listcomp>   s    3335v333r   r   u   成功添加 u    个向量到文档 u   添加文档向量失败: )
lenr5   ru   insertflushrB   rC   primary_keysrE   rF   )r   rb   r{   r#   rg   entitiesinsert_resultrH   s           r   add_documentszMilvusBasicStore.add_documents   s   	F+::6:::33F333s6{{*',1-F;H !O228<<MO!!###KKVFVVVVWWW -- 	 	 	LL9a99:::	s   B<B? ?
C,	C''C,   ffffff?query_embeddingk	thresholdfilter_dictc           
         	 d}| j         r
d| j          }|rl|r|dz  }|                                D ]1\  }}t          |t                    r|| d| dz  }&|| d| dz  }2|                    d          r
|d d         }dd	d
id}| j                            |gd||dz  |r|nd g d          }	g }
|	D ]Q}|D ]5}t          |j                  }||k    r| j	        
                    t                                        t          j        |j                            d          k                                              }|j        |j                            d          |j                            d          |j                            d          |j                            d          |r|j        nd|r|j        nd d}|
                    ||f           t)          |
          |k    r n7t)          |
          |k    r nSt*                              dt)          |
           d           |
d |         S # t.          $ r#}t*                              d|             d }~ww xY w)N zknowledge_base_id == z && z == "z" && z == rl   nprobe
   )ro   rq   rf      )rb   rc   rd   rg   )data
anns_fieldparamlimitexproutput_fieldsrb   rc   rd   rg   Unknown)rR   rb   rc   r   rg   titlecategory_idu   相似度搜索完成，找到 
    个结果u   相似度搜索失败: )r5   items
isinstancer-   endswithru   searchr.   scorer   rP   r   rQ   rR   entitygetrS   r   r   r"   r   rB   rC   rE   rF   )r   r   r   r   r   r   keyvaluesearch_paramsresultsformatted_resultshitshitr   docdoc_dictrH   s                    r   similarity_searchz"MilvusBasicStore.similarity_search   s   3	D% HGt/EGG 	% #FND"-"3"3"5"5 8 8JC!%-- 83 9 9U 9 9 993 7 7E 7 7 77==(( %9D,42OOMo,,%&##!e!+TTtXXX -  G !#    C!#),,E	)) GMM*;<<#V$5$8CJNN=<Y<Y$YZZ"UWW  #&&+.:>>-+H+H+.:>>-+H+H$'JNN<$@$@*-*..*F*F25%DSYY9>A+K3??t$ $ *00(E1BCCC,--22 3())Q..E /KK\#>O:P:P\\\]]]$RaR(( 	 	 	LL6166777	s   H=I   
I-
I((I-333333?rP   keyword_weightsemantic_weightc                 r   	 |                      ||dz  |dz            }ddlm}  |d          }	| j                            |	||dz  d          }
i }|
D ] }dt          |j                  i||j        <   !i }|D ]\  }}|d	         }||d
d||<   |                                D ]\  }}||v r|d         ||         d<   g }|                                D ]\\  }}|d         }|d         dk    rt          |d         dz  d          nd
}||z  ||z  z   }|
                    |d         |f           ]|                    d d           t                              dt          |d |                    d           |d |         S # t          $ r#}t                              d|             d }~ww xY w)Nr   g?)r   r   r   r   r   a  
                SELECT
                    id,
                    title,
                    content,
                    category_id,
                    ts_rank(to_tsvector('simple', content), plainto_tsquery('simple', :query)) as rank
                FROM knowledge_documents
                WHERE to_tsvector('simple', content) @@ plainto_tsquery('simple', :query)
                ORDER BY rank DESC
                LIMIT :k
            )rP   r   r   rb   r   )r   semantic_scorekeyword_scorer   r   g?g      ?r   c                     | d         S )N   r/   )xs    r   <lambda>z0MilvusBasicStore.hybrid_search.<locals>.<lambda>  s
    QqT r   T)r   reverseu   混合搜索完成，找到 r   u   混合搜索失败: )r   
sqlalchemyr   r   executer.   rankrR   r   minr"   sortrB   rC   r   rE   rF   )r   rP   r   r   r   r   r   semantic_resultssql_textkeyword_querykeyword_resultkeyword_resultsrowcombined_resultsr   r   doc_idr   final_results_doc_idsemantic_normkeyword_normcombined_scorerH   s                           r   hybrid_searchzMilvusBasicStore.hybrid_search   sl   &	#55 /1q5IPSO  6     433333 %H Y M "W__]eRSVWRW<X<XYYN O% E E+2E#(OO*D''!.  
U]+&+%(, , ((
 !0 5 5 7 7 N N---@DW$V,_=M!1!7!7!9!9 D D $%5 6=A/=RUV=V=VC_-3S999\_  "1=!@>T`C`!`$$d5k>%BCCCC>>4@@@KKYs=!;L7M7MYYYZZZ !$$ 	 	 	LL333444	s   FF	 	
F6F11F6c                    	 d| }| j                             |           | j                                          t                              d| d           dS # t
          $ r#}t                              d|             d }~ww xY w)Nzdocument_id == u   删除文档 u
    的向量r   u   删除文档向量失败: )ru   deleter   rB   rC   rE   rF   )r   rb   r   rH   s       r   delete_document_vectorsz(MilvusBasicStore.delete_document_vectors  s    	2[22DO""4(((O!!###KK????@@@1 	 	 	LL9a99:::	s   AA 
B$BBc                     	 | j         j        }|| j        dS # t          $ r1}t                              d|            d| j        dcY d }~S d }~ww xY w)N)total_vectorsr6   u   获取统计信息失败: r   )ru   num_entitiesr6   rE   rB   rF   )r   statsrH   s      r   get_collection_statsz%MilvusBasicStore.get_collection_stats$  s    	QO0E%*t?STTT 	Q 	Q 	QLL9a99:::%&4;OPPPPPPPP	Qs    
A&AAA)Nr2   r3   r4   T)r   r   N)r   r   r   r   )r(   r)   r*   r   r+   r-   boolr   r?   rG   r,   dictr   r.   r   tupler   r   r   r   r/   r   r   r1   r1   +   sz       
 )-1" "" :" 	"
 " " " " " "87 sTz VY    ,7 sTz VY    ,  > T#s(^$ e%	
  
c   6 #': :e: : 	:
 D[: 
eD%K 	!: : : :@  #!$/ // e/ 	/
 / / / 
eD%K 	!/ / / /b	3 	3 	 	 	 	Qd38n Q Q Q Q Q Qr   r1   Nr3   r4   Tr   r5   r7   r8   r9   r   c                 *    t          | ||||          S )N)r7   r8   r9   )r1   )r   r5   r7   r8   r9   s        r   get_vector_storer   -  s     B 14RZ[[[[r   )Nr3   r4   T)typingr   langchain_core.embeddingsr   pymilvusr   r   r   r   r	   r
   sqlalchemy.ormr   rO   r   3app.services.llm.backends.embedding_backend_factoryr   common_loggingr   r(   rB   r   r1   r+   r-   r   r   r/   r   r   <module>r      s         0 0 0 0 0 0 ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ " " " " " " ( ( ( ( ( ( U U U U U U % % % % % %	H		
8 8 8 8 8z 8 8 84Q Q Q Q Q Q Q QH %)\ \\Tz\ \ 	\
 \ \ \ \ \ \ \r   