
    Xj[                        d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ  ee          Z G d
 de          Z G d d          Z	 	 	 	 	 ddededz  dededededz  defdZdS )    N)Any)
Embeddings)
CollectionCollectionSchemaDataTypeFieldSchema	Partitionconnectionsutility)Session)VectorStoreConnectionError)KnowledgeDocument)get_embedding_factory)
get_loggerc                   v    e Zd Zd
dededz  fdZdee         deee                  fdZ	dedee         fd	Z
dS )CustomEmbeddingsNdbmodel_idc                 H    || _         || _        t                      | _        d S N)r   r   r   embedding_factory)selfr   r   s      C/lsinfo/ai/hellotax_ai/base_platform/app/services/storage/milvus.py__init__zCustomEmbeddings.__init__   s$     !6!8!8    textsreturnc                     g }|D ]s}| j                             || j        | j                  }|r|                    |           A|rt          |d                   nd}|                    dg|z             t|S )Ntextr   r   r              )r   generate_embeddingr   r   appendlen)r   r   
embeddingsr    	embeddingdims         r   embed_documentsz CustomEmbeddings.embed_documents#   s    
 	/ 	/D.AAdg B  I  /!!),,,,,6@c*Q-(((D!!3%#+....r   r    c                 d    | j                             || j        | j                  }|r|ndgdz  S )Nr   r"   r!   )r   r#   r   r   )r   r    r'   s      r   embed_queryzCustomEmbeddings.embed_query0   sA    *==$'DM > 
 
	 &7yyC54<7r   r   )__name__
__module____qualname__r   intr   liststrfloatr)   r+    r   r   r   r      s        9 97 9cDj 9 9 9 9
T#Y 4U3D    8 8U 8 8 8 8 8 8r   r   c                      e Zd Z	 	 	 	 	 	 d*dededz  ded	ed
edededz  fdZdededz  defdZd Z	d Z
defdZ	 	 d+dedeeeef                  deee                  dedee         dz  dee         dz  dee         fdZ	 	 	 d,dee         dedededz  deeeef                  f
d Z	 	 	 	 d-d"edee         deded#ed$edeeeef                  fd%Zdedefd&Zdeeef         fd'Zdee         fd(Zd) ZdS ).MilvusVectorStoreNdocument_vectors	localhost19530Tr   knowledge_base_idcollection_namehostportuse_partition	tenant_idc                 D   || _         || _        || _        || _        || _        |                     ||          | _        	 dt          j                    v rt          
                    d           n7t          j        d||           t          
                    d| d|            n># t          $ r1}t                              d|            t                      d d }~ww xY w|                                  | j        r| j        r|                                  d S d S d S )Ndefaultz Using existing Milvus connection)aliasr;   r<   z"Successfully connected to Milvus: :zFailed to connect to Milvus: )r   r9   r:   r=   r>   _get_embedding_dimensionr(   r
   list_connectionsloggerinfoconnect	Exceptionerrorr   _ensure_collection_ensure_partition)	r   r   r9   r:   r;   r<   r=   r>   es	            r   r   zMilvusVectorStore.__init__9   sJ    !2.*"005FGG	9K8::::>????#)$TJJJJNNNNNOOO 	9 	9 	9LL<<<===,..D8	9 	!!! 	%$"8 	%""$$$$$	% 	% 	% 	%s    A'B( (
C#2,CC#r   c                    	 |st                               d           dS ddlm} ddlm} |                    |                              |j        |k              	                                }|r|j
        s t                               d| d           dS |j
                                        rV|                    |                              |j        t          |j
                  k              	                                }nH|                    |                              |j
        |j
        k              	                                }|r9|j        r2t                               d|j         d	|j         d
           |j        S t                               d|j
         d           dS # t           $ r)}t                               d| d           Y d }~dS d }~ww xY w)Nz<No knowledge_base_id provided, using default dimension: 1536r!   r   )KnowledgeBase)ModelzKnowledge base z> not found or has no model code, using default dimension: 1536z%Using model dimension from database: z	 (model: )z#Model dimension not found for code z, using default dimension: 1536z#Failed to get embedding dimension: z, using default: 1536)rE   rF   
app.modelsrN   app.models.providerrO   queryfilteridfirstcodewarningisdigitr/   	dimensionnamerH   rI   )r   r   r9   rN   rO   kbmodelrL   s           r   rC   z*MilvusVectorStore._get_embedding_dimensionV   s   	$ Z[[[t000000111111-((//0@DU0UVV\\^^B RW w&7www   tw   N..ux3rw<</GHHNNPP..uzRW/DEEKKMM 	 	cEOccV[V`ccc   &b"'bbb   t 	 	 	LLWqWWWXXX44444	s*   F/ A6F/ C1F/ 
#F/ /
G"9GG"c                 X   	 t          j        | j                  r=t          | j                  | _        t
                              d| j                    n	t          dt          j	        dd          t          dt          j	                  t          dt          j	                  t          dt          j
        d	
          t          dt          j        | j                  t          dt          j
        d
          t          dt          j	                  t          dt          j	                  t          dt          j
        d
          t          dt          j
        d
          t          dt          j                  t          dt          j
        d
          t          dt          j
        d
          t          dt          j
        d
          t          dt          j	                  g}t          |d          }t          | j        |          | _        dddd id!}| j                            d|"           t
                              d#| j                    | j                                         d S # t"          $ r#}t
                              d$|             d }~ww xY w)%NzUsing existing collection: rU   T)r[   dtype
is_primaryauto_iddocument_id)r[   r_   chunk_index
chunk_text  )r[   r_   
max_lengthvector)r[   r_   r(   
model_named   r9   r>   chunk_idparent_chunk_id	is_parentchunk_level   
references  
doc_statusissue_date_intz:Document vectors for knowledge base with partition support)fieldsdescription)r[   schemaCOSINEIVF_FLATnlisti   )metric_type
index_typeparams)
field_nameindex_paramszCreated new collection: z$Failed to ensure collection exists: )r   has_collectionr:   r   
collectionrE   rF   r   r   INT64VARCHARFLOAT_VECTORr(   BOOLr   create_indexloadrH   rI   )r   rs   ru   r}   rL   s        r   rJ   z$MilvusVectorStore._ensure_collectionv   ss   %	%d&:;;  O",T-A"B"BP$:NPPQQQQ  TDZ^___](.III](.III\9IV[\\\XX5JPTPXYYY\9IVYZZZ%8OOO[GGGZx7GTWXXX%6h>N[^___[FFF](:JWYZZZ\9IVZ[[[\9IVXYYY%5X^LLL" *! \   #-$2Fv"V"V"V#+",&o   
 ,,|,\\\Mt7KMMNNNO  """"" 	 	 	LLCCCDDD	s   I8I< <
J)J$$J)c                    	 d| j          }| j                            |          s8| j                            |           t                              d|            nt                              d|            || _        d S # t          $ r6}t                              d|            d| _	        d | _        Y d }~d S d }~ww xY w)Nkb_zCreated partition: zUsing existing partition: z#Failed to ensure partition exists: F)
r9   r   has_partitioncreate_partitionrE   rF   partition_namerH   rI   r=   r   r   rL   s      r   rK   z#MilvusVectorStore._ensure_partition   s    	';4#9;;N?00@@ K00@@@B.BBCCCCIIIJJJ"0D 	' 	' 	'LLBqBBCCC!&D"&D	's   B B 
C+B??Cc                    | j         st                              d           dS d| j          }	 | j                            |          rR| j                                         | j                            |           t                              d|            dS t                              d| d           dS # t          $ r&}t          	                    d| d	|             d }~ww xY w)
Nz9drop_partition called without knowledge_base_id, skippingFr   zDropped Milvus partition: TzMilvus partition z does not exist, skipping dropz Failed to drop Milvus partition z: )
r9   rE   rX   r   r   releasedrop_partitionrF   rH   rI   r   s      r   r   z MilvusVectorStore.drop_partition   s   % 	NNVWWW57t577	,,^<< '')))..~>>>IIIJJJt^^^^___u 	 	 	LLQNQQaQQRRR	s   A*B; B; ;
C+!C&&C+rb   chunksr&   rh   doc_status_listissue_date_int_listc                    	 g }g }g }	g }
g }|D ]l}|                     dt          t          j                                        }|                     d          }|d}|                     dd          }|                     dd          }|                     dg           }t	          |t
                    r|rt          j        |          nd}n|rt          |          nd}|                    |           |                    |           |	                    |           |
                    |           |                    |	                    d	          d d
         
                    d	d                     nd | j        j        j        D             }|gt          |          z  d |D             d |D             ||gt          |          z  | j        pdgt          |          z  | j        pdgt          |          z  |||	|
|g}d|v r*|                    ||ndgt          |          z             d|v r*|                    ||ndgt          |          z             | j        r[| j        rT| j                            || j                  }t(                              dt          |           d| j                    nG| j                            |          }t(                              dt          |           d|            | j                                         |j        S # t0          $ r#}t(                              d|             d }~ww xY w)Nrj   rk    rl   Frm   leafro   utf-8rp   ignoreerrorsc                     h | ]	}|j         
S r3   r[   .0fs     r   	<setcomp>z2MilvusVectorStore.add_documents.<locals>.<setcomp>       MMM!qvMMMr   c                     g | ]
}|d          S )rc   r3   r   chunks     r   
<listcomp>z3MilvusVectorStore.add_documents.<locals>.<listcomp>   s    :::%}%:::r   c                 ~    g | ]:}|d                               d          dd                             dd          ;S )r    r   Nre   r   r   )encodedecoder   s     r   r   z3MilvusVectorStore.add_documents.<locals>.<listcomp>   sW        &M((11&5&9@@QY@ZZ  r   r   rq   	effectiverr   )r   zSuccessfully added z vectors to partition z vectors to document z Failed to add document vectors: )getr1   uuiduuid4
isinstancer0   jsondumpsr$   r   r   r   ru   rs   r%   r9   r>   r=   r   insertrE   rF   flushprimary_keysrH   rI   )r   rb   r   r&   rh   r   r   	chunk_idsparent_chunk_ids
is_parentschunk_levelsreferences_listr   rj   rk   rl   rm   ro   references_strexisting_fieldsentitiesinsert_resultrL   s                          r   add_documentszMilvusVectorStore.add_documents   s   ?	I!JL O   99ZTZ\\1B1BCC"')),=">">"*&(O!IIk599	#iiv>>"YY|R88
j$// K?I%QTZ
%;%;%;rNN8B%JS___N  *** ''888!!),,,##K000&&"))'225D59@@QY@ZZ    NMt/E/LMMMOF+::6::: !'   s6{{*',1-F;$1%F3 H" ..'6'BOOX[\bXcXcHc    ?22+>+J''QRPSVYZ`VaVaPa   ! cd&9 c $ 6 6xPTPc 6 d db#f++bbTM`bb    !% 6 6x @ @a#f++aaT_aabbbO!!### -- 	 	 	LL?A??@@@	s   L-L0 0
M:MM   ffffff?query_embeddingk	thresholdfilter_dictc                 	   	 g }| j         r|                    d| j                     | j        r|                    d| j                    |r|                                D ]\  }}|dv r
t	          |t
                    r:|                    | dt          |                                                      Yt	          |t                    r|                    | d| d           |                    | d|            	 d | j        j	        j
        D             }d|v r|                    d	           n# t          $ r Y nw xY w|rf|                    d
          r|                    d|d
                     |                    d          r|                    d|d                     |rd                    |          nd}	dddid}
|dk    r|n|dz  }g d}|gd|
||	r|	nd |d}| j        r4| j        r-| j        g|d<   t                               d| j                     | j        j        d,i |}g }|D ]H}|D ],}t'          |j                  }||k    r| j                            t.                                        t.          j        |j                            d          k    t.          j        dk    t.          j        t.          j        dk                                              }||j                            d          pd}	 |rt?          j         |          ng }n# t>          j!        tD          f$ r g }Y nw xY w|j        |j                            d          |j                            d          |j                            d          |j                            d           |r|j#        nd!|r|j$        nd |r|j%        nd |j                            d"          pd|j                            d#          pd|j                            d$          pd%|j                            d&          pd'|d(}|                    ||f           tM          |          |k    r n.tM          |          |k    r nJt           '                    d)tM          |           d*           |d |         S # t          $ r#}t           (                    d+|             d }~ww xY w)-Nztenant_id == zknowledge_base_id == )issue_date_gteissue_date_ltez == z == ""c                     g | ]	}|j         
S r3   r   r   s     r   r   z7MilvusVectorStore.similarity_search.<locals>.<listcomp>   r   r   rq   z)doc_status not in ["obsolete", "expired"]r   zissue_date_int >= r   zissue_date_int <= z && r   rv   nprobe
   )ry   r{      )	rb   rc   rd   rh   rj   rk   rl   rm   ro   rg   )data
anns_fieldparamlimitexproutput_fieldspartition_nameszSearching in partition rb   	publishedobsoletero   rc   rd   rh   Unknownrj   rk   rl   Frm   r   )rU   rb   rc   r    rh   titlecategory_idreference_urlrj   rk   rl   rm   ro   z#Similarity search completed, found  resultszSimilarity search failed: r3   ))r>   r$   r9   itemsr   boolr1   lowerr   ru   rs   rH   r   joinr=   r   rE   debugsearchr2   scorer   rS   r   rT   rU   entitystatusis_vectorizedrq   rV   r   loadsJSONDecodeError	TypeErrorr   r   r   r%   rF   rI   )r   r   r   r   r   
conditionskeyvaluefield_namesr   search_paramssearch_limitr   search_kwargsresultsformatted_resultshitshitr   docr   ro   doc_dictrL   s                           r   similarity_searchz#MilvusVectorStore.similarity_search  s   c	J~ D!!"B$."B"BCCC% T!!"R$:P"R"RSSS 	?"-"3"3"5"5 ? ?JCBBB !%.. ?"))S*J*Jc%jj6F6F6H6H*J*JKKKK#E3// ?"))S*?*?u*?*?*?@@@@"))S*=*=e*=*=>>>>MMt/E/LMMM;..%%&QRRR    \??#344 \%%&Z;GW;X&Z&Z[[[??#344 \%%&Z;GW;X&Z&Z[[[.8@6;;z***bD,42OOM !B11AEL
 
 
M ))&&% $.$!. M ! Nd&9 N484G3H/0Lt7JLLMMM,do,==}==G " ( ( % %C!#),,E	)) GMM*;<<#V 1 4
}8U8U U 1 8K G 1 ? 1 <
 J	  #UWW  ;$),)E)E)K,GU)]N)C)C)C[]JJ $ 4i@ , , ,)+JJJ, #&&+.:>>-+H+H+.:>>-+H+H$'JNN<$@$@*-*..*F*F25%DSYY9>A+K3??tBE-OS->->4(+
z(B(B(Hb/2z~~>O/P/P/VTV),)D)D)M+.:>>-+H+H+RF*4$ $ *00(E1BCCC,--22 3())Q..E /KK^cBS>T>T^^^___$RaR(( 	 	 	LL9a99:::	sb   DS 4D= <S =
E
S 	E

GS L43S 4MS ME4S 
S1S,,S1333333?rS   keyword_weightsemantic_weightc           
      R   	 |dk    r|n|dz  }|                      |||dz            }ddlm}	 dg}
||d}| j        r|
                    d	           | j        |d
<   | j        |
                    d           | j        |d<   |
                    d           |
                    d           |
                    d            |	dd                    |
           d          }t          | j        	                    ||                    }t          d |D             d          }i }|D ]\  }}|d         |d         f}||dd||<   |D ]n}|j        |j        f}t          |j                  }||vr;|j        |j        |j        |j        |j        |j        |j        |j        dd|d||<   c|||         d<   og }|                                D ]Q}|d         }|dk    r|d         dk    r|d         |z  nd}||z  ||z  z   }|                    |d         |f           R|                    d d           t.                              d t3          |d |                    d!           |d |         S # t4          $ r#}t.                              d"|             d }~ww xY w)#Nr   r   g?)r   r   r   r   )r    zdv.id IN (SELECT id FROM document_vectors WHERE to_tsvector('simple', COALESCE(chunk_text, '')) @@ plainto_tsquery('simple', :query)))rS   r   zkd.tenant_id = :tenant_idr>   z)kc.knowledge_base_id = :knowledge_base_idr9   zkd.status = 'published'zkd.is_vectorized = truez6(kd.doc_status IS NULL OR kd.doc_status != 'obsolete')a$  
                SELECT
                    dv.id,
                    dv.document_id,
                    dv.chunk_index,
                    dv.chunk_text,
                    dv.model_name,
                    kd.title,
                    kd.category_id,
                    kd.reference_url,
                    ts_rank(
                        setweight(to_tsvector('simple', COALESCE(kd.title, '')), 'A') ||
                        setweight(to_tsvector('simple', COALESCE(dv.chunk_text, '')), 'B'),
                        plainto_tsquery('simple', :query)
                    ) as rank
                FROM document_vectors dv
                JOIN knowledge_documents kd ON dv.document_id = kd.id
                LEFT JOIN knowledge_categories kc ON kd.category_id = kc.id
                WHERE z AND zI
                ORDER BY rank DESC
                LIMIT :k
            c              3   >   K   | ]}t          |j                  V  d S r   )r2   rank)r   rows     r   	<genexpr>z2MilvusVectorStore.hybrid_search.<locals>.<genexpr>  s*      #L#LE#(OO#L#L#L#L#L#Lr   r"   )r@   rb   rc   )r   semantic_scorekeyword_score)rU   rb   rc   r    rh   r   r   r   r   r   r   c                     | d         S )N   r3   )xs    r   <lambda>z1MilvusVectorStore.hybrid_search.<locals>.<lambda>  s
    QqT r   T)r   reversezHybrid search completed, found r   zHybrid search failed: )r   
sqlalchemyr    r>   r$   r9   r   r0   r   executemaxrb   rc   r2   r   rU   rd   rh   r   r   r   valuessortrE   rF   r%   rH   rI   )r   rS   r   r   r   r   r   search_ksemantic_resultssql_textwhere_clausesr{   keyword_querykeyword_rowsmax_keyword_rankcombined_resultsr   r   
result_keyr   r   final_resultsr   semantic_normkeyword_normcombined_scorerL   s                              r   hybrid_searchzMilvusVectorStore.hybrid_searcht  s   D	FFqqAH#55 /8ySV  6     433333 XM  %844F~ 5$$%@AAA&*n{#%1$$%PQQQ.2.D*+  !:;;;  !:;;;  !YZZZ$H s  JQ  JV  JV  Wd  Je  Je  s  s  s M  v F FGGL"#L#L|#L#L#LVYZZZ!.  
U!-0#m2DE
&+%(0 0 ,,
 $ R R!os?
 %ch%555 #&&+.?+.?$'N*-.%(Y+.?-0->	  	  +.)64 4$Z00 ER$Z0AAM(//11 D D $%5 6 (!++_0E0I0I ),<<< 
 "1=!@>T`C`!`$$d5k>%BCCCC>>4@@@KKZ#mBQB>O:P:PZZZ[[[ !$$ 	 	 	LL5!55666	s   I6I9 9
J&J!!J&c                    	 d| }| j                             |           | j                                          t                              d|            dS # t
          $ r#}t                              d|             d }~ww xY w)Nzdocument_id == zDeleted vectors for document r   z#Failed to delete document vectors: )r   deleter   rE   rF   rH   rI   )r   rb   r   rL   s       r   delete_document_vectorsz)MilvusVectorStore.delete_document_vectors  s    	2[22DO""4(((O!!###KKEEEFFF1 	 	 	LLBqBBCCC	s   AA 
B#BBc                 <   	 | j         j        | j        | j        d}| j        r5| j        r.t          | j         | j                  }| j        |d<   |j        |d<   |S # t          $ r7}t                              d|            d| j        | j        dcY d }~S d }~ww xY w)N)total_vectorsr:   r=   r   partition_vectorszFailed to get statistics: r   )	r   num_entitiesr:   r=   r   r	   rH   rE   rI   )r   stats	partitionrL   s       r   get_collection_statsz&MilvusVectorStore.get_collection_stats  s    	!%!=#'#7!%!3 E
 ! Dd&9 D%dot7JKK	*.*=&'-6-C)*L 	 	 	LL9a99:::!"#'#7!%!3       	s   AA 
B$,BBBc                     	 | j         j        S # t          $ r)}t                              d|            g cY d }~S d }~ww xY w)NzFailed to list partitions: )r   
partitionsrH   rE   rI   r   rL   s     r   list_partitionsz!MilvusVectorStore.list_partitions  s[    	?-- 	 	 	LL:q::;;;IIIIII	s    
A<AAc                    	 | j         rE| j        r@| j                                         t                              d| j         d           d S d S d S # t          $ r(}t                              d|            Y d }~d S d }~ww xY w)Nz
Partition z compaction completedzFailed to compact partition: )r=   r   r   compactrE   rF   rH   rI   r   s     r   compact_partitionz#MilvusVectorStore.compact_partition  s    	>! Ud&9 U'')))S)<SSSTTTTTU U U U  	> 	> 	>LL<<<=========	>s   A
A 
BA??B)Nr6   r7   r8   TN)NN)r   r   N)r   r   r   r   )r,   r-   r.   r   r/   r1   r   r   rC   rJ   rK   r   r0   dictr   r2   r   tupler   r  r  r  r!  r$  r3   r   r   r5   r5   7   s       
 )-1" $% %% :% 	%
 % % % :% % % %:7 sTz VY    @& & &P' ' '    0 -104H HH T#s(^$H e%	H
 H cT)H "#Y-H 
cH H H HZ #'j jej j 	j
 D[j 
eD%K 	!j j j j`  #!$M MM eM 	M
 M M M 
eD%K 	!M M M M^	3 	3 	 	 	 	d38n    (c    > > > > >r   r5   r7   r8   Tr   r9   r;   r<   r=   r>   r   c                 P    |ddl m}  |            }t          | |||||          S )Nr   )get_current_tenant_id)r;   r<   r=   r>   )app.core.tenant_contextr(  r5   )r   r9   r;   r<   r=   r>   r(  s          r   get_vector_storer*    sU     AAAAAA *)++	
#   r   )Nr7   r8   TN) r   r   typingr   langchain_core.embeddingsr   pymilvusr   r   r   r   r	   r
   r   sqlalchemy.ormr   app.core.exceptionsr   rQ   r   3app.services.llm.backends.embedding_backend_factoryr   common_loggingr   r,   rE   r   r5   r/   r1   r   r*  r3   r   r   <module>r2     s           0 0 0 0 0 0	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 # " " " " " : : : : : : ( ( ( ( ( ( U U U U U U % % % % % %	H		8 8 8 8 8z 8 8 86x> x> x> x> x> x> x> x>z %)  Tz  	
  Tz      r   