
    Xj              
       &   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ  ee          Z e            Ze                    d	           ee           e ed
                    fdedededefd            ZdS )    )	APIRouterDependsRequest)Session)require_read)DocumentNotFoundError)get_translator)get_db)KnowledgeDocumentUser)
get_loggerz/documents/{document_id}/chunksknowledge_basesrequestdocument_iddbcurrent_userc                    ddl m} t          |            |                    t                                        t          j        |k                                              }|st          |          d }|j	        rq|                    |                              |j        |j	        k                                              }|r'|j
        }t                              d| d|            	 ddlm} ddlm}	  |	            }
|
j        s'|
                    |j        |j        |j                   |
                    d          }d	| }t                              d
| d| d|            |                    |g dd          }t                              dt/          |           d|            |rt                              d|d                             d           d|d                             d           d|d                             d                      t                              dt3          |d                                                               nt                              d| d           |                    ddgd          }t                              dt/          |           dd |D                         |rjd	| d | }t                              d!|            |                    |g dd          }t                              d"t/          |           d#           |r`d$ |d d%         D             }t                              d&|            t                              d'd( |d d%         D                         g }g }|D ]C}|                    d)d*          r|                    |           .|                    |           Dt                              d+t/          |           d,t/          |           d-           |                    d. /           |                    d0 /           ||z   }g }t=                      }|D ]u}|                    d1          pd2                                }|s.|                    dd          |f}||v rK|                     |           |                    |           vg }tC          |          D ]\  }}|                    d1d2          }|                    ||                    dd          |t/          |          |                    d)d*          |                    d3d2          d4           t                              d5|t/          |          6           ||j"        |t/          |          d7S # tF          $ rG}t          $                    d8| %                    d9:                     ||j"        g dd7cY d }~S d }~ww xY w);Nr   )KnowledgeCategoryz	Document z belongs to knowledge base ID: )settings)get_milvus_manager)hostportuse_litedocument_vectorszdocument_id == z#Querying chunk data - Document ID: z, Knowledge base ID: z, Expression: )chunk_index
chunk_textknowledge_base_idr   parent_chunk_id	is_parenti  )exproutput_fieldslimitzQuery result: Found z chunks for document_id=z!First chunk example: document_id=r   z, knowledge_base_id=r   z, chunk_index=r   zAvailable fields in result: z No chunks found for document_id=z%, trying to check collection stats...zdocument_id > 0
   zCollection has z% total records, sample document_ids: c                 8    g | ]}|                     d           S )r   get.0rs     C/lsinfo/ai/hellotax_ai/base_platform/app/api/v1/knowledge/chunks.py
<listcomp>z'get_document_chunks.<locals>.<listcomp>N   sA      ZE  ZE  ZEstZ[Z_Z_`mZnZn  ZE  ZE  ZE    z && knowledge_base_id == zRetrying query - Expression: zRetry found z chunksc                 8    g | ]}|                     d           S )r   r%   r'   s     r*   r+   z'get_document_chunks.<locals>.<listcomp>a   s$    HHHqk 2 2HHHr,      z%is_parent values for first 5 chunks: z chunk sizes for first 5 chunks: c                 T    g | ]%}t          |                    d d                    &S )r    )lenr&   r'   s     r*   r+   z'get_document_chunks.<locals>.<listcomp>d   s/    3f3f3fUVClB8O8O4P4P3f3f3fr,   r   Fz	Sorting: z parent chunks, z child chunksc                 .    |                      dd          S Nr   r   r%   xs    r*   <lambda>z%get_document_chunks.<locals>.<lambda>p   s    }a)@)@ r,   )keyc                 .    |                      dd          S r3   r%   r4   s    r*   r6   z%get_document_chunks.<locals>.<lambda>q   s    mQ(?(? r,   r   r0   r   )r   original_chunk_indextextcharacter_countr   r   zChunks retrieved)doc_idchunk_count)r   is_vectorizedchunkstotalzFailed to get document chunks: T)	exception)&app.models.knowledge_baser   r	   queryr   filteridfirstr   category_idr   loggerinfo
app.configr   !app.services.vector.milvus_clientr   	connectedconnectMILVUS_HOSTMILVUS_PORTUSE_MILVUS_LITEget_collectionr1   r&   listkeyswarningappendsortsetstripadd	enumerater>   	Exceptionerroropt)r   r   r   r   r   docr   categoryr   r   manager
collectionr    resultsall_resultsis_parent_valuesparent_chunkschild_chunksresultdeduplicated_resultsseenr   	signaturer?   display_indexes                             r*   get_document_chunksrm      s    <;;;;;7
(($
%
%
,
,->-A[-P
Q
Q
W
W
Y
YC 1#K000
 eHH&''../@/Cs/VWW]]__ 	  	e ( :KKcKccPaccdddu
''''''HHHHHH$$&&  	OO))!1    
 ++,>??
...{+{{Te{{uy{{	
 	
 	
 ""    # 
 
 	^3w<<^^Q\^^___ 	BKK zGAJNN=4Q4Q  z  zgnopgqgugu  wJ  hK  hK  z  z  [b  cd  [e  [i  [i  jw  [x  [x  z  z   KKPtGAJOO<M<M7N7NPPQQQQNNe;eee   %**&}oR +  K KK G#k"2"2  G  G  ZE  ZE  yD  ZE  ZE  ZE  G  G   ! BbbbO`bbBDBBCCC$**# # #  +   @3w<<@@@AAA 	HHGBQBKHHHKKR@PRRSSSKKh3f3fZabdcdbdZe3f3f3fhh    	, 	,Fzz+u-- ,$$V,,,,##F++++\M**\\C<M<M\\\	
 	
 	
 	@@AAA??@@@,.!uu 	0 	0F **\228b??AAJ M155zBID  HHY ''////%./C%D%D 	 	!M6L"55JMM#0,2JJ}a,H,H&'*:!'K!?!?'-zz2CR'H'H 	 	 	 	 	&{FTTT& .[[	
 
 	
  
 
 
:q::>>>NNOOO& .	
 
 	
 	
 	
 	
 	
 	

s   /SV; ;
X<XXXN)fastapir   r   r   sqlalchemy.ormr   app.api.permissionsr   app.core.exceptionsr   app.core.i18nr	   app.db.sessionr
   
app.modelsr   r   common_loggingr   __name__rH   routerr&   intrm    r,   r*   <module>rz      sR   / / / / / / / / / / " " " " " " , , , , , , 5 5 5 5 5 5 ( ( ( ( ( ( ! ! ! ! ! ! . . . . . . . . % % % % % %	H			 -.. '&// .?!@!@AA	I
 I
I
I
 	I
 	I
 I
 I
 /.I
 I
 I
r,   