
    j                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ  e j        e          Z G d d          Zd	efd
ZdS )    N)ThreadPoolExecutor)SessionLocal)KnowledgeDocument)DocumentVectorizationService)get_task_managerc                       e Zd ZdZ ej                    ZdZed             Z	e
	 	 	 	 ddededz  dedz  dedz  dedz  f
d	            Ze
dededz  dedz  dedz  dedz  f
d
            ZdS )AsyncVectorizationServiceN   c                     | j         d| j        5  | j         >t          | j        d          | _         t                              d| j         d           d d d            n# 1 swxY w Y   | j         S )Nvectorization)max_workersthread_name_prefixz Created ThreadPoolExecutor with z workers)	_executor_lockr   _max_workersloggerinfo)clss    T/lsinfo/ai/hellotax_ai/base_platform/app/services/rag/async_vectorization_service.pyget_executorz&AsyncVectorizationService.get_executor   s    =  _ _=($6$'$4% % %CM KK ]3CS ] ] ]^^^_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ }s   AA!!A%(A%document_idmodel_id
chunk_sizechunk_overlapsplitter_typec                     t                                           }|                    t           j        | ||||           t                              d|             d S )Nz?Submitted async vectorization task to thread pool: document_id=)r	   r   submit_vectorize_document_workerr   r   )r   r   r   r   r   executors         r   vectorize_document_asyncz2AsyncVectorizationService.vectorize_document_async   sd     -99;;%@	
 	
 	
 	cVaccddddd    c           
         t                      }t                      }	 |                    t                                        t          j        | k                                              }|s4t                              d|             	 |	                                 d S d|_
        d|_        |                                 |                    | d          }|                                 t          |          }	|j        dk    r|j        p|pd}
|j        p|pd}|j        p|pd	}t                              d
|  d|
 d| d|            |	                    | ||
|||j        |j                  }|r d|v r|                    | |d                    nAt                              d
|  d           |	                    | ||j        |j                  }d|_
        d|_        d|_        |                                 |                    |            t                              d|             n# t6          $ r}t                              d|  dt9          |           d           	 |                    t                                        t          j        | k                                              }|r/d|_
        t9          |          |_        |                                 n4# t6          $ r'}t                              d|            Y d }~nd }~ww xY w|                    | t9          |                     Y d }~nd }~ww xY w|	                                 d S # |	                                 w xY w)Nz%Document does not exist: document_id=
processingr      )total_chunksnonei     	recursivez Starting document vectorization z: chunk_size=z, chunk_overlap=z, chunk_strategy=)r   r   r   r   chunk_strategy	tenant_iduser_idchunks_countz (without chunking))r   r   r*   r+   	completedd   Tz.Document vectorization completed: document_id=z+Document vectorization failed: document_id=z, error=)exc_infofailedz"Failed to update document status: )r   r   queryr   filteridfirstr   errorclosevectorization_statusvectorization_progresscommitcreate_taskstartr   segmentation_moder   r   r   r   vectorize_documentr*   	author_idupdate_taskis_vectorizedcomplete_task	Exceptionstrvectorization_error	fail_task)r   r   r   r   r   dbtask_managerdocumenttaskvectorization_serviceactual_chunk_sizeactual_chunk_overlapactual_chunk_strategyresulteupdate_errors                   r   r   z4AsyncVectorizationService._vectorize_document_worker0   s    ^^'))>	*++223D3G;3VWW]]__   R[RRSSSp HHJJJJJo -9H)./H+IIKKK++Ka+HHDJJLLL$@$D$D!)V33$,$7$M:$M!'/'='U'URU$(0(>(^-(^S^% s{  s  sQb  s  s  uI  s  s  \q  s  s   /AA +%0"6#8&0$. B    Rn66 ,,[&:PQQQ_{___```.AA +%&0$.	 B   -8H).1H+%)H"IIKKK&&{333KKVVVWWWW 	8 	8 	8LL[k[[SVWXSYSY[[    	RHH.//667H7K{7Z[[aacc    4<H136q66H0IIKKK R R RP,PPQQQQQQQQR"";A77777777	8" HHJJJJJBHHJJJJsb   A,H6 !FH6 4M* 6M/M1A>K0/M0
L!:LML!!&MM* MM* *N )NNNN)__name__
__module____qualname__r   	threadingLockr   r   classmethodr   staticmethodintrC   r    r    r!   r   r	   r	      s*       IINEL  [   $!%$($(e ee*e $Je Tz	e
 Tze e e \e$ GG*G $JG Tz	G
 TzG G G \G G Gr!   r	   returnc                      t                      S )N)r	   rY   r!   r   get_async_vectorization_servicer\   {   s    $&&&r!   )loggingrT   concurrent.futuresr   app.db.sessionr   app.models.knowledge_baser   ,app.services.knowledge.vectorization_servicer   1app.services.knowledge.vectorization_task_managerr   	getLoggerrQ   r   r	   r\   rY   r!   r   <module>rd      s         1 1 1 1 1 1 ' ' ' ' ' ' 7 7 7 7 7 7 U U U U U U N N N N N N		8	$	$k k k k k k k k\')B ' ' ' ' ' 'r!   