o
    "ÆiU  ã                   @   s’   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ e e¡ZG d
d„ dƒZdefdd„ZdS )u|   
å¼‚æ­¥å‘é‡åŒ–æœåŠ¡
åœ¨åŽå°çº¿ç¨‹ä¸­æ‰§è¡Œæ–‡æ¡£å‘é‡åŒ–ä»»åŠ¡
ä½¿ç”¨çº¿ç¨‹æ± æŽ§åˆ¶å¹¶å‘æ•°é‡ï¼Œé¿å…èµ„æºè€—å°½
é    N)ÚOptional)ÚThreadPoolExecutor)ÚSession)ÚSessionLocal)ÚKnowledgeDocument)Úget_task_manager)ÚDocumentVectorizationServicec                   @   sš   e Zd ZdZdZe ¡ ZdZe	dd„ ƒZ
e				ddedee dee d	ee d
ee f
dd„ƒZededee dee d	ee d
ee f
dd„ƒZdS )ÚAsyncVectorizationServiceu*   å¼‚æ­¥å‘é‡åŒ–æœåŠ¡ï¼ˆå¸¦å¹¶å‘æŽ§åˆ¶ï¼‰Né   c                 C   sz   | j du r:| j* | j du r)t| jdd| _ t d| j› d¡ W d  ƒ | j S W d  ƒ | j S 1 s5w   Y  | j S )u'   èŽ·å–çº¿ç¨‹æ± å®žä¾‹ï¼ˆå•ä¾‹æ¨¡å¼ï¼‰NÚvectorization)Úmax_workersÚthread_name_prefixz Created ThreadPoolExecutor with z workers)Ú	_executorÚ_lockr   Ú_max_workersÚloggerÚinfo)Úcls© r   úT/lsinfo/ai/hellotax_ai/base_platform/app/services/rag/async_vectorization_service.pyÚget_executor   s   

þ
úú
ÿùz&AsyncVectorizationService.get_executorÚdocument_idÚmodel_idÚ
chunk_sizeÚchunk_overlapÚsplitter_typec                 C   s2   t  ¡ }| t j| ||||¡}t d| › ¡ dS )u*   åœ¨åŽå°çº¿ç¨‹æ± ä¸­å¼‚æ­¥å‘é‡åŒ–æ–‡æ¡£z?Submitted async vectorization task to thread pool: document_id=N)r	   r   ÚsubmitÚ_vectorize_document_workerr   r   )r   r   r   r   r   ÚexecutorÚfuturer   r   r   Úvectorize_document_async)   s   	
þz2AsyncVectorizationService.vectorize_document_asyncc                 C   s^  t ƒ }tƒ }z"z°| t¡ tj| k¡ ¡ }|s(t d| › ¡ W W | 	¡  dS d|_
d|_| ¡  |j| dd}| ¡  t|ƒ}	|jdkr‰|jpL|pLd}
|jpS|pSd	}|jpZ|pZd
}t d| › d|
› d|› d|› ¡ |	j| ||
|||j|jd}|rˆd|v rˆ| | |d ¡ nt d| › d¡ |	j| ||j|jd}d|_
d|_d|_| ¡  | | ¡ t d| › ¡ W nc ty } zVtjd| › dt|ƒ› dd z| t¡ tj| k¡ ¡ }|rìd|_
t|ƒ|_| ¡  W n ty } zt d|› ¡ W Y d}~nd}~ww | | t|ƒ¡ W Y d}~nd}~ww W | 	¡  dS W | 	¡  dS | 	¡  w )u   å‘é‡åŒ–å·¥ä½œçº¿ç¨‹z%Document does not exist: document_id=NÚ
processingr   é   )Útotal_chunksÚnoneiè  éÈ   Ú	recursivez Starting document vectorization z: chunk_size=z, chunk_overlap=z, chunk_strategy=)r   r   r   r   Úchunk_strategyÚ	tenant_idÚuser_idÚchunks_countz (without chunking))r   r   r(   r)   Ú	completedéd   Tz.Document vectorization completed: document_id=z+Document vectorization failed: document_id=z, error=)Úexc_infoÚfailedz"Failed to update document status: )r   r   Úqueryr   ÚfilterÚidÚfirstr   ÚerrorÚcloseÚvectorization_statusÚvectorization_progressÚcommitÚcreate_taskÚstartr   Úsegmentation_moder   r   r   r   Úvectorize_documentr(   Ú	author_idÚupdate_taskÚis_vectorizedÚcomplete_taskÚ	ExceptionÚstrÚvectorization_errorÚ	fail_task)r   r   r   r   r   ÚdbÚtask_managerÚdocumentÚtaskÚvectorization_serviceÚactual_chunk_sizeÚactual_chunk_overlapÚactual_chunk_strategyÚresultÚeÚupdate_errorr   r   r   r   9   s   	
ÿþL·
"ù€ü

ÿþ
€€ÿ€ðþýz4AsyncVectorizationService._vectorize_document_worker)NNNN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ú	threadingÚLockr   r   Úclassmethodr   ÚstaticmethodÚintr   rA   r    r   r   r   r   r   r	      sF    
ûÿþýüûÿþýüûr	   Úreturnc                   C   s   t ƒ S )u!   èŽ·å–å¼‚æ­¥å‘é‡åŒ–æœåŠ¡å®žä¾‹)r	   r   r   r   r   Úget_async_vectorization_serviceœ   s   rY   )rR   rS   ÚloggingÚtypingr   Úconcurrent.futuresr   Úsqlalchemy.ormr   Úapp.db.sessionr   Úapp.models.knowledge_baser   Ú1app.services.knowledge.vectorization_task_managerr   Ú,app.services.knowledge.vectorization_servicer   Ú	getLoggerrO   r   r	   rY   r   r   r   r   Ú<module>   s    
 	