
    Xj                         d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ  ee          Zdedefd	Z ej        d
e	d          d             ZdS )    N)Path)
celery_app)_rebuild_content_text_switch_mode)DatabaseTask)
get_logger
local_pathreturnc                 ^   |                      dd          d         dz   }	 t          j        dd| ddd	d
d|dg
dd           ddlm}  |d          }|                    |          }d                    d |D                       t          |                              d           S # t          $ rN}t                              d|  d|            Y d }~t          |                              d           dS d }~ww xY w# t          |                              d           w xY w)N.   r   z
_audio.wavffmpegz-iz-vnz-ar16000z-ac1z-yT)checkcapture_output)	AutoModelzparaformer-zh)model)input c              3   P   K   | ]!}|                     d           |d          V  "dS )textNget).0rs     A/lsinfo/ai/hellotax_ai/data_center/backend/app/tasks/asr_tasks.py	<genexpr>z_asr_video.<locals>.<genexpr>   s5      CCaQUU6]]C&	CCCCCC    )
missing_oku   [asr_tasks] ASR 失败: u    —  )rsplit
subprocessrunfunasrr   generatejoinr   unlink	Exceptionloggerwarning)r	   
audio_pathr   r   resultes         r   
_asr_videor/      sz   ""3**1-<J
1$
E5'5RUWacghpt  FJ  	K  	K  	K  	K$$$$$$	000j11xxCC6CCCCC
 	Z40000	    F*FF1FFGGGrrrZ400000	 	Z40000s*   A'B, ,
D6 C?D ?DD %D,Tz!app.tasks.asr_tasks.run_asr_batch)bindbasenamec                    ddl m} ddlm} ddlm} | j        }|                    |                              |j	        
                    d                                                     }d |D             }|st                              d           dddS t                              d	t          |           d
            |            }t          d           d}	 |D ]}	d}
|	j	        D ]K}|                    d          s|                    d          s-t#          |d                   }|r||d<   d}
L|
rt%          |	|            ||	d           	 |                                 |dz  }ddlm}  ||	j        |	j        |	j	                   # t0          $ rC}|                                 t                              d|	j         d|            Y d }~d }~ww xY w	 t          d           n# t          d           w xY wt                              d| dt          |                      d|t          |          dS )Nr   )flag_modified)TaxDocument)DocumentCleanerc                 R    g | ]$}t          d  |j        pg D                       "|%S )c              3   l   K   | ]/}|                     d            o|                     d          V  0dS )
transcriptpathNr   )r   vs     r   r   z+run_asr_batch.<locals>.<listcomp>.<genexpr>"   sA      %o%oTU!%%*=*=&=&O!%%--%o%o%o%o%o%or   )anyinline_videos)r   ds     r   
<listcomp>z!run_asr_batch.<locals>.<listcomp>"   s>    pppQ#%o%oYZYhYnln%o%o%o"o"opqpppr   u'   [asr_tasks] 无待处理文档，跳过T)success	processedu   [asr_tasks] 待处理文档: u    条media_processingFr9   r:   r=   r   )index_document_mediaz[asr_tasks] doc_id=u    写库失败: 	inferenceu   [asr_tasks] 完成: processed=/)r@   rA   total)sqlalchemy.orm.attributesr4   app.models.tax_datar5   0app.services.tax_data_processor.document_cleanerr6   dbqueryfilterr=   isnotallr*   infolenr   r   r/   r   commitapp.services.multimodal_indexerrC   idinline_imagesr)   rollbackerror)selfr4   r5   r6   rJ   docspendingcleanerrA   docupdatedvidr9   rC   r.   s                  r   run_asr_batchr^      s   777777//////PPPPPP	B88K  ''(A(G(G(M(MNNRRTTDpp$pppG 1=>>>a000
KKBGBBBCCCoG#$$$I" 	S 	SCG( # #77<((  'F44
 #(2C%"G S%c7333c?333	SIIKKKNITTTTTT )(1BCDUVVVV  S S SKKMMMLL!Qsv!Q!Qa!Q!QRRRRRRRRSS	S. 	[!!!![!!!!
KKKKKS\\KKLLL)c'llKKKs7    A8G8 ;FG8 
G"9GG8 G""G8 8H	)r#   pathlibr   app.celery_appr   app.tasks.ocr_tasksr   r   app.tasks.processor_tasksr   common_loggingr   __name__r*   strr/   taskr^    r   r   <module>rh      s              & % % % % % C C C C C C C C 2 2 2 2 2 2 % % % % % %	H		13 13 1 1 1 1 d4WXXX)L )L YX)L )L )Lr   