
    Xj1                        d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ	 d dl
mZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z"  e"e#          Z$ e            Z% G d de          Z&de'de(de'defdZ)defdZ*de'de+fdZ,de+de-fdZ.d e(d!e/ddfd"Z0d_d$ed%e/e(         d&e(d'e+dz  de(f
d(Z1 ej2        d)e&d*d+,          d`de'de(de'd/e-fd0            Z3 ej2        e&d)d12          d3e'de(fd4            Z4 ej2        e&d)d52          d6e(fd7            Z5 ej2        e&d)d82          d6e(fd9            Z6 ej2        e&d)d:2          dad;e/e(         dz  fd<            Z7 ej2        e&d)d=2          dbd?e(fd@            Z8 ej2        e&d)dA2          dcdCe/e(         dz  dDe(dEe/e(         dz  de'dz  fdF            Z9 ej2        e&d)dG2          dbd?e(fdH            Z: ej2        e&d)dI2          dbd?e(fdJ            Z; ej2        e&d)dK2          ddd?e(fdM            Z< ej2        e&d)dN2          dO             Z= ej2        e&d)dP2          dQ             Z> ej2        e&d)dR2          dS             Z? ej2        e&d)dT2          dbd?e(fdU            Z@dVe(de-fdWZA ej2        d)e&d*dX,          dede'dVe(de'd/e-dYe(dZe(fd[            ZB ej2        d)e&d*d\,          dfd?e(fd^            ZCdS )g    NdatetimePath)Task)insert)IntegrityError)
celery_app)get_settings)DataProcessingTaskTaxDocumentCategoryProcessor)CheckpointManager)IncrementalUpdater)KnowledgeBaseClientProcessorEngine)normalize_legal_doc_number)StagingPackageBuilder)
get_loggerc                   .    e Zd ZdZed             Zd ZdS )DatabaseTaskNc                 H    | j         ddlm}  |            | _         | j         S )Nr   )SessionLocal)_dbapp.databaser   )selfr   s     G/lsinfo/ai/hellotax_ai/data_center/backend/app/tasks/processor_tasks.pydbzDatabaseTask.db   s0    8111111#|~~DHx    c                 X    | j         "| j                                          d | _         d S d S N)r   close)r   argskwargss      r   after_returnzDatabaseTask.after_return#   s/    8HNNDHHH  r!   )__name__
__module____qualname__r   propertyr    r'    r!   r   r   r      sA        
C  X    r!   r   task_idcategory_idmodereturnc           
      N   |                      t                                        t          j        |k                                              }|sUt          |||ddddd          }|                     |           |                                  |                     |           |S )Npendingr   )r-   r.   r/   statusprogresstotal_countsuccess_countfailed_count)queryr   filterr-   firstaddcommitrefresh)r    r-   r.   r/   tasks        r   _get_or_create_taskr?   (   s    88&''../A/IW/TUU[[]]D !'{QU^grs  BC  ST  cd  e  e  e
t
		


4Kr!   r>   c                     |                                 D ]\  }}t          |||           t          j                    |_        |                                  d S r#   )itemssetattrr   utcnow
updated_atr<   )r    r>   r&   kvs        r   _update_taskrG   1   sR      1ao''DOIIKKKKKr!   
source_url
new_resultc                    |                      t                                        t          j        |k                                              }|sd S |j        }|                    d          }|r|r||k    rd S 	 dd l}t          j	                    
                    d          }t          j        dz  t          |j                  z  }|                    dd           || dz  }	|j        |j        t          j	                                                    |j        |j        |j        |j        r|j                                        nd |j        r|j                                        nd ||j        |j        |j        |j        r|j                                        nd d
||                    d	          d
d}
t5          |	dd          5 }|                    |
|dd           d d d            n# 1 swxY w Y   t8                              d|	            d S # t<          $ r+}t8                              d| d|            Y d }~d S d }~ww xY w)Ncontent_hashr   z%Y%m%d%H%M%SversionsTparentsexist_okz.json)
title
doc_numberissuing_authority
issue_dateeffective_daterK   content_htmlcontent_markdownattachmentsrD   rP   rK   rP   )
tax_doc_idrH   archived_atpreviousincomingwutf-8encodingF   ensure_asciiindentu   [version] 已归档旧版本: u+   [version] 归档旧版本失败 source_url=: ) r8   r   r9   rH   r:   rK   getjsonr   rC   strftimesettingsraw_data_dirstridmkdir	isoformatrP   rQ   rR   rS   rT   rU   rV   rW   rD   opendumploggerinfo	Exceptionwarning)r    rH   rI   old_docold_hashnew_hashrg   tsarchive_dirarchive_filepayloadfes                r   _archive_previous_versionr~   7   s   hh{##**;+AZ+OPPVVXXG #H~~n--H 8 x8';';X_''77+j83wz??J$666"\\\1!(7;M^f^m^o^o^y^y^{^{  T[  Ta  qx  qC  Za  Zs  el  ew  CA  CJ  CU  C_  C_  Ca  Ca  Ca  }A  {B  {Q  U[  U\  Uk  Uu  Uu  Uw  Uw  Uw  W[  mu  GN  G[  qx  qI  Za  Zm  _	f	  _	q	  }{	  }D	  }O	  }Y	  }Y	  }[	  }[	  }[	  w	{	  J|	  J|	  [
c
  n
x
  n
|
  n
|
  }
D  n
E  n
E  J
F  J
F  G  G,g666 	@!IIgquQI???	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@C\CCDDDDD X X XVZVVSTVVWWWWWWWWWXs=   ;EH G4(H 4G88H ;G8< H 
I( IIresultc                 
   	 |                     d          r|d                             dd          |d<   |                     d          r|d                             dd          |d<   |                     d          r|d                             dd          |d<   t          | |d         |           t          t                                        |d         |d         |d         |                     d	          |                     d
          |                     d          |                     d          |                     d          pd |                     d          |                     d          |                     d          |                     d          |                     d          |                     d          |                     d          |                     d          |                     d          |                     d          pd|                     d          |                     d          d                              dgi d|d         d	|                     d	          d
|                     d
          d|                     d          d|                     d          d|                     d          pd d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          d|                     d          pdd|                     d          |                     d          dt          j                    d          }| 	                    |           | 
                                 dS # t          $ rR}|                                  t                              d|                     d           d|            Y d }~dS d }~wt          $ r{ |                                  d d l}t                              d!|                     d                      t                              d"|                                            Y dS w xY w)#NrU     rV   content_textrH   r.   rP   rQ   rR   rS   rT   
doc_statusrK   rW   inline_imagesinline_videos	file_pathfile_directoryfetch_strategynormal
supersedes
references	completed)rH   r.   rP   rQ   rR   rS   rT   r   rU   rV   r   rK   rW   r   r   r   r   r   r   r   processing_status)r   r   rD   index_elementsset_Tu    [upsert] 写入冲突已忽略:     — Fr   u   [upsert] 写入失败: u   [upsert] 异常详情: )rf   replacer~   	pg_insertr   valueson_conflict_do_updater   rC   executer<   r	   rollbackrq   rt   rs   	tracebackerror
format_exc)r    r   stmtr}   r   s        r   _upsert_tax_documentr   L   sO	   ::n%% 	P%+N%;%C%CFB%O%OF>"::()) 	X)/0B)C)K)KFTV)W)WF%&::n%% 	P%+N%;%C%CFB%O%OF>"!"f\&:FCCC%%,,|8LZ`anZow}  F  xG  TZ  T^  T^  _k  Tl  Tl  @F  @J  @J  K^  @_  @_  lr  lv  lv  wC  lD  lD  U[  U_  U_  `p  Uq  Uq  ~D  ~H  ~H  IU  ~V  ~V  ~^  Z^  ms  mw  mw  xF  mG  mG  Z`  Zd  Zd  ew  Zx  Zx  GM  GQ  GQ  R`  Ga  Ga  pv  pz  pz  {I  pJ  pJ  X^  Xb  Xb  cp  Xq  Xq  A	G	  A	K	  A	K	  L	[	  A	\	  A	\	  l	r	  l	v	  l	v	  w	F
  l	G
  l	G
  S
Y
  S
]
  S
]
  ^
i
  S
j
  S
j
  {
A  {
E  {
E  FV  {
W  {
W  hn  hr  hr  sC  hD  hD  hP  HP  ]c  ]g  ]g  ht  ]u  ]u  BH  BL  BL  MY  BZ  BZ  ny,  z  z  P  P  am  `n  uX  v}  E  FM  N  uX  P\  ^d  ^h  ^h  iu  ^v  ^v  uX  xK  MS  MW  MW  Xk  Ml  Ml  uX  nz  |B  |F  |F  GS  |T  |T  uX  Vf  hn  hr  hr  sC  hD  hD  uX  FR  TZ  T^  T^  _k  Tl  Tl  Tt  pt  uX  vD  FL  FP  FP  Q_  F`  F`  uX  bt  v|  v@  v@  AS  vT  vT  uX  Vd  fl  fp  fp  q  f@  f@  uX  BP  RX  R\  R\  ]k  Rl  Rl  uX  n{  }C  }G  }G  HU  }V  }V  uX  Xg  io  is  is  tC  iD  iD  uX  FU  W]  Wa  Wa  bq  Wr  Wr  uX  t  AG  AK  AK  LW  AX  AX  uX  Zj  lr  lv  lv  wG  lH  lH  uX  JZ  \b  \f  \f  gw  \x  \x  \D  |D  uX  FR  TZ  T^  T^  _k  Tl  Tl  uX  |B  |F  |F  GS  |T  |T  kv  FN  FU  FW  FW  uX  uX  uX  P  Y  Y


4
		t   
\&**\:R:R\\YZ\\]]]uuuuu   
Ivzz,/G/GIIJJJGy/C/C/E/EGGHHHuus    PP" "
T,AQ99BT Tdoc_idrW   c                    |sd S |                      t                                        t          j        |k                                              }|r|j        sd S |j        }d}t          |          D ]\  }}|                    dd          }|                    dd          }	|rd| d|vr<|	rt          j	        |	z  nd }
|
r|

                                r
d| d| d	}n|}||k    rA|                    d| dd| d          }d
}t                              d| d| d           |r||_        |                                  d S d S )NFurlr   pathz]()z/api/v1/documents/z/attachments/z	/downloadTz[attachment-url] doc_id=    附件[u   ] URL 已替换为本地端点)r8   r   r9   rl   r:   rV   	enumeraterf   ri   project_rootis_filer   rq   rr   r<   )r    r   rW   tax_doccontentchangedidxattrH   
local_path	abs_localnew_urls               r   _rewrite_attachment_urlsr   d   s    hh{##**;>V+CDDJJLLG '2 &GGk** h hSWWUB''
WWVR((
 	/*///w>>:DNH)J66$	 	!**,, 	!N6NNNNNGG Gj  oo&8:&8&8&8/w///JJGGKKf6ff3fffggg #* 
		 r!      	kb_clientbatch_doc_idskb_idcategory_id_mapc           	         |sdS |pi }d}|D ]}|                      t                                        t          j        |k                                              }|sS|j        r|dz  }`|                    |j                  }	 t          j	        |
                    |||                    }	|	rP|	                    d          r;d|_        |	                    d          |_        |                                  |dz  }n7t                              d| d|	r|	                    d	          nd
            0# t          $ rS}
|                                  t                              d| d|
                     d                     Y d }
~
d }
~
ww xY wt                              dt)          |           d|            |S )Nr   r   r   r.   successTknowledge_doc_idu#   [batch-import] 导入失败 doc_id=z error=r   unknownu#   [batch-import] 导入异常 doc_id=re   	exceptionu'   [batch-import] 本批完成: requested=z
 imported=)r8   r   r9   rl   r:   is_importedrf   r.   asynciorunimport_documentr   r<   rq   rt   rs   r   r   optrr   len)r    r   r   r   r   importedr   r   kb_category_idr   r}   s              r   _flush_import_batchr   ~   s    q%+OH b b((;''..{~/GHHNNPP 	 	MH(,,W-@AA	b[!:!:7%]k!:!l!lmmF F&**Y// F&*#+1::6H+I+I(		A   EV   E   Elr  VBU[U_U_`gUhUhUh  yB   E   E  F  F  F 	b 	b 	bKKMMMLLLvLLLLPP[_P``aaaaaaaa	b KKb#m:L:LbbX`bbcccOs   B3D==
FAFFTdata_center_queuez/app.tasks.processor_tasks.process_category_task)bindbasequeuenamefullFforcec                    t                               d| d| d|            t          | j        |||          }t	          ||          }|                                }|                                }	 t          | j        |dt          j	                    d           t                      }	|	                    |          }
|
st          | j        |dd	d
           dd	dS t          t          j        t          j        t          j        t          j                  }|	                    |          }g }|dz   }t'          ||dz             D ]}|	                    ||          }t                               d| d| d| d|            t+          j        |                    ||                    }|r|                    |           t3          ||z  dz            }t          | j        ||           |                    |           t7          |          }t                               d| d| dt7          |           d           t          | j        ||t7          |          z   d           |dk    rF|sDt          | j        |ddt          j	                               |                                 d|ddS t;          t          j        |
d         z            }t?          |                               dd           t7          |          }d}tC          t          j"        t          j#                  }t          j$        }t          j%        }tM                      }d }g }d}|D ]\}|d!         }|'                    |          r|dz  }&|d"k    rv|st| j        (                    tR          j*                  +                    tR          j,        |k              -                                } | r|.                    |           |dz  }	 t+          j        |/                    |||                    }!|!r|!0                    d#          rtc          | j        |!           |.                    |           |dz  }| j        (                    tR                    +                    tR          j,        |k              -                                }"|"r	 |2                    |"           n?# tf          $ r2}#t           4                    d| d$|"j*         d|#            Y d }#~#nd }#~#ww xY w|!0                    d%          r!tk          | j        |"j*        |!d%                    |6                    |"j*                   t7          |          |k    rNt                               d| d&t7          |           d'           to          | j        ||||(          }$||$z  }g }nB|dz  }|!r|!0                    d)d*          nd+}%t           4                    d| d,| d-|%            nS# tf          $ rF}&|dz  }t           8                    d| d.| d-|& 9                    d/                     Y d }&~&nd }&~&ww xY w|t7          |          z   }'||z   }(dt3          |(tu          |'d          z  d0z            z   })t          | j        ||||)1           ^|;                                 |rNt                               d| d2t7          |           d'           to          | j        ||||(          }$||$z  }g }t                               d| d3|            t          | j        |ddt          j	                               |dk    r|                                 t                               d| d4|t7          |          z    d5| d6|            d|||t7          |          z   ||d7S # tf          $ r}*t           8                    d| d8|* 9                    d/                     |;                                 t          | j        |dt;          |*          t          j	                    9           dt;          |*          dcY d }*~*S d }*~*ww xY w):N[u   ] 开始处理分类 u   ，模式: r-   r.   runningr   r3   
started_atr4   failed   未知的分类ID)r3   error_messager4   Fr   r   timeoutmax_retries	delay_min	delay_maxr   u   ] 获取列表页 /re      r4   u   ] 共获取 u)    个新文档链接（已有断点记录 u    条）)r5   r4   r   d   r3   r4   completed_atT)r   r-   totalr   rM   base_urlapi_key  r   r   r   u!   ] staging 包生成失败 doc_id=rW   u   ] 触发批量推送:     条)r   r   r   u   未知错误   处理失败u   ] 文档处理失败: r   u   ] 文档异常: r   P   r6   r7   r4   u   ] 推送最终批次: u(   ] 知识库导入完成: total_imported=u   ] 完成: total=	 success= failed=)r   r-   r.   r   r6   r7   u   ] 处理分类任务失败: r3   r   r   )<rq   rr   r?   r    r   load_completed_urlsload_page_cursorrG   r   rC   r   get_category_configr   ri   crawler_request_timeoutcrawler_max_retriescrawler_delay_mincrawler_delay_maxcalculate_total_pagesrangebuild_page_urlr   r   process_list_pageextendintsave_page_cursorr   clearrk   rj   r   rm   r   base_platform_urlbase_platform_api_keybase_platform_kb_idkb_category_id_mapr   is_doner8   r   rl   r9   rH   r:   	mark_doneprocess_documentrf   r   save_packagers   rt   r   appendr   r   r   maxflush)+r   r-   r.   r/   r   r>   
checkpointcompleted_urlsresume_pagecategory_processorconfigprocessor_enginetotal_pagesall_documents
start_pagepagepage_urldocslist_progressr   save_dirr6   r7   r   r   r   staging_builderIMPORT_BATCH_SIZEpending_import_idstotal_importeddocdoc_urlexistingr   	saved_docpkg_errr   errdoc_err	all_total	processedr4   r}   s+                                              r   process_category_taskr"     ss	   
KKPGPP+PP$PPQQQtwdCCD"7LLLJ3355N--//K`3TWd9ARAR]^____.00#77DD 	D$xGZefgggg$/BCCC*83Saia}  JR  Jd  px  pJ  K  K  K(>>{KK 1_
*kAo66 	. 	.D)88dKKHKKWGWWtWWkWWXWWXXX;/AA(KXXYYD +$$T***{ 2R 788M$????''----M""yyyUyy]`ao]p]pyyyzzzTWdN8K8K0KVXYYYYA::~:${SW_WfWhWhiiii#!DDDx,vf~=>>XTD999N++'1KU]Usttt	,"5/11(*  )	s )	sC%jG!!'** "v~~u~7==88??@VZa@abbhhjj ((111!Q&Mg %5%F%FwP[]e%f%fgg [fjj33 [(&999((111!Q&M $k : : A A+BX\cBc d d j j l lI  @t+88CCCC( t t t"NN+rw+r+rYbYe+r+rip+r+rsssssssst!::m44 c4TWilFS`Labbb*11),???-..2CCC$d$d$dsK]G^G^$d$d$deee#6tw	K]ej  }L  $M  $M  $M&(2-/* A%LAG[&**Wn===^CNN#Yw#Y#Yg#Y#YTW#Y#YZZZ g g g!QQQ'QQQQUU`dUeeffffffffg N 3 33I%4IC	C	1,=,= = BCCCH$mR^iqrrrrr 	$KKXGXX3?Q;R;RXXXYYY*47I?QY^p  A  A  AHh&N!#YYYYYZZZTWd;S[SbSdSdeeee1|||^9L9L1L||Wd||nz||}}}GKZ_beftbubuZu  IV  ht  u  u  	u 3 3 3AAAaAAEEPTEUUVVVTWd83q66X`XgXiXijjjj 3q6622222222	3s   5A,_ "G_ ?E_ CXS('X(
T$2(TXT$$C7X_ 
Y,&<Y'"_ 'Y,,E1_ 
b(Ba>8b>bz/app.tasks.processor_tasks.process_document_task)r   r   r   document_urlc                 r   t                               d|            	 t          t          j        t          j        t          j        t          j                  }t                      }|	                    |          }|sdddS t          t          j        |d         z            }t          |                              dd           t          j        |                    |||                    }|r|                    d	          rt%          | j        |           | j                            t*                                        t*          j        |k                                              }|r6|                    d
          r!t3          | j        |j        |d
                    d||                    d          dS d|r|                    dd          nddS # t6          $ rM}	t                               d|	                     d                     dt          |	          dcY d }	~	S d }	~	ww xY w)Nu   开始处理文档: r   Fr   r   r   TrM   r   rW   rP   )r   r#  rP   r   r   u   处理文档任务失败: r   )rq   rr   r   ri   r   r   r   r   r   r   rk   rj   r   rm   r   r   r  rf   r   r    r8   r   r9   rH   r:   r   rl   rs   r   r   )
r   r#  r.   r  r
  r  r  r   r  r}   s
             r   process_document_taskr%    sC   
KK5|556663*83Saia}  JR  Jd  px  pJ  K  K  K.00#77DD 	D$/BCCCx,vf~=>>XTD999->>|[Zbccdd 	rfjj++ 	r &111k2299+:PT`:`aaggiiI WVZZ66 W(),}@UVVV#\FJJW^L_L_```$V\/pvzz'>/R/R/Rbpqqq 3 3 35!5599D9IIJJJ 3q66222222223s,   A$G D=G G 
H6)AH1+H61H6z3app.tasks.processor_tasks.reprocess_from_local_taskrY   c                 4   | j                             t                                        t          j        |k                                              }|sdddS t          t          j        t          j	        t          j
        t          j                  }t                      }|                    |j                  }|r"t          t          j        |d         z            nt          t          j                  }t#          |                              dd           |j        r:t)          j        |                    |j        |j        |j        |                    }n3t)          j        |                    |j        |j        |                    }|ri|                    d          rT|                    d	          s|j        r
|j        |d	<   t7          | j         |           d||                    d
          dS d|r|                    dd          nddS )NF   文档不存在r   r   r   TrM   r   rW   rP   )r   r   rP   r   r   )r    r8   r   r9   rl   r:   r   ri   r   r   r   r   r   r   r.   rk   rj   r   rm   rU   r   r   reprocess_from_htmlrH   r  rf   rW   r   )r   rY   r  r  r
  r  r  r   s           r   reprocess_from_local_taskr)    s   
'--
$
$
+
+KNj,H
I
I
O
O
Q
QC > +<===&x/O]e]y  FN  F`  lt  lF  G  G  G*,,33CODDF>Dds8(6&>9:::#hNcJdJdHNN555
 k-AA#BRTWTbdgdsu}~~->>s~s`hiijj n&**Y'' nzz-(( 	4S_ 	4$'OF=!TWf---:

7@S@STTT RX+l6::g~+N+N+N^lmmmr!   z7app.tasks.processor_tasks.import_to_knowledge_base_taskc                    t                               d| d           	 | j                            t                                        t          j        |k                                              }|sdddS |j        rdd|j	        dS t          t          j        t          j        	          }t          j        }t          j                            |j                  }t%          j        |                    |||
                    }|rh|                    d          rSd|_        |                    d          |_	        | j                                         d||                    d          dS d|r|                    dd          nddS # t,          $ rM}t                               d|                     d                     dt3          |          dcY d }~S d }~ww xY w)Nu   开始导入文档 u    到知识库Fr'  r   Tu   文档已导入)r   messager   r   r   r   r   )r   rY   r   r   u   导入失败u   导入文档任务失败: r   )rq   rr   r    r8   r   r9   rl   r:   r   r   r   ri   r   r   r   r   rf   r.   r   r   r   r<   rs   r   r   rk   )r   rY   r   r   r   r   r   r}   s           r   import_to_knowledge_base_taskr,  1  s   
KK?j???@@@3'--,,33KNj4PQQWWYY 	B$/@AAA 	q#0AW^Woppp'1KU]Usttt	,!4889LMMY66weYg6hhii 	rfjj++ 	r"&G'-zz2D'E'EG$GNN#:SYS]S]^pSqSqrrr$V\/pvzz'>/R/R/Rbpqqq 3 3 35!5599D9IIJJJ 3q66222222223s2   AF 9F C(F 4F 
G(AG#G(#G(z:app.tasks.processor_tasks.backfill_attachment_content_taskdoc_idsc                    dd l }ddlm} ddlm} ddlm} d}g }|ddlm} | j        	                    t          j                                      t          j                            d            |d           |d          t          j                            d	                                                    }	d
 |	D             }t%          |          }
t&                              d|
             |            } |            }d}d}d}d}|D ]}| j        	                    t                                        t          j        |k                                              }|s|dz  }]|j        pg }|s|dz  }ng }h d}dh}d}t-          |          D ]\  }}|                    dd          }|rt0          j                            |          s!t&                              d| d|            ^t0          j                            |          d                                         }||v rt&                              d| d|                    d           d| d           |                    ||                    d          |d|t0          j                            |          r%t0          j                             |          dz  dz  ndd           E||v rt&                              d| d|                    d           d| d           |                    ||                    d          |d|t0          j                            |          r%t0          j                             |          dz  dz  ndd           ||v rt&                              d| d|                    d           d| d           |                    ||                    d          |d|t0          j                            |          r%t0          j                             |          dz  dz  ndd           t0          j                             |          }||k    rt&          !                    d| d|                    d           d|dz  dz  d d!|dz  dz  d"d#	           |                    ||                    d          |d$|dz  dz  d d%|dz  dz  d"d#||dz  dz  d           ^	 tE          j#        |$                    |                    }|                    d&          r|                    d'd          %                                rv|                    |                    dd(|dz              |d'         d)           t&                              d| d*|                    d           d+|d,         d d           ;# tL          $ rA}t&          !                    d| d-|                    d           d.|            Y d }~d }~ww xY w|s|dz  }t%          |          dk    }g } t-          |          D ]o\  }!}"|r<|!t%          |          k     r||!         ntO          |!dz             }#d(|# d/|"d          }$nd0|"d          }$|                     d1|$ d2|"d'                     pd(                    |           }%|j        pd|%z   }&|)                    |&          *                    d3d          }'	 |&*                    d3d          |_        |'|_+        | j        ,                                 |dz  }t&                              d| d4t%          |           d5           i# tL          $ rI}| j        -                                 |dz  }t&          .                    d| d6|            Y d }~d }~ww xY w|r	 t_          |d7d89          5 }(|0                     |j1                    2                                t%          |          |d:|(d;d<=           d d d            n# 1 swxY w Y   t&                              d>| d?t%          |           d@           n4# tL          $ r'}t&          .                    dA|            Y d }~nd }~ww xY wt&                              dB|
 dC| dD| dE|            dF|
|||dGS )HNr   r   )DocConverterDocumentCleanerz!/tmp/skipped_ocr_attachments.jsontextattachments::text != 'null'attachments::text != '[]'u   %## 附件%c                     g | ]
}|d          S )r   r,   .0rs     r   
<listcomp>z4backfill_attachment_content_task.<locals>.<listcomp>U  s    &&&A1Q4&&&r!   u*   [backfill_attachment] 待补填文档数: u   一二三四五六七八九十r   >   .bmp.gif.png.tif.jpeg.tiff.webp.jpgz.pdfi   r   r   z[backfill_attachment] doc_id=u    附件文件不存在: u    跳过扫描附件: r   z (r   u   扫描附件（图片）i   )r   attachment_namer   reasonfile_extensionfile_size_mbu    跳过PDF文件: u$   PDF文件（需要OCR，处理慢）u    跳过大文件: u
    (大小: z.2fu   MB, 阈值: z.0fzMB)u   文件过大 (zMB > r   markdownu   附件)r   rG  u    附件转换成功: z
 (quality=parse_quality_scoreu    附件转换异常: r   u   ：u	   附件：z


---

## z

r   u    补填完成，追加 u
    个附件u    写库失败: r]   r^   r_   )	timestamptotal_skippedskipped_itemsFra   rb   u0   [backfill_attachment] 跳过记录已保存到: u    (共 u    条)u0   [backfill_attachment] 保存跳过记录失败: u$   [backfill_attachment] 完成: total=r   r   z	 skipped=T)r   r   r6   r7   skipped_count)3rg   r   8app.services.tax_data_processor.converters.doc_converterr/  0app.services.tax_data_processor.document_cleanerr1  
sqlalchemyr3  r    r8   r   rl   r9   rW   isnotrV   notlikeallr   rq   rr   r:   r   rf   osr   isfiledebugsplitextlowerr  existsgetsizert   r   r   convert_localstriprs   rk   joinmarkdown_to_rag_textr   r   r<   r   r   ro   rp   nowrn   ))r   r-  rg   r   r/  r1  skipped_log_pathskipped_recordssa_textrowsr   doc_convertercleanerlabelsr6   r7   rL  r   r  rW   attachment_sectionsimage_extensionsskipped_extensionsLARGE_FILE_THRESHOLDir   r   file_ext	file_sizeconv_resultr}   use_ordinalpartsjseclabelheadingextra_mdnew_markdownnew_textr|   s)                                            r    backfill_attachment_content_taskrw  I  s\   KKK!!!!!!UUUUUUPPPPPP:O......w}}[^,,33K4K4Q4QRV4W4WY`Y`a~YY  BI  BI  Je  Bf  Bf  hs  hD  hL  hL  MZ  h[  h[  \  \  `  `  b  b&&&&&LLE
KKDUDDEEE LNNMoG-FMLM CU CUgmmK((//&0HIIOOQQ 	QMO1r 	QM ^^^$X/,, 	w 	wFAs,,J RW^^J%?%? iVii]giijjjw''
33A6<<>>H+++xVxxZ]ZaZabhZiZixxmuxxxyyy&&&SWWU[__ku  B\  px  wy  w~  wE  wE  FP  wQ  wQ  JX  JL  JQ  JY  JY  Zd  Je  Je  hl  Jl  os  Js  Js  WX  (Y  (Y  Z  Z  Z---uVuuWZW^W^_eWfWfuujruuuvvv&&&SWWU[__ku  Bh  |D  CE  CJ  CQ  CQ  R\  C]  C]  Vd  VX  V]  Ve  Ve  fp  Vq  Vq  tx  Vx  {  V  V  cd  (e  (e  f  f  f---uVuuWZW^W^_eWfWfuujruuuvvv&&&SWWU[__ku  Bh  |D  CE  CJ  CQ  CQ  R\  C]  C]  Vd  VX  V]  Ve  Ve  fp  Vq  Vq  tx  Vx  {  V  V  cd  (e  (e  f  f  f
33I///   Iv   I   IY\Y`Y`agYhYh   I   It}  AE  uE  HL  uL   I   I   I  ^r  uy  ^y  |@  ^@   I   I   I   I  J  J  J&&&SWWU[__ku  B`  S\  _c  Sc  fj  Sj  B`  B`  B`  uI  LP  uP  SW  uW  B`  B`  B`  B`  t|  NW  Z^  N^  ae  Ne  (f  (f  g  g  gw%k-*E*Ej*Q*QRR??9-- c+//*b2Q2Q2W2W2Y2Y c'..HXQRUVQVHXHX8Y8Ygrs}g~//  A  A  AKK  !b  !b  !b]`]d]dek]l]l  !b  !b  yD  EZ  y[  !b  !b  !b  !b  c  c  c w w wuvuu\_\c\cdj\k\kuursuuvvvvvvvvw" 	QM-..2 344 	J 	JFAs 4%&V__q		#a!e**:5::S[::3c&k33LLH'HHs:HHIIII775>>,2h>//==EEfbQQ		U#/#7#7#C#CC 'CGNNQMKK{{{WZ[nWoWo{{{|||| 	U 	U 	UGALLLSSSPQSSTTTTTTTT	U  Q	Q&g>>> o!		(@(@(B(BUXYhUiUi  }L  M  M  OP  _d  mn	  o  o  oo o o o o o o o o o o o o o oKK~K[~~cfgvcwcw~~~ 	Q 	Q 	QLLOAOOPPPPPPPP	Q
KK  Eu  E  E}  E  E^j  E  E  vC  E  E  F  F  Fem]i  }J  K  K  Kso   8CW
X6XX
A.]::
_>__a5 %A
`;/a5 ;`??a5 `?1a5 5
b&?b!!b&z0app.tasks.processor_tasks.fill_content_text_task   
batch_sizec                    ddl m}  |            }| j                            t                                        t          j                            d           t          j        	                    d                     
                                }t                              d|            d}	 | j                            t                                        t          j                            d           t          j        	                    d                                         t          j                                      |                                          }|sn|D ]}	 |                    |j                  }|r|                    dd          nd|_        =# t&          $ r6}	t                              d|j         d|	            d|_        Y d }	~	xd }	~	ww xY w| j                                         |t-          |          z  }t                              d	| d
|            t                              d| d           d||dS )Nr   r0  u(   [fill_content_text] 待填充文档数: Tr   r   z[fill_content_text] doc_id=u    转换失败: u   [fill_content_text] 进度: r   u'   [fill_content_text] 完成，共处理 r   )r   r   r!  )rN  r1  r    r8   r   r9   rV   rP  r   is_countrq   rr   order_byrl   limitrR  r]  r   rs   rt   r<   r   )
r   ry  r1  rd  r   r!  r  r  r3  r}   s
             r   fill_content_text_taskr    s}   PPPPPPoGGMM+&&--k.J.P.PQU.V.VXcXpXtXtuyXzXz{{  B  B  D  DE
KKB5BBCCCIHw}}[))001M1S1STX1Y1Y[f[s[w[wx|[}[}~~  H  H  IT  IW  X  X  ^  ^  _i  j  j  n  n  p  p 	 	& 	&C&33C4HII?C#K4<<#;#;#;   & & &WSVWWTUWWXXX#%      & 	SYY	F9FFuFFGGGH KKI)IIIJJJe)DDDs   '9F!!
G!+,GG!z1app.tasks.processor_tasks.incremental_update_task   category_idswindow_days
source_idsc                   ( d|pd d| d}t                               d|            d }|rl| j                            t                                        t          j        |k                                              }|rt          | j        |dd           	 t          | j                  }|r|nt          t          d	d
                    }t          |          }	g }
t          |          D ]\  }}|r/dt          ||	z  dz            z   }t          | j        ||           t          j        |                    ||                    }|
                    |           t                               d| dt          |                    dg                      dt          |                    dg                                 d}d}d}d (dt(          f(fd}|rt          | j        |d           |
D ]\}|                    d          rGt                               d|                    d           d|                    d                      _|d         }|                    dg           D ](}t,                              |d         |           |d	z  })|                    dg           D ]}|d         }| j                            t0                                        t0          j        |k                                              }|r|j        r|j        r	 ddlm} ddlm} ddl m!}  |            }|"                    |          }|r"tG          tH          j%        |d         z            ntG          tH          j%        dz            } ||          &                    d d !            |tH          j'        tH          j(        tH          j)        tH          j*        "          }t          j        |+                    |||                    }|r|                    d#          r}|                    d$          p|                    d%          pd&}|rt          j         |            ,                    |j        |'                    } | r@|                     d#          r+t                               d(| d)|j                    |d	z  }n7t           -                    d*| d+| r|                     d          nd,            t]          | j        |           | j                            t0                                        t0          j        |k                                              }!|!r6|                    d-          r!t_          | j        |!j0        |d-                    n:|r|                    dd.          nd.}"t           -                    d/| d0|"            nf# tb          $ r>}#t                               d1| d0|# 2                    d 2                     Y d }#~#n#d }#~#ww xY wt,                              ||           |d	z  }^|r|D ]}$t          j        |3                    |$|                    }%|%                    d          r't                               d3|$ d|%d                     f|%                    dg           D ]P}&th                              tG          tk          d4          6                                          |$d56           |d	z  }Q|pt          t          d	d
                    D ]}|7                    |           t                               d7| d8| d| d9|            |r2t          | j        |d:d;||z   ||z   tq          j9                    <           d ||||d= |
D             d>S # tb          $ r}'t                               d?|' 2                    d 2                     |r7t          | j        |d@tG          |'          tq          j9                    A           dBtG          |'          dCcY d }'~'S d }'~'ww xY w)DNzcategories=rR  z window=u   天u   开始执行增量更新: r   
   )r3   r4   r   	   r   r   )r  u   [增量] 分类 u    检查完成: 新增=new_documentsu    更新=updated_documentsr   r0   c                  V     %t          t          j        t          j                    S )Nr   )r   ri   r   r   )kb_client_incrementals   r   _get_kb_clientz/incremental_update_task.<locals>._get_kb_client  s6    $,(;XE_iq  jH  )I  )I  )I%((r!   Z   r   r.   u    检查失败: r   r   r   r   r   incrementalTrM   r   r   r   rV   r   )r   new_contentu-   [增量] 增量重向量化成功 source_url=z knowledge_doc_id=u-   [增量] 增量重向量化失败 source_url=re   r   rW   r   u,   [增量] 重爬失败（已导入文档）: r   u&   [增量] 已导入文档更新异常: r   u   [增量] 数据源 uuidr   )r/   u   [增量] 完成: u    — 新增=u    重向量化=r   r   )r3   r4   r5   r6   r   c                 >   g | ]}|                     d           |                     d          t          |                     dg                     t          |                     dg                     |                     dd          |                     d          dS )r.   category_namer  r  skippedr   r   )r.   r  newupdatedr  r   )rf   r   r7  s     r   r:  z+incremental_update_task.<locals>.<listcomp>!  s|     _Q  _Q  _Q  DE  pq  pu  pu  vC  pD  pD  WX  W\  W\  ]l  Wm  Wm  vy  z{  z  z  @O  QS  zT  zT  vU  vU  be  fg  fk  fk  l  AC  fD  fD  bE  bE  RS  RW  RW  Xa  cd  Re  Re  pq  pu  pu  v}  p~  p~  `  `  _Q  _Q  _Qr!   )r   scope	total_newtotal_updatedtotal_revectorizedresultsu   增量更新任务失败: r   r   Fr   ):rq   rr   r    r8   r   r9   r-   r:   rG   r   listr   r   r   r   r   r   check_category_updatesr  rf   r   r   r%  delayr   rH   r   r   pathlibr   2app.services.tax_data_processor.category_processorr   0app.services.tax_data_processor.processor_enginer   r   rk   ri   rj   rm   r   r   r   r   r  update_document_contentrt   r   r   rl   rs   r   check_sourceprocess_source_task
__import__uuid4update_last_check_timer   rC   ))r   r  r  r  r-   r  r>   updatertarget_categoriestotal_categoriesr  r   cidr4   r   r  r  r  r  r.   r  r  r  r   r   r   r
  r  r  enginecrawl_resultr  	rv_resultinc_docr  inc_errsid
src_result_docr}   r  s)                                           @r   incremental_update_taskr    so
   I,/%IIIIIE
KK4U44555D Gw}}/00778J8RV]8]^^ddff 	G$y2FFFFY3$TW--,8OLLd5A;;>O>O011!"344 	` 	`HC ?C*:$:R$? @ @@TWdX>>>>[!?!?Q\!?!]!]^^FNN6"""KK  _3  _  _c&**UdfhJiJiFjFj  _  _twx~  yC  yC  DW  Y[  y\  y\  u]  u]  _  _  `  `  `  `	<@	) 3 	) 	) 	) 	) 	) 	)
  	5$4444 -	# -	#Fzz'"" o

=0I0IooZ`ZdZdelZmZmooppp /Kzz/266  %++CJDDDQ		zz"5r:: %# %#e*7==55<<[=SW^=^__eegg !F 4 !F9R !F{000000      eddddd->->-@-@*!3!G!G!T!TRX  $I3x'<vf~'M#N#N#N^abjbw  {H  cH  _I  _IX,,TD,III!09Ygo  hD  PX  Pj  v~  vP  "Q  "Q  "Q'.{63J3J7T_ai3j3j'k'k' oL,<,<Y,G,G o*6*:*:>*J*J*xlN^N^_qNrNr*xvxK* f,3K8H8H8`8`rz  sL  Ze8`  9f  9f  -g  -g	#, !fy1I1I !f$*KK  1W`g  1W  1W  |D  |U  1W  1W  %X  %X  %X$6!$;$6$6$*NN  4ecj  4e  4e  JS  pboxo|o|  ~E  pF  pF  pF  Yb  4e  4e  %f  %f  %f0,GGG&*gmmK&@&@&G&GH^biHi&j&j&p&p&r&rG& k<+;+;M+J+J k 8'*l[hNi j j jO["o,"2"27N"K"K"KaoC"NN+mZa+m+mhk+m+mnnn$ { { {%eg%e%e\c%e%e%i%itx%i%y%yzzzzzzzz{ *//EEE"K%#L  	#! # #$[)=)=c;)O)OPP
>>'** LL!`s!`!`:V]K^!`!`aaa&NN?B?? # #D'--c*V2D2D2J2J2L2L.M.MsY_-```NII#  44a#4#4 	0 	0C**3////9mk}  	A  	A  	A 	z${SV_boVo  @I  LY  @Y  hp  hw  hy  hy  z  z  z  z%iZg  @R  _Q  _Q  IP  _Q  _Q  _Q  R  R  	R 3 3 35!5599D9IIJJJ 	o$xs1vv\d\k\m\mnnnn 3q6622222222	3sL   J_ 'KW)(_ )
X134X,'_ ,X11F_ 
aA;aaaz.app.tasks.processor_tasks.build_relations_taskc                     ddl m}  || j        |dt                    }t                              d|                                            ddi|                                S )Nr   )rebuild_document_relationsF)ry  dry_runrq   u   [build_relations] 完成: r   T)0app.services.tax_data_processor.relation_builderr  r    rq   rr   as_dict)r   ry  r  statss       r   build_relations_taskr  (  sk    [[[[[[&&tw:u]cdddE
KK>U]]__>>???t/u}}//r!   z1app.tasks.processor_tasks.fill_superseded_by_taskc                 V   dd l                     d                              d                              d          g}| j        }|                    t                                        t          j        dk    t          j                            d           t          j	                            d           t          j
                            d                                                     }t                              dt          |                      |                    t          j        t          j        t          j                                      t          j                            d                                                     }i |D ]A}t'          |j                  }|r)                    |g                               |           Bdt,          dt,          d	t.          ffd
}d}	|D ]}
|
j
        pdd d         t1          fddD                       s0d }|D ]B}|                              }|r)|                    d                                          } nC|sz|                    t          j                                      t          j        |k    t          j        |
j        k                                              }|s |||
j                  }|r|j        |
_        n||
_	        |	dz  }		 |                                 nH# t<          $ r;}|                                 t                               d|            Y d }~nd }~ww xY wt                              d|	 dt          |           d           dt          |          |	dS )Nr   uF   根据[^，,。\n]{0,10}?《[^》]{2,60}》[（(]([^）)]{3,80})[）)]uH   根据([\u4e00-\u9fa5]{2,15}[〔\[（(]\d{4}[〕\]）)]第?\s*\d+\s*号)u1   根据((?:国务院令|主席令)第\s*\d+\s*号)obsoleteu#   [fill_superseded_by] 候选文档: rQ   headcurrent_doc_idc                 X   t          |           }|sd S fd                    |g           D             }|sd S fd|D             }t          |          dk    r|d         S t          |          dk    r1                    d|           st          |          dk    r|d         S d S )Nc                 *    g | ]}|j         k    |S r,   )rl   )r8  itemr  s     r   r:  zKfill_superseded_by_task.<locals>._safe_normalized_match.<locals>.<listcomp>A  s(    oooDUYU\`nUnUn4UnUnUnr!   c                 4    g | ]}|j         	|j         v |S r,   )rP   )r8  r  r  s     r   r:  zKfill_superseded_by_task.<locals>._safe_normalized_match.<locals>.<listcomp>D  s-    WWW$TZWDJRVDVDVDVDVDVr!   r   r   z\d{4}   )r   rf   r   search)rQ   r  r  
normalizedmatchestitle_matches_redocs_by_normalized_numbers    ``   r   _safe_normalized_matchz7fill_superseded_by_task.<locals>._safe_normalized_match=  s    /
;;
 	4oooo$=$A$A*b$Q$Qooo 	4WWWW'WWW}"" ##w<<1#**Xz"B"Bc*ooY[F[F[1:tr!   r   r   c              3       K   | ]}|v V  	d S r#   r,   )r8  rE   r  s     r   	<genexpr>z*fill_superseded_by_task.<locals>.<genexpr>M  s'      ;;19;;;;;;r!   )u   废止u   失效r   u$   [fill_superseded_by] commit 失败: u$   [fill_superseded_by] 完成: 填充 r   r   Tr   r   filled)!recompiler    r8   r   r9   r   superseded_by_doc_idr{  superseded_by_doc_numberrV   rP  rR  rq   rr   r   rl   rP   rQ   r   
setdefaultr  rk   r   anyr  groupr[  r:   r<   rs   r   r   )r   ry  	_PATTERNSr    
candidatesindexed_docsindexed_docr  r  r  r  rQ   patmmatchedr}   r  r  r  s                   @@@r   fill_superseded_by_taskr  /  s8   fggilitit  vH  jI  jI  KN  KV  KV  WM  KN  KN  OI	B+&&--k.D
.RT_TtTxTxy}T~T~  AL  Ae  Ai  Ai  jn  Ao  Ao  q|  qM  qS  qS  TX  qY  qY  Z  Z  ^  ^  `  `J
KKGc*ooGGHHH88KNK,={?UVV]]^i^t^z^z{  _A  _A  B  B  F  F  H  HL13# U U/0FGG
 	U%00R@@GGTTT3 c 3        F  $*DSD1;;;;&:;;;;; 	
 	 	C

4  A WWQZZ--//
  	((;>**11+2HJ2VXcXfjmjpXpqqwwyy 	G,,ZsvFFG 	6'.zC$$+5C(!A
		 A A A
?A??@@@@@@@@A KKUvUUJUUUVVVc*ooHHHs   L" "
M',1M""M'z3app.tasks.processor_tasks.fill_effective_dates_taskr   c                    ddl m} h d} |            dt          dt          d z  ffd}| j        }|                    t
                                        t
          j                            |          t
          j	        
                    d                                                     }t                              d| d           d}d}	 |                    t
                                        t
          j                            |          t
          j	        
                    d                                         t
          j                                      |                                          }	|	sn|	D ]*}
|
j        pd
} |||
j                  }|r||
_	        |dz  }+	 |                                 nK# t*          $ r>}|                                 t                              d| d|            Y d }~nd }~ww xY w|t1          |	          z  }t                              d| d| d|            rt                              d| d|            d	||dS )Nr   MetadataExtractor>   	normative
regulationlocal_policymdr0   c                                          | |rt          |          nd           }|r|S |rt          |          nd S r#   )_extract_effective_daterk   )r  rS   explicit_datemetadata_extractors      r   _extractz+fill_effective_dates_task.<locals>._extractm  sL    *BB2ZdGns:jnoo 	!  ",6s:$6r!   u%   [fill_effective_dates] 需要回填: r   Tr   r   u,   [fill_effective_dates] commit 失败 offset=re   u   [fill_effective_dates] 进度: r   	    已填: u%   [fill_effective_dates] 完成: total= filled=r  ):app.services.tax_data_processor.parsers.metadata_extractorr  rk   r    r8   r   r9   doc_typein_rT   r{  r|  rq   rr   r}  rl   r~  rR  rV   rS   r<   rs   r   r   r   )r   ry  r  regulation_doc_typesr  r    r   r  offsetbatchr  r  r   r}   r  s                 @r   fill_effective_dates_taskr  g  s   \\\\\\FFF**,,7S 7t 7 7 7 7 7 7
 
BHH[!!(()=)A)ABV)W)WYdYsYwYwx|Y}Y}~~  E  E  G  GE
KKCCCCDDDFFY%%,,[-A-E-EFZ-[-[]h]w]{]{  }A  ^B  ^B  C  C  L  L  MX  M[  \  \  b  b  cm  n  n  r  r  t  t 	 	 	C%+BXb#.11F %+"!	WIIKKKK 	W 	W 	WKKMMMLLUUURSUUVVVVVVVV	W 	#e**WfWWuWWvWWXXX!Y" KKOOOvOOPPPev>>>s   ,G 
H	4HH	z0app.tasks.processor_tasks.strip_ocr_content_taskc                    ddl m} ddlm} | j        }|                    t                                         |t          j        	                    d          t          j        	                    d                              
                                }t          |          }t                              d| d           d}|D ]2}|j        pd}|                    |          }	|	|k    r|	|_        |d	z  }3	 |                                 nD# t           $ r7}
|                                 t                              d
|
             d }
~
ww xY wt                              d| d|            d||dS )Nr   )or_r0  u   %正文图片内容%u   %<!-- 图片%u   [strip_ocr_content] 找到 u    条含 OCR 内容的文档r   r   u#   [strip_ocr_content] commit 失败: u"   [strip_ocr_content] 完成: total=z	 cleaned=T)r   r   cleaned)rO  r  rN  r1  r    r8   r   r9   rV   likerR  r   rq   rr   strip_ocr_contentr<   rs   r   r   )r   r  r1  r    r  r   r  r  originalstrippedr}   s              r   strip_ocr_content_taskr    s   PPPPPP	B88K  ''K,H,M,MNd,e,egr  hD  hI  hI  JY  hZ  hZ  )[  )[  \  \  `  `  b  bDIIE
KKPePPPQQQG  '-2"44X>>x#+C qLG
		   
>1>>??? KKNUNNWNNOOOe@@@s   <D 
E2EEz=app.tasks.processor_tasks.redownload_missing_attachments_taskc                 N   ddl m} | j        } |dd           ddlm} |                    t                                        t          j        	                    d            |d           |d                    
                                }d}d}d}|D ]a}|j        pg }	d	}
t          |	          D ]\  }}|                    d
d          }|r t          j                            |          r>|                    d          p|                    dd          }|r|                    d          r1t"                              d|j         d| d           |dz  }|dz  }ddlm} d|v rf|j        r_|                    dd          }t1          |          dk    r6|j                            dd          d         } ||dz   |d                   }|st          j                            t4          j        d|j                  }t          j        |d           |                    d          p1t          j                            |                    d                    }t          j                            ||          }|dz  }t"                               d|j         d| d|            dd l!d|j        d}	 |||ffd	}tE          j#         |                      }n)# tH          $ r}d	tK          |          d}Y d }~nd }~ww xY w|d          r||d
<   |dz  }d}
q|dz  }t"                              d|j         d| d!|                    d"                      |
r	 dd#l&m'}  ||d           |(                    |           |)                                 # tH          $ rD}|*                                 t"          +                    d|j         d$|            Y d }~Yd }~ww xY wct"                               d%| d&| d'|            d|||d(S ))Nr   AttachmentParser<      r   r   r2  r4  r5  Fr   r   original_urlr   z/api/z[redownload] doc_id=r   u   ] 无有效 URL，跳过r   )urljoinz/zcfgk/r   ra   rW   T)rO   r   u   ] 缺失，重新下载: zBMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36)z
User-AgentRefererc                   K                        dd          4 d {V }|                    | |           d {V }|j        dk    rXt          |d          5 }|                    |j                   d d d            n# 1 swxY w Y   ddicd d d           d {V  S dd	|j         d
cd d d           d {V  S # 1 d {V swxY w Y   d S )NT   )follow_redirectsr   )headersrx  wbr   FzHTTP r   )AsyncClientrf   status_codero   writer   )r   r  r   clientr9  r|   httpxs         r   	_downloadz6redownload_missing_attachments_task.<locals>._download  s      $00$PR0SS X X X X X X XW]"(**S'*"B"BBBBBBB=C//!%dD!1!1 3Q !	 2 2 23 3 3 3 3 3 3 3 3 3 3 3 3 3 3$-t#4X X X X X X X X X X X X X X 05?Vq}?V?V#W#WX X X X X X X X X X X X X X X X X X X X X X X X X X X X X Xs;   9CB5CB	CB		C"C
CCr   r   u   ] 下载失败: r   flag_modifiedu    commit 失败: u   [redownload] 完成: missing=z downloaded=r   )r   missing_total
downloadedr   ),9app.services.tax_data_processor.parsers.attachment_parserr  r    rO  r3  r8   r   r9   rW   rP  rR  r   rf   rS  r   rT  
startswithrq   rt   rl   urllib.parser  rH   rsplitr   r\  ri   rj   rP   makedirsbasenamerstriprr   r  r   r   rs   rk   sqlalchemy.orm.attributesr  r;   r<   r   r   )r   r  r    ra  r  r  r  r   r  rW   r   r   r   r   r   r  ro  r   att_dirr   r  r  r   r}   r  r  s                            @r   #redownload_missing_attachments_taskr    s   ZZZZZZ	BRQ////******88K  ''(?(E(Ed(K(KWWUrMsMsu|u|  ~Y  vZ  vZ  [  [  _  _  a  aDMJF 9Q 9Qo+!+.. -	r -	rHC7762&&D t,, ''.))?SWWUB-?-?C #..11 ccfccccccddd"!,,,,,,CCN

3**u::??"~44S!<<Q?H!'(S.%(;;C 3',,x'<mSYWWGd3333wwvK"'*:*:3::c??*K*Kw||GT22QMKKbsvbbsbb]`bbcccLLL%ivy  wE  F  FG=(+W4 X X X X X X !YY[[11 = = =%*SVV<<=i  r"Fa
!pcfppcpp[a[e[efm[n[nppqqqq 	QQCCCCCCc=111s		 Q Q QOCFOOAOOPPPPPPPPQ	Q KKggg:gg_egghhhm:aghhhs0   %K))
L3L

L-;N**
O849O33O8z,app.tasks.processor_tasks.fix_json_null_taskc                    ddl m} | j        }|                     |d                    }|                     |d                    }	 |                                 nD# t
          $ r7}|                                 t                              d|             d }~ww xY w|j	        }|j	        }t          
                    d| d|            d||d	S )
Nr   r2  z
        UPDATE tax_documents
        SET supersedes = NULL
        WHERE supersedes IS NOT NULL AND jsonb_typeof(supersedes) = 'null'
    z
        UPDATE tax_documents
        SET "references" = NULL
        WHERE "references" IS NOT NULL AND jsonb_typeof("references") = 'null'
    u   [fix_json_null] commit 失败: u#   [fix_json_null] 完成: supersedes=z references=T)r   supersedes_fixedreferences_fixed)rO  r3  r    r   r<   rs   r   rq   r   rowcountrr   )r   r3  r    r1r2r}   	sup_fixed	ref_fixeds           r   fix_json_null_taskr$    s   	B	DD  k  l  l  
m  
mB	DD  s  t  t  
u  
uB
		   
:q::;;; II
KKXiXXYXXYYYPYZZZs   A   
B!*2BB!z5app.tasks.processor_tasks.fill_issuing_authority_taskc                    ddl m}  |            }| j        }|                    t                                        t          j                            d                                                     }t          
                    d| d           d}d}	 |                    t                                        t          j                            d                                         t          j                                      |                                          }|sn,|D ]}	d }
|	j        rHdd l}|                    d|	j                  }|r'|                    d                                          }
|
s.|	j        pdd d	         }|                    |	j        pd|          }
|
r|
|	_        |dz  }	 |                                 nK# t0          $ r>}|                                 t                              d
| d|            Y d }~nd }~ww xY w|t7          |          z  }t          
                    d| d| d|            t          
                    d| d|            d||dS )Nr   r  u'   [fill_issuing_authority] 需要回填: r   TuM   ^([\u4e00-\u9fa5][\u4e00-\u9fa5 ]{3,60}?)(?:公告|令|函|发|〔|\[|（|\()r   r   r   u.   [fill_issuing_authority] commit 失败 offset=re   u!   [fill_issuing_authority] 进度: r   r  u'   [fill_issuing_authority] 完成: total=r  r  )r  r  r    r8   r   r9   rR   r{  r|  rq   rr   r}  rl   r~  rR  rQ   r  matchr  r[  rV   _extract_issuing_authorityrP   r<   rs   r   r   r   )r   ry  r  	extractorr    r   r  r  r  r  	authorityr  r  r   r}   s                  r   fill_issuing_authority_taskr*    s   \\\\\\!!##I	BHH[!!(()F)J)J4)P)PQQWWYYE
KKE%EEEFFFFF[%%,,[-J-N-Nt-T-TUU^^_j_mnnttu  A  A  E  E  G  G 	 	 	CI~ 3    IIsux  vD  E  E 3 !

 0 0 2 2I [/52tt<%@@bRYZZ	 (1%!	YIIKKKK 	Y 	Y 	YKKMMMLLW&WWTUWWXXXXXXXX	Y 	#e**YYYYYQWYYZZZ/[0 KKQ%QQQQRRRev>>>s   <G 
H4HH	source_idc                    	 t          |dd           pd}t          |dd           pd}t          |dd           pd}|                    dd          }|                    dd          }|                    dd          }t          | |j        t          |dd           |j        d           t          t                                        |j        d||j        t          |d	d           t          |d
d           t          |dd           t          |dd           t          |dd           pd t          |dd           t          |dd           t          |dd           t          |dd           t          |dd           |||t          |dd           t          |dd           t          |dd           t          |dd           t          |dd           pdt          |dd           t          |dd           d                              dgi d|d|j        d	t          |d	d           d
t          |d
d           dt          |dd           dt          |dd           dt          |dd           pd dt          |dd           dt          |dd           dt          |dd           dt          |dd           dt          |dd           d|d|d|dt          |dd           dt          |dd           t          |dd           t          |dd           t          |dd           pdt          |dd           t          |dd           dt          j
                    d           }|                     |           |                                  d!S # t          $ rD}|                                  t                              d"|j         d#|            Y d }~d$S d }~wt"          $ rm |                                  dd l}t                              d%|j                    t                              d&|                                            Y d$S w xY w)'NrU   r   rV   r   r   rK   rX   r   rQ   rR   rS   rT   r   r  region_codeqa_question	qa_answerinterpretation_formrW   r   r   r   r   r   r   r   )rH   r.   r+  rP   rQ   rR   rS   rT   r   r  r-  r.  r/  r0  rU   rV   r   rK   rW   r   r   r   r   r   r   rH   r+  rP   )r   r   r   r   r   r   rD   r   Tu#   [upsert_v2] 写入冲突已忽略: r   Fu   [upsert_v2] 写入失败: u   [upsert_v2] 异常详情: )getattrr   r~   rH   rP   r   r   r   r   r   rC   r   r<   r	   r   rq   rt   rs   r   r   r   )	r    r   r+  rU   rV   r   r   r}   r   s	            r   _upsert_tax_document_v2r2     so	   v~t<<B"6+=tDDJv~t<<B#++FB77+33FB??#++FB77!"f&7'RXZhjnJoJo  {A  {G  :H  :H  	I  	I  	I%%,,8IWXdmu{  vB  OV  W]  _k  mq  Or  Or  FM  NT  Vi  ko  Fp  Fp  }D  EK  MY  [_  }`  }`  qx  y  AQ  SW  qX  qX  el  ms  uA  CG  eH  eH  eP  LP  [b  ci  ku  w{  [|  [|  JQ  RX  Zg  im  Jn  Jn  |C  DJ  LY  [_  |`  |`  ls  tz  |G	  I	M	  lN	  lN	  d	k	  l	r	  t	I
  K
O
  d	P
  d	P
  _
k
  ~
N  ]i  x  @F  HV  X\  x]  x]  kr  sy  {H  JN  kO  kO  _f  gm  o~  @D  _E  _E  U\  ]c  et  vz  U{  U{  LS  TZ  \l  nr  Ls  Ls  L  w  LS  TZ  \h  jn  Lo  Lo  |C  DJ  LX  Z^  |_  |_  s~,      U  U  fr  es  zE%  {F  HQ  zE%  SZ  \b  \h  zE%  jv  x  @F  HT  VZ  x[  x[  zE%  ]p  ry  z@  BU  W[  r\  r\  zE%  ^j  ls  tz  |H  JN  lO  lO  zE%  Qa  cj  kq  sC  EI  cJ  cJ  zE%  LX  Za  bh  jv  x|  Z}  Z}  ZE  AE  zE%  GQ  SZ  [a  cm  os  St  St  zE%  vC  EL  MS  Ub  dh  Ei  Ei  zE%  kx  zA  BH  JW  Y]  z^  z^  zE%  `k  mt  u{  }H  JN  mO  mO  zE%  Qf  ho  pv  xM  OS  hT  hT  zE%  Vd  fr  zE%  tF  HX  zE%  Zh  jv  zE%  xF  HO  PV  Xf  hl  Hm  Hm  zE%  o|  ~E  FL  N[  ]a  ~b  ~b  zE%  u|  }C   E T   V Z   u[   u[   n u   v |   ~ M!  O!S!  n T!  n T!  h!o!  p!v!  x!H"  J"N"  h!O"  h!O"  h!["  S"["  k"r"  s"y"  {"G#  I#M#  k"N#  k"N#  ^#e#  f#l#  n#z#  |#@$  ^#A$  ^#A$  X$c$  s${$  s$B%  s$D%  s$D%  zE%  zE%  zE%  U  F%  F%


4
		t   
XV=NXXUVXXYYYuuuuu   
E&2CEEFFFJ)2F2F2H2HJJKKKuus   N
N 
Q9OA6QQz-app.tasks.processor_tasks.process_source_taskr  end_pagec                 r    !" ddl m} ddlm}  j                            |                              |j        k                                                sdddS t           j        d          ""_
         j                                         t          t          j         j         j         j                  }	 | |	          t%                                                    }
	 t)           j        "d	t+          j                    d
           d  j                            t.          j                                      t.          j
        k                                              D             t4                              d dt9                     d           t;          t          j        t          j                  t          j         dg }d}t9          |
          }d}d}tC           dd           pd|||||d!ddl"m#}  |dd          tI          t          j%         j&        z            tO                    (                    dd           tS          j*                    !"fd!fd}tS          j+         |                       !d         }!d         }!d         }!d         },                                 |r|t[           j        |          z  }t)           j        "d d!t+          j                    "           |dk    r.                                 d||d#S # t^          $ r}dd l0}|1                                }t4          2                    d d$tg          |          j4         d%| d&|            ,                                 t)           j        "d'tg          |          j4         d%| t+          j                    (           dtI          |          dcY d }~S d }~ww xY w))Nr   )
DataSource)get_adapterFu   数据源不存在r   r   r   r   r   c                     h | ]\  }|S r,   r,   )r8  r   s     r   	<setcomp>z&process_source_task.<locals>.<setcomp>H  s,      F  F  Fdcc  F  F  Fr!   r   u   ] 已加载 u    条已有 URLr   r   concurrencyr  )r6   r7   r!  r  r  r  r  r  TrM   c                 ~  K   4 d {V  	                      |            d {V }|r-|j        rg }|j        D ]}	                    |d         t          t	                    |d         z            t
          j                   d {V }|d         rU|                    |d         |d         |d         |                    dd          |                    dd	          d
           ||_        t          j
        |           
                    | j                                       | j                   dxx         dz  cc<   j
                            t                                        t          j        | j        k                                              }|rR|j        r t'          j
        |j        |j                   j        dk    r d                             |j                   t-          d                   k    rFdxx         t/          j
        d                   z  cc<   d                                          ndxx         dz  cc<   nc# t2          $ rV}dxx         dz  cc<   t4                              d d| j         d|                     d                     Y d }~nd }~ww xY wdxx         dz  cc<   t;          j
        d         d         t=          dd         z   d                     d d d           d {V  d S # 1 d {V swxY w Y   d S )Nr   r   base_dirr   r   sizer   typer   )r   r   r   r=  r>  r6   r   qa_12366r  r  r   r7   r   u   ] 文档处理异常: r   Tr   r!  r   c   r   )parse_documentrW   download_attachmentrk   r   ri   r   r  rf   r2  r    r  r   r;   r8   r   r9   rH   r:   r   rl   coder   r   r   rs   rq   r   r   rG   min)r  parsedr  r   dlsavedr}   r  adapterattachment_parserr  existing_urlsr   r   save_dir_sourcer   semsourcer+  stater>   r-   s          r   _process_onez)process_source_task.<locals>._process_oneZ  s'      a a a a a a a al#*#9#9$#?#???????F 3!- <)+J'-'9 l l+<+P+PQTUZQ[]`aefuavavy|  ~D  zE  bE  ^F  ^F  QY  Qf+P  ,g  ,g  &g  &g  &g  &g  &g  &g#%i= !l$.$5$5s6{SVW\S]gijpgq{}  |B  |B  CI  KL  |M  |M  WZ  W^  W^  _e  gi  Wj  Wj  7k  7k  %l  %l  %l1;F./KKK",,TX666%))$(333o...!3... $k : : A A+BX\`\dBd e e k k m m  M%1 ` 8%(FL^ _ _ _%{j88 %&: ; B B58 L L Lu%9:;;?PPP!"23337J47T]_dey_z  CH  8I  8I  8I  I333!"67==???n---2---  l l l.)))Q.)))LL!VW!V!VDH!V!VST!V!V!Z!Zei!Z!j!jkkkkkkkkl k"""a'"""TWd%:P_des_t  @C  DF  IN  OZ  I[  D[  ]_  @`  @`  a  a  a  a=a a a a a a a a a a a a a a a a a a a a a a a a a a a a a as=   L,II*)L,*
K
4AK L,K

AL,,
L69L6c                  (  K   g }                                2 3 d {V }                    |j                  rdxx         dz  cc<   2dk    r6
s4|j        	v r+                    |j                   dxx         dz  cc<   n|                     t          j         |                               t          |           dz  k    rgt          j        | ddi d {V }|D ]9}t          |t                    r"t                              d d	| |
           :|                                  6 | rSt          j        | ddi d {V }|D ];}t          |t                    r"t                              d d	| |
           :d S d S )N)r  r3  r6   r   r      return_exceptionsTr   u   ] gather异常: )exc_info)list_all_documentsr   r   r  r  r   ensure_futurer   gather
isinstancers   rq   r   r   )tasksr  r  r9  rP  rI  r  r9  r3  rK  r   r/   r  rO  r-   s       r   _runz!process_source_task.<locals>._run{  s'     E%88JYa8bb " " " " " " "d%%dh// /***a/***6>>5>x=00",,TX666o...!3... W2<<3E3EFFGGGu::q00$+NE$RT$R$RRRRRRRG$ W W%a33 W"LL)IW)I)Ia)I)ITULVVVKKMMM c   S ' N N NNNNNNN  S SA!!Y// S%E%E%E!%E%EPQRRR	S SS Ss   D:r6   r7   r  r  r@  r   r   r   )r   r-   r+  r6   r7   u   ] process_source_task 异常: re   
r   r   )5app.models.tax_datar5  (app.services.tax_data_processor.adaptersr6  r    r8   r9   rl   r:   r?   r+  r<   r   ri   r   r   request_delay_minrequest_delay_maxr   r   rG   r   rC   r   rH   rR  rq   rr   r   r   r   r   r   r1  r  r  rk   rj   rD  r   rm   r   	Semaphorer   r  r   r   BaseExceptionr   r   r   r>  r(   )#r   r-   r+  r/   r   r  r3  r5  r6  r  r  r  r  r6   r7   r!  r  rZ  r}   r   tbr  rP  rI  rJ  r  r9  rK  r   r   rL  rM  rN  rO  r>   s#   ```````              @@@@@@@@@@@@@@r   r  r  8  sm   ......DDDDDDW]]:&&--jmy.HIIOOQQF A +?@@@twD99DDNGNNX%ESYSeqw  rJ  V\  Vn  o  o  oFk&&))G"7	JJJJ3355N^3TWd9ARAR]^____ F  FdgmmK<R.S.S.Z.Z[f[pt}[}.~.~  /C  /C  /E  /E  F  F  FOOOS-?-?OOOPPP'1KU]Usttt	,N++	fmT::?a"/\e  zH  `r  s  s^^^^^^,,RQGGGh3fkABB_##D4#@@@,,	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	a 	aB	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S. 	DDFFo.^,/0"#78 	g1$'9FX`effffNTWd;S[SbSdSdeeee1G)^k  ~J  K  K  	K 3 3 3!!##]]]Q@P]]TU]]Y[]]^^^TWd8d1ggFVC]C]Z[C]C]ltl{l}l}~~~~ 3q66222222223s    I$M+ +
P65B6P1+P61P6z-app.tasks.processor_tasks.backfill_media_task2   c                 2   ddl m} ddlm} ddlm} ddlm} ddlm	} ddl
m} | j        } |dd	dd
          }	 |            }
 |dd          } |            }|                    t                                        t          j                            d           t          j                            d                                                     }t*                              d|            dx}}	 |                    t                                        t          j                            d           t          j                            d                                         t          j                                      |                                          }|snI|D ]#}	 |j        r|                    |j                  nd }t;          t<          j        |r|d         ndz            }tA          |          !                    dd           tE          j#        |	$                    |j        |j%        |j        pd|                    }|r|&                    d          s!|d	z  }g |_        |'                                 |j(        dk    r"g |_        |'                                 |d	z  }|
)                    |j        |j%                  } ||d         d          }|j*        }|dk    rdd l+}ddl,m-} t;          t<          j        dz  |.                    |j%        /                                          0                                d d         z  dz            }tE          j#         |||                    }g }n|dk    rdd l+}ddl,m1} t;          t<          j        dz  |.                    |j%        /                                          0                                d d         z  dz            }g }tE          j#         |||                    }nK|
2                    ||j%                  }g }|d         rtA          |          d|j         z  dz  }|!                    dd           tg          |d                   D ]\  } }!|!d         }"tA          |"4                    d           d                   j5        6                                pd!}#tE          j#        |7                    |"t;          |d"| d#|# z            t<          j8        $                    }$|$d         r&|9                    |"|$d%         |!d&         d'd(           g }|d         r
tA          |          d|j         z  dz  }|!                    dd           tg          |d                   D ]\  } }%|%d         }"tA          |"4                    d           d                   j5        6                                pd)}#tE          j#        |7                    |"t;          |d*| d#|# z            t<          j8        $                    }$|$d         r |9                    |"|$d%         d'g d+           tu          |j;        pg           }&d,}'|&D ]}(|(&                    d%          rtE          j#        |7                    |(d-         t;          tA          |          |(d         z            t<          j8        $                    }$|$d         r|$d%         |(d%<   d}'||_        |pd |_<        |j(        dk    r|d.         |_=        |d/         |_>        |'r|&|_;         ||d0           |'                                 |d	z  }# t~          $ rI})|@                                 |d	z  }t*          A                    d1|j         d2|)            Y d })~)d })~)ww xY wt*                              d3| d4|            t*                              d5| d6| d4|            d|||d7S )8Nr   )BeautifulSoupr  r   r  )
HTMLParserr   r  r   r   ra   r  u   [backfill_media] 待回填: Tr   backfillrM   r   qarU   lxmlimage)_download_inline_imageschinatax_zcjdr  imagesvideo)_download_inline_videosvideos	backfill_src?rB  img_03dr;  r   altr   )src_originalr   rv  ocr_textz.mp4vid_)rw  r   
transcript	keyframesFr   rV   r   rW   z[backfill_media] doc_id=u	    失败: u    [backfill_media] 进度 success=r   u   [backfill_media] 完成: total=r   )r   r   r6   r7   )Bbs4re  r  r  r  r   r  r  3app.services.tax_data_processor.parsers.html_parserrf  r  r   r    r8   r   r9   rU   rP  r   r{  r|  rq   rr   r}  rl   r~  rR  r.   r   rk   ri   rj   r   rm   r   r   r(  rH   rf   r<   r  parser0  hashlib>app.services.tax_data_processor.adapters.chinatax_zcjd.adapterrk  sha256encode	hexdigestro  extract_mediar   splitsuffixrW  rC  r   r  r  rW   r   rV   r   rs   r   r   )*r   ry  re  r  r   r  rf  r   r    r  html_parserrJ  r
  r   r6   r7   r  r  r  r  r   rF  cleaned_soupinterp_hashlibrk  img_dirr   r   ro  vid_dirmediar   img_inforr  extrG  vid_inforW   att_changedr   r}   s*                                             r   backfill_media_taskr    s	   !!!!!!777777TTTTTTZZZZZZNNNNNNPPPPPP	B_RQ!qQQQF*,,K((CCC**,,HH[!!(()A)G)G)M)M{OhOlOlmqOrOrssyy{{E
KK6u66777#$$MLX^%%,,[-E-K-KD-Q-QS^SlSpSpquSvSvww  A  A  BM  BP  Q  Q  W  W  Xb  c  c  g  g  i  i 	 S	N S	NCRNTWTcm+??PPPimx4&8`vV`abbX$$TD$AAA V%?%?@PRUR`bebqbvuv  yA  &B  &B  C  C VZZ	%:%:  A%L(*C%IIKKK<4''(*C%IIKKK!Q&M$**3+;S^LL,}VN-CVLL0W$$....      "("7/"IHOO\_\j\q\q\s\sLtLtL~L~  MA  MA  BE  CE  BE  MF  #F  IQ  #Q  R  RG$+K0G0GV]0^0^$_$_M$&MMw&&....     
 "("7/"IHOO\_\j\q\q\s\sLtLtL~L~  MA  MA  BE  CE  BE  MF  #F  IQ  #Q  R  RG$&M$+K0G0GV]0^0^$_$_MM'55lCNSSE$&MX H"&x..3Gsv3G3G"G("RdTBBB-6uX-G-G H HMC"*5/C"&syy~~a'8"9"9"@"F"F"H"H"RFC!(->-R-RSVX[\cf{mpf{f{vyf{f{\{X|X|  HP  H]-R  .^  .^  "_  "_B!)} H - 4 4cSUV\S]fnotfu  DF  6G  6G  !H  !H  !H$&MX C"&x..3Gsv3G3G"G("RdTBBB-6uX-G-G C CMC"*5/C"&syy~~a'8"9"9"@"F"F"H"H"RFC!(->-R-RSVX[\cf{mpf{f{vyf{f{\{X|X|  HP  H]-R  .^  .^  "_  "_B!)} C - 4 4cSUV\S]mo  A  6B  6B  !C  !C  !C"3?#8b99#& + +Cwwv !  %6%J%J3u:WZ[_`h[i[ilopvlw[wWxWx  DL  DY%J  &Z  &Z  [  [B)} +&(jF&*$1!$1$9T!<4''+12D+EC('-n'=C$ 6&1CO!M#}555		" N N N!LLLLLMMMMMMMMN 	\}\\l\\]]]qX^r KKg%gg-ggYegghhhem]ijjjs&   ,C]7+]79R<]77
_
>__
)r   N)r   Fr#   )rx  )Nr  NN)r   )r   FNN)rc  )Dr   rS  r   r  r   celeryr   sqlalchemy.dialects.postgresqlr   r   sqlalchemy.excr	   app.celery_appr
   
app.configr   r\  r   r   r  r   2app.services.tax_data_processor.checkpoint_managerr   3app.services.tax_data_processor.incremental_updaterr   5app.services.tax_data_processor.knowledge_base_clientr   r  r   1app.services.tax_data_processor.relation_identityr   7app.services.tax_data_processor.staging_package_builderr   common_loggingr   r(   rq   ri   r   rk   r   r?   rG   dictr~   boolr   r  r   r   r>   r"  r%  r)  r,  rw  r  r  r  r  r  r  r  r$  r*  r2  r  r  r,   r!   r   <module>r     s    				                   > > > > > > ) ) ) ) ) ) % % % % % % # # # # # # ? ? ? ? ? ? ? ? P P P P P P P P P P P P R R R R R R U U U U U U L L L L L L X X X X X X Y Y Y Y Y Y % % % % % %	H		<>>    4   S s # J\    -    Xc Xt X X X X*T d    0 4 D    4 ': 4PS9 ]` uy  }A  vA   KN    8 d5H  PA  B  B  Bf3 f3 f33 f3c f3Y] f3 f3 f3 B  Bf3P l4efff3c 3 3 3 3 gf3. l4ijjjn n n n kjn* l4mnnn3C 3 3 3 on3. l4pqqq`K `KDI4D `K `K `K rq`KD l4fgggE ES E E E hgE. l4ghhha3 a3S	D0@ a3TW a3gklogpswgw a3  HK  NR  HR a3 a3 a3 iha3F l4deee0 03 0 0 0 fe0 l4ghhh5I 5Ic 5I 5I 5I ih5In l4ijjj!? !? !? !? !? kj!?F l4fgggA A hgA0 l4stttDi Di utDiL l4bccc[ [ dc[  l4klll!? !?# !? !? !? ml!?F3 4    0 d5HO~k3 k3s k3s k3# k3UY k3mp k3  BE k3 k3 k3 @k3Z d5HO~jk jk# jk jk jk @jk jk jkr!   