
    Xj              
          d dl m Z mZ d dlmZmZmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ  ee          Z e            Zd	Zd
ZdZdZe                    d           ee          fde
fd            Ze                    d           ee          fde
fd            Ze                    d           ee          fde
fd            Ze                    d          d ee          fdedz  de
fd            Z e                    d          d edd          d  ee          fdedz  dedede
fd            Z!dS )    )datetime	timedelta)	APIRouterDependsQuery)casefunc)Session)get_db)DataProcessingTaskTaxDocument)CategoryProcessor)
get_loggerg      ?   2   g?z	/overviewdbc                 J  K   |                      t          j        t          j                                                            pd}|                      t          j        t          j                                                t          j                            d                                                    pd}|                      t          j        t          j                                                t          j	        dk                                              pd}|                      t          j        t          j                                                t          j	        dk                                              pd}|                      t          j        t          j                                                            pd}|                      t          j        t          j                                                t          j        dk                                              pd}|                      t          j        t          j                                                t          j        dk                                              pd}|                      t          j        t          j                                                t          j        dk                                              pd}||||||z
  |rt          ||z  d          nd||||d	
S )
Nr   Tfailedpendingrunning	completed           )
total_documentsimported_documentspending_documentsfailed_documentsnot_imported_documentsimport_ratetotal_tasksrunning_taskscompleted_tasksfailed_tasks)queryr	   countr   idscalarfilteris_importedis_processing_statusr   statusround)	r   
total_docsimported_docsfailed_docspending_docsr    r!   r"   r#   s	            >/lsinfo/ai/hellotax_ai/data_center/backend/app/api/v1/stats.pyget_overview_statsr3      s     $*[^4455<<>>C!JHHTZ7788??@W@[@[\`@a@abbiikkpopM((4:kn5566==k>[_g>ghhooqqvuvK88DJ{~6677>>{?\`i?ijjqqssxwxL((4:&8&;<<==DDFFK!KHHTZ(:(=>>??FFGYG`dmGmnnuuww|{|Mhhtz*<*?@@AAHHI[IbfqIqrryy{{  A  @AO88DJ'9'<==>>EEFXF_ckFkllssuuzyzL)eq  HS  oy  |I  oI  BL  ZU  Z_  `m  pz  `z  |}  Z~  Z~  Z~  RU  fq  DQ  fu  GS  T  T  T    z/by-categoryc                   K   t                      }|                                }|                     t          j        t          j        t          j                                      d          t          j	        t          t          j                            d          dfd                                        d          t          j	        t          t          j        dk    dfd                                        d          t          j	        t          t          j        dk    dfd                                        d                                        t          j                                                  }d	 |D             }g }|D ]}|                    |d
         ddddd          }|                    |d
         |d         |d         |d         |d         |d         |d         z
  |d         |d         |d         rt%          |d         |d         z  d          nd|d         rt%          |d         |d         z  d          ndd
           d|iS )NtotalT   r   )else_importedr   r   c           	          i | ]P}|j         |j        t          |j        pd           t          |j        pd           t          |j        pd           dQS )r   r6   r9   r   r   )category_idr6   intr9   r   r   .0rows     r2   
<dictcomp>z&get_category_stats.<locals>.<dictcomp>&   s      q  q  q  eh39#clFWVWBXBXdghkhrhwvwdxdx  HK  LO  LY  L^  ]^  H_  H_  "`  "`  q  q  qr4   r&   r;   nametyper   r   )
r&   rB   rC   r6   r9   not_importedr   r   r   	fail_rate
categories)r   get_all_categoriesr$   r   r<   r	   r%   r&   labelsumr   r)   r*   r+   group_byallgetappendr-   )r   category_processorall_categoriesrowsstat_maprF   css           r2   get_category_statsrT   !   sB     *,,'::<<N88K+TZ-G-G-M-Mg-V-VX\X`aegrg~  hC  hC  DH  hI  hI  KL  gM  UV  bW  bW  bW  YX  YX  Y^  Y^  _i  Yj  Yj  lp  lt  uy  {F  {X  \d  {d  fg  zh  pq  ur  ur  ur  ls  ls  ly  ly  zB  lC  lC  EI  EM  NR  T_  Tq  u@  T@  BC  SD  LM  NN  NN  NN  EO  EO  EU  EU  Va  Eb  Eb  c  c  l  l  mx  mD  E  E  I  I  K  KD q  q  lp  q  q  qHJ u uLL4A1XY"Z"Z[[4!F)QvYYZ[bYcqrs}q~  QR  SZ  Q[  ^_  `j  ^k  Qk  wx  yA  wB  QR  S^  Q_  XY  Za  Xb  pk  pu  vw  xB  vC  FG  HO  FP  vP  RS  pT  pT  pT  hk  `a  bi  `j  zs  z  @A  BJ  @K  NO  PW  NX  @X  Z[  z\  z\  z\  ps  t  t  	u  	u  	u  	u*%%r4   z/processing-statusc                 P  K   |                      t          j        t          j        t          j                                      d                                        t          j                                                  }d |D             }t          |
                                          |                    dd          |                    dd          |                    dd          |                    dd          dfd	|                                D             d
S )Nr%   c                 (    i | ]}|j         |j        S  )r+   r%   r>   s     r2   rA   z)get_processing_status.<locals>.<dictcomp>0   s    EEEC)39EEEr4   r   r   
processingr   r   )r   rX   r   r   c                 F    i | ]\  }}|rt          |z  d           ndS )r   r   )r-   )r?   r,   r%   r6   s      r2   rA   z)get_processing_status.<locals>.<dictcomp>2   s      sP  sP  sP  jw  jp  rw  tz  W\  |e  |A  BG  JO  BO  QR  |S  |S  |S  be  sP  sP  sPr4   )r6   status_distributionrates)r$   r   r+   r	   r%   r&   rH   rJ   rK   rI   valuesrL   items)r   rP   distributionr6   s      @r2   get_processing_statusr_   -   s     88K14:kn3M3M3S3ST[3\3\]]ffgr  hE  F  F  J  J  L  LDEEEEEL##%%&&E|?O?OPY[\?]?]mym}m}  K  MN  nO  nO  ^j  ^n  ^n  oz  |}  ^~  ^~  JV  JZ  JZ  [c  ef  Jg  Jg  4h  4h  sP  sP  sP  sP  {G  {M  {M  {O  {O  sP  sP  sP  Q  Q  Qr4   z/qualityNr<   c                   K   |                     t                    }| r#|                    t          j        | k              }|                                }|                    t          j                            d           t          j        dk                                              }|                    t          j        dk                                              }t          j	                    t          t                    z
  }|                    t          j        dk    t          j        |k                                              }|rt          ||z  d          nd}g }	|t          k    r-|	                    dddt           d	| d
t           dd           |t           k    r'|	                    ddd|dd
t           ddd           t          |t#          |d          z  d          }
|
dk    r|	                    ddd|
ddd           |	r7t$                              dt)          |	           dd |	D                         |||||
||	t)          |	          t*          t          t          t           dd	S )Nr   r   )hoursr   r   criticalHIGH_RECENT_FAILURESu   最近 u   h 内失败文档数 u    超过阈值 u   ，请检查爬虫/解析服务)levelcodemessagewarningHIGH_FAILURE_RATEu   处理失败率 z.1%z.0%u-   ，建议检查目标网站结构是否变化r7   g?HIGH_EMPTY_CONTENT_RATEu   正文缺失率 uB   （成功文档中正文为空），建议检查 HTML 清洗规则u   [质量告警] u    条告警: c                     g | ]
}|d          S )re   rW   )r?   as     r2   
<listcomp>z%get_quality_stats.<locals>.<listcomp>H   s    B]B]B]QR1V9B]B]B]r4   )low_quality_scorefailed_alert_countfailed_alert_window_hoursfailed_rate_alert)	r   r   rE   no_content_documentsno_content_raterecent_failed_24halertsalert_count
thresholds)r$   r   r(   r<   r%   content_hashr*   r+   r   utcnowr   _FAILED_ALERT_WINDOW_HOURS
updated_atr-   _FAILED_ALERT_COUNTrM   _FAILED_RATE_ALERTmaxloggerrg   len_LOW_QUALITY_THRESHOLD)r<   r   r$   r6   
no_contentfailed_totalcutoffrecent_failedrE   rt   rr   s              r2   get_quality_statsr   4   sA     HH[!!E E[4CDDKKMMEk6::4@@+B_cnBnoouuwwJ<< = IJJPPRRL_1K!L!L!LLFLL!>(!JKLbflLlmmssuuM27@lU*A...SIF+++
4J  Xda{  Xd  Xd  S`  Xd  Xd  pC  Xd  Xd  Xd  e  e  	f  	f  	f&&&	3F  TIfo  TI  TI  TI  DV  TI  TI  TI  TI  J  J  	K  	K  	KJUA6::O$	3L  ZDl{  ZD  ZD  ZD  ZD  E  E  	F  	F  	F a_V__B]B]V\B]B]B]__```$,U^  yC  Xg  ~K  W]  nq  rx  ny  ny  _u  M`  Y  pB  IC  IC  D  D  Dr4   z/failed-documents   )lelimitskipc                   K   |                     t                                        t          j        dk              }| r#|                    t          j        | k              }|                                }|                    t          j                                                  	                    |          
                    |                                          }|||d |D             dS )Nr   c                     g | ]V}|j         |j        |j        |j        |j        |j        |j        |j        |j        r|j        	                                nd d	WS )N)	r&   title
source_urlr<   
doc_numberr+   rw   r)   rz   )
r&   r   r   r<   r   r+   rw   r)   rz   	isoformat)r?   ds     r2   rl   z(get_failed_documents.<locals>.<listcomp>R   s      Dr  Dr  Dr  hi14Z[Zaqrq}  OP  O\  lm  lx  OP  Ob  tu  tB  ST  S`  LM  LX  pb  pq  p|  pF  pF  pH  pH  pH  ^b  Ec  Ec  Dr  Dr  Drr4   )r6   r   r   r]   )r$   r   r(   r+   r<   r%   order_byrz   descoffsetr   rK   )r<   r   r   r   r$   r6   docss          r2   get_failed_documentsr   K   s      HH[!!(()F()RSSE E[4CDDKKMME>>+0557788??EEKKERRVVXXDD5  Dr  Dr  mq  Dr  Dr  Dr  s  s  sr4   )"r   r   fastapir   r   r   
sqlalchemyr   r	   sqlalchemy.ormr
   app.databaser   app.models.tax_datar   r   2app.services.tax_data_processor.category_processorr   common_loggingr   __name__r~   routerr   ry   r{   r|   rL   r3   rT   r_   r=   r   r   rW   r4   r2   <module>r      s   ( ( ( ( ( ( ( ( - - - - - - - - - - ! ! ! ! ! ! ! ! " " " " " "       ? ? ? ? ? ? ? ? P P P P P P % % % % % %	H		 
    K)0 	T 	T 	T 	T 	T 	T N)0 	& 	& 	& 	& 	& 	&  !!,3GFOO Q QG Q Q Q "!Q J48ggfoo D Dt Dg D D D D,   7;bUXHYHYHYeft{t{  }C  uD  uD s sC$J sC sad sls s s s ! s s sr4   