o
    ³æÝi7'  ã                
   @   sj  d Z ddlmZ ddlmZmZ ddlmZmZmZ ddl	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ eƒ ZdZdZdZdZe d¡eeƒfdefdd„ƒZe d¡eeƒfdefdd„ƒZe d¡eeƒfdefdd„ƒZ e d¡deeƒfdee! defdd„ƒZ"e d¡deddd deeƒfdee! d!e!d"e!defd#d$„ƒZ#dS )%uB   ç»Ÿè®¡ä¿¡æ¯ API â€” å®žçŽ°çœŸå®žæŸ¥è¯¢ + è§£æžè´¨é‡ç›‘æŽ§å‘Šè­¦é    )ÚOptional)ÚdatetimeÚ	timedelta)Ú	APIRouterÚDependsÚQuery)ÚfuncÚcase)ÚSession)Úlogger)Úget_db)ÚTaxDocumentÚDataProcessingTaskÚProcessingLog)ÚCategoryProcessorg      à?é   é2   gš™™™™™¹?z	/overviewÚdbc           	      Ã   sL  |   t tj¡¡ ¡ pd}|   t tj¡¡ tj d¡¡ ¡ p!d}|   t tj¡¡ tj	dk¡ ¡ p4d}|   t tj¡¡ tj	dk¡ ¡ pGd}|   t t
j¡¡ ¡ pTd}|   t t
j¡¡ t
jdk¡ ¡ pgd}|   t t
j¡¡ t
jdk¡ ¡ pzd}|   t t
j¡¡ t
jdk¡ ¡ pd}|||||| |ržt|| dƒnd||||d	œ
S )
u'   èŽ·å–æ€»ä½“ç»Ÿè®¡ï¼ˆæ–‡æ¡£ + ä»»åŠ¡ï¼‰r   TÚfailedÚpendingÚrunningÚ	completedé   ç        )
Útotal_documentsÚimported_documentsÚpending_documentsÚfailed_documentsÚnot_imported_documentsÚimport_rateÚtotal_tasksÚrunning_tasksÚcompleted_tasksÚfailed_tasks)Úqueryr   Úcountr   ÚidÚscalarÚfilterÚis_importedÚis_Úprocessing_statusr   ÚstatusÚround)	r   Ú
total_docsÚimported_docsÚfailed_docsÚpending_docsr    r!   r"   r#   © r2   ú>/lsinfo/ai/hellotax_ai/data_center/backend/app/api/v1/stats.pyÚget_overview_stats   sd   €þýþýþýþýþýþýör4   z/by-categoryc                 Ã   s\  t ƒ }| ¡ }|  tjt tj¡ d¡t 	t
tj d¡dfdd¡ d¡t 	t
tjdkdfdd¡ d¡t 	t
tjdkdfdd¡ d¡¡ tj¡ ¡ }d	d
„ |D ƒ}g }|D ]Q}| |d dddddœ¡}| |d |d |d |d |d |d |d  |d |d |d r”t|d |d  dƒnd|d r¤t|d |d  dƒnddœ
¡ qXd|iS )u9   èŽ·å–æŒ‰åˆ†ç±»çš„æ–‡æ¡£æ•°ã€å¯¼å…¥æ•°ã€å¤±è´¥æ•°ç»Ÿè®¡ÚtotalTé   r   )Úelse_Úimportedr   r   c                 S   s>   i | ]}|j |jt|jpd ƒt|jpd ƒt|jpd ƒdœ“qS )r   ©r5   r8   r   r   )Úcategory_idr5   Úintr8   r   r   ©Ú.0Úrowr2   r2   r3   Ú
<dictcomp>d   s    úüÿz&get_category_stats.<locals>.<dictcomp>r&   r9   ÚnameÚtyper   r   )
r&   r@   rA   r5   r8   Únot_importedr   r   r   Ú	fail_rateÚ
categories)r   Úget_all_categoriesr$   r   r:   r   r%   r&   ÚlabelÚsumr	   r)   r*   r+   Úgroup_byÚallÚgetÚappendr-   )r   Úcategory_processorÚall_categoriesÚrowsÚstat_maprD   ÚcÚsr2   r2   r3   Úget_category_statsN   sN   €"ÿÿ÷óù
  öÿrR   z/processing-statusc                 ƒ   sŒ   |   tjt tj¡ d¡¡ tj¡ ¡ }dd„ |D ƒ}t	| 
¡ ƒ‰ ˆ | dd¡| dd¡| dd¡| dd¡d	œ‡ fd
d„| ¡ D ƒdœS )u   èŽ·å–å¤„ç†çŠ¶æ€åˆ†å¸ƒr%   c                 S   s   i | ]}|j |j“qS r2   )r+   r%   r<   r2   r2   r3   r?   Ž   s    z)get_processing_status.<locals>.<dictcomp>r   r   Ú
processingr   r   )r   rS   r   r   c                    s(   i | ]\}}|ˆ rt |ˆ  d ƒnd“qS )r   r   )r-   )r=   r,   r%   ©r5   r2   r3   r?   ™   s    ÿÿ)r5   Ústatus_distributionÚrates)r$   r   r+   r   r%   r&   rF   rH   rI   rG   ÚvaluesrJ   Úitems)r   rN   Údistributionr2   rT   r3   Úget_processing_statusƒ   s(   €þú



ü
þørZ   z/qualityNr:   c                 Ã   s‚  |  t¡}| r| tj| k¡}| ¡ }| tj d¡tjdk¡ ¡ }| tjdk¡ ¡ }t 	¡ t
td }| tjdktj|k¡ ¡ }|rMt|| dƒnd}g }	|tkrh|	 ddd	t› d
|› dt› ddœ¡ |tkr~|	 ddd|d›dtd›ddœ¡ t|t|dƒ dƒ}
|
dkrš|	 ddd|
d›ddœ¡ |	r®t dt|	ƒ› ddd„ |	D ƒ› ¡ |||||
||	t|	ƒttttdœdœ	S )u©   
    è§£æžè´¨é‡ç»Ÿè®¡

    - æŒ‰ content_hash åˆ¤æ–­ï¼šNULL = è§£æžå¤±è´¥ï¼ˆæœªæˆåŠŸæå–æ­£æ–‡ï¼‰
    - æœ€è¿‘ 24h å¤±è´¥æ–‡æ¡£è¶‹åŠ¿
    - æ˜¯å¦è§¦å‘å‘Šè­¦
    Nr   r   )Úhoursr   r   ÚcriticalÚHIGH_RECENT_FAILURESu   æœ€è¿‘ u   h å†…å¤±è´¥æ–‡æ¡£æ•° u    è¶…è¿‡é˜ˆå€¼ u   ï¼Œè¯·æ£€æŸ¥çˆ¬è™«/è§£æžæœåŠ¡)ÚlevelÚcodeÚmessageÚwarningÚHIGH_FAILURE_RATEu   å¤„ç†å¤±è´¥çŽ‡ z.1%z.0%u-   ï¼Œå»ºè®®æ£€æŸ¥ç›®æ ‡ç½‘ç«™ç»“æž„æ˜¯å¦å˜åŒ–r6   gš™™™™™©?ÚHIGH_EMPTY_CONTENT_RATEu   æ­£æ–‡ç¼ºå¤±çŽ‡ uB   ï¼ˆæˆåŠŸæ–‡æ¡£ä¸­æ­£æ–‡ä¸ºç©ºï¼‰ï¼Œå»ºè®®æ£€æŸ¥ HTML æ¸…æ´—è§„åˆ™u   [è´¨é‡å‘Šè­¦] u    æ¡å‘Šè­¦: c                 S   s   g | ]}|d  ‘qS )r_   r2   )r=   Úar2   r2   r3   Ú
<listcomp>ñ   s    z%get_quality_stats.<locals>.<listcomp>)Úlow_quality_scoreÚfailed_alert_countÚfailed_alert_window_hoursÚfailed_rate_alert)	r   r   rC   Úno_content_documentsÚno_content_rateÚrecent_failed_24hÚalertsÚalert_countÚ
thresholds)r$   r   r(   r:   r%   Úcontent_hashr*   r+   r   Úutcnowr   Ú_FAILED_ALERT_WINDOW_HOURSÚ
updated_atr-   Ú_FAILED_ALERT_COUNTrK   Ú_FAILED_RATE_ALERTÚmaxr   ra   ÚlenÚ_LOW_QUALITY_THRESHOLD)r:   r   r$   r5   Ú
no_contentÚfailed_totalÚcutoffÚrecent_failedrC   rm   rk   r2   r2   r3   Úget_quality_stats    s|   €

þü	ÿþüÿüÿ
üÿ
üÿ$ü÷r}   z/failed-documentséÈ   )ÚleÚlimitÚskipc                 Ã   sl   |  t¡ tjdk¡}| r| tj| k¡}| ¡ }| tj ¡ ¡ 	|¡ 
|¡ ¡ }|||dd„ |D ƒdœS )u¿   
    èŽ·å–å¤„ç†å¤±è´¥çš„æ–‡æ¡£åˆ—è¡¨ï¼ˆä¾›äººå·¥å¤æ ¸ç•Œé¢ä½¿ç”¨ï¼‰

    - æŒ‰ updated_at é™åºï¼ˆæœ€æ–°å¤±è´¥çš„ä¼˜å…ˆï¼‰
    - è¿”å›ž source_url + title + category_id + updated_at
    r   c                 S   sD   g | ]}|j |j|j|j|j|j|j|j|jr|j 	¡ nd dœ	‘qS )N)	r&   ÚtitleÚ
source_urlr:   Ú
doc_numberr+   rp   r)   rs   )
r&   r‚   rƒ   r:   r„   r+   rp   r)   rs   Ú	isoformat)r=   Údr2   r2   r3   re   $  s    ö÷ÿz(get_failed_documents.<locals>.<listcomp>)r5   r   r€   rX   )r$   r   r(   r+   r:   r%   Úorder_byrs   ÚdescÚoffsetr€   rI   )r:   r€   r   r   r$   r5   Údocsr2   r2   r3   Úget_failed_documents  s&   €
ÿüôür‹   )$Ú__doc__Útypingr   r   r   Úfastapir   r   r   Ú
sqlalchemyr   r	   Úsqlalchemy.ormr
   Úlogurur   Úapp.databaser   Úapp.models.tax_datar   r   r   Ú2app.services.tax_data_processor.category_processorr   Úrouterrx   rr   rt   ru   rJ   r4   rR   rZ   r;   r}   r‹   r2   r2   r2   r3   Ú<module>   sV    24þÿþd
üÿþýü